-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsimple_data_storage_format.h
More file actions
1494 lines (1302 loc) * 52.9 KB
/
simple_data_storage_format.h
File metadata and controls
1494 lines (1302 loc) * 52.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#ifndef _SIMPLE_DATA_STORAGE_FORMAT_H_
#define _SIMPLE_DATA_STORAGE_FORMAT_H_
/*
Simple data storage format (sdsf)
This library provides functions for serializing and deserializing sdsf files
=== SIMPLE DATA STORAGE FORMAT DESCRIPTION ===
Simple data storage format (sdsf) is a text-based format.
Data is stored using prebuit types. Those types are:
1) boolean
2) integer
3) floating point
4) string
5) binary
6) array
7) composite
- Boolean values are represented using 't' or 'f' characters
- Integer values are represented using decimals. Example : 0, -1, 1, 999999, -999999, etc.
- Floating point values are represented using decimals. Example : 0.0, 0.1, -0.1, 999.999, -999.999, etc.
- String values are represented using double quotation marks. Example : "string of text", etc.
- Binary values are special kind of values, which are used to store binary data in file. Binary value points to a binary blob at the end of file.
Binary values start with 'b' character and are followed by two integer values separated by '-' character. Example : b0-100, b99-1024, etc.
- Arrays can store multiple member (child) values. Array members must have no name. Arrays start with '[' character, each member is separated with ','
character. Arrays end with ']' character. Example : [0, t, "string", b0-123, [1, 2, 3]]
- Composite values can also store multiple childs. But, unlike an arrays, composite childs must have names and must not be separated by ',' character.
Example :
{
int 1
float 0.1
array [0, 1, 2, { text "composite value inside of an array" anotherValue 42 }]
}
All values, except array members, must be paired with indentifiers. Identifier is a string which:
- can have any letter
- can have numbers
- can't begin with number
- can't have ',' '[' ']' '{' '}' '"' '@' '.' '-' characters
Values and identifiers are separated using:
- skip-characters - ' ' '\n' '\t' '\r'
- reserved characters - ',' '[' ']' '{' '}' '"' '@'
Values and identifiers can be separated by *any* number of skip-characters
Binary data blob is stored at the end of file. Binary data blob starts with '@' character
Example of sdsf file with binary blob:
some_value 10
another_value 123
binary_value b0-71
@this is binary data blob and it can store anything, not only plain text
Sdsf files are expected to use utf-8 encoding
=== LIBRARY DESCRIPTION ===
To deserialize file user must:
1) read file into a memory buffer
2) provide SdsfAllocator for library to use
3) preallocate SdsfDeserializedResult value (can be on stack)
4) call sdsf_deserialize function with all required args
5) check for SdsfDeserializationError value
6) process result
7) call sdsf_deserialized_result_free
Important - if c file api is used to read file (fopen, fread, etc.), "rb" mode must be used because "r" mode can alter file size and stuff
Important - even if deserialization fails sdsf_deserialized_result_free must be called
To serialize file user must:
1) provide SdsfAllocator for library to use
2) call sdsf_serializer_begin
3) call sdsf_serialize_* functions to store data to sdsf
3.1) check for SdsfSerializationError if neccessary
4) call sdsf_serializer_end
5) use SdsfSerializedResult data as needed
6) free SdsfSerializedResult using sdsf_serialized_result_free
Important - if SdsfSerializationError occurs, user can continue serialization process. Serialization error invalidates only a single command
For example, if following sequence of commands was executed:
sdsf_serialize_string(&sdsf, "a", "first string");
sdsf_serialize_string(&sdsf, NULL, "this command will fail because name is null");
sdsf_serialize_string(&sdsf, "c", "third string");
Resulting sdsf file will look like this:
a "first string"
c "third string"
User can alter library behaviour using preprocessor definitions:
SDSF_VALUES_ARRAY_DEFAULT_CAPACITY - defines default size for SdsfValueArray
SDSF_VALUES_PTR_ARRAY_DEFAULT_CAPACITY - defines default size for SdsfValuePtrArray
SDSF_STRING_ARRAY_DEFAULT_CAPACITY - defines default size for SdsfStringArray
SDSF_SERIALIZER_STAGING_BUFFER_CAPACITY - defines size for serializer's staging buffer (used for converting integers and floats to string)
SDSF_SERIALIZER_MAIN_BUFFER_DEFAULT_CAPACITY - defines default size for serializer's main (aka result) buffer
SDSF_SERIALIZER_STACK_DEFAULT_CAPACITY - defines default size for serializer's _SdsfSerializerStackEntry stack
SDSF_SERIALIZER_BINARY_DATA_BUFFER_DEFAULT_CAPACITY - defines default size for serializer's binary data buffer
Library does not check SdsfAllocator::alloc result. Valid pointer is always expected
Both serialization and deserialization operations return error codes (SdsfDeserializationError / SdsfSerializationError)
On success error code is 0, so user can do error check using if statement:
SdsfDeserializationError err = sdsf_deserialize(&dr, data, dataSize, allocator);
if (err)
{
// Handle error
}
If error occurs, SdsfDeserializedResult and SdsfSerializer will have error description in errorMsg member
To access error message in unified manner sdsf_get_error_message macro can be used
*/
#ifdef __cplusplus
extern "C" {
#endif
#include
#include
#ifndef SDSF_VALUES_ARRAY_DEFAULT_CAPACITY
# define SDSF_VALUES_ARRAY_DEFAULT_CAPACITY 1024
#endif
#ifndef SDSF_VALUES_PTR_ARRAY_DEFAULT_CAPACITY
# define SDSF_VALUES_PTR_ARRAY_DEFAULT_CAPACITY 8
#endif
#ifndef SDSF_STRING_ARRAY_DEFAULT_CAPACITY
# define SDSF_STRING_ARRAY_DEFAULT_CAPACITY 2048
#endif
#ifndef SDSF_SERIALIZER_STAGING_BUFFER_CAPACITY
# define SDSF_SERIALIZER_STAGING_BUFFER_CAPACITY 128
#endif
#ifndef SDSF_SERIALIZER_STACK_DEFAULT_CAPACITY
# define SDSF_SERIALIZER_STACK_DEFAULT_CAPACITY 32
#endif
#ifndef SDSF_SERIALIZER_BINARY_DATA_BUFFER_DEFAULT_CAPACITY
# define SDSF_SERIALIZER_BINARY_DATA_BUFFER_DEFAULT_CAPACITY 128
#endif
#ifndef SDSF_SERIALIZER_MAIN_BUFFER_DEFAULT_CAPACITY
# define SDSF_SERIALIZER_MAIN_BUFFER_DEFAULT_CAPACITY 2048
#endif
typedef enum
{
SDSF_VALUE_UNDEFINED,
SDSF_VALUE_BOOL,
SDSF_VALUE_INT,
SDSF_VALUE_FLOAT,
SDSF_VALUE_STRING,
SDSF_VALUE_BINARY,
SDSF_VALUE_ARRAY,
SDSF_VALUE_COMPOSITE,
} SdsfValueType;
const char* SDSF_VALUE_TYPE_TO_STR[] =
{
"SDSF_VALUE_UNDEFINED",
"SDSF_VALUE_BOOL",
"SDSF_VALUE_INT",
"SDSF_VALUE_FLOAT",
"SDSF_VALUE_STRING",
"SDSF_VALUE_BINARY",
"SDSF_VALUE_ARRAY",
"SDSF_VALUE_COMPOSITE",
};
typedef enum
{
SDSF_DESERIALIZATION_ERROR_ALL_FINE = 0,
SDSF_DESERIALIZATION_ERROR_TOKENIZER_FAILED,
SDSF_DESERIALIZATION_ERROR_EXPECTED_IDENTIFIER,
SDSF_DESERIALIZATION_ERROR_UNEXPECTED_RESERVED_SYMBOL,
SDSF_DESERIALIZATION_ERROR_UNEXPECTED_BINARY_DATA_BLOB,
SDSF_DESERIALIZATION_ERROR_UNEXPECTED_IDENTIFIER,
SDSF_DESERIALIZATION_ERROR_INVALID_BINARY_LITERAL,
} SdsfDeserializationError;
const char* SDSF_DESERIALIZATION_ERROR_TO_STR[] =
{
"SDSF_DESERIALIZATION_ERROR_ALL_FINE",
"SDSF_DESERIALIZATION_ERROR_TOKENIZER_FAILED",
"SDSF_DESERIALIZATION_ERROR_EXPECTED_IDENTIFIER",
"SDSF_DESERIALIZATION_ERROR_UNEXPECTED_RESERVED_SYMBOL",
"SDSF_DESERIALIZATION_ERROR_UNEXPECTED_BINARY_DATA_BLOB",
"SDSF_DESERIALIZATION_ERROR_UNEXPECTED_IDENTIFIER",
"SDSF_DESERIALIZATION_ERROR_INVALID_BINARY_LITERAL",
};
typedef struct
{
void* (*alloc)(size_t size, void* userData);
void (*dealloc)(void* ptr, size_t size, void* userData);
void* userData;
} SdsfAllocator;
typedef struct
{
struct SdsfValue* ptr;
size_t size;
size_t capacity;
} SdsfValueArray;
typedef struct
{
struct SdsfValue** ptr;
size_t size;
size_t capacity;
} SdsfValuePtrArray;
typedef struct
{
char* ptr;
size_t size;
size_t capacity;
} SdsfStringArray;
typedef struct SdsfValue
{
struct SdsfValue* parent;
const char* name;
SdsfValueType type;
union
{
bool asBool;
int32_t asInt;
float asFloat;
const char* asString;
struct
{
size_t dataOffset;
size_t dataSize;
} asBinary;
struct
{
SdsfValuePtrArray childs;
} asArray;
struct
{
SdsfValuePtrArray childs;
} asComposite;
};
} SdsfValue;
typedef struct
{
SdsfAllocator allocator;
SdsfValuePtrArray topLevelValues;
SdsfValueArray values;
SdsfStringArray strings;
void* binaryData;
size_t binaryDataSize;
const char* errorMsg;
} SdsfDeserializedResult;
typedef enum
{
SDSF_SERIALIZATION_ERROR_ALL_FINE = 0,
SDSF_SERIALIZATION_ERROR_NO_NAME_PROVIDED,
SDSF_SERIALIZATION_ERROR_INVALID_NAME,
SDSF_SERIALIZATION_ERROR_NO_VALUE_PROVIDED,
SDSF_SERIALIZATION_ERROR_UNABLE_TO_CONVERT_VALUE_TO_STRING,
SDSF_SERIALIZATION_ERROR_UNABLE_TO_END_ARRAY,
SDSF_SERIALIZATION_ERROR_UNABLE_TO_END_COMPOSITE,
SDSF_SERIALIZATION_ERROR_UNFINISHED_ARRAY_OR_COMPOSITE_VALUES,
} SdsfSerializationError;
const char* SDSF_SERIALIZATION_ERROR_TO_STR[] =
{
"SDSF_SERIALIZATION_ERROR_ALL_FINE",
"SDSF_SERIALIZATION_ERROR_NO_NAME_PROVIDED",
"SDSF_SERIALIZATION_ERROR_INVALID_NAME",
"SDSF_SERIALIZATION_ERROR_NO_VALUE_PROVIDED",
"SDSF_SERIALIZATION_ERROR_UNABLE_TO_CONVERT_VALUE_TO_STRING",
"SDSF_SERIALIZATION_ERROR_UNABLE_TO_END_ARRAY",
"SDSF_SERIALIZATION_ERROR_UNABLE_TO_END_COMPOSITE",
"SDSF_SERIALIZATION_ERROR_UNFINISHED_ARRAY_OR_COMPOSITE_VALUES",
};
typedef enum
{
_SDSF_SERIALIZER_IN_NOTHING,
_SDSF_SERIALIZER_IN_ARRAY,
_SDSF_SERIALIZER_IN_COMPOSITE,
} _SdsfSerializerStackEntry;
typedef struct
{
SdsfAllocator allocator;
char* stagingBuffer1;
char* stagingBuffer2;
_SdsfSerializerStackEntry* stack;
size_t stackSize;
size_t stackCapacity;
void* binaryDataBuffer;
size_t binaryDataBufferSize;
size_t binaryDataBufferCapacity;
void* mainBuffer;
size_t mainBufferSize;
size_t mainBufferCapacity;
const char* errorMsg;
} SdsfSerializer;
typedef struct
{
SdsfAllocator allocator;
void* buffer;
size_t bufferSize;
size_t bufferCapacity;
} SdsfSerializedResult;
SdsfDeserializationError sdsf_deserialize(SdsfDeserializedResult* result, const void* data, size_t dataSize, SdsfAllocator allocator);
void sdsf_deserialized_result_free(SdsfDeserializedResult* sdsf);
SdsfSerializer sdsf_serializer_begin(SdsfAllocator allocator);
SdsfSerializationError sdsf_serialize_bool(SdsfSerializer* sdsf, const char* name, bool value);
SdsfSerializationError sdsf_serialize_int(SdsfSerializer* sdsf, const char* name, int32_t value);
SdsfSerializationError sdsf_serialize_float(SdsfSerializer* sdsf, const char* name, float value);
SdsfSerializationError sdsf_serialize_string(SdsfSerializer* sdsf, const char* name, const char* value);
SdsfSerializationError sdsf_serialize_binary(SdsfSerializer* sdsf, const char* name, const void* value, size_t size);
SdsfSerializationError sdsf_serialize_array_start(SdsfSerializer* sdsf, const char* name);
SdsfSerializationError sdsf_serialize_array_end(SdsfSerializer* sdsf);
SdsfSerializationError sdsf_serialize_composite_start(SdsfSerializer* sdsf, const char* name);
SdsfSerializationError sdsf_serialize_composite_end(SdsfSerializer* sdsf);
SdsfSerializationError sdsf_serializer_end(SdsfSerializer* sdsf, SdsfSerializedResult* result);
void sdsf_serialized_result_free(SdsfSerializedResult* sdsf);
#define sdsf_get_error_message(obj) (obj).errorMsg
#ifdef __cplusplus
}
#endif
#endif //_SIMPLE_DATA_STORAGE_FORMAT_H_
#define SDSF_IMPL
#ifdef SDSF_IMPL
#ifndef _SDSF_IMPL_INNER_
#define _SDSF_IMPL_INNER_
#ifdef __cplusplus
extern "C" {
#endif
#include
#include
#ifdef _SDSF_INDENT_SIZE
# error User should not redefine _SDSF_INDENT_SIZE value
#endif
#define _SDSF_INDENT_SIZE 4
#ifdef _SDSF_INDENT
# error User should not redefine _SDSF_INDENT value
#endif
#define _SDSF_INDENT " "
// ==============================================================================================================
//
//
// Deserializer
//
//
// ==============================================================================================================
typedef struct
{
const char* ptr;
size_t size;
} _SdsfComsumedString;
typedef enum
{
_SDSF_TOKEN_TYPE_INVALID,
_SDSF_TOKEN_TYPE_IDENTIFIER,
_SDSF_TOKEN_TYPE_RESERVED_SYMBOL,
_SDSF_TOKEN_TYPE_BOOL_LITERAL,
_SDSF_TOKEN_TYPE_INT_LITERAL,
_SDSF_TOKEN_TYPE_FLOAT_LITERAL,
_SDSF_TOKEN_TYPE_BINARY_LITERAL,
_SDSF_TOKEN_TYPE_STRING_LITERAL,
} _SdsfTokenType;
typedef enum
{
_SDSF_STRING_LITERAL_NONE,
_SDSF_STRING_LITERAL_BEGIN,
_SDSF_STRING_LITERAL_END,
} _SdsfStringLiteralState;
typedef struct
{
const char* data;
size_t dataSize;
_SdsfStringLiteralState stringLiteralState;
size_t stringConsumePtr;
} _SdsfTokenizerData;
typedef struct
{
const char* stringPtr;
size_t stringSize;
_SdsfTokenType tokenType;
} _SdsfConsumedToken;
inline bool _sdsf_is_skipped_char(char c)
{
return (c == ' ') || (c == '\n') || (c == '\r') || (c == '\t');
}
inline bool _sdsf_is_reserved_symbol(char c)
{
return
(c == ',') ||
(c == '[') ||
(c == ']') ||
(c == '{') ||
(c == '}') ||
(c == '\"') ||
(c == '@');
}
inline bool _sdsf_is_number(char c)
{
return c >= '0' && c <= '9';
}
bool _sdsf_consume_string(const char* sourceBuffer, size_t sourceBufferSize, size_t* consumePtr, _SdsfComsumedString* result, bool isStringLiteral)
{
if (*consumePtr >= sourceBufferSize)
{
return false;
}
size_t beginning;
if (!isStringLiteral)
{
// Skip spaces and stuff
for (; *consumePtr < sourceBufferSize; *consumePtr += 1)
{
if (!_sdsf_is_skipped_char(sourceBuffer[*consumePtr])) break;
}
// Check reserved symbol
if (_sdsf_is_reserved_symbol(sourceBuffer[*consumePtr]))
{
result->ptr = sourceBuffer + *consumePtr;
result->size = 1;
*consumePtr += 1;
return true;
}
// Consume string until reserved or skip symbol
beginning = *consumePtr;
for (; *consumePtr < sourceBufferSize; *consumePtr += 1)
{
const char c = sourceBuffer[*consumePtr];
const bool isSkipped = _sdsf_is_skipped_char(c);
const bool isReserved = _sdsf_is_reserved_symbol(c);
if (isSkipped || isReserved) break;
}
// Zero size means we finished
const size_t size = *consumePtr - beginning;
if (!size)
{
return false;
}
}
else
{
beginning = *consumePtr;
for (; *consumePtr < sourceBufferSize; *consumePtr += 1)
{
const char c = sourceBuffer[*consumePtr];
if (c == '\"') break;
}
}
result->ptr = sourceBuffer + beginning;
result->size = *consumePtr - beginning;
return true;
}
_SdsfTokenType _sdsf_match_string(SdsfDeserializedResult* sdsf, const _SdsfComsumedString* str)
{
if (!str->ptr || !str->size)
{
sdsf->errorMsg = "Tokenzer error - invalid string provided";
return _SDSF_TOKEN_TYPE_INVALID;
}
if (str->size == 1)
{
const char c = *str->ptr;
const bool isReserved = _sdsf_is_reserved_symbol(c);
if (isReserved)
{
return _SDSF_TOKEN_TYPE_RESERVED_SYMBOL;
}
else
{
if (_sdsf_is_number(c)) return _SDSF_TOKEN_TYPE_INT_LITERAL;
switch (c)
{
case 't':
case 'f': return _SDSF_TOKEN_TYPE_BOOL_LITERAL;
sdsf->errorMsg = "Tokenzer error - invalid bool literal";
default: return _SDSF_TOKEN_TYPE_INVALID;
}
}
}
else
{
typedef enum
{
_POSSIBLE_IDENTIFIER = 1 << 0,
_POSSIBLE_INT_LITERAL = 1 << 1,
_POSSIBLE_FLOAT_LITERAL = 1 << 2,
_POSSIBLE_BINARY_LITERAL = 1 << 3,
} _PossibilitySpaceValue;
int32_t possibilitySpace = 0;
bool dotFound = false;
bool dashFound = false;
const char firstChar = *str->ptr;
if (firstChar == 'b')
{
possibilitySpace = _POSSIBLE_BINARY_LITERAL | _POSSIBLE_IDENTIFIER;
}
else if (_sdsf_is_number(firstChar) || firstChar == '-')
{
possibilitySpace = _POSSIBLE_INT_LITERAL | _POSSIBLE_FLOAT_LITERAL;
if (firstChar == '-') dashFound = true;
}
else if (firstChar == '.' )
{
possibilitySpace = _POSSIBLE_FLOAT_LITERAL;
dotFound = true;
}
else if (_sdsf_is_skipped_char(firstChar) || _sdsf_is_reserved_symbol(firstChar))
{
sdsf->errorMsg = "Tokenzer error - unexpected character";
return _SDSF_TOKEN_TYPE_INVALID;
}
else
{
possibilitySpace = _POSSIBLE_IDENTIFIER;
}
for (size_t it = 1; it < str->size; it++)
{
const char c = str->ptr[it];
if (_sdsf_is_number(c))
{
continue; // can be anything
}
if (_sdsf_is_skipped_char(c) || _sdsf_is_reserved_symbol(c))
{
sdsf->errorMsg = "Tokenzer error - unexpected character";
return _SDSF_TOKEN_TYPE_INVALID;
}
switch(c)
{
case '.':
{
if (dotFound)
{
sdsf->errorMsg = "Tokenzer error - two '.' characters in single literal";
return _SDSF_TOKEN_TYPE_INVALID;
}
// only floats can have '.'
possibilitySpace &= _POSSIBLE_FLOAT_LITERAL;
dotFound = true;
} break;
case '-':
{
if (dashFound)
{
sdsf->errorMsg = "Tokenzer error - two '-' characters in single literal";
return _SDSF_TOKEN_TYPE_INVALID;
}
// everything, but identifier can have '-'
possibilitySpace &= ~_POSSIBLE_IDENTIFIER;
dashFound = true;
} break;
default:
{
// only identifier can have something other than number, dot or dash (binary literal 'b' prefix is checked earlier)
possibilitySpace &= _POSSIBLE_IDENTIFIER;
} break;
}
}
if (possibilitySpace & _POSSIBLE_IDENTIFIER)
{
// It is possible to have both _POSSIBLE_IDENTIFIER and _POSSIBLE_BINARY_LITERAL in possibility space,
// but if we have _POSSIBLE_IDENTIFIER that means binary literal wasn't fully completed (it must have '-' symbol)
return _SDSF_TOKEN_TYPE_IDENTIFIER;
}
if (possibilitySpace & _POSSIBLE_INT_LITERAL)
{
// It is possible to have both _POSSIBLE_INT_LITERAL and _POSSIBLE_FLOAT_LITERAL in possibility space,
// but if we have _POSSIBLE_INT_LITERAL that means float literal wasn't fully completed (it must have '.' symbol)
return _SDSF_TOKEN_TYPE_INT_LITERAL;
}
if (possibilitySpace & _POSSIBLE_FLOAT_LITERAL)
{
return _SDSF_TOKEN_TYPE_FLOAT_LITERAL;
}
if (possibilitySpace & _POSSIBLE_BINARY_LITERAL)
{
return _SDSF_TOKEN_TYPE_BINARY_LITERAL;
}
}
sdsf->errorMsg = "Tokenzer error - failed to match token";
return _SDSF_TOKEN_TYPE_INVALID;
}
bool _sdsf_consume_token(SdsfDeserializedResult* sdsf, _SdsfConsumedToken* result, _SdsfTokenizerData* data)
{
_SdsfComsumedString consumedString;
if (!_sdsf_consume_string(data->data, data->dataSize, &data->stringConsumePtr, &consumedString, data->stringLiteralState == _SDSF_STRING_LITERAL_BEGIN))
{
return false;
}
result->stringPtr = consumedString.ptr;
result->stringSize = consumedString.size;
if (data->stringLiteralState == _SDSF_STRING_LITERAL_BEGIN)
{
data->stringLiteralState = _SDSF_STRING_LITERAL_END;
result->tokenType = _SDSF_TOKEN_TYPE_STRING_LITERAL;
}
else
{
result->tokenType = _sdsf_match_string(sdsf, &consumedString);
if (result->tokenType == _SDSF_TOKEN_TYPE_RESERVED_SYMBOL && consumedString.ptr[0] == '\"')
{
if (data->stringLiteralState == _SDSF_STRING_LITERAL_NONE)
{
data->stringLiteralState = _SDSF_STRING_LITERAL_BEGIN;
}
else // (data->stringLiteralState == _SDSF_STRING_LITERAL_END)
{
data->stringLiteralState = _SDSF_STRING_LITERAL_NONE;
}
}
}
return true;
}
SdsfValue* _sdsf_val_array_add(SdsfValueArray* array, const SdsfAllocator* allocator)
{
if (array->capacity == 0)
{
array->ptr = (SdsfValue*)allocator->alloc(SDSF_VALUES_ARRAY_DEFAULT_CAPACITY * sizeof(SdsfValue), allocator->userData);
memset(array->ptr, 0, SDSF_VALUES_ARRAY_DEFAULT_CAPACITY * sizeof(SdsfValue));
array->size = 0;
array->capacity = SDSF_VALUES_ARRAY_DEFAULT_CAPACITY;
}
else if (array->size == array->capacity)
{
const size_t newCapacity = array->capacity * 2;
const size_t oldCapacity = array->capacity;
SdsfValue* const newMem = (SdsfValue*)allocator->alloc(newCapacity * sizeof(SdsfValue), allocator->userData);
memcpy(newMem, array->ptr, oldCapacity * sizeof(SdsfValue));
allocator->dealloc(array->ptr, oldCapacity * sizeof(SdsfValue), allocator->userData);
void* const toZero = newMem + oldCapacity;
memset(toZero, 0, (newCapacity - oldCapacity) * sizeof(SdsfValue));
array->ptr = newMem;
array->capacity = newCapacity;
}
return &array->ptr[array->size++];
}
void _sdsf_val_array_clear(SdsfValueArray* array, const SdsfAllocator* allocator)
{
if (array->capacity)
{
allocator->dealloc(array->ptr, array->capacity * sizeof(SdsfValue), allocator->userData);
}
}
SdsfValue** _sdsf_val_ptr_array_add(SdsfValuePtrArray* array, const SdsfAllocator* allocator)
{
if (array->capacity == 0)
{
array->ptr = (SdsfValue**)allocator->alloc(SDSF_VALUES_PTR_ARRAY_DEFAULT_CAPACITY * sizeof(SdsfValue*), allocator->userData);
memset(array->ptr, 0, SDSF_VALUES_PTR_ARRAY_DEFAULT_CAPACITY * sizeof(SdsfValue*));
array->size = 0;
array->capacity = SDSF_VALUES_PTR_ARRAY_DEFAULT_CAPACITY;
}
else if (array->size == array->capacity)
{
const size_t newCapacity = array->capacity * 2;
const size_t oldCapacity = array->capacity;
SdsfValue** const newMem = (SdsfValue**)allocator->alloc(newCapacity * sizeof(SdsfValue*), allocator->userData);
memcpy(newMem, array->ptr, oldCapacity * sizeof(SdsfValue*));
allocator->dealloc(array->ptr, oldCapacity * sizeof(SdsfValue*), allocator->userData);
void* const toZero = newMem + oldCapacity;
memset(toZero, 0, (newCapacity - oldCapacity) * sizeof(SdsfValue*));
array->ptr = newMem;
array->capacity = newCapacity;
}
return &array->ptr[array->size++];
}
void _sdsf_val_ptr_array_clear(SdsfValuePtrArray* array, const SdsfAllocator* allocator)
{
if (array->capacity)
{
allocator->dealloc(array->ptr, array->capacity * sizeof(SdsfValue*), allocator->userData);
}
}
char* _sdsf_string_array_save(SdsfStringArray* array, const SdsfAllocator* allocator, const char* string, size_t stringLength)
{
if (array->capacity == 0)
{
const size_t initialCapacity = (stringLength + 1) > SDSF_STRING_ARRAY_DEFAULT_CAPACITY ? (stringLength + 1) : SDSF_STRING_ARRAY_DEFAULT_CAPACITY;
array->ptr = allocator->alloc(initialCapacity, allocator->userData);
memset(array->ptr, 0, initialCapacity);
array->size = 0;
array->capacity = initialCapacity;
}
else if ((array->size + stringLength + 1) >= array->capacity)
{
const size_t requiredCapacity = array->size + stringLength + 1;
const size_t doubledCapacity = array->capacity * 2;
const size_t newCapacity = (requiredCapacity > doubledCapacity) ? requiredCapacity : doubledCapacity;
const size_t oldCapacity = array->capacity;
char* const newMem = (char*)allocator->alloc(newCapacity, allocator->userData);
memcpy(newMem, array->ptr, oldCapacity);
allocator->dealloc(array->ptr, oldCapacity, allocator->userData);
void* const toZero = newMem + oldCapacity;
memset(toZero, 0, newCapacity - oldCapacity);
array->ptr = newMem;
array->capacity = newCapacity;
}
char* result = &array->ptr[array->size];
if (string)
{
memcpy(result, string, stringLength);
}
array->size += stringLength + 1;
return result;
}
void _sdsf_string_array_clear(SdsfStringArray* array, const SdsfAllocator* allocator)
{
if (array->capacity)
{
allocator->dealloc(array->ptr, array->capacity, allocator->userData);
}
}
SdsfDeserializationError sdsf_deserialize(SdsfDeserializedResult* sdsf, const void* data, size_t dataSize, SdsfAllocator allocator)
{
_SdsfTokenizerData tokenizerData;
tokenizerData.data = (const char*)data;
tokenizerData.dataSize = dataSize;
tokenizerData.stringLiteralState = _SDSF_STRING_LITERAL_NONE;
tokenizerData.stringConsumePtr = 0;
_SdsfConsumedToken token;
_SdsfConsumedToken previousToken = {0};
*sdsf = (SdsfDeserializedResult){0};
sdsf->allocator = allocator;
SdsfValuePtrArray* topLevelValues = &sdsf->topLevelValues;
SdsfValueArray* values = &sdsf->values;
SdsfStringArray* strings = &sdsf->strings;
SdsfValue* currentValue = NULL;
bool expectsBinaryDataBlob = false;
bool shouldRun = true;
while (shouldRun && _sdsf_consume_token(sdsf, &token, &tokenizerData))
{
if (token.tokenType == _SDSF_TOKEN_TYPE_INVALID)
{
// Error message is set in _sdsf_match_string
return SDSF_DESERIALIZATION_ERROR_TOKENIZER_FAILED;
}
if (token.tokenType == _SDSF_TOKEN_TYPE_IDENTIFIER)
{
if (currentValue)
{
if (currentValue->type == SDSF_VALUE_UNDEFINED)
{
sdsf->errorMsg = "Unexpected identifier - got two identifiers in a row";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_IDENTIFIER;
}
if (currentValue->type != SDSF_VALUE_COMPOSITE)
{
sdsf->errorMsg = "Unexpected identifier - only composite values can have named childs";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_IDENTIFIER;
}
}
SdsfValue* const val = _sdsf_val_array_add(values, &allocator);
val->parent = currentValue;
val->name = _sdsf_string_array_save(strings, &allocator, token.stringPtr, token.stringSize);
if (currentValue)
{
SdsfValue** const ptr = _sdsf_val_ptr_array_add(¤tValue->asComposite.childs, &allocator);
*ptr = val;
}
else
{
SdsfValue** const ptr = _sdsf_val_ptr_array_add(topLevelValues, &allocator);
*ptr = val;
}
currentValue = val;
}
else if (token.tokenType == _SDSF_TOKEN_TYPE_RESERVED_SYMBOL)
{
switch (token.stringPtr[0])
{
case ',':
{
if (!currentValue || currentValue->type != SDSF_VALUE_ARRAY)
{
sdsf->errorMsg = "Unexpected ',' character - commas can be used in arrays only";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_RESERVED_SYMBOL;
}
if (currentValue->asArray.childs.size == 0)
{
sdsf->errorMsg = "Unexpected ',' character - commas must be used only after first array child";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_RESERVED_SYMBOL;
}
if (previousToken.tokenType == _SDSF_TOKEN_TYPE_RESERVED_SYMBOL && previousToken.stringPtr[0] == ',')
{
sdsf->errorMsg = "Unexpected ',' character - can't have multiple commas in a row";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_RESERVED_SYMBOL;
}
// @NOTE : comma doesn't allocate new values, new value is allocated when processing literals or creating composites/arrays
} break;
case ']':
{
if (!currentValue || currentValue->type != SDSF_VALUE_ARRAY)
{
sdsf->errorMsg = "Unexpected ']' character - only arrays can end with this symbol";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_RESERVED_SYMBOL;
}
currentValue = currentValue->parent;
} break;
case '}':
{
if (!currentValue || currentValue->type != SDSF_VALUE_COMPOSITE)
{
sdsf->errorMsg = "Unexpected '}' character - only composites can end with this symbol";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_RESERVED_SYMBOL;
}
currentValue = currentValue->parent;
} break;
case '[':
{
if (!currentValue)
{
sdsf->errorMsg = "Unexpected '[' character - new array value can be created only after identifier or as child of another array";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_RESERVED_SYMBOL;
}
if (currentValue->type == SDSF_VALUE_UNDEFINED)
{
// Array with an identifier
currentValue->type = SDSF_VALUE_ARRAY;
}
else if (currentValue->type == SDSF_VALUE_ARRAY)
{
// Array in array
SdsfValue* const newChild = _sdsf_val_array_add(values, &allocator);
SdsfValue** const newChildPtr = _sdsf_val_ptr_array_add(¤tValue->asArray.childs, &allocator);
*newChildPtr = newChild;
newChild->parent = currentValue;
currentValue = newChild;
currentValue->type = SDSF_VALUE_ARRAY;
}
else
{
sdsf->errorMsg = "New array value can be created only after identifier or in the another array";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_RESERVED_SYMBOL;
}
} break;
case '{':
{
if (!currentValue)
{
sdsf->errorMsg = "Unexpected '[' character - new composite value can be created only after identifier or as child of the array";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_RESERVED_SYMBOL;
}
if (currentValue->type == SDSF_VALUE_UNDEFINED)
{
// Composite with an identifier
currentValue->type = SDSF_VALUE_COMPOSITE;
}
else if (currentValue->type == SDSF_VALUE_ARRAY)
{
// Composite in an array
SdsfValue* const newChild = _sdsf_val_array_add(values, &allocator);
SdsfValue** const newChildPtr = _sdsf_val_ptr_array_add(¤tValue->asArray.childs, &allocator);
*newChildPtr = newChild;
newChild->parent = currentValue;
currentValue = newChild;
currentValue->type = SDSF_VALUE_COMPOSITE;
}
else
{
sdsf->errorMsg = "Unexpected '[' character - new composite value can be created only after identifier or as child of the array";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_RESERVED_SYMBOL;
}
} break;
case '\"':
{
// Nothing
} break;
case '@':
{
if (!expectsBinaryDataBlob)
{
sdsf->errorMsg = "Unexpected binary data blob - no binary literals were used";
return SDSF_DESERIALIZATION_ERROR_UNEXPECTED_BINARY_DATA_BLOB;
}
const size_t binaryDataSize = tokenizerData.dataSize - tokenizerData.stringConsumePtr;
if (binaryDataSize)
{
void* const memory = allocator.alloc(binaryDataSize, allocator.userData);
sdsf->binaryData = memory;
sdsf->binaryDataSize = binaryDataSize;
const void* const from = tokenizerData.data + tokenizerData.stringConsumePtr;
memcpy(memory, from, binaryDataSize);
}
// Binary data blob is always in the end of file
shouldRun = false;
} break;
}
}
else
{
if (!currentValue)
{
sdsf->errorMsg = "Values must be associated with identifier or array";
return SDSF_DESERIALIZATION_ERROR_EXPECTED_IDENTIFIER;
}
SdsfValue* valueToUpdate = NULL;
if (currentValue->type == SDSF_VALUE_ARRAY)
{
valueToUpdate = _sdsf_val_array_add(values, &allocator);
valueToUpdate->parent = currentValue;
SdsfValue** const ptr = _sdsf_val_ptr_array_add(¤tValue->asArray.childs, &allocator);
*ptr = valueToUpdate;
}
else
{
if (!currentValue->name)
{
sdsf->errorMsg = "Unexpected unnamed value. Only arrays can have values without names";
return SDSF_DESERIALIZATION_ERROR_EXPECTED_IDENTIFIER;
}
valueToUpdate = currentValue;
currentValue = currentValue->parent;