Raw File
1 /*
2 Copyright (c) 2010, Matthieu Labas
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7
8 1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
24 OF SUCH DAMAGE.
25
26 The views and conclusions contained in the software and documentation are those of the
27 authors and should not be interpreted as representing official policies, either expressed
28 or implied, of the FreeBSD Project.
29 */
30 #if defined(WIN32) || defined(WIN64)
31 #pragma warning(disable : 4996)
32 #else
33 #ifndef strdup
34 #define _GNU_SOURCE
35 #endif
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include "sxmlc.h"
43
44 #define CHECK_NODE(node,ret) if (!XMLNode_is_valid(node)) return (ret)
45
46 /* UTF8 handling for Windows */
47 #ifndef SXMLC_UNICODE
48 #if defined(WIN32) || defined(WIN64)
49 #include <windows.h>
50 #endif
51 #endif
52
53 /* Determine if character is not ASCII. */
54 #define sx_isunicode(c) ((int)c < 0 || (int)c > 127)
55
56 #if defined(WIN32) || defined(WIN64)
57 FILE* sx_fopen(const SXML_CHAR* filename, const SXML_CHAR* mode)
58 {
59 FILE* ret = NULL;
60 int is_unicode = false;
61 const char* p;
62
63 for (p = filename; p && *p; p++) {
64 if (sx_isunicode(*p)) {
65 is_unicode = true;
66 break;
67 }
68 }
69
70 if (is_unicode) {
71 wchar_t* wmode = mode[0] == 'w'
72 ? mode[1] == 'b' ? L"wb" : L"wt"
73 : mode[1] == 'b' ? L"rb" : L"rt";
74 wchar_t* wide = NULL;
75 const int length = MultiByteToWideChar(CP_UTF8, 0, filename, -1, NULL, 0);
76
77 if (length > 1) {
78 wide = (wchar_t*)malloc(length * sizeof(wchar_t));
79 if (wide) {
80 MultiByteToWideChar(CP_UTF8, 0, filename, -1, wide, length);
81 ret = _wfopen(wide, wmode);
82 free(wide);
83 }
84 }
85 } else
86 ret = fopen(filename, mode);
87
88 return ret;
89 }
90 #endif
91
92 /**
93 * \brief Definition of "special" tags such as "<? ?>" or "<![CDATA[ ]]/>".
94 *
95 * These tags are considered having a start and an end with some data in between that will
96 * be stored in the 'tag' member of an XMLNode.
97 * The `tag_type` member is a constant that is associated to such tag.
98 * All `len_*` members are basically the "sx_strlen()" of 'start' and 'end' members.
99 */
100 typedef struct _Tag {
101 TagType tag_type; /**< The tag type. */
102 SXML_CHAR* start; /**< The string representing the tag "opening". *Must start with <*. */
103 int len_start; /**< The `strlen(start)`. */
104 SXML_CHAR* end; /**< The string representing the tag "closing". *Must end with >*. */
105 int len_end; /**< The `strlen(end)`. */
106 } _TAG;
107
108 /*
109 List of "special" tags handled by sxmlc.
110 NB the "<!DOCTYPE" tag has a special handling because its 'end' changes according
111 to its content ('>' or ']>').
112 */
113 static _TAG _spec[] = {
114 { TAG_INSTR, C2SX("<?"), 2, C2SX("?>"), 2 },
115 { TAG_COMMENT, C2SX("<!--"), 4, C2SX("-->"), 3 },
116 { TAG_CDATA, C2SX("<![CDATA["), 9, C2SX("]]>"), 3 }
117 };
118 static int NB_SPECIAL_TAGS = (int)(sizeof(_spec) / sizeof(_TAG)); /* Auto computation of number of special tags */
119
120 /*
121 User-registered tags.
122 */
123 static struct _SpecialTag {
124 _TAG *tags;
125 int n_tags;
126 } _user_tags = { NULL, 0 };
127
128 int XML_register_user_tag(TagType tag_type, SXML_CHAR* start, SXML_CHAR* end)
129 {
130 _TAG* p;
131 int i, n, le;
132
133 if (tag_type < TAG_USER)
134 return -1;
135
136 if (start == NULL || end == NULL || *start != C2SX('<'))
137 return -1;
138
139 le = sx_strlen(end);
140 if (end[le-1] != C2SX('>'))
141 return -1;
142
143 i = _user_tags.n_tags;
144 n = i + 1;
145 p = __realloc(_user_tags.tags, n * sizeof(_TAG));
146 if (p == NULL)
147 return -1;
148
149 p[i].tag_type = tag_type;
150 p[i].start = start;
151 p[i].end = end;
152 p[i].len_start = sx_strlen(start);
153 p[i].len_end = le;
154 _user_tags.tags = p;
155 _user_tags.n_tags = n;
156
157 return i;
158 }
159
160 int XML_unregister_user_tag(int i_tag)
161 {
162 _TAG* pt;
163
164 if (i_tag < 0 || i_tag >= _user_tags.n_tags)
165 return -1;
166
167 if (_user_tags.n_tags == 1)
168 pt = NULL;
169 else {
170 pt = __malloc((_user_tags.n_tags - 1) * sizeof(_TAG));
171 if (pt == NULL)
172 return -1;
173 }
174
175 if (pt != NULL) {
176 memcpy(pt, _user_tags.tags, i_tag * sizeof(_TAG));
177 memcpy(&pt[i_tag], &_user_tags.tags[i_tag + 1], (_user_tags.n_tags - i_tag - 1) * sizeof(_TAG));
178 }
179 if (_user_tags.tags != NULL)
180 __free(_user_tags.tags);
181 _user_tags.tags = pt;
182 _user_tags.n_tags--;
183
184 return _user_tags.n_tags;
185 }
186
187 int XML_get_nb_registered_user_tags(void)
188 {
189 return _user_tags.n_tags;
190 }
191
192 int XML_get_registered_user_tag(TagType tag_type)
193 {
194 int i;
195
196 for (i = 0; i < _user_tags.n_tags; i++)
197 if (_user_tags.tags[i].tag_type == tag_type)
198 return i;
199
200 return -1;
201 }
202
203 /* --- XMLNode methods --- */
204
205 /*
206 Add 'node' to given '*children_array' of '*len_array' elements.
207 '*len_array' is overwritten with the number of elements in '*children_array' after its reallocation.
208 Return the index of the newly added 'node' in '*children_array', or '-1' for memory error.
209 */
210 static int _add_node(XMLNode*** children_array, int* len_array, XMLNode* node)
211 {
212 XMLNode** pt = __realloc(*children_array, (*len_array+1) * sizeof(XMLNode*));
213
214 if (pt == NULL)
215 return -1;
216
217 pt[*len_array] = node;
218 *children_array = pt;
219
220 return (*len_array)++;
221 }
222
223 int XMLNode_init(XMLNode* node)
224 {
225 if (node == NULL)
226 return false;
227
228 if (node->init_value == XML_INIT_DONE)
229 return true; /*(void)XMLNode_free(node);*/
230
231 node->tag = NULL;
232 node->text = NULL;
233
234 node->attributes = NULL;
235 node->n_attributes = 0;
236
237 node->father = NULL;
238 node->children = NULL;
239 node->n_children = 0;
240
241 node->tag_type = TAG_NONE;
242 node->active = true;
243
244 node->init_value = XML_INIT_DONE;
245
246 return true;
247 }
248
249 XMLNode* XMLNode_allocN(int n)
250 {
251 int i;
252 XMLNode* p;
253
254 if (n <= 0)
255 return NULL;
256
257 p = __calloc(n, sizeof(XMLNode));
258 if (p == NULL)
259 return NULL;
260
261 for (i = 0; i < n; i++)
262 (void)XMLNode_init(&p[i]);
263
264 return p;
265 }
266
267 XMLNode* XMLNode_new(const TagType tag_type, const SXML_CHAR* tag, const SXML_CHAR* text)
268 {
269 XMLNode* node = XMLNode_alloc();
270 if (node == NULL)
271 return NULL;
272
273 if (!XMLNode_set_tag(node, tag) || (text != NULL && !XMLNode_set_text(node, text))) {
274 __free(node);
275 return NULL;
276 }
277
278 node->tag_type = tag_type;
279
280 return node;
281 }
282
283 XMLNode* XMLNode_dup(const XMLNode* node, int copy_children)
284 {
285 XMLNode* n;
286
287 if (node == NULL)
288 return NULL;
289
290 n = __calloc(1, sizeof(XMLNode));
291 if (n == NULL)
292 return NULL;
293
294 XMLNode_init(n);
295 if (!XMLNode_copy(n, node, copy_children)) {
296 XMLNode_free(n);
297
298 return NULL;
299 }
300
301 return n;
302 }
303
304 int XMLNode_free(XMLNode* node)
305 {
306 CHECK_NODE(node, false);
307
308 if (node->tag != NULL) {
309 __free(node->tag);
310 node->tag = NULL;
311 }
312
313 XMLNode_remove_text(node);
314 XMLNode_remove_all_attributes(node);
315 XMLNode_remove_children(node);
316
317 node->tag_type = TAG_NONE;
318
319 return true;
320 }
321
322 int XMLNode_copy(XMLNode* dst, const XMLNode* src, int copy_children)
323 {
324 int i;
325
326 if (dst == NULL || (src != NULL && src->init_value != XML_INIT_DONE))
327 return false;
328
329 (void)XMLNode_free(dst); /* 'dst' is freed first */
330
331 /* NULL 'src' resets 'dst' */
332 if (src == NULL)
333 return true;
334
335 /* Tag */
336 if (src->tag != NULL) {
337 dst->tag = sx_strdup(src->tag);
338 if (dst->tag == NULL) goto copy_err;
339 }
340
341 /* Text */
342 if (dst->text != NULL) {
343 dst->text = sx_strdup(src->text);
344 if (dst->text == NULL) goto copy_err;
345 }
346
347 /* Attributes */
348 if (src->n_attributes > 0) {
349 dst->attributes = __calloc(src->n_attributes, sizeof(XMLAttribute));
350 if (dst->attributes== NULL) goto copy_err;
351 dst->n_attributes = src->n_attributes;
352 for (i = 0; i < src->n_attributes; i++) {
353 dst->attributes[i].name = sx_strdup(src->attributes[i].name);
354 dst->attributes[i].value = sx_strdup(src->attributes[i].value);
355 if (dst->attributes[i].name == NULL || dst->attributes[i].value == NULL) goto copy_err;
356 dst->attributes[i].active = src->attributes[i].active;
357 }
358 }
359
360 dst->tag_type = src->tag_type;
361 dst->father = src->father;
362 dst->user = src->user;
363 dst->active = src->active;
364
365 /* Copy children if required (and there are any) */
366 if (copy_children && src->n_children > 0) {
367 dst->children = __calloc(src->n_children, sizeof(XMLNode*));
368 if (dst->children == NULL) goto copy_err;
369 dst->n_children = src->n_children;
370 for (i = 0; i < src->n_children; i++) {
371 if (!XMLNode_copy(dst->children[i], src->children[i], true)) goto copy_err;
372 }
373 }
374
375 return true;
376
377 copy_err:
378 (void)XMLNode_free(dst);
379
380 return false;
381 }
382
383 int XMLNode_set_active(XMLNode* node, int active)
384 {
385 CHECK_NODE(node, false);
386
387 node->active = active;
388
389 return true;
390 }
391
392 int XMLNode_set_tag(XMLNode* node, const SXML_CHAR* tag)
393 {
394 SXML_CHAR* newtag;
395 if (node == NULL || tag == NULL || node->init_value != XML_INIT_DONE)
396 return false;
397
398 newtag = sx_strdup(tag);
399 if (newtag == NULL)
400 return false;
401 if (node->tag != NULL) __free(node->tag);
402 node->tag = newtag;
403
404 return true;
405 }
406
407 int XMLNode_set_type(XMLNode* node, const TagType tag_type)
408 {
409 CHECK_NODE(node, false);
410
411 switch (tag_type) {
412 case TAG_ERROR:
413 case TAG_END:
414 case TAG_PARTIAL:
415 case TAG_NONE:
416 return false;
417
418 default:
419 node->tag_type = tag_type;
420 return true;
421 }
422 }
423
424 int XMLNode_set_attribute(XMLNode* node, const SXML_CHAR* attr_name, const SXML_CHAR* attr_value)
425 {
426 XMLAttribute* pt;
427 int i;
428
429 if (node == NULL || attr_name == NULL || attr_name[0] == NULC || node->init_value != XML_INIT_DONE)
430 return -1;
431
432 i = XMLNode_search_attribute(node, attr_name, 0);
433 if (i >= 0) { /* Attribute found: update it */
434 SXML_CHAR* value = NULL;
435 if (attr_value != NULL && (value = sx_strdup(attr_value)) == NULL)
436 return -1;
437 pt = node->attributes;
438 if (pt[i].value != NULL)
439 __free(pt[i].value);
440 pt[i].value = value;
441 } else { /* Attribute not found: add it */
442 SXML_CHAR* name = sx_strdup(attr_name);
443 SXML_CHAR* value = (attr_value == NULL ? NULL : sx_strdup(attr_value));
444 if (name == NULL || (value == NULL && attr_value != NULL)) {
445 if (value != NULL)
446 __free(value);
447 if (name != NULL)
448 __free(name);
449 return -1;
450 }
451 i = node->n_attributes;
452 pt = __realloc(node->attributes, (i+1) * sizeof(XMLAttribute));
453 if (pt == NULL) {
454 if (value != NULL)
455 __free(value);
456 __free(name);
457 return -1;
458 }
459
460 pt[i].name = name;
461 pt[i].value = value;
462 pt[i].active = true;
463 node->attributes = pt;
464 node->n_attributes = i + 1;
465 }
466
467 return node->n_attributes;
468 }
469
470 int XMLNode_get_attribute_with_default(XMLNode* node, const SXML_CHAR* attr_name, const SXML_CHAR** attr_value, const SXML_CHAR* default_attr_value)
471 {
472 XMLAttribute* pt;
473 int i;
474
475 if (node == NULL || attr_name == NULL || attr_name[0] == NULC || attr_value == NULL || node->init_value != XML_INIT_DONE)
476 return false;
477
478 i = XMLNode_search_attribute(node, attr_name, 0);
479 if (i >= 0) {
480 pt = node->attributes;
481 if (pt[i].value != NULL) {
482 *attr_value = sx_strdup(pt[i].value);
483 if (*attr_value == NULL)
484 return false;
485 } else
486 *attr_value = NULL; /* NULL but returns 'true' as 'NULL' is the actual attribute value */
487 } else if (default_attr_value != NULL) {
488 *attr_value = sx_strdup(default_attr_value);
489 if (*attr_value == NULL)
490 return false;
491 } else
492 *attr_value = NULL;
493
494 return true;
495 }
496
497 int XMLNode_get_attribute_count(const XMLNode* node)
498 {
499 int i, n;
500
501 CHECK_NODE(node, -1);
502
503 for (i = n = 0; i < node->n_attributes; i++)
504 if (node->attributes[i].active) n++;
505
506 return n;
507 }
508
509 int XMLNode_search_attribute(const XMLNode* node, const SXML_CHAR* attr_name, int i_search)
510 {
511 int i;
512
513 if (node == NULL || attr_name == NULL || attr_name[0] == NULC || i_search < 0 || i_search >= node->n_attributes)
514 return -1;
515
516 for (i = i_search; i < node->n_attributes; i++)
517 if (node->attributes[i].active && !sx_strcmp(node->attributes[i].name, attr_name))
518 return i;
519
520 return -1;
521 }
522
523 int XMLNode_remove_attribute(XMLNode* node, int i_attr)
524 {
525 XMLAttribute* pt;
526 if (node == NULL || node->init_value != XML_INIT_DONE || i_attr < 0 || i_attr >= node->n_attributes)
527 return -1;
528
529 /* Before modifying first see if we run out of memory */
530 if (node->n_attributes == 1)
531 pt = NULL;
532 else {
533 pt = __malloc((node->n_attributes - 1) * sizeof(XMLAttribute));
534 if (pt == NULL)
535 return -1;
536 }
537
538 /* Can't fail anymore, free item */
539 if (node->attributes[i_attr].name != NULL) __free(node->attributes[i_attr].name);
540 if (node->attributes[i_attr].value != NULL) __free(node->attributes[i_attr].value);
541
542 if (pt != NULL) {
543 memcpy(pt, node->attributes, i_attr * sizeof(XMLAttribute));
544 memcpy(&pt[i_attr], &node->attributes[i_attr + 1], (node->n_attributes - i_attr - 1) * sizeof(XMLAttribute));
545 }
546 if (node->attributes != NULL)
547 __free(node->attributes);
548 node->attributes = pt;
549 node->n_attributes--;
550
551 return node->n_attributes;
552 }
553
554 int XMLNode_remove_all_attributes(XMLNode* node)
555 {
556 int i;
557
558 CHECK_NODE(node, false);
559
560 if (node->attributes != NULL) {
561 for (i = 0; i < node->n_attributes; i++) {
562 if (node->attributes[i].name != NULL)
563 __free(node->attributes[i].name);
564 if (node->attributes[i].value != NULL)
565 __free(node->attributes[i].value);
566 }
567 __free(node->attributes);
568 node->attributes = NULL;
569 }
570 node->n_attributes = 0;
571
572 return true;
573 }
574
575 int XMLNode_set_text(XMLNode* node, const SXML_CHAR* text)
576 {
577 SXML_CHAR* p;
578 CHECK_NODE(node, false);
579
580 if (text == NULL) { /* We want to remove it => free node text */
581 if (node->text != NULL) {
582 __free(node->text);
583 node->text = NULL;
584 }
585
586 return true;
587 }
588
589 p = sx_strdup(text);
590 if (p == NULL)
591 return false;
592 if (node->text != NULL)
593 __free(node->text);
594 node->text = p;
595
596 return true;
597 }
598
599 int XMLNode_add_child(XMLNode* node, XMLNode* child)
600 {
601 if (node == NULL || child == NULL || node->init_value != XML_INIT_DONE || child->init_value != XML_INIT_DONE)
602 return false;
603
604 if (_add_node(&node->children, &node->n_children, child) >= 0) {
605 node->tag_type = TAG_FATHER;
606 child->father = node;
607 return true;
608 } else
609 return false;
610 }
611
612 int XMLNode_insert_child(XMLNode* node, XMLNode* child, int index)
613 {
614 int i, j;
615
616 CHECK_NODE(node, -1);
617
618 /* We could process cases "first" and "last" in an optimized way, but we prefer readability to (micro-)optimization */
619 if (index < 0) /* Before first => first */
620 index = 0;
621 if (index >= node->n_children) /* After last => last */
622 index = node->n_children - 1;
623
624 for (i = 0; i < node->n_children; i++) {
625 if (!node->children[i]->active || index-- > 0)
626 continue;
627 /* Insert it here, at 'i' */
628 if (_add_node(&node->children, &node->n_children, child) >= 0) {
629 node->tag_type = TAG_FATHER;
630 child->father = node;
631 /* Erase 'child', which is the last node ('n_children' has been incremented by '_add_node()') */
632 for (j = node->n_children - 1; j >= i; j--)
633 node->children[j] = node->children[j-1];
634 node->children[i] = child; /* Set it */
635 return true;
636 } else
637 return false;
638 }
639
640 return false; /* Oops! */
641 }
642
643 int XMLNode_move_child(XMLNode* node, int from, int to)
644 {
645 XMLNode* nfrom;
646
647 CHECK_NODE(node, false);
648 if (from < 0 || from >= node->n_children)
649 return false;
650 if (to < 0) /* Before first => first */
651 to = 0;
652 if (to >= node->n_children) /* After last => last */
653 to = node->n_children - 1;
654
655 nfrom = node->children[from];
656 if (to > from) { /* Move forward: bring following nodes (up to 'to') backward one position */
657 int i;
658 for (i = from; i < to; i++)
659 node->children[i] = node->children[i+1];
660 } else { /* Move backward: bring previous nodes (up to 'from') forward one position */
661 int i;
662 for (i = from - 1; i >= to; i--)
663 node->children[i+1] = node->children[i];
664 }
665 node->children[to] = nfrom;
666
667 return true;
668 }
669
670
671 int XMLNode_get_children_count(const XMLNode* node)
672 {
673 int i, n;
674
675 CHECK_NODE(node, -1);
676
677 for (i = n = 0; i < node->n_children; i++)
678 if (node->children[i]->active)
679 n++;
680
681 return n;
682 }
683
684 int XMLNode_get_index(const XMLNode* node)
685 {
686 int i, i_child;
687
688 CHECK_NODE(node, -1);
689
690 if (node->father == NULL)
691 return 0;
692
693 for (i = i_child = 0; i < node->father->n_children; i++) {
694 if (!node->father->children[i]->active)
695 continue;
696 if (node->father->children[i] == node)
697 return i_child;
698 i_child++;
699 }
700
701 return -2; /* Oops! */
702 }
703
704 XMLNode* XMLNode_get_child(const XMLNode* node, int i_child)
705 {
706 int i;
707
708 if (node == NULL || node->init_value != XML_INIT_DONE || i_child < 0 || i_child >= node->n_children)
709 return NULL;
710
711 for (i = 0; i < node->n_children; i++) {
712 if (!node->children[i]->active)
713 i_child++;
714 else if (i == i_child)
715 return node->children[i];
716 }
717
718 return NULL;
719 }
720
721 int XMLNode_remove_child(XMLNode* node, int i_child, int free_child)
722 {
723 int i;
724 XMLNode** pt;
725
726 if (node == NULL || node->init_value != XML_INIT_DONE || i_child < 0 || i_child >= node->n_children)
727 return -1;
728
729 /* Lookup 'i_child'th active child */
730 for (i = 0; i < node->n_children; i++) {
731 if (!node->children[i]->active)
732 i_child++;
733 else if (i == i_child)
734 break;
735 }
736 if (i >= node->n_children)
737 return -1; /* Children is not found */
738
739 /* Before modifying first see if we run out of memory */
740 if (node->n_children == 1) {
741 pt = NULL;
742 } else {
743 pt = __malloc((node->n_children - 1) * sizeof(XMLNode*));
744 if (pt == NULL)
745 return -1;
746 }
747
748 /* Can't fail anymore, free item */
749 (void)XMLNode_free(node->children[i_child]);
750 if (free_child)
751 __free(node->children[i_child]);
752
753 if (pt != NULL) {
754 memcpy(pt, node->children, i_child * sizeof(XMLNode*));
755 memcpy(&pt[i_child], &node->children[i_child + 1], (node->n_children - i_child - 1) * sizeof(XMLNode*));
756 }
757 if (node->children != NULL)
758 __free(node->children);
759 node->children = pt;
760 node->n_children--;
761 if (node->n_children == 0)
762 node->tag_type = TAG_SELF;
763
764 return node->n_children;
765 }
766
767 int XMLNode_remove_children(XMLNode* node)
768 {
769 int i;
770
771 CHECK_NODE(node, false);
772
773 if (node->children != NULL) {
774 for (i = 0; i < node->n_children; i++)
775 if (node->children[i] != NULL) {
776 (void)XMLNode_free(node->children[i]);
777 __free(node->children[i]);
778 }
779 __free(node->children);
780 node->children = NULL;
781 }
782 node->n_children = 0;
783
784 return true;
785 }
786
787 int XMLNode_equal(const XMLNode* node1, const XMLNode* node2)
788 {
789 int i, j;
790
791 if (node1 == node2)
792 return true;
793
794 if (node1 == NULL || node2 == NULL || node1->init_value != XML_INIT_DONE || node2->init_value != XML_INIT_DONE)
795 return false;
796
797 if (sx_strcmp(node1->tag, node2->tag))
798 return false;
799
800 /* Test all attributes from 'node1' */
801 for (i = 0; i < node1->n_attributes; i++) {
802 if (!node1->attributes[i].active)
803 continue;
804 j = XMLNode_search_attribute(node2, node1->attributes[i].name, 0);
805 if (j < 0)
806 return false;
807 if (sx_strcmp(node1->attributes[i].value, node2->attributes[j].value))
808 return false;
809 }
810
811 /* Test other attributes from 'node2' that might not be in 'node1' */
812 for (i = 0; i < node2->n_attributes; i++) {
813 if (!node2->attributes[i].active)
814 continue;
815 j = XMLNode_search_attribute(node1, node2->attributes[i].name, 0);
816 if (j < 0)
817 return false;
818 if (sx_strcmp(node2->attributes[i].name, node1->attributes[j].name))
819 return false;
820 }
821
822 return true;
823 }
824
825 XMLNode* XMLNode_next_sibling(const XMLNode* node)
826 {
827 int i;
828 XMLNode* father;
829
830 if (node == NULL || node->init_value != XML_INIT_DONE || node->father == NULL)
831 return NULL;
832
833 father = node->father;
834 for (i = 0; i < father->n_children && father->children[i] != node; i++) ;
835 i++; /* father->children[i] is now 'node' next sibling */
836
837 return i < father->n_children ? father->children[i] : NULL;
838 }
839
840 static XMLNode* _XMLNode_next(const XMLNode* node, int in_children)
841 {
842 XMLNode* node2;
843
844 CHECK_NODE(node, NULL);
845
846 /* Check first child */
847 if (in_children && node->n_children > 0)
848 return node->children[0];
849
850 /* Check next sibling */
851 if ((node2 = XMLNode_next_sibling(node)) != NULL)
852 return node2;
853
854 /* Check next uncle */
855 return _XMLNode_next(node->father, false);
856 }
857
858 XMLNode* XMLNode_next(const XMLNode* node)
859 {
860 return _XMLNode_next(node, true);
861 }
862
863 /* --- XMLDoc methods --- */
864
865 int XMLDoc_init(XMLDoc* doc)
866 {
867 if (doc == NULL)
868 return false;
869
870 doc->filename[0] = NULC;
871 memset(&doc->bom, 0, sizeof(doc->bom));
872 doc->nodes = NULL;
873 doc->n_nodes = 0;
874 doc->i_root = -1;
875 doc->init_value = XML_INIT_DONE;
876
877 return true;
878 }
879
880 int XMLDoc_free(XMLDoc* doc)
881 {
882 int i;
883
884 if (doc == NULL || doc->init_value != XML_INIT_DONE)
885 return false;
886
887 for (i = 0; i < doc->n_nodes; i++) {
888 (void)XMLNode_free(doc->nodes[i]);
889 __free(doc->nodes[i]);
890 }
891 __free(doc->nodes);
892 doc->nodes = NULL;
893 doc->n_nodes = 0;
894 doc->i_root = -1;
895
896 return true;
897 }
898
899 int XMLDoc_set_root(XMLDoc* doc, int i_root)
900 {
901 if (doc == NULL || doc->init_value != XML_INIT_DONE || i_root < 0 || i_root >= doc->n_nodes)
902 return false;
903
904 doc->i_root = i_root;
905
906 return true;
907 }
908
909 int XMLDoc_add_node(XMLDoc* doc, XMLNode* node)
910 {
911 if (doc == NULL || node == NULL || doc->init_value != XML_INIT_DONE)
912 return -1;
913
914 if (_add_node(&doc->nodes, &doc->n_nodes, node) < 0)
915 return -1;
916
917 if (node->tag_type == TAG_FATHER)
918 doc->i_root = doc->n_nodes - 1; /* Main root node is the last father node */
919
920 return doc->n_nodes;
921 }
922
923 int XMLDoc_remove_node(XMLDoc* doc, int i_node, int free_node)
924 {
925 XMLNode** pt;
926 if (doc == NULL || doc->init_value != XML_INIT_DONE || i_node < 0 || i_node > doc->n_nodes)
927 return false;
928
929 /* Before modifying first see if we run out of memory */
930 if (doc->n_nodes == 1)
931 pt = NULL;
932 else {
933 pt = __malloc((doc->n_nodes - 1) * sizeof(XMLNode*));
934 if (pt == NULL)
935 return false;
936 }
937
938 /* Can't fail anymore, free item */
939 (void)XMLNode_free(doc->nodes[i_node]);
940 if (free_node) __free(doc->nodes[i_node]);
941
942 if (pt != NULL) {
943 memcpy(pt, &doc->nodes[i_node], i_node * sizeof(XMLNode*));
944 memcpy(&pt[i_node], &doc->nodes[i_node + 1], (doc->n_nodes - i_node - 1) * sizeof(XMLNode*));
945 }
946
947 if (doc->nodes != NULL)
948 __free(doc->nodes);
949 doc->nodes = pt;
950 doc->n_nodes--;
951
952 return true;
953 }
954
955 /*
956 Helper functions to print formatting before a new tag.
957 Returns the new number of characters in the line.
958 */
959 static int _count_new_char_line(const SXML_CHAR* str, int nb_char_tab, int cur_sz_line)
960 {
961 for (; *str; str++) {
962 if (*str == C2SX('\n'))
963 cur_sz_line = 0;
964 else if (*str == C2SX('\t'))
965 cur_sz_line += nb_char_tab;
966 else
967 cur_sz_line++;
968 }
969
970 return cur_sz_line;
971 }
972 static int _print_formatting(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, int nb_char_tab, int cur_sz_line)
973 {
974 if (tag_sep != NULL) {
975 sx_fputs(tag_sep, f);
976 cur_sz_line = _count_new_char_line(tag_sep, nb_char_tab, cur_sz_line);
977 }
978 if (child_sep != NULL) {
979 for (node = node->father; node != NULL; node = node->father) {
980 sx_fputs(child_sep, f);
981 cur_sz_line = _count_new_char_line(child_sep, nb_char_tab, cur_sz_line);
982 }
983 }
984
985 return cur_sz_line;
986 }
987
988 static int _XMLNode_print_header(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int sz_line, int cur_sz_line, int nb_char_tab)
989 {
990 int i;
991 SXML_CHAR* p;
992
993 if (node == NULL || f == NULL || !node->active || node->tag == NULL || node->tag[0] == NULC)
994 return -1;
995
996 /* Special handling of DOCTYPE */
997 if (node->tag_type == TAG_DOCTYPE) {
998 /* Search for an unescaped '[' in the DOCTYPE definition, in which case the end delimiter should be ']>' instead of '>' */
999 for (p = sx_strchr(node->tag, C2SX('[')); p != NULL && *(p-1) == C2SX('\\'); p = sx_strchr(p+1, C2SX('['))) ;
1000 cur_sz_line += sx_fprintf(f, C2SX("<!DOCTYPE%s%s>"), node->tag, p != NULL ? C2SX("]") : C2SX(""));
1001 return cur_sz_line;
1002 }
1003
1004 /* Check for special tags first */
1005 for (i = 0; i < NB_SPECIAL_TAGS; i++) {
1006 if (node->tag_type == _spec[i].tag_type) {
1007 sx_fprintf(f, C2SX("%s%s%s"), _spec[i].start, node->tag, _spec[i].end);
1008 cur_sz_line += sx_strlen(_spec[i].start) + sx_strlen(node->tag) + sx_strlen(_spec[i].end);
1009 return cur_sz_line;
1010 }
1011 }
1012
1013 /* Check for user tags */
1014 for (i = 0; i < _user_tags.n_tags; i++) {
1015 if (node->tag_type == _user_tags.tags[i].tag_type) {
1016 sx_fprintf(f, C2SX("%s%s%s"), _user_tags.tags[i].start, node->tag, _user_tags.tags[i].end);
1017 cur_sz_line += sx_strlen(_user_tags.tags[i].start) + sx_strlen(node->tag) + sx_strlen(_user_tags.tags[i].end);
1018 return cur_sz_line;
1019 }
1020 }
1021
1022 /* Print tag name */
1023 cur_sz_line += sx_fprintf(f, C2SX("<%s"), node->tag);
1024
1025 /* Print attributes */
1026 if (attr_sep == NULL)
1027 attr_sep = C2SX(" ");
1028 for (i = 0; i < node->n_attributes; i++) {
1029 if (!node->attributes[i].active)
1030 continue;
1031 cur_sz_line += sx_strlen(node->attributes[i].name) + sx_strlen(node->attributes[i].value) + 3;
1032 if (sz_line > 0 && cur_sz_line > sz_line) {
1033 cur_sz_line = _print_formatting(node, f, tag_sep, child_sep, nb_char_tab, cur_sz_line);
1034 /* Add extra separator, as if new line was a child of the previous one */
1035 if (child_sep != NULL) {
1036 sx_fputs(child_sep, f);
1037 cur_sz_line = _count_new_char_line(child_sep, nb_char_tab, cur_sz_line);
1038 }
1039 }
1040 /* Attribute name */
1041 cur_sz_line = _count_new_char_line(attr_sep, nb_char_tab, cur_sz_line);
1042 sx_fprintf(f, C2SX("%s%s="), attr_sep, node->attributes[i].name);
1043
1044 /* Attribute value */
1045 (void)sx_fputc(XML_DEFAULT_QUOTE, f);
1046 cur_sz_line += fprintHTML(f, node->attributes[i].value) + 2;
1047 (void)sx_fputc(XML_DEFAULT_QUOTE, f);
1048 }
1049
1050 /* End the tag if there are no children and no text */
1051 if (node->n_children == 0 && (node->text == NULL || node->text[0] == NULC)) {
1052 cur_sz_line += sx_fprintf(f, C2SX("/>"));
1053 } else {
1054 (void)sx_fputc(C2SX('>'), f);
1055 cur_sz_line++;
1056 }
1057
1058 return cur_sz_line;
1059 }
1060
1061 int XMLNode_print_header(const XMLNode* node, FILE* f, int sz_line, int nb_char_tab)
1062 {
1063 return _XMLNode_print_header(node, f, NULL, NULL, NULL, sz_line, 0, nb_char_tab) < 0 ? false : true;
1064 }
1065
1066 static int _XMLNode_print(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int keep_text_spaces, int sz_line, int cur_sz_line, int nb_char_tab, int depth)
1067 {
1068 int i;
1069 SXML_CHAR* p;
1070
1071 if (node != NULL && node->tag_type==TAG_TEXT) { /* Text has to be printed: check if it is only spaces */
1072 if (!keep_text_spaces) {
1073 for (p = node->text; p != NULL && *p != NULC && sx_isspace(*p); p++) ; /* 'p' points to first non-space character, or to '\0' if only spaces */
1074 } else
1075 p = node->text; /* '*p' won't be '\0' */
1076 if (p != NULL && *p != NULC)
1077 cur_sz_line += fprintHTML(f, node->text);
1078 return cur_sz_line;
1079 }
1080
1081 if (node == NULL || f == NULL || !node->active || node->tag == NULL || node->tag[0] == NULC)
1082 return -1;
1083
1084 if (nb_char_tab <= 0)
1085 nb_char_tab = 1;
1086
1087 /* Print formatting */
1088 if (depth < 0) /* UGLY HACK: 'depth' forced negative on very first line so we don't print an extra 'tag_sep' (usually "\n" when pretty-printing) */
1089 depth = 0;
1090 else
1091 cur_sz_line = _print_formatting(node, f, tag_sep, child_sep, nb_char_tab, cur_sz_line);
1092
1093 _XMLNode_print_header(node, f, tag_sep, child_sep, attr_sep, sz_line, cur_sz_line, nb_char_tab);
1094
1095 if (node->text != NULL && node->text[0] != NULC) {
1096 /* Text has to be printed: check if it is only spaces */
1097 if (!keep_text_spaces) {
1098 for (p = node->text; *p != NULC && sx_isspace(*p); p++) ; /* 'p' points to first non-space character, or to '\0' if only spaces */
1099 } else
1100 p = node->text; /* '*p' won't be '\0' */
1101 if (*p != NULC) cur_sz_line += fprintHTML(f, node->text);
1102 } else if (node->n_children <= 0) /* Everything has already been printed */
1103 return cur_sz_line;
1104
1105 /* Recursively print children */
1106 for (i = 0; i < node->n_children; i++)
1107 (void)_XMLNode_print(node->children[i], f, tag_sep, child_sep, attr_sep, keep_text_spaces, sz_line, cur_sz_line, nb_char_tab, depth+1);
1108
1109 /* Print tag end after children */
1110 /* Print formatting */
1111 if (node->n_children > 0)
1112 cur_sz_line = _print_formatting(node, f, tag_sep, child_sep, nb_char_tab, cur_sz_line);
1113 cur_sz_line += sx_fprintf(f, C2SX("</%s>"), node->tag);
1114
1115 return cur_sz_line;
1116 }
1117
1118 int XMLNode_print_attr_sep(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int keep_text_spaces, int sz_line, int nb_char_tab)
1119 {
1120 return _XMLNode_print(node, f, tag_sep, child_sep, attr_sep, keep_text_spaces, sz_line, 0, nb_char_tab, 0);
1121 }
1122
1123 int XMLDoc_print_attr_sep(const XMLDoc* doc, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int keep_text_spaces, int sz_line, int nb_char_tab)
1124 {
1125 int i, depth, cur_sz_line;
1126
1127 if (doc == NULL || f == NULL || doc->init_value != XML_INIT_DONE)
1128 return false;
1129
1130 /* Write BOM if it exist */
1131 if (doc->sz_bom > 0) fwrite(doc->bom, sizeof(unsigned char), doc->sz_bom, f);
1132
1133 depth = -1; /* UGLY HACK: 'depth' forced negative on very first line so we don't print an extra 'tag_sep' (usually "\n") */
1134 for (i = 0, cur_sz_line = 0; i < doc->n_nodes; i++) {
1135 cur_sz_line = _XMLNode_print(doc->nodes[i], f, tag_sep, child_sep, attr_sep, keep_text_spaces, sz_line, cur_sz_line, nb_char_tab, depth);
1136 depth = 0;
1137 }
1138 /* TODO: Find something more graceful than 'depth=-1', even though everyone knows I probably never will ;) */
1139
1140 return true;
1141 }
1142
1143 /* --- */
1144
1145 int XML_parse_attribute_to(const SXML_CHAR* str, int to, XMLAttribute* xmlattr)
1146 {
1147 const SXML_CHAR *p;
1148 int i, n0, n1, remQ = 0;
1149 int ret = 1;
1150 SXML_CHAR quote = '\0';
1151
1152 if (str == NULL || xmlattr == NULL)
1153 return 0;
1154
1155 if (to < 0)
1156 to = sx_strlen(str) - 1;
1157
1158 /* Search for the '=' */
1159 /* 'n0' is where the attribute name stops, 'n1' is where the attribute value starts */
1160 for (n0 = 0; n0 != to && str[n0] != C2SX('=') && !sx_isspace(str[n0]); n0++) ; /* Search for '=' or a space */
1161 for (n1 = n0; n1 != to && sx_isspace(str[n1]); n1++) ; /* Search for something not a space */
1162 if (str[n1] != C2SX('='))
1163 return 0; /* '=' not found: malformed string */
1164 for (n1++; n1 != to && sx_isspace(str[n1]); n1++) ; /* Search for something not a space */
1165 if (isquote(str[n1])) { /* Remove quotes */
1166 quote = str[n1];
1167 remQ = 1;
1168 }
1169
1170 xmlattr->name = __malloc((n0+1)*sizeof(SXML_CHAR));
1171 xmlattr->value = __malloc((to+1 - n1 - 2*remQ + 1) * sizeof(SXML_CHAR)); /* 2*remQ because we expect 2 quotes */
1172 xmlattr->active = true;
1173 if (xmlattr->name != NULL && xmlattr->value != NULL) {
1174 /* Copy name */
1175 sx_strncpy(xmlattr->name, str, n0);
1176 xmlattr->name[n0] = NULC;
1177 /* (void)str_unescape(xmlattr->name); do not unescape the name */
1178 /* Copy value (p starts after the quote (if any) and stops at the end of 'str'
1179 (skipping the quote if any, hence the '*(p+remQ)') */
1180 for (i = 0, p = str + n1 + remQ; i + n1 + 2*remQ < to && *(p+remQ) != NULC; i++, p++)
1181 xmlattr->value[i] = *p;
1182 xmlattr->value[i] = NULC;
1183 (void)html2str(xmlattr->value, NULL); /* Convert HTML escape sequences, do not str_unescape(xmlattr->value) */
1184 if (remQ && *p != quote)
1185 ret = 2; /* Quote at the beginning but not at the end: probable presence of '>' inside attribute value, so we need to read more data! */
1186 } else
1187 ret = 0;
1188
1189 if (ret == 0) {
1190 if (xmlattr->name != NULL) {
1191 __free(xmlattr->name);
1192 xmlattr->name = NULL;
1193 }
1194 if (xmlattr->value != NULL) {
1195 __free(xmlattr->value);
1196 xmlattr->value = NULL;
1197 }
1198 }
1199
1200 return ret;
1201 }
1202
1203 static TagType _parse_special_tag(const SXML_CHAR* str, int len, _TAG* tag, XMLNode* node)
1204 {
1205 if (sx_strncmp(str, tag->start, tag->len_start))
1206 return TAG_NONE;
1207
1208 if (sx_strncmp(str + len - tag->len_end, tag->end, tag->len_end)) /* There probably is a '>' inside the tag */
1209 return TAG_PARTIAL;
1210
1211 node->tag = __malloc((len - tag->len_start - tag->len_end + 1)*sizeof(SXML_CHAR));
1212 if (node->tag == NULL)
1213 return TAG_ERROR;
1214 sx_strncpy(node->tag, str + tag->len_start, len - tag->len_start - tag->len_end);
1215 node->tag[len - tag->len_start - tag->len_end] = NULC;
1216 node->tag_type = tag->tag_type;
1217
1218 return node->tag_type;
1219 }
1220
1221 /*
1222 Reads a string that is supposed to be an xml tag like '<tag (attribName="attribValue")* [/]>' or '</tag>'.
1223 Fills the 'xmlnode' structure with the tag name and its attributes.
1224 Returns 'TAG_ERROR' if an error occurred (malformed 'str' or memory). 'TAG_*' when string is recognized.
1225 */
1226 TagType XML_parse_1string(const SXML_CHAR* str, XMLNode* xmlnode)
1227 {
1228 SXML_CHAR *p;
1229 XMLAttribute* pt;
1230 int n, nn, len, rc, tag_end = 0;
1231
1232 if (str == NULL || xmlnode == NULL)
1233 return TAG_ERROR;
1234 len = sx_strlen(str);
1235
1236 /* Check for malformed string */
1237 if (str[0] != C2SX('<') || str[len-1] != C2SX('>'))
1238 return TAG_NONE; /* Syntax error */
1239
1240 for (nn = 0; nn < NB_SPECIAL_TAGS; nn++) {
1241 n = (int)_parse_special_tag(str, len, &_spec[nn], xmlnode);
1242 switch (n) {
1243 case TAG_NONE: break; /* Nothing found => do nothing */
1244 default: return (TagType)n; /* Tag found => return it */
1245 }
1246 }
1247
1248 /* "<!DOCTYPE" requires a special handling because it can end with "]>" instead of ">" if a '[' is found inside */
1249 if (str[1] == C2SX('!')) {
1250 /* DOCTYPE */
1251 if (!sx_strncmp(str, C2SX("<!DOCTYPE"), 9)) { /* 9 = sizeof("<!DOCTYPE") */
1252 for (n = 9; str[n] && str[n] != C2SX('['); n++) ; /* Look for a '[' inside the DOCTYPE, which would mean that we should be looking for a "]>" tag end */
1253 nn = 0;
1254 if (str[n]) { /* '[' was found */
1255 if (sx_strncmp(str+len-2, C2SX("]>"), 2)) /* There probably is a '>' inside the DOCTYPE */
1256 return TAG_PARTIAL;
1257 nn = 1;
1258 }
1259 xmlnode->tag = __malloc((len - 9 - nn)*sizeof(SXML_CHAR)); /* 'len' - "<!DOCTYPE" and ">" + '\0' */
1260 if (xmlnode->tag == NULL)
1261 return TAG_ERROR;
1262 sx_strncpy(xmlnode->tag, &str[9], len - 10 - nn);
1263 xmlnode->tag[len - 10 - nn] = NULC;
1264 xmlnode->tag_type = TAG_DOCTYPE;
1265
1266 return TAG_DOCTYPE;
1267 }
1268 }
1269
1270 /* Test user tags */
1271 for (nn = 0; nn < _user_tags.n_tags; nn++) {
1272 n = _parse_special_tag(str, len, &_user_tags.tags[nn], xmlnode);
1273 switch (n) {
1274 case TAG_ERROR: return TAG_ERROR; /* Error => exit */
1275 case TAG_NONE: break; /* Not this one */
1276 default: return (TagType)n; /* Tag found => return it */
1277 }
1278 }
1279
1280 if (str[1] == C2SX('/'))
1281 tag_end = 1;
1282
1283 /* tag starts at index 1 (or 2 if tag end) and ends at the first space or '/>' */
1284 for (n = 1 + tag_end; str[n] != NULC && str[n] != C2SX('>') && str[n] != C2SX('/') && !sx_isspace(str[n]); n++) ;
1285 xmlnode->tag = __malloc((n - tag_end)*sizeof(SXML_CHAR));
1286 if (xmlnode->tag == NULL)
1287 return TAG_ERROR;
1288 sx_strncpy(xmlnode->tag, &str[1 + tag_end], n - 1 - tag_end);
1289 xmlnode->tag[n - 1 - tag_end] = NULC;
1290 if (tag_end) {
1291 xmlnode->tag_type = TAG_END;
1292 return TAG_END;
1293 }
1294
1295 /* Here, 'n' is the position of the first space after tag name */
1296 while (n < len) {
1297 /* Skips spaces */
1298 while (sx_isspace(str[n])) n++;
1299
1300 /* Check for XML end ('>' or '/>') */
1301 if (str[n] == C2SX('>')) { /* Tag with children */
1302 TagType type = (str[n-1] == '/' ? TAG_SELF : TAG_FATHER); /* TODO: Find something better to cope with <tag attr=v/> */
1303 xmlnode->tag_type = type;
1304 return type;
1305 }
1306 if (!sx_strcmp(str+n, C2SX("/>"))) { /* Tag without children */
1307 xmlnode->tag_type = TAG_SELF;
1308 return TAG_SELF;
1309 }
1310
1311 /* New attribute found */
1312 p = sx_strchr(str+n, C2SX('='));
1313 if (p == NULL) goto parse_err;
1314 pt = __realloc(xmlnode->attributes, (xmlnode->n_attributes + 1) * sizeof(XMLAttribute));
1315 if (pt == NULL) goto parse_err;
1316
1317 pt[xmlnode->n_attributes].name = NULL;
1318 pt[xmlnode->n_attributes].value = NULL;
1319 pt[xmlnode->n_attributes].active = false;
1320 xmlnode->n_attributes++;
1321 xmlnode->attributes = pt;
1322 while (*++p != NULC && sx_isspace(*p)) ; /* Skip spaces */
1323 if (isquote(*p)) { /* Attribute value starts with a quote, look for next one, ignoring protected ones with '\' */
1324 for (nn = p-str+1; str[nn] && str[nn] != *p; nn++) { /* CHECK UNICODE "nn = p-str+1" */
1325 /* if (str[nn] == C2SX('\\')) nn++; [bugs:#7]: '\' is valid in values */
1326 }
1327 nn++; //* Skip quote */
1328 } else { /* Attribute value stops at first space or end of XML string */
1329 for (nn = p-str+1; str[nn] != NULC && !sx_isspace(str[nn]) && str[nn] != C2SX('/') && str[nn] != C2SX('>'); nn++) ; /* Go to the end of the attribute value */ /* CHECK UNICODE */
1330 }
1331
1332 /* Here 'str[nn]' is the character after value */
1333 /* the attribute definition ('attrName="attrVal"') is between 'str[n]' and 'str[nn]' */
1334 rc = XML_parse_attribute_to(&str[n], nn - n, &xmlnode->attributes[xmlnode->n_attributes - 1]);
1335 if (!rc) goto parse_err;
1336 if (rc == 2) { /* Probable presence of '>' inside attribute value, which is legal XML. Remove attribute to re-parse it later */
1337 XMLNode_remove_attribute(xmlnode, xmlnode->n_attributes - 1);
1338 return TAG_ERROR; /* was TAG_PARTIAL */
1339 }
1340
1341 n = nn + 1; /* Go to next attribute */
1342 if (str[nn] == C2SX('>')) { /* ... or we migh have reached the end if no space is between the attribute value and the ">" or "/>" */
1343 TagType type = (str[nn-1] == '/' ? TAG_SELF : TAG_FATHER); /* TODO: Find something better to cope with <tag attr=v/> */
1344 xmlnode->tag_type = type;
1345 return type;
1346 }
1347 }
1348
1349 sx_fprintf(stderr, C2SX("\nWE SHOULD NOT BE HERE!\n[%s]\n\n"), str);
1350
1351 parse_err:
1352 (void)XMLNode_free(xmlnode);
1353
1354 return TAG_ERROR;
1355 }
1356
1357 static int _parse_data_SAX(void* in, const DataSourceType in_type, const SAX_Callbacks* sax, SAX_Data* sd)
1358 {
1359 SXML_CHAR *line = NULL, *txt_end, *p;
1360 XMLNode node;
1361 int ret, exit, sz, n0, ncr;
1362 TagType tag_type;
1363 int (*meos)(void* ds) = (in_type == DATA_SOURCE_BUFFER ? (int(*)(void*))_beob : (int(*)(void*))sx_feof);
1364
1365 if (sax->start_doc != NULL && !sax->start_doc(sd))
1366 return true;
1367 if (sax->all_event != NULL && !sax->all_event(XML_EVENT_START_DOC, NULL, (SXML_CHAR*)sd->name, 0, sd))
1368 return true;
1369
1370 ret = true;
1371 exit = false;
1372 sd->line_num = 1; /* Line counter, starts at 1 */
1373 sz = 0; /* 'line' buffer size */
1374 node.init_value = 0;
1375 (void)XMLNode_init(&node);
1376 while ((n0 = read_line_alloc(in, in_type, &line, &sz, 0, NULC, C2SX('>'), true, C2SX('\n'), &ncr)) != 0) {
1377 (void)XMLNode_free(&node);
1378 for (p = line; *p != NULC && sx_isspace(*p) && p - line < n0; p++) ; /* Checks if text is only spaces */
1379 if (*p == NULC || p - line >= n0)
1380 break;
1381 sd->line_num += ncr;
1382
1383 /* Get text for 'father' (i.e. what is before '<') */
1384 while ((txt_end = sx_strchr(line, C2SX('<'))) == NULL) { /* '<' was not found, indicating a probable '>' inside text (should have been escaped with '>' but we'll handle that ;) */
1385 int n1 = read_line_alloc(in, in_type, &line, &sz, n0, 0, C2SX('>'), true, C2SX('\n'), &ncr); /* Go on reading the file from current position until next '>' */
1386 sd->line_num += ncr;
1387 if (n1 <= n0) {
1388 ret = false;
1389 if (sax->on_error == NULL && sax->all_event == NULL) {
1390 sx_fprintf(stderr, C2SX("%s:%d: MEMORY ERROR.\n"), sd->name, sd->line_num);
1391 } else {
1392 if (sax->on_error != NULL && !sax->on_error(PARSE_ERR_MEMORY, sd->line_num, sd))
1393 break;
1394 if (sax->all_event != NULL && !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_MEMORY, sd))
1395 break;
1396 }
1397 break; /* 'txt_end' is still NULL here so we'll display the syntax error below */
1398 }
1399 n0 = n1;
1400 }
1401 if (txt_end == NULL) { /* Missing tag start */
1402 ret = false;
1403 if (sax->on_error == NULL && sax->all_event == NULL) {
1404 sx_fprintf(stderr, C2SX("%s:%d: ERROR: Unexpected end character '>', without matching '<'!\n"), sd->name, sd->line_num);
1405 } else {
1406 if (sax->on_error != NULL && !sax->on_error(PARSE_ERR_UNEXPECTED_TAG_END, sd->line_num, sd))
1407 break;
1408 if (sax->all_event != NULL && !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_UNEXPECTED_TAG_END, sd))
1409 break;
1410 }
1411 break;
1412 }
1413 /* First part of 'line' (before '<') is to be added to 'father->text' */
1414 *txt_end = NULC; /* Have 'line' be the text for 'father' */
1415 if (*line != NULC && (sax->new_text != NULL || sax->all_event != NULL)) {
1416 if (sax->new_text != NULL && (exit = !sax->new_text(line, sd))) /* no str_unescape(line) */
1417 break;
1418 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_TEXT, NULL, line, sd->line_num, sd)))
1419 break;
1420 }
1421 *txt_end = '<'; /* Restores tag start */
1422
1423 switch (tag_type = XML_parse_1string(txt_end, &node)) {
1424 case TAG_ERROR: /* Memory error */
1425 ret = false;
1426 if (sax->on_error == NULL && sax->all_event == NULL) {
1427 sx_fprintf(stderr, C2SX("%s:%d: MEMORY ERROR.\n"), sd->name, sd->line_num);
1428 } else {
1429 if (sax->on_error != NULL && (exit = !sax->on_error(PARSE_ERR_MEMORY, sd->line_num, sd)))
1430 break;
1431 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_MEMORY, sd)))
1432 break;
1433 }
1434 break;
1435
1436 case TAG_NONE: /* Syntax error */
1437 ret = false;
1438 p = sx_strchr(txt_end, C2SX('\n'));
1439 if (p != NULL)
1440 *p = NULC;
1441 if (sax->on_error == NULL && sax->all_event == NULL) {
1442 sx_fprintf(stderr, C2SX("%s:%d: SYNTAX ERROR (%s%s).\n"), sd->name, sd->line_num, txt_end, p == NULL ? C2SX("") : C2SX("..."));
1443 if (p != NULL)
1444 *p = C2SX('\n');
1445 } else {
1446 if (sax->on_error != NULL && (exit = !sax->on_error(PARSE_ERR_SYNTAX, sd->line_num, sd)))
1447 break;
1448 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_SYNTAX, sd)))
1449 break;
1450 }
1451 break;
1452
1453 case TAG_END:
1454 if (sax->end_node != NULL || sax->all_event != NULL) {
1455 if (sax->end_node != NULL && (exit = !sax->end_node(&node, sd)))
1456 break;
1457 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_END_NODE, &node, NULL, sd->line_num, sd)))
1458 break;
1459 }
1460 break;
1461
1462 default: /* Add 'node' to 'father' children */
1463 /* If the line looks like a comment (or CDATA) but is not properly finished, loop until we find the end. */
1464 while (tag_type == TAG_PARTIAL) {
1465 int n1 = read_line_alloc(in, in_type, &line, &sz, n0, NULC, C2SX('>'), true, C2SX('\n'), &ncr); /* Go on reading the file from current position until next '>' */
1466 sd->line_num += ncr;
1467 if (n1 <= n0) {
1468 ret = false;
1469 if (sax->on_error == NULL && sax->all_event == NULL) {
1470 sx_fprintf(stderr, C2SX("%s:%d: SYNTAX ERROR.\n"), sd->name, sd->line_num);
1471 } else {
1472 if (sax->on_error != NULL && (exit = !sax->on_error(meos(in) ? PARSE_ERR_EOF : PARSE_ERR_MEMORY, sd->line_num, sd)))
1473 break;
1474 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, meos(in) ? PARSE_ERR_EOF : PARSE_ERR_MEMORY, sd)))
1475 break;
1476 }
1477 break;
1478 }
1479 n0 = n1;
1480 txt_end = sx_strchr(line, C2SX('<')); /* In case 'line' has been moved by the '__realloc' in 'read_line_alloc' */
1481 tag_type = XML_parse_1string(txt_end, &node);
1482 if (tag_type == TAG_ERROR) {
1483 ret = false;
1484 if (sax->on_error == NULL && sax->all_event == NULL) {
1485 sx_fprintf(stderr, C2SX("%s:%d: PARSE ERROR.\n"), sd->name, sd->line_num);
1486 } else {
1487 if (sax->on_error != NULL && (exit = !sax->on_error(meos(in) ? PARSE_ERR_EOF : PARSE_ERR_SYNTAX, sd->line_num, sd)))
1488 break;
1489 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, meos(in) ? PARSE_ERR_EOF : PARSE_ERR_SYNTAX, sd)))
1490 break;
1491 }
1492 break;
1493 }
1494 }
1495 if (ret == false)
1496 break;
1497 if (sax->start_node != NULL && (exit = !sax->start_node(&node, sd)))
1498 break;
1499 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_START_NODE, &node, NULL, sd->line_num, sd)))
1500 break;
1501 if (node.tag_type != TAG_FATHER && (sax->end_node != NULL || sax->all_event != NULL)) {
1502 if (sax->end_node != NULL && (exit = !sax->end_node(&node, sd)))
1503 break;
1504 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_END_NODE, &node, NULL, sd->line_num, sd)))
1505 break;
1506 }
1507 break;
1508 }
1509 if (exit == true) /* Return false when exit is requested */
1510 ret = false;
1511 if (ret == false || meos(in))
1512 break;
1513 }
1514 __free(line);
1515 (void)XMLNode_free(&node);
1516
1517 if (sax->end_doc != NULL && !sax->end_doc(sd))
1518 return ret;
1519 if (sax->all_event != NULL)
1520 (void)sax->all_event(XML_EVENT_END_DOC, NULL, (SXML_CHAR*)sd->name, sd->line_num, sd);
1521
1522 return ret;
1523 }
1524
1525 int SAX_Callbacks_init(SAX_Callbacks* sax)
1526 {
1527 if (sax == NULL)
1528 return false;
1529
1530 sax->start_doc = NULL;
1531 sax->start_node = NULL;
1532 sax->end_node = NULL;
1533 sax->new_text = NULL;
1534 sax->on_error = NULL;
1535 sax->end_doc = NULL;
1536 sax->all_event = NULL;
1537
1538 return true;
1539 }
1540
1541 int DOMXMLDoc_doc_start(SAX_Data* sd)
1542 {
1543 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user;
1544
1545 dom->current = NULL;
1546 dom->error = PARSE_ERR_NONE;
1547 dom->line_error = 0;
1548
1549 return true;
1550 }
1551
1552 int DOMXMLDoc_node_start(const XMLNode* node, SAX_Data* sd)
1553 {
1554 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user;
1555 XMLNode* new_node;
1556 int i;
1557
1558 if ((new_node = XMLNode_dup(node, true)) == NULL) goto node_start_err; /* No real need to put 'true' for 'XMLNode_dup', but cleaner */
1559
1560 if (dom->current == NULL) {
1561 if ((i = _add_node(&dom->doc->nodes, &dom->doc->n_nodes, new_node)) < 0) goto node_start_err;
1562
1563 if (dom->doc->i_root < 0 && (node->tag_type == TAG_FATHER || node->tag_type == TAG_SELF))
1564 dom->doc->i_root = i;
1565 } else {
1566 if (_add_node(&dom->current->children, &dom->current->n_children, new_node) < 0) goto node_start_err;
1567 }
1568
1569 new_node->father = dom->current;
1570 dom->current = new_node;
1571
1572 return true;
1573
1574 node_start_err:
1575 dom->error = PARSE_ERR_MEMORY;
1576 dom->line_error = sd->line_num;
1577 (void)XMLNode_free(new_node);
1578 __free(new_node);
1579
1580 return false;
1581 }
1582
1583 int DOMXMLDoc_node_end(const XMLNode* node, SAX_Data* sd)
1584 {
1585 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user;
1586
1587 if (dom->current == NULL || sx_strcmp(dom->current->tag, node->tag)) {
1588 sx_fprintf(stderr, C2SX("%s:%d: ERROR - End tag </%s> was unexpected"), sd->name, sd->line_num, node->tag);
1589 if (dom->current != NULL)
1590 sx_fprintf(stderr, C2SX(" (</%s> was expected)\n"), dom->current->tag);
1591 else
1592 sx_fprintf(stderr, C2SX(" (no node to end)\n"));
1593
1594 dom->error = PARSE_ERR_UNEXPECTED_NODE_END;
1595 dom->line_error = sd->line_num;
1596
1597 return false;
1598 }
1599
1600 dom->current = dom->current->father;
1601
1602 return true;
1603 }
1604
1605 int DOMXMLDoc_node_text(SXML_CHAR* text, SAX_Data* sd)
1606 {
1607 SXML_CHAR* p = text;
1608 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user;
1609
1610 /* Keep text, even if it is only spaces */
1611 #if 0
1612 while(*p != NULC && sx_isspace(*p++)) ;
1613 if (*p == NULC) return true; /* Only spaces */
1614 #endif
1615
1616 /* If there is no current node to add text to, raise an error, except if text is only spaces, in which case it is probably just formatting */
1617 if (dom->current == NULL) {
1618 while(*p != NULC && sx_isspace(*p)) p++;
1619 if (*p == NULC) /* Only spaces => probably pretty-printing */
1620 return true;
1621 dom->error = PARSE_ERR_TEXT_OUTSIDE_NODE;
1622 dom->line_error = sd->line_num;
1623 return false; /* There is some "real" text => raise an error */
1624 }
1625
1626 if (dom->text_as_nodes) {
1627 XMLNode* new_node = XMLNode_allocN(1);
1628 if (new_node == NULL || (new_node->text = sx_strdup(text)) == NULL
1629 || _add_node(&dom->current->children, &dom->current->n_children, new_node) < 0) {
1630 dom->error = PARSE_ERR_MEMORY;
1631 dom->line_error = sd->line_num;
1632 (void)XMLNode_free(new_node);
1633 __free(new_node);
1634 return false;
1635 }
1636 new_node->tag_type = TAG_TEXT;
1637 new_node->father = dom->current;
1638 /*dom->current->tag_type = TAG_FATHER; // OS: should parent field be forced to be TAG_FATHER? now it has at least one TAG_TEXT child. I decided not to enforce this for backward-compatibility related to tag_types*/
1639 return true;
1640 } else { /* Old behaviour: concatenate text to the previous one */
1641 /* 'p' will point at the new text */
1642 if (dom->current->text == NULL) {
1643 p = sx_strdup(text);
1644 } else {
1645 p = __realloc(dom->current->text, (sx_strlen(dom->current->text) + sx_strlen(text) + 1)*sizeof(SXML_CHAR));
1646 if (p != NULL)
1647 sx_strcat(p, text);
1648 }
1649 if (p == NULL) {
1650 dom->error = PARSE_ERR_MEMORY;
1651 dom->line_error = sd->line_num;
1652 return false;
1653 }
1654
1655 dom->current->text = p;
1656 }
1657
1658 return true;
1659 }
1660
1661 int DOMXMLDoc_parse_error(ParseError error_num, int line_number, SAX_Data* sd)
1662 {
1663 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user;
1664
1665 dom->error = error_num;
1666 dom->line_error = line_number;
1667
1668 /* Complete error message will be displayed in 'DOMXMLDoc_doc_end' callback */
1669
1670 return false; /* Stop on error */
1671 }
1672
1673 int DOMXMLDoc_doc_end(SAX_Data* sd)
1674 {
1675 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user;
1676
1677 if (dom->error != PARSE_ERR_NONE) {
1678 SXML_CHAR* msg;
1679
1680 switch (dom->error) {
1681 case PARSE_ERR_MEMORY: msg = C2SX("MEMORY"); break;
1682 case PARSE_ERR_UNEXPECTED_TAG_END: msg = C2SX("UNEXPECTED_TAG_END"); break;
1683 case PARSE_ERR_SYNTAX: msg = C2SX("SYNTAX"); break;
1684 case PARSE_ERR_EOF: msg = C2SX("UNEXPECTED_END_OF_FILE"); break;
1685 case PARSE_ERR_TEXT_OUTSIDE_NODE: msg = C2SX("TEXT_OUTSIDE_NODE"); break;
1686 case PARSE_ERR_UNEXPECTED_NODE_END: msg = C2SX("UNEXPECTED_NODE_END"); break;
1687 default: msg = C2SX("UNKNOWN"); break;
1688 }
1689 sx_fprintf(stderr, C2SX("%s:%d: An error was found (%s(%d)), loading aborted...\n"), sd->name, dom->line_error, msg, dom->error);
1690 dom->current = NULL;
1691 (void)XMLDoc_free(dom->doc);
1692 dom->doc = NULL;
1693 }
1694
1695 return true;
1696 }
1697
1698 int SAX_Callbacks_init_DOM(SAX_Callbacks* sax)
1699 {
1700 if (sax == NULL)
1701 return false;
1702
1703 sax->start_doc = DOMXMLDoc_doc_start;
1704 sax->start_node = DOMXMLDoc_node_start;
1705 sax->end_node = DOMXMLDoc_node_end;
1706 sax->new_text = DOMXMLDoc_node_text;
1707 sax->on_error = DOMXMLDoc_parse_error;
1708 sax->end_doc = DOMXMLDoc_doc_end;
1709 sax->all_event = NULL;
1710
1711 return true;
1712 }
1713
1714 int XMLDoc_parse_file_SAX(const SXML_CHAR* filename, const SAX_Callbacks* sax, void* user)
1715 {
1716 FILE* f;
1717 int ret;
1718 SAX_Data sd;
1719 SXML_CHAR* fmode =
1720 #ifndef SXMLC_UNICODE
1721 C2SX("rt");
1722 #else
1723 C2SX("rb"); /* In Unicode, open the file as binary so that further 'fgetwc' read all bytes */
1724 #endif
1725 BOM_TYPE bom;
1726
1727
1728 if (sax == NULL || filename == NULL || filename[0] == NULC)
1729 return false;
1730
1731 f = sx_fopen(filename, fmode);
1732 if (f == NULL)
1733 return false;
1734 /* Microsoft's 'ftell' returns invalid position for Unicode text files
1735 (see http://connect.microsoft.com/VisualStudio/feedback/details/369265/ftell-ftell-nolock-incorrectly-handling-unicode-text-translation)
1736 However, we're opening the file as binary in Unicode so we don't fall into that case...
1737 */
1738 #if defined(SXMLC_UNICODE) && (defined(WIN32) || defined(WIN64))
1739 /*setvbuf(f, NULL, _IONBF, 0);*/
1740 #endif
1741
1742 sd.name = (SXML_CHAR*)filename;
1743 sd.user = user;
1744 sd.type = DATA_SOURCE_FILE;
1745 sd.src = (void*)f;
1746 bom = freadBOM(f, NULL, NULL); /* Skip BOM, if any */
1747 /* In Unicode, re-open the file in text-mode if there is no BOM (or UTF-8) as we assume that
1748 the file is "plain" text (i.e. 1 byte = 1 character). If opened in binary mode, 'fgetwc'
1749 would read 2 bytes for 1 character, which would not work on "plain" files. */
1750 if (bom == BOM_NONE || bom == BOM_UTF_8) {
1751 sx_fclose(f);
1752 f = sx_fopen(filename, C2SX("rt"));
1753 if (f == NULL)
1754 return false;
1755 if (bom == BOM_UTF_8)
1756 freadBOM(f, NULL, NULL); /* Skip the UTF-8 BOM that was found */
1757 }
1758 #ifndef SXMLC_UNICODE
1759 /* Unicode BOM when Unicode support has not been compiled in. */
1760 else {
1761 sx_fclose(f);
1762 return false;
1763 }
1764 #endif
1765
1766 ret = _parse_data_SAX((void*)f, DATA_SOURCE_FILE, sax, &sd);
1767 (void)sx_fclose(f);
1768
1769 return ret;
1770 }
1771
1772 int XMLDoc_parse_buffer_SAX_len(const SXML_CHAR* buffer, int buffer_len, const SXML_CHAR* name, const SAX_Callbacks* sax, void* user)
1773 {
1774 DataSourceBuffer dsb = { buffer, buffer_len, 0 };
1775 SAX_Data sd;
1776
1777 if (sax == NULL || buffer == NULL)
1778 return false;
1779
1780 sd.name = name;
1781 sd.user = user;
1782 sd.type = DATA_SOURCE_BUFFER;
1783 sd.src = (void*)buffer;
1784 return _parse_data_SAX((void*)&dsb, DATA_SOURCE_BUFFER, sax, &sd);
1785 }
1786
1787 int XMLDoc_parse_file_DOM_text_as_nodes(const SXML_CHAR* filename, XMLDoc* doc, int text_as_nodes)
1788 {
1789 DOM_through_SAX dom;
1790 SAX_Callbacks sax;
1791 int ret;
1792
1793 if (doc == NULL || filename == NULL || filename[0] == NULC || doc->init_value != XML_INIT_DONE)
1794 return false;
1795
1796 sx_strncpy(doc->filename, filename, SXMLC_MAX_PATH - 1);
1797 doc->filename[SXMLC_MAX_PATH - 1] = NULC;
1798
1799 /* Read potential BOM on file */
1800 {
1801 /* In Unicode, open the file as binary so that further 'fgetwc' read all bytes */
1802 FILE* f = sx_fopen(filename, C2SX("rb"));
1803 if (f != NULL) {
1804 #if defined(SXMLC_UNICODE) && (defined(WIN32) || defined(WIN64))
1805 /*setvbuf(f, NULL, _IONBF, 0);*/
1806 #endif
1807 doc->bom_type = freadBOM(f, doc->bom, &doc->sz_bom);
1808 sx_fclose(f);
1809 }
1810 }
1811
1812 dom.doc = doc;
1813 dom.current = NULL;
1814 dom.text_as_nodes = text_as_nodes;
1815 SAX_Callbacks_init_DOM(&sax);
1816
1817 ret = XMLDoc_parse_file_SAX(filename, &sax, &dom);
1818 if (!ret) {
1819 (void)XMLDoc_free(doc);
1820 dom.doc = NULL;
1821 return ret;
1822 }
1823
1824 /* TODO: Check there is no unfinished root nodes */
1825 return ret;
1826 }
1827
1828 int XMLDoc_parse_buffer_DOM_text_as_nodes(const SXML_CHAR* buffer, const SXML_CHAR* name, XMLDoc* doc, int text_as_nodes)
1829 {
1830 DOM_through_SAX dom;
1831 SAX_Callbacks sax;
1832 int ret;
1833
1834 if (doc == NULL || buffer == NULL || doc->init_value != XML_INIT_DONE)
1835 return false;
1836
1837 dom.doc = doc;
1838 dom.current = NULL;
1839 dom.text_as_nodes = text_as_nodes;
1840 SAX_Callbacks_init_DOM(&sax);
1841
1842 ret = XMLDoc_parse_buffer_SAX(buffer, name, &sax, &dom);
1843 if (!ret) {
1844 XMLDoc_free(doc);
1845 return ret;
1846 }
1847
1848 /* TODO: Check there is no unfinished root nodes */
1849 return ret;
1850 }
1851
1852
1853
1854 /* --- Utility functions (ex sxmlutils.c) --- */
1855
1856 #ifdef DBG_MEM
1857 static int nb_alloc = 0, nb_free = 0;
1858
1859 void* __malloc(size_t sz)
1860 {
1861 void* p = malloc(sz);
1862 if (p != NULL)
1863 nb_alloc++;
1864 printf("0x%x: MALLOC (%d) - NA %d - NF %d = %d\n", p, sz, nb_alloc, nb_free, nb_alloc - nb_free);
1865 return p;
1866 }
1867
1868 void* __calloc(size_t count, size_t sz)
1869 {
1870 void* p = calloc(count, sz);
1871 if (p != NULL)
1872 nb_alloc++;
1873 printf("0x%x: CALLOC (%d, %d) - NA %d - NF %d = %d\n", p, count, sz, nb_alloc, nb_free, nb_alloc - nb_free);
1874 return p;
1875 }
1876
1877 void* __realloc(void* mem, size_t sz)
1878 {
1879 void* p = realloc(mem, sz);
1880 if (mem == NULL && p != NULL)
1881 nb_alloc++;
1882 else if (mem != NULL && sz == 0)
1883 nb_free++;
1884 printf("0x%x: REALLOC 0x%x (%d)", p, mem, sz);
1885 if (mem == NULL)
1886 printf(" - NA %d - NF %d = %d", nb_alloc, nb_free, nb_alloc - nb_free);
1887 printf("\n");
1888 return p;
1889 }
1890
1891 void __free(void* mem)
1892 {
1893 nb_free++;
1894 printf("0x%x: FREE - NA %d - NF %d = %d\n", mem, nb_alloc, nb_free, nb_alloc - nb_free);
1895 free(mem);
1896 }
1897
1898 char* __sx_strdup(const char* s)
1899 {
1900 /* Mimic the behavior of sx_strdup(), as we can't use it directly here: DBG_MEM is defined
1901 and sx_strdup is this function! (bug #5) */
1902 #ifdef SXMLC_UNICODE
1903 char* p = wcsdup(s);
1904 #else
1905 char* p = strdup(s);
1906 #endif
1907 if (p != NULL)
1908 nb_alloc++;
1909 printf("0x%x: STRDUP (%d) - NA %d - NF %d = %d\n", p, sx_strlen(s), nb_alloc, nb_free, nb_alloc - nb_free);
1910 return p;
1911 }
1912 #endif
1913
1914 /* Dictionary of special characters and their HTML equivalent */
1915 static struct _html_special_dict {
1916 SXML_CHAR chr; /* Original character */
1917 SXML_CHAR* html; /* Equivalent HTML string */
1918 int html_len; /* 'sx_strlen(html)' */
1919 } HTML_SPECIAL_DICT[] = {
1920 { C2SX('<'), C2SX("<"), 4 },
1921 { C2SX('>'), C2SX(">"), 4 },
1922 { C2SX('"'), C2SX("""), 6 },
1923 { C2SX('\''), C2SX("'"), 6 },
1924 { C2SX('&'), C2SX("&"), 5 },
1925 { NULC, NULL, 0 }, /* Terminator */
1926 };
1927
1928 int _beob(DataSourceBuffer* ds)
1929 {
1930 if (ds == NULL || ds->buf[ds->cur_pos] == NULC || ds->cur_pos >= ds->buf_len)
1931 return true;
1932
1933 return false;
1934 }
1935
1936 int _bgetc(DataSourceBuffer* ds)
1937 {
1938 if (_beob(ds))
1939 return EOF;
1940
1941 return (int)(ds->buf[ds->cur_pos++]);
1942 }
1943
1944 /*
1945 * \brief Read a "line" from data source, eventually (re-)allocating a given buffer. A "line" is defined
1946 * as a portion starting with character `from` (usually `<`) ending at character `to` (usually `>`).
1947 *
1948 * Characters read will be stored in `line` starting at `i0` (this allows multiple calls to
1949 * `read_line_alloc()` on the same `line` buffer without overwriting it at each call).
1950 * Searches for character `from` until character `to`. If `from` is 0, starts from
1951 * current position in the data source. If `to` is 0, it is replaced by `\n`.
1952 *
1953 * \param in The data source (either `FILE*` if `in_type` is `DATA_SOURCE_FILE` or `SXML_CHAR*`
1954 * if `in_type` is `DATA_SOURCE_BUFFER`).
1955 * \param in_type specifies the type of data source to be read.
1956 * \param line can be `NULL`, in which case it will be allocated to `*sz_line` bytes. After the function
1957 * returns, `*sz_line` is the actual buffer size. This allows multiple calls to this function using
1958 * the same buffer (without re-allocating/freeing).
1959 * \param sz_line is the size of the buffer `line` if previously allocated (in `SXML_CHAR`, not byte!).
1960 * If `NULL` or 0, an internal value of `MEM_INCR_RLA` is used.
1961 * \param i0 The position where read characters are stored in `line`.
1962 * \param from The character indicating a start of line.
1963 * \param to The character indicating an end of line.
1964 * \param keep_fromto if 0, removes characters `from` and `to` from the line (stripping).
1965 * \param interest is a special character of interest, usually `'\n'` so we can count line numbers in the
1966 * data source (valid only if `interest_count` is not `NULL`).
1967 * \param interest_count if not `NULL`, will receive the count of `interest` characters while searching.
1968 * \returns the number of characters in the line or 0 if an error occurred.
1969 */
1970 int read_line_alloc(void* in, DataSourceType in_type, SXML_CHAR** line, int* sz_line, int i0, SXML_CHAR from, SXML_CHAR to, int keep_fromto, SXML_CHAR interest, int* interest_count)
1971 {
1972 int init_sz = 0;
1973 SXML_CHAR ch, *pt;
1974 int c;
1975 int n, ret;
1976 int (*mgetc)(void* ds) = (in_type == DATA_SOURCE_BUFFER ? (int(*)(void*))_bgetc : (int(*)(void*))sx_fgetc);
1977 int (*meos)(void* ds) = (in_type == DATA_SOURCE_BUFFER ? (int(*)(void*))_beob : (int(*)(void*))sx_feof);
1978
1979 if (in == NULL || line == NULL)
1980 return 0;
1981
1982 if (to == NULC)
1983 to = C2SX('\n');
1984 /* Search for character 'from' */
1985 if (interest_count != NULL)
1986 *interest_count = 0;
1987 while (true) {
1988 /* Reaching EOF before 'to' char is not an error but should trigger 'line' alloc and init to '' */
1989 c = mgetc(in);
1990 ch = (SXML_CHAR)c;
1991 if (c == EOF)
1992 break;
1993 if (interest_count != NULL && ch == interest)
1994 (*interest_count)++;
1995 /* If 'from' is '\0', we stop here */
1996 if (ch == from || from == NULC)
1997 break;
1998 }
1999
2000 if (sz_line == NULL)
2001 sz_line = &init_sz;
2002
2003 if (*line == NULL || *sz_line == 0) {
2004 if (*sz_line == 0) *sz_line = MEM_INCR_RLA;
2005 *line = __malloc(*sz_line*sizeof(SXML_CHAR));
2006 if (*line == NULL)
2007 return 0;
2008 }
2009 if (i0 < 0)
2010 i0 = 0;
2011 if (i0 >= *sz_line)
2012 return 0;
2013
2014 n = i0;
2015 if (c == CEOF) { /* EOF reached before 'to' char => return the empty string */
2016 (*line)[n] = NULC;
2017 return meos(in) ? n : 0; /* Error if not EOF */
2018 }
2019 if (ch != from || keep_fromto) {
2020 (*line)[n++] = ch;
2021 if (n >= *sz_line) {
2022 *sz_line += MEM_INCR_RLA;
2023 pt = __realloc(*line, *sz_line*sizeof(SXML_CHAR));
2024 if (pt == NULL) {
2025 return 0;
2026 } else
2027 *line = pt;
2028 }
2029 }
2030 (*line)[n] = NULC;
2031 ret = 0;
2032 while (true) {
2033 if ((c = mgetc(in)) == CEOF) { /* EOF or error */
2034 (*line)[n] = NULC;
2035 ret = meos(in) ? n : 0;
2036 break;
2037 }
2038 ch = (SXML_CHAR)c;
2039 if (interest_count != NULL && ch == interest)
2040 (*interest_count)++;
2041 (*line)[n] = ch;
2042 if (ch != to || (keep_fromto && to != NULC && ch == to)) /* If we reached the 'to' character and we keep it, we still need to add the extra '\0' */
2043 n++;
2044 if (n >= *sz_line) { /* Too many characters for our line => realloc some more */
2045 *sz_line += MEM_INCR_RLA;
2046 pt = __realloc(*line, *sz_line*sizeof(SXML_CHAR));
2047 if (pt == NULL) {
2048 ret = 0;
2049 break;
2050 } else
2051 *line = pt;
2052 }
2053 (*line)[n] = NULC; /* If we reached the 'to' character and we want to strip it, 'n' hasn't changed and 'line[n]' (which is 'to') will be replaced by '\0' */
2054 if (ch == to) {
2055 ret = n;
2056 break;
2057 }
2058 }
2059
2060 #if 0 /* Automatic buffer resize is deactivated */
2061 /* Resize line to the exact size */
2062 pt = __realloc(*line, (n+1)*sizeof(SXML_CHAR));
2063 if (pt != NULL)
2064 *line = pt;
2065 #endif
2066
2067 return ret;
2068 }
2069
2070 /* --- */
2071
2072 SXML_CHAR* strcat_alloc(SXML_CHAR** src1, const SXML_CHAR* src2)
2073 {
2074 SXML_CHAR* cat;
2075 int n;
2076
2077 /* Do not concatenate '*src1' with itself */
2078 if (src1 == NULL || *src1 == src2)
2079 return NULL;
2080
2081 /* Concatenate a NULL or empty string */
2082 if (src2 == NULL || *src2 == NULC)
2083 return *src1;
2084
2085 n = (*src1 == NULL ? 0 : sx_strlen(*src1)) + sx_strlen(src2) + 1;
2086 cat = __realloc(*src1, n*sizeof(SXML_CHAR));
2087 if (cat == NULL)
2088 return NULL;
2089 if (*src1 == NULL)
2090 *cat = NULC;
2091 *src1 = cat;
2092 sx_strcat(*src1, src2);
2093
2094 return *src1;
2095 }
2096
2097 SXML_CHAR* strip_spaces(SXML_CHAR* str, SXML_CHAR repl_sq)
2098 {
2099 SXML_CHAR* p;
2100 int i, len;
2101
2102 /* 'p' to the first non-space */
2103 for (p = str; *p != NULC && sx_isspace(*p); p++) ; /* No need to search for 'protect' as it is not a space */
2104 len = sx_strlen(str);
2105 for (i = len-1; i >= 0 && sx_isspace(str[i]); i--) ;
2106 if (i >= 0 && str[i] == C2SX('\\')) /* If last non-space is the protection, keep the last space */
2107 i++;
2108 str[i+1] = NULC; /* New end of string to last non-space */
2109
2110 if (repl_sq == NULC) {
2111 if (p == str && i == len)
2112 return str; /* Nothing to do */
2113 for (i = 0; (str[i] = *p) != NULC; i++, p++) ; /* Copy 'p' to 'str' */
2114 return str;
2115 }
2116
2117 /* Squeeze all spaces with 'repl_sq' */
2118 i = 0;
2119 while (*p != NULC) {
2120 if (sx_isspace(*p)) {
2121 str[i++] = repl_sq;
2122 while (sx_isspace(*++p)) ; /* Skips all next spaces */
2123 } else {
2124 if (*p == C2SX('\\'))
2125 p++;
2126 str[i++] = *p++;
2127 }
2128 }
2129 str[i] = NULC;
2130
2131 return str;
2132 }
2133
2134 SXML_CHAR* str_unescape(SXML_CHAR* str)
2135 {
2136 int i, j;
2137
2138 if (str == NULL)
2139 return NULL;
2140
2141 for (i = j = 0; str[j]; j++) {
2142 if (str[j] == C2SX('\\'))
2143 j++;
2144 str[i++] = str[j];
2145 }
2146
2147 return str;
2148 }
2149
2150 int split_left_right(SXML_CHAR* str, SXML_CHAR sep, int* l0, int* l1, int* i_sep, int* r0, int* r1, int ignore_spaces, int ignore_quotes)
2151 {
2152 int n0, n1, is;
2153 SXML_CHAR quote = '\0';
2154
2155 if (str == NULL)
2156 return false;
2157
2158 if (i_sep != NULL)
2159 *i_sep = -1;
2160
2161 if (!ignore_spaces) /* No sense of ignore quotes if spaces are to be kept */
2162 ignore_quotes = false;
2163
2164 /* Parse left part */
2165
2166 if (ignore_spaces) {
2167 for (n0 = 0; str[n0] != NULC && sx_isspace(str[n0]); n0++) ; /* Skip head spaces, n0 points to first non-space */
2168 if (ignore_quotes && isquote(str[n0])) { /* If quote is found, look for next one */
2169 quote = str[n0++]; /* Quote can be '\'' or '"' */
2170 for (n1 = n0; str[n1] != NULC && str[n1] != quote; n1++) {
2171 if (str[n1] == C2SX('\\') && str[++n1] == NULC)
2172 break; /* Escape character (can be the last) */
2173 }
2174 for (is = n1 + 1; str[is] != NULC && sx_isspace(str[is]); is++) ; /* '--' not to take quote into account */
2175 } else {
2176 for (n1 = n0; str[n1] != NULC && str[n1] != sep && !sx_isspace(str[n1]); n1++) ; /* Search for separator or a space */
2177 for (is = n1; str[is] != NULC && sx_isspace(str[is]); is++) ;
2178 }
2179 } else {
2180 n0 = 0;
2181 for (n1 = 0; str[n1] != NULC && str[n1] != sep; n1++) ; /* Search for separator only */
2182 is = n1;
2183 }
2184
2185 /* Here 'n0' is the start of left member, 'n1' is the character after the end of left member */
2186
2187 if (l0 != NULL)
2188 *l0 = n0;
2189 if (l1 != NULL)
2190 *l1 = n1 - 1;
2191 if (i_sep != NULL)
2192 *i_sep = is;
2193 if (str[is] == NULC || str[is+1] == NULC) { /* No separator => empty right member */
2194 if (r0 != NULL)
2195 *r0 = is;
2196 if (r1 != NULL)
2197 *r1 = is-1;
2198 if (i_sep != NULL)
2199 *i_sep = (str[is] == NULC ? -1 : is);
2200 return true;
2201 }
2202
2203 /* Parse right part */
2204
2205 n0 = is + 1;
2206 if (ignore_spaces) {
2207 for (; str[n0] != NULC && sx_isspace(str[n0]); n0++) ;
2208 if (ignore_quotes && isquote(str[n0]))
2209 quote = str[n0];
2210 }
2211
2212 for (n1 = ++n0; str[n1]; n1++) {
2213 if (ignore_quotes && str[n1] == quote) /* Quote was reached */
2214 break;
2215 if (str[n1] == C2SX('\\') && str[++n1] == NULC) /* Escape character (can be the last) */
2216 break;
2217 }
2218 if (ignore_quotes && str[n1--] != quote) /* Quote is not the same than earlier, '--' is not to take it into account */
2219 return false;
2220 if (!ignore_spaces)
2221 while (str[++n1]) ; /* Jump down the end of the string */
2222
2223 if (r0 != NULL)
2224 *r0 = n0;
2225 if (r1 != NULL)
2226 *r1 = n1;
2227
2228 return true;
2229 }
2230
2231 BOM_TYPE freadBOM(FILE* f, unsigned char* bom, int* sz_bom)
2232 {
2233 unsigned char c1, c2;
2234 long pos;
2235
2236 if (f == NULL)
2237 return BOM_NONE;
2238
2239 /* Save position and try to read and skip BOM if found. If not, go back to saved position. */
2240 pos = ftell(f);
2241 if (pos < 0)
2242 return BOM_NONE;
2243 if (fread(&c1, sizeof(char), 1, f) != 1 || fread(&c2, sizeof(char), 1, f) != 1) {
2244 fseek(f, pos, SEEK_SET);
2245 return BOM_NONE;
2246 }
2247 if (bom != NULL) {
2248 bom[0] = c1;
2249 bom[1] = c2;
2250 bom[2] = '\0';
2251 if (sz_bom != NULL)
2252 *sz_bom = 2;
2253 }
2254 switch ((unsigned short)(c1 << 8) | c2) {
2255 case (unsigned short)0xfeff:
2256 return BOM_UTF_16BE;
2257
2258 case (unsigned short)0xfffe:
2259 pos = ftell(f); /* Save current position to get it back if BOM is not UTF-32LE */
2260 if (pos < 0)
2261 return BOM_UTF_16LE;
2262 if (fread(&c1, sizeof(char), 1, f) != 1 || fread(&c2, sizeof(char), 1, f) != 1) {
2263 fseek(f, pos, SEEK_SET);
2264 return BOM_UTF_16LE;
2265 }
2266 if (c1 == 0x00 && c2 == 0x00) {
2267 if (bom != NULL)
2268 bom[2] = bom[3] = bom[4] = '\0';
2269 if (sz_bom != NULL)
2270 *sz_bom = 4;
2271 return BOM_UTF_32LE;
2272 }
2273 fseek(f, pos, SEEK_SET); /* fseek(f, -2, SEEK_CUR) is not garanteed on Windows (and actually fails in Unicode...) */
2274 return BOM_UTF_16LE;
2275
2276 case (unsigned short)0x0000:
2277 if (fread(&c1, sizeof(char), 1, f) == 1 && fread(&c2, sizeof(char), 1, f) == 1
2278 && c1 == 0xfe && c2 == 0xff) {
2279 bom[2] = c1;
2280 bom[3] = c2;
2281 bom[4] = '\0';
2282 if (sz_bom != NULL)
2283 *sz_bom = 4;
2284 return BOM_UTF_32BE;
2285 }
2286 fseek(f, pos, SEEK_SET);
2287 return BOM_NONE;
2288
2289 case (unsigned short)0xefbb: /* UTF-8? */
2290 if (fread(&c1, sizeof(char), 1, f) != 1 || c1 != 0xbf) { /* Not UTF-8 */
2291 fseek(f, pos, SEEK_SET);
2292 if (bom != NULL)
2293 bom[0] = '\0';
2294 if (sz_bom != NULL)
2295 *sz_bom = 0;
2296 return BOM_NONE;
2297 }
2298 if (bom != NULL) {
2299 bom[2] = c1;
2300 bom[3] = '\0';
2301 }
2302 if (sz_bom != NULL)
2303 *sz_bom = 3;
2304 return BOM_UTF_8;
2305
2306 default: /* No BOM, go back */
2307 fseek(f, pos, SEEK_SET);
2308 if (bom != NULL)
2309 bom[0] = '\0';
2310 if (sz_bom != NULL)
2311 *sz_bom = 0;
2312 return BOM_NONE;
2313 }
2314 }
2315
2316 /* --- */
2317
2318 SXML_CHAR* html2str(SXML_CHAR* html, SXML_CHAR* str)
2319 {
2320 SXML_CHAR *ps, *pd;
2321 int i;
2322
2323 if (html == NULL)
2324 return NULL;
2325
2326 if (str == NULL)
2327 str = html;
2328
2329 /* Look for '&' and matches it to any of the recognized HTML pattern. */
2330 /* If found, replaces the '&' by the corresponding char. */
2331 /* 'p2' is the char to analyze, 'p1' is where to insert it */
2332 for (pd = str, ps = html; *ps; ps++, pd++) {
2333 if (*ps != C2SX('&')) {
2334 if (pd != ps)
2335 *pd = *ps;
2336 continue;
2337 }
2338
2339 for (i = 0; HTML_SPECIAL_DICT[i].chr; i++) {
2340 if (sx_strncmp(ps, HTML_SPECIAL_DICT[i].html, HTML_SPECIAL_DICT[i].html_len))
2341 continue;
2342
2343 *pd = HTML_SPECIAL_DICT[i].chr;
2344 ps += HTML_SPECIAL_DICT[i].html_len-1;
2345 break;
2346 }
2347 /* If no string was found, simply copy the character */
2348 if (HTML_SPECIAL_DICT[i].chr == NULC && pd != ps)
2349 *pd = *ps;
2350 }
2351 *pd = NULC;
2352
2353 return str;
2354 }
2355
2356 /* TODO: Allocate 'html'? */
2357 SXML_CHAR* str2html(SXML_CHAR* str, SXML_CHAR* html)
2358 {
2359 SXML_CHAR *ps, *pd;
2360 int i;
2361
2362 if (str == NULL)
2363 return NULL;
2364
2365 if (html == str) /* Not handled (yet) */
2366 return NULL;
2367
2368 if (html == NULL) { /* Allocate 'html' to the correct size */
2369 html = __malloc(strlen_html(str) * sizeof(SXML_CHAR));
2370 if (html == NULL)
2371 return NULL;
2372 }
2373
2374 for (ps = str, pd = html; *ps; ps++, pd++) {
2375 for (i = 0; HTML_SPECIAL_DICT[i].chr; i++) {
2376 if (*ps == HTML_SPECIAL_DICT[i].chr) {
2377 sx_strcpy(pd, HTML_SPECIAL_DICT[i].html);
2378 pd += HTML_SPECIAL_DICT[i].html_len - 1;
2379 break;
2380 }
2381 }
2382 if (HTML_SPECIAL_DICT[i].chr == NULC && pd != ps)
2383 *pd = *ps;
2384 }
2385 *pd = NULC;
2386
2387 return html;
2388 }
2389
2390 int strlen_html(SXML_CHAR* str)
2391 {
2392 int i, j, n;
2393
2394 if (str == NULL)
2395 return 0;
2396
2397 n = 0;
2398 for (i = 0; str[i] != NULC; i++) {
2399 for (j = 0; HTML_SPECIAL_DICT[j].chr; j++) {
2400 if (str[i] == HTML_SPECIAL_DICT[j].chr) {
2401 n += HTML_SPECIAL_DICT[j].html_len;
2402 break;
2403 }
2404 }
2405 if (HTML_SPECIAL_DICT[j].chr == NULC)
2406 n++;
2407 }
2408
2409 return n;
2410 }
2411
2412 int fprintHTML(FILE* f, SXML_CHAR* str)
2413 {
2414 SXML_CHAR* p;
2415 int i, n;
2416
2417 for (p = str, n = 0; *p != NULC; p++) {
2418 for (i = 0; HTML_SPECIAL_DICT[i].chr; i++) {
2419 if (*p != HTML_SPECIAL_DICT[i].chr)
2420 continue;
2421 if (f != NULL)
2422 sx_fputs(HTML_SPECIAL_DICT[i].html, f);
2423 n += HTML_SPECIAL_DICT[i].html_len;
2424 break;
2425 }
2426 if (HTML_SPECIAL_DICT[i].chr == NULC) {
2427 if (f != NULL)
2428 (void)sx_fputc(*p, f);
2429 n++;
2430 }
2431 }
2432
2433 return n;
2434 }
2435
2436 int regstrcmp(SXML_CHAR* str, SXML_CHAR* pattern)
2437 {
2438 SXML_CHAR *p, *s;
2439
2440 if (str == NULL && pattern == NULL)
2441 return true;
2442
2443 if (str == NULL || pattern == NULL)
2444 return false;
2445
2446 p = pattern;
2447 s = str;
2448 while (true) {
2449 switch (*p) {
2450 /* Any character matches, go to next one */
2451 case C2SX('?'):
2452 p++;
2453 s++;
2454 break;
2455
2456 /* Go to next character in pattern and wait until it is found in 'str' */
2457 case C2SX('*'):
2458 for (; *p != NULC; p++) { /* Squeeze '**?*??**' to '*' */
2459 if (*p != C2SX('*') && *p != C2SX('?'))
2460 break;
2461 }
2462 for (; *s != NULC; s++) {
2463 if (*s == *p)
2464 break;
2465 }
2466 break;
2467
2468 /* NULL character on pattern has to be matched by 'str' */
2469 case 0:
2470 return *s ? false : true;
2471
2472 default:
2473 if (*p == C2SX('\\')) /* Escape character */
2474 p++;
2475 if (*p++ != *s++) /* Characters do not match */
2476 return false;
2477 break;
2478 }
2479 }
2480
2481 return false;
2482 }
2483
Generated by GNU Enscript 1.6.6, and GophHub 1.3.