1 /* 2 Copyright (c) 2010, Matthieu Labas 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without modification, 6 are permitted provided that the following conditions are met: 7 8 1. Redistributions of source code must retain the above copyright notice, 9 this list of conditions and the following disclaimer. 10 11 2. Redistributions in binary form must reproduce the above copyright notice, 12 this list of conditions and the following disclaimer in the documentation 13 and/or other materials provided with the distribution. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 24 OF SUCH DAMAGE. 25 26 The views and conclusions contained in the software and documentation are those of the 27 authors and should not be interpreted as representing official policies, either expressed 28 or implied, of the FreeBSD Project. 29 */ 30 #if defined(WIN32) || defined(WIN64) 31 #pragma warning(disable : 4996) 32 #else 33 #ifndef strdup 34 #define _GNU_SOURCE 35 #endif 36 #endif 37 38 #include 39 #include 40 #include 41 #include 42 #include "sxmlc.h" 43 44 #define CHECK_NODE(node,ret) if (!XMLNode_is_valid(node)) return (ret) 45 46 /* UTF8 handling for Windows */ 47 #ifndef SXMLC_UNICODE 48 #if defined(WIN32) || defined(WIN64) 49 #include 50 #endif 51 #endif 52 53 /* Determine if character is not ASCII. */ 54 #define sx_isunicode(c) ((int)c < 0 || (int)c > 127) 55 56 #if defined(WIN32) || defined(WIN64) 57 FILE* sx_fopen(const SXML_CHAR* filename, const SXML_CHAR* mode) 58 { 59 FILE* ret = NULL; 60 int is_unicode = false; 61 const char* p; 62 63 for (p = filename; p && *p; p++) { 64 if (sx_isunicode(*p)) { 65 is_unicode = true; 66 break; 67 } 68 } 69 70 if (is_unicode) { 71 wchar_t* wmode = mode[0] == 'w' 72 ? mode[1] == 'b' ? L"wb" : L"wt" 73 : mode[1] == 'b' ? L"rb" : L"rt"; 74 wchar_t* wide = NULL; 75 const int length = MultiByteToWideChar(CP_UTF8, 0, filename, -1, NULL, 0); 76 77 if (length > 1) { 78 wide = (wchar_t*)malloc(length * sizeof(wchar_t)); 79 if (wide) { 80 MultiByteToWideChar(CP_UTF8, 0, filename, -1, wide, length); 81 ret = _wfopen(wide, wmode); 82 free(wide); 83 } 84 } 85 } else 86 ret = fopen(filename, mode); 87 88 return ret; 89 } 90 #endif 91 92 /** 93 * \brief Definition of "special" tags such as "<? ?>" or "<![CDATA[ ]]/>". 94 * 95 * These tags are considered having a start and an end with some data in between that will 96 * be stored in the 'tag' member of an XMLNode. 97 * The `tag_type` member is a constant that is associated to such tag. 98 * All `len_*` members are basically the "sx_strlen()" of 'start' and 'end' members. 99 */ 100 typedef struct _Tag { 101 TagType tag_type; /**< The tag type. */ 102 SXML_CHAR* start; /**< The string representing the tag "opening". *Must start with <*. */ 103 int len_start; /**< The `strlen(start)`. */ 104 SXML_CHAR* end; /**< The string representing the tag "closing". *Must end with >*. */ 105 int len_end; /**< The `strlen(end)`. */ 106 } _TAG; 107 108 /* 109 List of "special" tags handled by sxmlc. 110 NB the "' or ']>'). 112 */ 113 static _TAG _spec[] = { 114 { TAG_INSTR, C2SX(""), 2 }, 115 { TAG_COMMENT, C2SX(""), 3 }, 116 { TAG_CDATA, C2SX(""), 3 } 117 }; 118 static int NB_SPECIAL_TAGS = (int)(sizeof(_spec) / sizeof(_TAG)); /* Auto computation of number of special tags */ 119 120 /* 121 User-registered tags. 122 */ 123 static struct _SpecialTag { 124 _TAG *tags; 125 int n_tags; 126 } _user_tags = { NULL, 0 }; 127 128 int XML_register_user_tag(TagType tag_type, SXML_CHAR* start, SXML_CHAR* end) 129 { 130 _TAG* p; 131 int i, n, le; 132 133 if (tag_type < TAG_USER) 134 return -1; 135 136 if (start == NULL || end == NULL || *start != C2SX('<')) 137 return -1; 138 139 le = sx_strlen(end); 140 if (end[le-1] != C2SX('>')) 141 return -1; 142 143 i = _user_tags.n_tags; 144 n = i + 1; 145 p = __realloc(_user_tags.tags, n * sizeof(_TAG)); 146 if (p == NULL) 147 return -1; 148 149 p[i].tag_type = tag_type; 150 p[i].start = start; 151 p[i].end = end; 152 p[i].len_start = sx_strlen(start); 153 p[i].len_end = le; 154 _user_tags.tags = p; 155 _user_tags.n_tags = n; 156 157 return i; 158 } 159 160 int XML_unregister_user_tag(int i_tag) 161 { 162 _TAG* pt; 163 164 if (i_tag < 0 || i_tag >= _user_tags.n_tags) 165 return -1; 166 167 if (_user_tags.n_tags == 1) 168 pt = NULL; 169 else { 170 pt = __malloc((_user_tags.n_tags - 1) * sizeof(_TAG)); 171 if (pt == NULL) 172 return -1; 173 } 174 175 if (pt != NULL) { 176 memcpy(pt, _user_tags.tags, i_tag * sizeof(_TAG)); 177 memcpy(&pt[i_tag], &_user_tags.tags[i_tag + 1], (_user_tags.n_tags - i_tag - 1) * sizeof(_TAG)); 178 } 179 if (_user_tags.tags != NULL) 180 __free(_user_tags.tags); 181 _user_tags.tags = pt; 182 _user_tags.n_tags--; 183 184 return _user_tags.n_tags; 185 } 186 187 int XML_get_nb_registered_user_tags(void) 188 { 189 return _user_tags.n_tags; 190 } 191 192 int XML_get_registered_user_tag(TagType tag_type) 193 { 194 int i; 195 196 for (i = 0; i < _user_tags.n_tags; i++) 197 if (_user_tags.tags[i].tag_type == tag_type) 198 return i; 199 200 return -1; 201 } 202 203 /* --- XMLNode methods --- */ 204 205 /* 206 Add 'node' to given '*children_array' of '*len_array' elements. 207 '*len_array' is overwritten with the number of elements in '*children_array' after its reallocation. 208 Return the index of the newly added 'node' in '*children_array', or '-1' for memory error. 209 */ 210 static int _add_node(XMLNode*** children_array, int* len_array, XMLNode* node) 211 { 212 XMLNode** pt = __realloc(*children_array, (*len_array+1) * sizeof(XMLNode*)); 213 214 if (pt == NULL) 215 return -1; 216 217 pt[*len_array] = node; 218 *children_array = pt; 219 220 return (*len_array)++; 221 } 222 223 int XMLNode_init(XMLNode* node) 224 { 225 if (node == NULL) 226 return false; 227 228 if (node->init_value == XML_INIT_DONE) 229 return true; /*(void)XMLNode_free(node);*/ 230 231 node->tag = NULL; 232 node->text = NULL; 233 234 node->attributes = NULL; 235 node->n_attributes = 0; 236 237 node->father = NULL; 238 node->children = NULL; 239 node->n_children = 0; 240 241 node->tag_type = TAG_NONE; 242 node->active = true; 243 244 node->init_value = XML_INIT_DONE; 245 246 return true; 247 } 248 249 XMLNode* XMLNode_allocN(int n) 250 { 251 int i; 252 XMLNode* p; 253 254 if (n <= 0) 255 return NULL; 256 257 p = __calloc(n, sizeof(XMLNode)); 258 if (p == NULL) 259 return NULL; 260 261 for (i = 0; i < n; i++) 262 (void)XMLNode_init(&p[i]); 263 264 return p; 265 } 266 267 XMLNode* XMLNode_new(const TagType tag_type, const SXML_CHAR* tag, const SXML_CHAR* text) 268 { 269 XMLNode* node = XMLNode_alloc(); 270 if (node == NULL) 271 return NULL; 272 273 if (!XMLNode_set_tag(node, tag) || (text != NULL && !XMLNode_set_text(node, text))) { 274 __free(node); 275 return NULL; 276 } 277 278 node->tag_type = tag_type; 279 280 return node; 281 } 282 283 XMLNode* XMLNode_dup(const XMLNode* node, int copy_children) 284 { 285 XMLNode* n; 286 287 if (node == NULL) 288 return NULL; 289 290 n = __calloc(1, sizeof(XMLNode)); 291 if (n == NULL) 292 return NULL; 293 294 XMLNode_init(n); 295 if (!XMLNode_copy(n, node, copy_children)) { 296 XMLNode_free(n); 297 298 return NULL; 299 } 300 301 return n; 302 } 303 304 int XMLNode_free(XMLNode* node) 305 { 306 CHECK_NODE(node, false); 307 308 if (node->tag != NULL) { 309 __free(node->tag); 310 node->tag = NULL; 311 } 312 313 XMLNode_remove_text(node); 314 XMLNode_remove_all_attributes(node); 315 XMLNode_remove_children(node); 316 317 node->tag_type = TAG_NONE; 318 319 return true; 320 } 321 322 int XMLNode_copy(XMLNode* dst, const XMLNode* src, int copy_children) 323 { 324 int i; 325 326 if (dst == NULL || (src != NULL && src->init_value != XML_INIT_DONE)) 327 return false; 328 329 (void)XMLNode_free(dst); /* 'dst' is freed first */ 330 331 /* NULL 'src' resets 'dst' */ 332 if (src == NULL) 333 return true; 334 335 /* Tag */ 336 if (src->tag != NULL) { 337 dst->tag = sx_strdup(src->tag); 338 if (dst->tag == NULL) goto copy_err; 339 } 340 341 /* Text */ 342 if (dst->text != NULL) { 343 dst->text = sx_strdup(src->text); 344 if (dst->text == NULL) goto copy_err; 345 } 346 347 /* Attributes */ 348 if (src->n_attributes > 0) { 349 dst->attributes = __calloc(src->n_attributes, sizeof(XMLAttribute)); 350 if (dst->attributes== NULL) goto copy_err; 351 dst->n_attributes = src->n_attributes; 352 for (i = 0; i < src->n_attributes; i++) { 353 dst->attributes[i].name = sx_strdup(src->attributes[i].name); 354 dst->attributes[i].value = sx_strdup(src->attributes[i].value); 355 if (dst->attributes[i].name == NULL || dst->attributes[i].value == NULL) goto copy_err; 356 dst->attributes[i].active = src->attributes[i].active; 357 } 358 } 359 360 dst->tag_type = src->tag_type; 361 dst->father = src->father; 362 dst->user = src->user; 363 dst->active = src->active; 364 365 /* Copy children if required (and there are any) */ 366 if (copy_children && src->n_children > 0) { 367 dst->children = __calloc(src->n_children, sizeof(XMLNode*)); 368 if (dst->children == NULL) goto copy_err; 369 dst->n_children = src->n_children; 370 for (i = 0; i < src->n_children; i++) { 371 if (!XMLNode_copy(dst->children[i], src->children[i], true)) goto copy_err; 372 } 373 } 374 375 return true; 376 377 copy_err: 378 (void)XMLNode_free(dst); 379 380 return false; 381 } 382 383 int XMLNode_set_active(XMLNode* node, int active) 384 { 385 CHECK_NODE(node, false); 386 387 node->active = active; 388 389 return true; 390 } 391 392 int XMLNode_set_tag(XMLNode* node, const SXML_CHAR* tag) 393 { 394 SXML_CHAR* newtag; 395 if (node == NULL || tag == NULL || node->init_value != XML_INIT_DONE) 396 return false; 397 398 newtag = sx_strdup(tag); 399 if (newtag == NULL) 400 return false; 401 if (node->tag != NULL) __free(node->tag); 402 node->tag = newtag; 403 404 return true; 405 } 406 407 int XMLNode_set_type(XMLNode* node, const TagType tag_type) 408 { 409 CHECK_NODE(node, false); 410 411 switch (tag_type) { 412 case TAG_ERROR: 413 case TAG_END: 414 case TAG_PARTIAL: 415 case TAG_NONE: 416 return false; 417 418 default: 419 node->tag_type = tag_type; 420 return true; 421 } 422 } 423 424 int XMLNode_set_attribute(XMLNode* node, const SXML_CHAR* attr_name, const SXML_CHAR* attr_value) 425 { 426 XMLAttribute* pt; 427 int i; 428 429 if (node == NULL || attr_name == NULL || attr_name[0] == NULC || node->init_value != XML_INIT_DONE) 430 return -1; 431 432 i = XMLNode_search_attribute(node, attr_name, 0); 433 if (i >= 0) { /* Attribute found: update it */ 434 SXML_CHAR* value = NULL; 435 if (attr_value != NULL && (value = sx_strdup(attr_value)) == NULL) 436 return -1; 437 pt = node->attributes; 438 if (pt[i].value != NULL) 439 __free(pt[i].value); 440 pt[i].value = value; 441 } else { /* Attribute not found: add it */ 442 SXML_CHAR* name = sx_strdup(attr_name); 443 SXML_CHAR* value = (attr_value == NULL ? NULL : sx_strdup(attr_value)); 444 if (name == NULL || (value == NULL && attr_value != NULL)) { 445 if (value != NULL) 446 __free(value); 447 if (name != NULL) 448 __free(name); 449 return -1; 450 } 451 i = node->n_attributes; 452 pt = __realloc(node->attributes, (i+1) * sizeof(XMLAttribute)); 453 if (pt == NULL) { 454 if (value != NULL) 455 __free(value); 456 __free(name); 457 return -1; 458 } 459 460 pt[i].name = name; 461 pt[i].value = value; 462 pt[i].active = true; 463 node->attributes = pt; 464 node->n_attributes = i + 1; 465 } 466 467 return node->n_attributes; 468 } 469 470 int XMLNode_get_attribute_with_default(XMLNode* node, const SXML_CHAR* attr_name, const SXML_CHAR** attr_value, const SXML_CHAR* default_attr_value) 471 { 472 XMLAttribute* pt; 473 int i; 474 475 if (node == NULL || attr_name == NULL || attr_name[0] == NULC || attr_value == NULL || node->init_value != XML_INIT_DONE) 476 return false; 477 478 i = XMLNode_search_attribute(node, attr_name, 0); 479 if (i >= 0) { 480 pt = node->attributes; 481 if (pt[i].value != NULL) { 482 *attr_value = sx_strdup(pt[i].value); 483 if (*attr_value == NULL) 484 return false; 485 } else 486 *attr_value = NULL; /* NULL but returns 'true' as 'NULL' is the actual attribute value */ 487 } else if (default_attr_value != NULL) { 488 *attr_value = sx_strdup(default_attr_value); 489 if (*attr_value == NULL) 490 return false; 491 } else 492 *attr_value = NULL; 493 494 return true; 495 } 496 497 int XMLNode_get_attribute_count(const XMLNode* node) 498 { 499 int i, n; 500 501 CHECK_NODE(node, -1); 502 503 for (i = n = 0; i < node->n_attributes; i++) 504 if (node->attributes[i].active) n++; 505 506 return n; 507 } 508 509 int XMLNode_search_attribute(const XMLNode* node, const SXML_CHAR* attr_name, int i_search) 510 { 511 int i; 512 513 if (node == NULL || attr_name == NULL || attr_name[0] == NULC || i_search < 0 || i_search >= node->n_attributes) 514 return -1; 515 516 for (i = i_search; i < node->n_attributes; i++) 517 if (node->attributes[i].active && !sx_strcmp(node->attributes[i].name, attr_name)) 518 return i; 519 520 return -1; 521 } 522 523 int XMLNode_remove_attribute(XMLNode* node, int i_attr) 524 { 525 XMLAttribute* pt; 526 if (node == NULL || node->init_value != XML_INIT_DONE || i_attr < 0 || i_attr >= node->n_attributes) 527 return -1; 528 529 /* Before modifying first see if we run out of memory */ 530 if (node->n_attributes == 1) 531 pt = NULL; 532 else { 533 pt = __malloc((node->n_attributes - 1) * sizeof(XMLAttribute)); 534 if (pt == NULL) 535 return -1; 536 } 537 538 /* Can't fail anymore, free item */ 539 if (node->attributes[i_attr].name != NULL) __free(node->attributes[i_attr].name); 540 if (node->attributes[i_attr].value != NULL) __free(node->attributes[i_attr].value); 541 542 if (pt != NULL) { 543 memcpy(pt, node->attributes, i_attr * sizeof(XMLAttribute)); 544 memcpy(&pt[i_attr], &node->attributes[i_attr + 1], (node->n_attributes - i_attr - 1) * sizeof(XMLAttribute)); 545 } 546 if (node->attributes != NULL) 547 __free(node->attributes); 548 node->attributes = pt; 549 node->n_attributes--; 550 551 return node->n_attributes; 552 } 553 554 int XMLNode_remove_all_attributes(XMLNode* node) 555 { 556 int i; 557 558 CHECK_NODE(node, false); 559 560 if (node->attributes != NULL) { 561 for (i = 0; i < node->n_attributes; i++) { 562 if (node->attributes[i].name != NULL) 563 __free(node->attributes[i].name); 564 if (node->attributes[i].value != NULL) 565 __free(node->attributes[i].value); 566 } 567 __free(node->attributes); 568 node->attributes = NULL; 569 } 570 node->n_attributes = 0; 571 572 return true; 573 } 574 575 int XMLNode_set_text(XMLNode* node, const SXML_CHAR* text) 576 { 577 SXML_CHAR* p; 578 CHECK_NODE(node, false); 579 580 if (text == NULL) { /* We want to remove it => free node text */ 581 if (node->text != NULL) { 582 __free(node->text); 583 node->text = NULL; 584 } 585 586 return true; 587 } 588 589 p = sx_strdup(text); 590 if (p == NULL) 591 return false; 592 if (node->text != NULL) 593 __free(node->text); 594 node->text = p; 595 596 return true; 597 } 598 599 int XMLNode_add_child(XMLNode* node, XMLNode* child) 600 { 601 if (node == NULL || child == NULL || node->init_value != XML_INIT_DONE || child->init_value != XML_INIT_DONE) 602 return false; 603 604 if (_add_node(&node->children, &node->n_children, child) >= 0) { 605 node->tag_type = TAG_FATHER; 606 child->father = node; 607 return true; 608 } else 609 return false; 610 } 611 612 int XMLNode_insert_child(XMLNode* node, XMLNode* child, int index) 613 { 614 int i, j; 615 616 CHECK_NODE(node, -1); 617 618 /* We could process cases "first" and "last" in an optimized way, but we prefer readability to (micro-)optimization */ 619 if (index < 0) /* Before first => first */ 620 index = 0; 621 if (index >= node->n_children) /* After last => last */ 622 index = node->n_children - 1; 623 624 for (i = 0; i < node->n_children; i++) { 625 if (!node->children[i]->active || index-- > 0) 626 continue; 627 /* Insert it here, at 'i' */ 628 if (_add_node(&node->children, &node->n_children, child) >= 0) { 629 node->tag_type = TAG_FATHER; 630 child->father = node; 631 /* Erase 'child', which is the last node ('n_children' has been incremented by '_add_node()') */ 632 for (j = node->n_children - 1; j >= i; j--) 633 node->children[j] = node->children[j-1]; 634 node->children[i] = child; /* Set it */ 635 return true; 636 } else 637 return false; 638 } 639 640 return false; /* Oops! */ 641 } 642 643 int XMLNode_move_child(XMLNode* node, int from, int to) 644 { 645 XMLNode* nfrom; 646 647 CHECK_NODE(node, false); 648 if (from < 0 || from >= node->n_children) 649 return false; 650 if (to < 0) /* Before first => first */ 651 to = 0; 652 if (to >= node->n_children) /* After last => last */ 653 to = node->n_children - 1; 654 655 nfrom = node->children[from]; 656 if (to > from) { /* Move forward: bring following nodes (up to 'to') backward one position */ 657 int i; 658 for (i = from; i < to; i++) 659 node->children[i] = node->children[i+1]; 660 } else { /* Move backward: bring previous nodes (up to 'from') forward one position */ 661 int i; 662 for (i = from - 1; i >= to; i--) 663 node->children[i+1] = node->children[i]; 664 } 665 node->children[to] = nfrom; 666 667 return true; 668 } 669 670 671 int XMLNode_get_children_count(const XMLNode* node) 672 { 673 int i, n; 674 675 CHECK_NODE(node, -1); 676 677 for (i = n = 0; i < node->n_children; i++) 678 if (node->children[i]->active) 679 n++; 680 681 return n; 682 } 683 684 int XMLNode_get_index(const XMLNode* node) 685 { 686 int i, i_child; 687 688 CHECK_NODE(node, -1); 689 690 if (node->father == NULL) 691 return 0; 692 693 for (i = i_child = 0; i < node->father->n_children; i++) { 694 if (!node->father->children[i]->active) 695 continue; 696 if (node->father->children[i] == node) 697 return i_child; 698 i_child++; 699 } 700 701 return -2; /* Oops! */ 702 } 703 704 XMLNode* XMLNode_get_child(const XMLNode* node, int i_child) 705 { 706 int i; 707 708 if (node == NULL || node->init_value != XML_INIT_DONE || i_child < 0 || i_child >= node->n_children) 709 return NULL; 710 711 for (i = 0; i < node->n_children; i++) { 712 if (!node->children[i]->active) 713 i_child++; 714 else if (i == i_child) 715 return node->children[i]; 716 } 717 718 return NULL; 719 } 720 721 int XMLNode_remove_child(XMLNode* node, int i_child, int free_child) 722 { 723 int i; 724 XMLNode** pt; 725 726 if (node == NULL || node->init_value != XML_INIT_DONE || i_child < 0 || i_child >= node->n_children) 727 return -1; 728 729 /* Lookup 'i_child'th active child */ 730 for (i = 0; i < node->n_children; i++) { 731 if (!node->children[i]->active) 732 i_child++; 733 else if (i == i_child) 734 break; 735 } 736 if (i >= node->n_children) 737 return -1; /* Children is not found */ 738 739 /* Before modifying first see if we run out of memory */ 740 if (node->n_children == 1) { 741 pt = NULL; 742 } else { 743 pt = __malloc((node->n_children - 1) * sizeof(XMLNode*)); 744 if (pt == NULL) 745 return -1; 746 } 747 748 /* Can't fail anymore, free item */ 749 (void)XMLNode_free(node->children[i_child]); 750 if (free_child) 751 __free(node->children[i_child]); 752 753 if (pt != NULL) { 754 memcpy(pt, node->children, i_child * sizeof(XMLNode*)); 755 memcpy(&pt[i_child], &node->children[i_child + 1], (node->n_children - i_child - 1) * sizeof(XMLNode*)); 756 } 757 if (node->children != NULL) 758 __free(node->children); 759 node->children = pt; 760 node->n_children--; 761 if (node->n_children == 0) 762 node->tag_type = TAG_SELF; 763 764 return node->n_children; 765 } 766 767 int XMLNode_remove_children(XMLNode* node) 768 { 769 int i; 770 771 CHECK_NODE(node, false); 772 773 if (node->children != NULL) { 774 for (i = 0; i < node->n_children; i++) 775 if (node->children[i] != NULL) { 776 (void)XMLNode_free(node->children[i]); 777 __free(node->children[i]); 778 } 779 __free(node->children); 780 node->children = NULL; 781 } 782 node->n_children = 0; 783 784 return true; 785 } 786 787 int XMLNode_equal(const XMLNode* node1, const XMLNode* node2) 788 { 789 int i, j; 790 791 if (node1 == node2) 792 return true; 793 794 if (node1 == NULL || node2 == NULL || node1->init_value != XML_INIT_DONE || node2->init_value != XML_INIT_DONE) 795 return false; 796 797 if (sx_strcmp(node1->tag, node2->tag)) 798 return false; 799 800 /* Test all attributes from 'node1' */ 801 for (i = 0; i < node1->n_attributes; i++) { 802 if (!node1->attributes[i].active) 803 continue; 804 j = XMLNode_search_attribute(node2, node1->attributes[i].name, 0); 805 if (j < 0) 806 return false; 807 if (sx_strcmp(node1->attributes[i].value, node2->attributes[j].value)) 808 return false; 809 } 810 811 /* Test other attributes from 'node2' that might not be in 'node1' */ 812 for (i = 0; i < node2->n_attributes; i++) { 813 if (!node2->attributes[i].active) 814 continue; 815 j = XMLNode_search_attribute(node1, node2->attributes[i].name, 0); 816 if (j < 0) 817 return false; 818 if (sx_strcmp(node2->attributes[i].name, node1->attributes[j].name)) 819 return false; 820 } 821 822 return true; 823 } 824 825 XMLNode* XMLNode_next_sibling(const XMLNode* node) 826 { 827 int i; 828 XMLNode* father; 829 830 if (node == NULL || node->init_value != XML_INIT_DONE || node->father == NULL) 831 return NULL; 832 833 father = node->father; 834 for (i = 0; i < father->n_children && father->children[i] != node; i++) ; 835 i++; /* father->children[i] is now 'node' next sibling */ 836 837 return i < father->n_children ? father->children[i] : NULL; 838 } 839 840 static XMLNode* _XMLNode_next(const XMLNode* node, int in_children) 841 { 842 XMLNode* node2; 843 844 CHECK_NODE(node, NULL); 845 846 /* Check first child */ 847 if (in_children && node->n_children > 0) 848 return node->children[0]; 849 850 /* Check next sibling */ 851 if ((node2 = XMLNode_next_sibling(node)) != NULL) 852 return node2; 853 854 /* Check next uncle */ 855 return _XMLNode_next(node->father, false); 856 } 857 858 XMLNode* XMLNode_next(const XMLNode* node) 859 { 860 return _XMLNode_next(node, true); 861 } 862 863 /* --- XMLDoc methods --- */ 864 865 int XMLDoc_init(XMLDoc* doc) 866 { 867 if (doc == NULL) 868 return false; 869 870 doc->filename[0] = NULC; 871 memset(&doc->bom, 0, sizeof(doc->bom)); 872 doc->nodes = NULL; 873 doc->n_nodes = 0; 874 doc->i_root = -1; 875 doc->init_value = XML_INIT_DONE; 876 877 return true; 878 } 879 880 int XMLDoc_free(XMLDoc* doc) 881 { 882 int i; 883 884 if (doc == NULL || doc->init_value != XML_INIT_DONE) 885 return false; 886 887 for (i = 0; i < doc->n_nodes; i++) { 888 (void)XMLNode_free(doc->nodes[i]); 889 __free(doc->nodes[i]); 890 } 891 __free(doc->nodes); 892 doc->nodes = NULL; 893 doc->n_nodes = 0; 894 doc->i_root = -1; 895 896 return true; 897 } 898 899 int XMLDoc_set_root(XMLDoc* doc, int i_root) 900 { 901 if (doc == NULL || doc->init_value != XML_INIT_DONE || i_root < 0 || i_root >= doc->n_nodes) 902 return false; 903 904 doc->i_root = i_root; 905 906 return true; 907 } 908 909 int XMLDoc_add_node(XMLDoc* doc, XMLNode* node) 910 { 911 if (doc == NULL || node == NULL || doc->init_value != XML_INIT_DONE) 912 return -1; 913 914 if (_add_node(&doc->nodes, &doc->n_nodes, node) < 0) 915 return -1; 916 917 if (node->tag_type == TAG_FATHER) 918 doc->i_root = doc->n_nodes - 1; /* Main root node is the last father node */ 919 920 return doc->n_nodes; 921 } 922 923 int XMLDoc_remove_node(XMLDoc* doc, int i_node, int free_node) 924 { 925 XMLNode** pt; 926 if (doc == NULL || doc->init_value != XML_INIT_DONE || i_node < 0 || i_node > doc->n_nodes) 927 return false; 928 929 /* Before modifying first see if we run out of memory */ 930 if (doc->n_nodes == 1) 931 pt = NULL; 932 else { 933 pt = __malloc((doc->n_nodes - 1) * sizeof(XMLNode*)); 934 if (pt == NULL) 935 return false; 936 } 937 938 /* Can't fail anymore, free item */ 939 (void)XMLNode_free(doc->nodes[i_node]); 940 if (free_node) __free(doc->nodes[i_node]); 941 942 if (pt != NULL) { 943 memcpy(pt, &doc->nodes[i_node], i_node * sizeof(XMLNode*)); 944 memcpy(&pt[i_node], &doc->nodes[i_node + 1], (doc->n_nodes - i_node - 1) * sizeof(XMLNode*)); 945 } 946 947 if (doc->nodes != NULL) 948 __free(doc->nodes); 949 doc->nodes = pt; 950 doc->n_nodes--; 951 952 return true; 953 } 954 955 /* 956 Helper functions to print formatting before a new tag. 957 Returns the new number of characters in the line. 958 */ 959 static int _count_new_char_line(const SXML_CHAR* str, int nb_char_tab, int cur_sz_line) 960 { 961 for (; *str; str++) { 962 if (*str == C2SX('\n')) 963 cur_sz_line = 0; 964 else if (*str == C2SX('\t')) 965 cur_sz_line += nb_char_tab; 966 else 967 cur_sz_line++; 968 } 969 970 return cur_sz_line; 971 } 972 static int _print_formatting(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, int nb_char_tab, int cur_sz_line) 973 { 974 if (tag_sep != NULL) { 975 sx_fputs(tag_sep, f); 976 cur_sz_line = _count_new_char_line(tag_sep, nb_char_tab, cur_sz_line); 977 } 978 if (child_sep != NULL) { 979 for (node = node->father; node != NULL; node = node->father) { 980 sx_fputs(child_sep, f); 981 cur_sz_line = _count_new_char_line(child_sep, nb_char_tab, cur_sz_line); 982 } 983 } 984 985 return cur_sz_line; 986 } 987 988 static int _XMLNode_print_header(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int sz_line, int cur_sz_line, int nb_char_tab) 989 { 990 int i; 991 SXML_CHAR* p; 992 993 if (node == NULL || f == NULL || !node->active || node->tag == NULL || node->tag[0] == NULC) 994 return -1; 995 996 /* Special handling of DOCTYPE */ 997 if (node->tag_type == TAG_DOCTYPE) { 998 /* Search for an unescaped '[' in the DOCTYPE definition, in which case the end delimiter should be ']>' instead of '>' */ 999 for (p = sx_strchr(node->tag, C2SX('[')); p != NULL && *(p-1) == C2SX('\\'); p = sx_strchr(p+1, C2SX('['))) ; 1000 cur_sz_line += sx_fprintf(f, C2SX(""), node->tag, p != NULL ? C2SX("]") : C2SX("")); 1001 return cur_sz_line; 1002 } 1003 1004 /* Check for special tags first */ 1005 for (i = 0; i < NB_SPECIAL_TAGS; i++) { 1006 if (node->tag_type == _spec[i].tag_type) { 1007 sx_fprintf(f, C2SX("%s%s%s"), _spec[i].start, node->tag, _spec[i].end); 1008 cur_sz_line += sx_strlen(_spec[i].start) + sx_strlen(node->tag) + sx_strlen(_spec[i].end); 1009 return cur_sz_line; 1010 } 1011 } 1012 1013 /* Check for user tags */ 1014 for (i = 0; i < _user_tags.n_tags; i++) { 1015 if (node->tag_type == _user_tags.tags[i].tag_type) { 1016 sx_fprintf(f, C2SX("%s%s%s"), _user_tags.tags[i].start, node->tag, _user_tags.tags[i].end); 1017 cur_sz_line += sx_strlen(_user_tags.tags[i].start) + sx_strlen(node->tag) + sx_strlen(_user_tags.tags[i].end); 1018 return cur_sz_line; 1019 } 1020 } 1021 1022 /* Print tag name */ 1023 cur_sz_line += sx_fprintf(f, C2SX("<%s"), node->tag); 1024 1025 /* Print attributes */ 1026 if (attr_sep == NULL) 1027 attr_sep = C2SX(" "); 1028 for (i = 0; i < node->n_attributes; i++) { 1029 if (!node->attributes[i].active) 1030 continue; 1031 cur_sz_line += sx_strlen(node->attributes[i].name) + sx_strlen(node->attributes[i].value) + 3; 1032 if (sz_line > 0 && cur_sz_line > sz_line) { 1033 cur_sz_line = _print_formatting(node, f, tag_sep, child_sep, nb_char_tab, cur_sz_line); 1034 /* Add extra separator, as if new line was a child of the previous one */ 1035 if (child_sep != NULL) { 1036 sx_fputs(child_sep, f); 1037 cur_sz_line = _count_new_char_line(child_sep, nb_char_tab, cur_sz_line); 1038 } 1039 } 1040 /* Attribute name */ 1041 cur_sz_line = _count_new_char_line(attr_sep, nb_char_tab, cur_sz_line); 1042 sx_fprintf(f, C2SX("%s%s="), attr_sep, node->attributes[i].name); 1043 1044 /* Attribute value */ 1045 (void)sx_fputc(XML_DEFAULT_QUOTE, f); 1046 cur_sz_line += fprintHTML(f, node->attributes[i].value) + 2; 1047 (void)sx_fputc(XML_DEFAULT_QUOTE, f); 1048 } 1049 1050 /* End the tag if there are no children and no text */ 1051 if (node->n_children == 0 && (node->text == NULL || node->text[0] == NULC)) { 1052 cur_sz_line += sx_fprintf(f, C2SX("/>")); 1053 } else { 1054 (void)sx_fputc(C2SX('>'), f); 1055 cur_sz_line++; 1056 } 1057 1058 return cur_sz_line; 1059 } 1060 1061 int XMLNode_print_header(const XMLNode* node, FILE* f, int sz_line, int nb_char_tab) 1062 { 1063 return _XMLNode_print_header(node, f, NULL, NULL, NULL, sz_line, 0, nb_char_tab) < 0 ? false : true; 1064 } 1065 1066 static int _XMLNode_print(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int keep_text_spaces, int sz_line, int cur_sz_line, int nb_char_tab, int depth) 1067 { 1068 int i; 1069 SXML_CHAR* p; 1070 1071 if (node != NULL && node->tag_type==TAG_TEXT) { /* Text has to be printed: check if it is only spaces */ 1072 if (!keep_text_spaces) { 1073 for (p = node->text; p != NULL && *p != NULC && sx_isspace(*p); p++) ; /* 'p' points to first non-space character, or to '\0' if only spaces */ 1074 } else 1075 p = node->text; /* '*p' won't be '\0' */ 1076 if (p != NULL && *p != NULC) 1077 cur_sz_line += fprintHTML(f, node->text); 1078 return cur_sz_line; 1079 } 1080 1081 if (node == NULL || f == NULL || !node->active || node->tag == NULL || node->tag[0] == NULC) 1082 return -1; 1083 1084 if (nb_char_tab <= 0) 1085 nb_char_tab = 1; 1086 1087 /* Print formatting */ 1088 if (depth < 0) /* UGLY HACK: 'depth' forced negative on very first line so we don't print an extra 'tag_sep' (usually "\n" when pretty-printing) */ 1089 depth = 0; 1090 else 1091 cur_sz_line = _print_formatting(node, f, tag_sep, child_sep, nb_char_tab, cur_sz_line); 1092 1093 _XMLNode_print_header(node, f, tag_sep, child_sep, attr_sep, sz_line, cur_sz_line, nb_char_tab); 1094 1095 if (node->text != NULL && node->text[0] != NULC) { 1096 /* Text has to be printed: check if it is only spaces */ 1097 if (!keep_text_spaces) { 1098 for (p = node->text; *p != NULC && sx_isspace(*p); p++) ; /* 'p' points to first non-space character, or to '\0' if only spaces */ 1099 } else 1100 p = node->text; /* '*p' won't be '\0' */ 1101 if (*p != NULC) cur_sz_line += fprintHTML(f, node->text); 1102 } else if (node->n_children <= 0) /* Everything has already been printed */ 1103 return cur_sz_line; 1104 1105 /* Recursively print children */ 1106 for (i = 0; i < node->n_children; i++) 1107 (void)_XMLNode_print(node->children[i], f, tag_sep, child_sep, attr_sep, keep_text_spaces, sz_line, cur_sz_line, nb_char_tab, depth+1); 1108 1109 /* Print tag end after children */ 1110 /* Print formatting */ 1111 if (node->n_children > 0) 1112 cur_sz_line = _print_formatting(node, f, tag_sep, child_sep, nb_char_tab, cur_sz_line); 1113 cur_sz_line += sx_fprintf(f, C2SX(""), node->tag); 1114 1115 return cur_sz_line; 1116 } 1117 1118 int XMLNode_print_attr_sep(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int keep_text_spaces, int sz_line, int nb_char_tab) 1119 { 1120 return _XMLNode_print(node, f, tag_sep, child_sep, attr_sep, keep_text_spaces, sz_line, 0, nb_char_tab, 0); 1121 } 1122 1123 int XMLDoc_print_attr_sep(const XMLDoc* doc, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int keep_text_spaces, int sz_line, int nb_char_tab) 1124 { 1125 int i, depth, cur_sz_line; 1126 1127 if (doc == NULL || f == NULL || doc->init_value != XML_INIT_DONE) 1128 return false; 1129 1130 /* Write BOM if it exist */ 1131 if (doc->sz_bom > 0) fwrite(doc->bom, sizeof(unsigned char), doc->sz_bom, f); 1132 1133 depth = -1; /* UGLY HACK: 'depth' forced negative on very first line so we don't print an extra 'tag_sep' (usually "\n") */ 1134 for (i = 0, cur_sz_line = 0; i < doc->n_nodes; i++) { 1135 cur_sz_line = _XMLNode_print(doc->nodes[i], f, tag_sep, child_sep, attr_sep, keep_text_spaces, sz_line, cur_sz_line, nb_char_tab, depth); 1136 depth = 0; 1137 } 1138 /* TODO: Find something more graceful than 'depth=-1', even though everyone knows I probably never will ;) */ 1139 1140 return true; 1141 } 1142 1143 /* --- */ 1144 1145 int XML_parse_attribute_to(const SXML_CHAR* str, int to, XMLAttribute* xmlattr) 1146 { 1147 const SXML_CHAR *p; 1148 int i, n0, n1, remQ = 0; 1149 int ret = 1; 1150 SXML_CHAR quote = '\0'; 1151 1152 if (str == NULL || xmlattr == NULL) 1153 return 0; 1154 1155 if (to < 0) 1156 to = sx_strlen(str) - 1; 1157 1158 /* Search for the '=' */ 1159 /* 'n0' is where the attribute name stops, 'n1' is where the attribute value starts */ 1160 for (n0 = 0; n0 != to && str[n0] != C2SX('=') && !sx_isspace(str[n0]); n0++) ; /* Search for '=' or a space */ 1161 for (n1 = n0; n1 != to && sx_isspace(str[n1]); n1++) ; /* Search for something not a space */ 1162 if (str[n1] != C2SX('=')) 1163 return 0; /* '=' not found: malformed string */ 1164 for (n1++; n1 != to && sx_isspace(str[n1]); n1++) ; /* Search for something not a space */ 1165 if (isquote(str[n1])) { /* Remove quotes */ 1166 quote = str[n1]; 1167 remQ = 1; 1168 } 1169 1170 xmlattr->name = __malloc((n0+1)*sizeof(SXML_CHAR)); 1171 xmlattr->value = __malloc((to+1 - n1 - 2*remQ + 1) * sizeof(SXML_CHAR)); /* 2*remQ because we expect 2 quotes */ 1172 xmlattr->active = true; 1173 if (xmlattr->name != NULL && xmlattr->value != NULL) { 1174 /* Copy name */ 1175 sx_strncpy(xmlattr->name, str, n0); 1176 xmlattr->name[n0] = NULC; 1177 /* (void)str_unescape(xmlattr->name); do not unescape the name */ 1178 /* Copy value (p starts after the quote (if any) and stops at the end of 'str' 1179 (skipping the quote if any, hence the '*(p+remQ)') */ 1180 for (i = 0, p = str + n1 + remQ; i + n1 + 2*remQ < to && *(p+remQ) != NULC; i++, p++) 1181 xmlattr->value[i] = *p; 1182 xmlattr->value[i] = NULC; 1183 (void)html2str(xmlattr->value, NULL); /* Convert HTML escape sequences, do not str_unescape(xmlattr->value) */ 1184 if (remQ && *p != quote) 1185 ret = 2; /* Quote at the beginning but not at the end: probable presence of '>' inside attribute value, so we need to read more data! */ 1186 } else 1187 ret = 0; 1188 1189 if (ret == 0) { 1190 if (xmlattr->name != NULL) { 1191 __free(xmlattr->name); 1192 xmlattr->name = NULL; 1193 } 1194 if (xmlattr->value != NULL) { 1195 __free(xmlattr->value); 1196 xmlattr->value = NULL; 1197 } 1198 } 1199 1200 return ret; 1201 } 1202 1203 static TagType _parse_special_tag(const SXML_CHAR* str, int len, _TAG* tag, XMLNode* node) 1204 { 1205 if (sx_strncmp(str, tag->start, tag->len_start)) 1206 return TAG_NONE; 1207 1208 if (sx_strncmp(str + len - tag->len_end, tag->end, tag->len_end)) /* There probably is a '>' inside the tag */ 1209 return TAG_PARTIAL; 1210 1211 node->tag = __malloc((len - tag->len_start - tag->len_end + 1)*sizeof(SXML_CHAR)); 1212 if (node->tag == NULL) 1213 return TAG_ERROR; 1214 sx_strncpy(node->tag, str + tag->len_start, len - tag->len_start - tag->len_end); 1215 node->tag[len - tag->len_start - tag->len_end] = NULC; 1216 node->tag_type = tag->tag_type; 1217 1218 return node->tag_type; 1219 } 1220 1221 /* 1222 Reads a string that is supposed to be an xml tag like '' or ''. 1223 Fills the 'xmlnode' structure with the tag name and its attributes. 1224 Returns 'TAG_ERROR' if an error occurred (malformed 'str' or memory). 'TAG_*' when string is recognized. 1225 */ 1226 TagType XML_parse_1string(const SXML_CHAR* str, XMLNode* xmlnode) 1227 { 1228 SXML_CHAR *p; 1229 XMLAttribute* pt; 1230 int n, nn, len, rc, tag_end = 0; 1231 1232 if (str == NULL || xmlnode == NULL) 1233 return TAG_ERROR; 1234 len = sx_strlen(str); 1235 1236 /* Check for malformed string */ 1237 if (str[0] != C2SX('<') || str[len-1] != C2SX('>')) 1238 return TAG_NONE; /* Syntax error */ 1239 1240 for (nn = 0; nn < NB_SPECIAL_TAGS; nn++) { 1241 n = (int)_parse_special_tag(str, len, &_spec[nn], xmlnode); 1242 switch (n) { 1243 case TAG_NONE: break; /* Nothing found => do nothing */ 1244 default: return (TagType)n; /* Tag found => return it */ 1245 } 1246 } 1247 1248 /* "" instead of ">" if a '[' is found inside */ 1249 if (str[1] == C2SX('!')) { 1250 /* DOCTYPE */ 1251 if (!sx_strncmp(str, C2SX("" tag end */ 1253 nn = 0; 1254 if (str[n]) { /* '[' was found */ 1255 if (sx_strncmp(str+len-2, C2SX("]>"), 2)) /* There probably is a '>' inside the DOCTYPE */ 1256 return TAG_PARTIAL; 1257 nn = 1; 1258 } 1259 xmlnode->tag = __malloc((len - 9 - nn)*sizeof(SXML_CHAR)); /* 'len' - "" + '\0' */ 1260 if (xmlnode->tag == NULL) 1261 return TAG_ERROR; 1262 sx_strncpy(xmlnode->tag, &str[9], len - 10 - nn); 1263 xmlnode->tag[len - 10 - nn] = NULC; 1264 xmlnode->tag_type = TAG_DOCTYPE; 1265 1266 return TAG_DOCTYPE; 1267 } 1268 } 1269 1270 /* Test user tags */ 1271 for (nn = 0; nn < _user_tags.n_tags; nn++) { 1272 n = _parse_special_tag(str, len, &_user_tags.tags[nn], xmlnode); 1273 switch (n) { 1274 case TAG_ERROR: return TAG_ERROR; /* Error => exit */ 1275 case TAG_NONE: break; /* Not this one */ 1276 default: return (TagType)n; /* Tag found => return it */ 1277 } 1278 } 1279 1280 if (str[1] == C2SX('/')) 1281 tag_end = 1; 1282 1283 /* tag starts at index 1 (or 2 if tag end) and ends at the first space or '/>' */ 1284 for (n = 1 + tag_end; str[n] != NULC && str[n] != C2SX('>') && str[n] != C2SX('/') && !sx_isspace(str[n]); n++) ; 1285 xmlnode->tag = __malloc((n - tag_end)*sizeof(SXML_CHAR)); 1286 if (xmlnode->tag == NULL) 1287 return TAG_ERROR; 1288 sx_strncpy(xmlnode->tag, &str[1 + tag_end], n - 1 - tag_end); 1289 xmlnode->tag[n - 1 - tag_end] = NULC; 1290 if (tag_end) { 1291 xmlnode->tag_type = TAG_END; 1292 return TAG_END; 1293 } 1294 1295 /* Here, 'n' is the position of the first space after tag name */ 1296 while (n < len) { 1297 /* Skips spaces */ 1298 while (sx_isspace(str[n])) n++; 1299 1300 /* Check for XML end ('>' or '/>') */ 1301 if (str[n] == C2SX('>')) { /* Tag with children */ 1302 TagType type = (str[n-1] == '/' ? TAG_SELF : TAG_FATHER); /* TODO: Find something better to cope with */ 1303 xmlnode->tag_type = type; 1304 return type; 1305 } 1306 if (!sx_strcmp(str+n, C2SX("/>"))) { /* Tag without children */ 1307 xmlnode->tag_type = TAG_SELF; 1308 return TAG_SELF; 1309 } 1310 1311 /* New attribute found */ 1312 p = sx_strchr(str+n, C2SX('=')); 1313 if (p == NULL) goto parse_err; 1314 pt = __realloc(xmlnode->attributes, (xmlnode->n_attributes + 1) * sizeof(XMLAttribute)); 1315 if (pt == NULL) goto parse_err; 1316 1317 pt[xmlnode->n_attributes].name = NULL; 1318 pt[xmlnode->n_attributes].value = NULL; 1319 pt[xmlnode->n_attributes].active = false; 1320 xmlnode->n_attributes++; 1321 xmlnode->attributes = pt; 1322 while (*++p != NULC && sx_isspace(*p)) ; /* Skip spaces */ 1323 if (isquote(*p)) { /* Attribute value starts with a quote, look for next one, ignoring protected ones with '\' */ 1324 for (nn = p-str+1; str[nn] && str[nn] != *p; nn++) { /* CHECK UNICODE "nn = p-str+1" */ 1325 /* if (str[nn] == C2SX('\\')) nn++; [bugs:#7]: '\' is valid in values */ 1326 } 1327 nn++; //* Skip quote */ 1328 } else { /* Attribute value stops at first space or end of XML string */ 1329 for (nn = p-str+1; str[nn] != NULC && !sx_isspace(str[nn]) && str[nn] != C2SX('/') && str[nn] != C2SX('>'); nn++) ; /* Go to the end of the attribute value */ /* CHECK UNICODE */ 1330 } 1331 1332 /* Here 'str[nn]' is the character after value */ 1333 /* the attribute definition ('attrName="attrVal"') is between 'str[n]' and 'str[nn]' */ 1334 rc = XML_parse_attribute_to(&str[n], nn - n, &xmlnode->attributes[xmlnode->n_attributes - 1]); 1335 if (!rc) goto parse_err; 1336 if (rc == 2) { /* Probable presence of '>' inside attribute value, which is legal XML. Remove attribute to re-parse it later */ 1337 XMLNode_remove_attribute(xmlnode, xmlnode->n_attributes - 1); 1338 return TAG_ERROR; /* was TAG_PARTIAL */ 1339 } 1340 1341 n = nn + 1; /* Go to next attribute */ 1342 if (str[nn] == C2SX('>')) { /* ... or we migh have reached the end if no space is between the attribute value and the ">" or "/>" */ 1343 TagType type = (str[nn-1] == '/' ? TAG_SELF : TAG_FATHER); /* TODO: Find something better to cope with */ 1344 xmlnode->tag_type = type; 1345 return type; 1346 } 1347 } 1348 1349 sx_fprintf(stderr, C2SX("\nWE SHOULD NOT BE HERE!\n[%s]\n\n"), str); 1350 1351 parse_err: 1352 (void)XMLNode_free(xmlnode); 1353 1354 return TAG_ERROR; 1355 } 1356 1357 static int _parse_data_SAX(void* in, const DataSourceType in_type, const SAX_Callbacks* sax, SAX_Data* sd) 1358 { 1359 SXML_CHAR *line = NULL, *txt_end, *p; 1360 XMLNode node; 1361 int ret, exit, sz, n0, ncr; 1362 TagType tag_type; 1363 int (*meos)(void* ds) = (in_type == DATA_SOURCE_BUFFER ? (int(*)(void*))_beob : (int(*)(void*))sx_feof); 1364 1365 if (sax->start_doc != NULL && !sax->start_doc(sd)) 1366 return true; 1367 if (sax->all_event != NULL && !sax->all_event(XML_EVENT_START_DOC, NULL, (SXML_CHAR*)sd->name, 0, sd)) 1368 return true; 1369 1370 ret = true; 1371 exit = false; 1372 sd->line_num = 1; /* Line counter, starts at 1 */ 1373 sz = 0; /* 'line' buffer size */ 1374 node.init_value = 0; 1375 (void)XMLNode_init(&node); 1376 while ((n0 = read_line_alloc(in, in_type, &line, &sz, 0, NULC, C2SX('>'), true, C2SX('\n'), &ncr)) != 0) { 1377 (void)XMLNode_free(&node); 1378 for (p = line; *p != NULC && sx_isspace(*p) && p - line < n0; p++) ; /* Checks if text is only spaces */ 1379 if (*p == NULC || p - line >= n0) 1380 break; 1381 sd->line_num += ncr; 1382 1383 /* Get text for 'father' (i.e. what is before '<') */ 1384 while ((txt_end = sx_strchr(line, C2SX('<'))) == NULL) { /* '<' was not found, indicating a probable '>' inside text (should have been escaped with '>' but we'll handle that ;) */ 1385 int n1 = read_line_alloc(in, in_type, &line, &sz, n0, 0, C2SX('>'), true, C2SX('\n'), &ncr); /* Go on reading the file from current position until next '>' */ 1386 sd->line_num += ncr; 1387 if (n1 <= n0) { 1388 ret = false; 1389 if (sax->on_error == NULL && sax->all_event == NULL) { 1390 sx_fprintf(stderr, C2SX("%s:%d: MEMORY ERROR.\n"), sd->name, sd->line_num); 1391 } else { 1392 if (sax->on_error != NULL && !sax->on_error(PARSE_ERR_MEMORY, sd->line_num, sd)) 1393 break; 1394 if (sax->all_event != NULL && !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_MEMORY, sd)) 1395 break; 1396 } 1397 break; /* 'txt_end' is still NULL here so we'll display the syntax error below */ 1398 } 1399 n0 = n1; 1400 } 1401 if (txt_end == NULL) { /* Missing tag start */ 1402 ret = false; 1403 if (sax->on_error == NULL && sax->all_event == NULL) { 1404 sx_fprintf(stderr, C2SX("%s:%d: ERROR: Unexpected end character '>', without matching '<'!\n"), sd->name, sd->line_num); 1405 } else { 1406 if (sax->on_error != NULL && !sax->on_error(PARSE_ERR_UNEXPECTED_TAG_END, sd->line_num, sd)) 1407 break; 1408 if (sax->all_event != NULL && !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_UNEXPECTED_TAG_END, sd)) 1409 break; 1410 } 1411 break; 1412 } 1413 /* First part of 'line' (before '<') is to be added to 'father->text' */ 1414 *txt_end = NULC; /* Have 'line' be the text for 'father' */ 1415 if (*line != NULC && (sax->new_text != NULL || sax->all_event != NULL)) { 1416 if (sax->new_text != NULL && (exit = !sax->new_text(line, sd))) /* no str_unescape(line) */ 1417 break; 1418 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_TEXT, NULL, line, sd->line_num, sd))) 1419 break; 1420 } 1421 *txt_end = '<'; /* Restores tag start */ 1422 1423 switch (tag_type = XML_parse_1string(txt_end, &node)) { 1424 case TAG_ERROR: /* Memory error */ 1425 ret = false; 1426 if (sax->on_error == NULL && sax->all_event == NULL) { 1427 sx_fprintf(stderr, C2SX("%s:%d: MEMORY ERROR.\n"), sd->name, sd->line_num); 1428 } else { 1429 if (sax->on_error != NULL && (exit = !sax->on_error(PARSE_ERR_MEMORY, sd->line_num, sd))) 1430 break; 1431 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_MEMORY, sd))) 1432 break; 1433 } 1434 break; 1435 1436 case TAG_NONE: /* Syntax error */ 1437 ret = false; 1438 p = sx_strchr(txt_end, C2SX('\n')); 1439 if (p != NULL) 1440 *p = NULC; 1441 if (sax->on_error == NULL && sax->all_event == NULL) { 1442 sx_fprintf(stderr, C2SX("%s:%d: SYNTAX ERROR (%s%s).\n"), sd->name, sd->line_num, txt_end, p == NULL ? C2SX("") : C2SX("...")); 1443 if (p != NULL) 1444 *p = C2SX('\n'); 1445 } else { 1446 if (sax->on_error != NULL && (exit = !sax->on_error(PARSE_ERR_SYNTAX, sd->line_num, sd))) 1447 break; 1448 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_SYNTAX, sd))) 1449 break; 1450 } 1451 break; 1452 1453 case TAG_END: 1454 if (sax->end_node != NULL || sax->all_event != NULL) { 1455 if (sax->end_node != NULL && (exit = !sax->end_node(&node, sd))) 1456 break; 1457 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_END_NODE, &node, NULL, sd->line_num, sd))) 1458 break; 1459 } 1460 break; 1461 1462 default: /* Add 'node' to 'father' children */ 1463 /* If the line looks like a comment (or CDATA) but is not properly finished, loop until we find the end. */ 1464 while (tag_type == TAG_PARTIAL) { 1465 int n1 = read_line_alloc(in, in_type, &line, &sz, n0, NULC, C2SX('>'), true, C2SX('\n'), &ncr); /* Go on reading the file from current position until next '>' */ 1466 sd->line_num += ncr; 1467 if (n1 <= n0) { 1468 ret = false; 1469 if (sax->on_error == NULL && sax->all_event == NULL) { 1470 sx_fprintf(stderr, C2SX("%s:%d: SYNTAX ERROR.\n"), sd->name, sd->line_num); 1471 } else { 1472 if (sax->on_error != NULL && (exit = !sax->on_error(meos(in) ? PARSE_ERR_EOF : PARSE_ERR_MEMORY, sd->line_num, sd))) 1473 break; 1474 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, meos(in) ? PARSE_ERR_EOF : PARSE_ERR_MEMORY, sd))) 1475 break; 1476 } 1477 break; 1478 } 1479 n0 = n1; 1480 txt_end = sx_strchr(line, C2SX('<')); /* In case 'line' has been moved by the '__realloc' in 'read_line_alloc' */ 1481 tag_type = XML_parse_1string(txt_end, &node); 1482 if (tag_type == TAG_ERROR) { 1483 ret = false; 1484 if (sax->on_error == NULL && sax->all_event == NULL) { 1485 sx_fprintf(stderr, C2SX("%s:%d: PARSE ERROR.\n"), sd->name, sd->line_num); 1486 } else { 1487 if (sax->on_error != NULL && (exit = !sax->on_error(meos(in) ? PARSE_ERR_EOF : PARSE_ERR_SYNTAX, sd->line_num, sd))) 1488 break; 1489 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, meos(in) ? PARSE_ERR_EOF : PARSE_ERR_SYNTAX, sd))) 1490 break; 1491 } 1492 break; 1493 } 1494 } 1495 if (ret == false) 1496 break; 1497 if (sax->start_node != NULL && (exit = !sax->start_node(&node, sd))) 1498 break; 1499 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_START_NODE, &node, NULL, sd->line_num, sd))) 1500 break; 1501 if (node.tag_type != TAG_FATHER && (sax->end_node != NULL || sax->all_event != NULL)) { 1502 if (sax->end_node != NULL && (exit = !sax->end_node(&node, sd))) 1503 break; 1504 if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_END_NODE, &node, NULL, sd->line_num, sd))) 1505 break; 1506 } 1507 break; 1508 } 1509 if (exit == true) /* Return false when exit is requested */ 1510 ret = false; 1511 if (ret == false || meos(in)) 1512 break; 1513 } 1514 __free(line); 1515 (void)XMLNode_free(&node); 1516 1517 if (sax->end_doc != NULL && !sax->end_doc(sd)) 1518 return ret; 1519 if (sax->all_event != NULL) 1520 (void)sax->all_event(XML_EVENT_END_DOC, NULL, (SXML_CHAR*)sd->name, sd->line_num, sd); 1521 1522 return ret; 1523 } 1524 1525 int SAX_Callbacks_init(SAX_Callbacks* sax) 1526 { 1527 if (sax == NULL) 1528 return false; 1529 1530 sax->start_doc = NULL; 1531 sax->start_node = NULL; 1532 sax->end_node = NULL; 1533 sax->new_text = NULL; 1534 sax->on_error = NULL; 1535 sax->end_doc = NULL; 1536 sax->all_event = NULL; 1537 1538 return true; 1539 } 1540 1541 int DOMXMLDoc_doc_start(SAX_Data* sd) 1542 { 1543 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; 1544 1545 dom->current = NULL; 1546 dom->error = PARSE_ERR_NONE; 1547 dom->line_error = 0; 1548 1549 return true; 1550 } 1551 1552 int DOMXMLDoc_node_start(const XMLNode* node, SAX_Data* sd) 1553 { 1554 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; 1555 XMLNode* new_node; 1556 int i; 1557 1558 if ((new_node = XMLNode_dup(node, true)) == NULL) goto node_start_err; /* No real need to put 'true' for 'XMLNode_dup', but cleaner */ 1559 1560 if (dom->current == NULL) { 1561 if ((i = _add_node(&dom->doc->nodes, &dom->doc->n_nodes, new_node)) < 0) goto node_start_err; 1562 1563 if (dom->doc->i_root < 0 && (node->tag_type == TAG_FATHER || node->tag_type == TAG_SELF)) 1564 dom->doc->i_root = i; 1565 } else { 1566 if (_add_node(&dom->current->children, &dom->current->n_children, new_node) < 0) goto node_start_err; 1567 } 1568 1569 new_node->father = dom->current; 1570 dom->current = new_node; 1571 1572 return true; 1573 1574 node_start_err: 1575 dom->error = PARSE_ERR_MEMORY; 1576 dom->line_error = sd->line_num; 1577 (void)XMLNode_free(new_node); 1578 __free(new_node); 1579 1580 return false; 1581 } 1582 1583 int DOMXMLDoc_node_end(const XMLNode* node, SAX_Data* sd) 1584 { 1585 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; 1586 1587 if (dom->current == NULL || sx_strcmp(dom->current->tag, node->tag)) { 1588 sx_fprintf(stderr, C2SX("%s:%d: ERROR - End tag was unexpected"), sd->name, sd->line_num, node->tag); 1589 if (dom->current != NULL) 1590 sx_fprintf(stderr, C2SX(" ( was expected)\n"), dom->current->tag); 1591 else 1592 sx_fprintf(stderr, C2SX(" (no node to end)\n")); 1593 1594 dom->error = PARSE_ERR_UNEXPECTED_NODE_END; 1595 dom->line_error = sd->line_num; 1596 1597 return false; 1598 } 1599 1600 dom->current = dom->current->father; 1601 1602 return true; 1603 } 1604 1605 int DOMXMLDoc_node_text(SXML_CHAR* text, SAX_Data* sd) 1606 { 1607 SXML_CHAR* p = text; 1608 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; 1609 1610 /* Keep text, even if it is only spaces */ 1611 #if 0 1612 while(*p != NULC && sx_isspace(*p++)) ; 1613 if (*p == NULC) return true; /* Only spaces */ 1614 #endif 1615 1616 /* If there is no current node to add text to, raise an error, except if text is only spaces, in which case it is probably just formatting */ 1617 if (dom->current == NULL) { 1618 while(*p != NULC && sx_isspace(*p)) p++; 1619 if (*p == NULC) /* Only spaces => probably pretty-printing */ 1620 return true; 1621 dom->error = PARSE_ERR_TEXT_OUTSIDE_NODE; 1622 dom->line_error = sd->line_num; 1623 return false; /* There is some "real" text => raise an error */ 1624 } 1625 1626 if (dom->text_as_nodes) { 1627 XMLNode* new_node = XMLNode_allocN(1); 1628 if (new_node == NULL || (new_node->text = sx_strdup(text)) == NULL 1629 || _add_node(&dom->current->children, &dom->current->n_children, new_node) < 0) { 1630 dom->error = PARSE_ERR_MEMORY; 1631 dom->line_error = sd->line_num; 1632 (void)XMLNode_free(new_node); 1633 __free(new_node); 1634 return false; 1635 } 1636 new_node->tag_type = TAG_TEXT; 1637 new_node->father = dom->current; 1638 /*dom->current->tag_type = TAG_FATHER; // OS: should parent field be forced to be TAG_FATHER? now it has at least one TAG_TEXT child. I decided not to enforce this for backward-compatibility related to tag_types*/ 1639 return true; 1640 } else { /* Old behaviour: concatenate text to the previous one */ 1641 /* 'p' will point at the new text */ 1642 if (dom->current->text == NULL) { 1643 p = sx_strdup(text); 1644 } else { 1645 p = __realloc(dom->current->text, (sx_strlen(dom->current->text) + sx_strlen(text) + 1)*sizeof(SXML_CHAR)); 1646 if (p != NULL) 1647 sx_strcat(p, text); 1648 } 1649 if (p == NULL) { 1650 dom->error = PARSE_ERR_MEMORY; 1651 dom->line_error = sd->line_num; 1652 return false; 1653 } 1654 1655 dom->current->text = p; 1656 } 1657 1658 return true; 1659 } 1660 1661 int DOMXMLDoc_parse_error(ParseError error_num, int line_number, SAX_Data* sd) 1662 { 1663 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; 1664 1665 dom->error = error_num; 1666 dom->line_error = line_number; 1667 1668 /* Complete error message will be displayed in 'DOMXMLDoc_doc_end' callback */ 1669 1670 return false; /* Stop on error */ 1671 } 1672 1673 int DOMXMLDoc_doc_end(SAX_Data* sd) 1674 { 1675 DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; 1676 1677 if (dom->error != PARSE_ERR_NONE) { 1678 SXML_CHAR* msg; 1679 1680 switch (dom->error) { 1681 case PARSE_ERR_MEMORY: msg = C2SX("MEMORY"); break; 1682 case PARSE_ERR_UNEXPECTED_TAG_END: msg = C2SX("UNEXPECTED_TAG_END"); break; 1683 case PARSE_ERR_SYNTAX: msg = C2SX("SYNTAX"); break; 1684 case PARSE_ERR_EOF: msg = C2SX("UNEXPECTED_END_OF_FILE"); break; 1685 case PARSE_ERR_TEXT_OUTSIDE_NODE: msg = C2SX("TEXT_OUTSIDE_NODE"); break; 1686 case PARSE_ERR_UNEXPECTED_NODE_END: msg = C2SX("UNEXPECTED_NODE_END"); break; 1687 default: msg = C2SX("UNKNOWN"); break; 1688 } 1689 sx_fprintf(stderr, C2SX("%s:%d: An error was found (%s(%d)), loading aborted...\n"), sd->name, dom->line_error, msg, dom->error); 1690 dom->current = NULL; 1691 (void)XMLDoc_free(dom->doc); 1692 dom->doc = NULL; 1693 } 1694 1695 return true; 1696 } 1697 1698 int SAX_Callbacks_init_DOM(SAX_Callbacks* sax) 1699 { 1700 if (sax == NULL) 1701 return false; 1702 1703 sax->start_doc = DOMXMLDoc_doc_start; 1704 sax->start_node = DOMXMLDoc_node_start; 1705 sax->end_node = DOMXMLDoc_node_end; 1706 sax->new_text = DOMXMLDoc_node_text; 1707 sax->on_error = DOMXMLDoc_parse_error; 1708 sax->end_doc = DOMXMLDoc_doc_end; 1709 sax->all_event = NULL; 1710 1711 return true; 1712 } 1713 1714 int XMLDoc_parse_file_SAX(const SXML_CHAR* filename, const SAX_Callbacks* sax, void* user) 1715 { 1716 FILE* f; 1717 int ret; 1718 SAX_Data sd; 1719 SXML_CHAR* fmode = 1720 #ifndef SXMLC_UNICODE 1721 C2SX("rt"); 1722 #else 1723 C2SX("rb"); /* In Unicode, open the file as binary so that further 'fgetwc' read all bytes */ 1724 #endif 1725 BOM_TYPE bom; 1726 1727 1728 if (sax == NULL || filename == NULL || filename[0] == NULC) 1729 return false; 1730 1731 f = sx_fopen(filename, fmode); 1732 if (f == NULL) 1733 return false; 1734 /* Microsoft's 'ftell' returns invalid position for Unicode text files 1735 (see http://connect.microsoft.com/VisualStudio/feedback/details/369265/ftell-ftell-nolock-incorrectly-handling-unicode-text-translation) 1736 However, we're opening the file as binary in Unicode so we don't fall into that case... 1737 */ 1738 #if defined(SXMLC_UNICODE) && (defined(WIN32) || defined(WIN64)) 1739 /*setvbuf(f, NULL, _IONBF, 0);*/ 1740 #endif 1741 1742 sd.name = (SXML_CHAR*)filename; 1743 sd.user = user; 1744 sd.type = DATA_SOURCE_FILE; 1745 sd.src = (void*)f; 1746 bom = freadBOM(f, NULL, NULL); /* Skip BOM, if any */ 1747 /* In Unicode, re-open the file in text-mode if there is no BOM (or UTF-8) as we assume that 1748 the file is "plain" text (i.e. 1 byte = 1 character). If opened in binary mode, 'fgetwc' 1749 would read 2 bytes for 1 character, which would not work on "plain" files. */ 1750 if (bom == BOM_NONE || bom == BOM_UTF_8) { 1751 sx_fclose(f); 1752 f = sx_fopen(filename, C2SX("rt")); 1753 if (f == NULL) 1754 return false; 1755 if (bom == BOM_UTF_8) 1756 freadBOM(f, NULL, NULL); /* Skip the UTF-8 BOM that was found */ 1757 } 1758 #ifndef SXMLC_UNICODE 1759 /* Unicode BOM when Unicode support has not been compiled in. */ 1760 else { 1761 sx_fclose(f); 1762 return false; 1763 } 1764 #endif 1765 1766 ret = _parse_data_SAX((void*)f, DATA_SOURCE_FILE, sax, &sd); 1767 (void)sx_fclose(f); 1768 1769 return ret; 1770 } 1771 1772 int XMLDoc_parse_buffer_SAX_len(const SXML_CHAR* buffer, int buffer_len, const SXML_CHAR* name, const SAX_Callbacks* sax, void* user) 1773 { 1774 DataSourceBuffer dsb = { buffer, buffer_len, 0 }; 1775 SAX_Data sd; 1776 1777 if (sax == NULL || buffer == NULL) 1778 return false; 1779 1780 sd.name = name; 1781 sd.user = user; 1782 sd.type = DATA_SOURCE_BUFFER; 1783 sd.src = (void*)buffer; 1784 return _parse_data_SAX((void*)&dsb, DATA_SOURCE_BUFFER, sax, &sd); 1785 } 1786 1787 int XMLDoc_parse_file_DOM_text_as_nodes(const SXML_CHAR* filename, XMLDoc* doc, int text_as_nodes) 1788 { 1789 DOM_through_SAX dom; 1790 SAX_Callbacks sax; 1791 int ret; 1792 1793 if (doc == NULL || filename == NULL || filename[0] == NULC || doc->init_value != XML_INIT_DONE) 1794 return false; 1795 1796 sx_strncpy(doc->filename, filename, SXMLC_MAX_PATH - 1); 1797 doc->filename[SXMLC_MAX_PATH - 1] = NULC; 1798 1799 /* Read potential BOM on file */ 1800 { 1801 /* In Unicode, open the file as binary so that further 'fgetwc' read all bytes */ 1802 FILE* f = sx_fopen(filename, C2SX("rb")); 1803 if (f != NULL) { 1804 #if defined(SXMLC_UNICODE) && (defined(WIN32) || defined(WIN64)) 1805 /*setvbuf(f, NULL, _IONBF, 0);*/ 1806 #endif 1807 doc->bom_type = freadBOM(f, doc->bom, &doc->sz_bom); 1808 sx_fclose(f); 1809 } 1810 } 1811 1812 dom.doc = doc; 1813 dom.current = NULL; 1814 dom.text_as_nodes = text_as_nodes; 1815 SAX_Callbacks_init_DOM(&sax); 1816 1817 ret = XMLDoc_parse_file_SAX(filename, &sax, &dom); 1818 if (!ret) { 1819 (void)XMLDoc_free(doc); 1820 dom.doc = NULL; 1821 return ret; 1822 } 1823 1824 /* TODO: Check there is no unfinished root nodes */ 1825 return ret; 1826 } 1827 1828 int XMLDoc_parse_buffer_DOM_text_as_nodes(const SXML_CHAR* buffer, const SXML_CHAR* name, XMLDoc* doc, int text_as_nodes) 1829 { 1830 DOM_through_SAX dom; 1831 SAX_Callbacks sax; 1832 int ret; 1833 1834 if (doc == NULL || buffer == NULL || doc->init_value != XML_INIT_DONE) 1835 return false; 1836 1837 dom.doc = doc; 1838 dom.current = NULL; 1839 dom.text_as_nodes = text_as_nodes; 1840 SAX_Callbacks_init_DOM(&sax); 1841 1842 ret = XMLDoc_parse_buffer_SAX(buffer, name, &sax, &dom); 1843 if (!ret) { 1844 XMLDoc_free(doc); 1845 return ret; 1846 } 1847 1848 /* TODO: Check there is no unfinished root nodes */ 1849 return ret; 1850 } 1851 1852 1853 1854 /* --- Utility functions (ex sxmlutils.c) --- */ 1855 1856 #ifdef DBG_MEM 1857 static int nb_alloc = 0, nb_free = 0; 1858 1859 void* __malloc(size_t sz) 1860 { 1861 void* p = malloc(sz); 1862 if (p != NULL) 1863 nb_alloc++; 1864 printf("0x%x: MALLOC (%d) - NA %d - NF %d = %d\n", p, sz, nb_alloc, nb_free, nb_alloc - nb_free); 1865 return p; 1866 } 1867 1868 void* __calloc(size_t count, size_t sz) 1869 { 1870 void* p = calloc(count, sz); 1871 if (p != NULL) 1872 nb_alloc++; 1873 printf("0x%x: CALLOC (%d, %d) - NA %d - NF %d = %d\n", p, count, sz, nb_alloc, nb_free, nb_alloc - nb_free); 1874 return p; 1875 } 1876 1877 void* __realloc(void* mem, size_t sz) 1878 { 1879 void* p = realloc(mem, sz); 1880 if (mem == NULL && p != NULL) 1881 nb_alloc++; 1882 else if (mem != NULL && sz == 0) 1883 nb_free++; 1884 printf("0x%x: REALLOC 0x%x (%d)", p, mem, sz); 1885 if (mem == NULL) 1886 printf(" - NA %d - NF %d = %d", nb_alloc, nb_free, nb_alloc - nb_free); 1887 printf("\n"); 1888 return p; 1889 } 1890 1891 void __free(void* mem) 1892 { 1893 nb_free++; 1894 printf("0x%x: FREE - NA %d - NF %d = %d\n", mem, nb_alloc, nb_free, nb_alloc - nb_free); 1895 free(mem); 1896 } 1897 1898 char* __sx_strdup(const char* s) 1899 { 1900 /* Mimic the behavior of sx_strdup(), as we can't use it directly here: DBG_MEM is defined 1901 and sx_strdup is this function! (bug #5) */ 1902 #ifdef SXMLC_UNICODE 1903 char* p = wcsdup(s); 1904 #else 1905 char* p = strdup(s); 1906 #endif 1907 if (p != NULL) 1908 nb_alloc++; 1909 printf("0x%x: STRDUP (%d) - NA %d - NF %d = %d\n", p, sx_strlen(s), nb_alloc, nb_free, nb_alloc - nb_free); 1910 return p; 1911 } 1912 #endif 1913 1914 /* Dictionary of special characters and their HTML equivalent */ 1915 static struct _html_special_dict { 1916 SXML_CHAR chr; /* Original character */ 1917 SXML_CHAR* html; /* Equivalent HTML string */ 1918 int html_len; /* 'sx_strlen(html)' */ 1919 } HTML_SPECIAL_DICT[] = { 1920 { C2SX('<'), C2SX("<"), 4 }, 1921 { C2SX('>'), C2SX(">"), 4 }, 1922 { C2SX('"'), C2SX("""), 6 }, 1923 { C2SX('\''), C2SX("'"), 6 }, 1924 { C2SX('&'), C2SX("&"), 5 }, 1925 { NULC, NULL, 0 }, /* Terminator */ 1926 }; 1927 1928 int _beob(DataSourceBuffer* ds) 1929 { 1930 if (ds == NULL || ds->buf[ds->cur_pos] == NULC || ds->cur_pos >= ds->buf_len) 1931 return true; 1932 1933 return false; 1934 } 1935 1936 int _bgetc(DataSourceBuffer* ds) 1937 { 1938 if (_beob(ds)) 1939 return EOF; 1940 1941 return (int)(ds->buf[ds->cur_pos++]); 1942 } 1943 1944 /* 1945 * \brief Read a "line" from data source, eventually (re-)allocating a given buffer. A "line" is defined 1946 * as a portion starting with character `from` (usually `<`) ending at character `to` (usually `>`). 1947 * 1948 * Characters read will be stored in `line` starting at `i0` (this allows multiple calls to 1949 * `read_line_alloc()` on the same `line` buffer without overwriting it at each call). 1950 * Searches for character `from` until character `to`. If `from` is 0, starts from 1951 * current position in the data source. If `to` is 0, it is replaced by `\n`. 1952 * 1953 * \param in The data source (either `FILE*` if `in_type` is `DATA_SOURCE_FILE` or `SXML_CHAR*` 1954 * if `in_type` is `DATA_SOURCE_BUFFER`). 1955 * \param in_type specifies the type of data source to be read. 1956 * \param line can be `NULL`, in which case it will be allocated to `*sz_line` bytes. After the function 1957 * returns, `*sz_line` is the actual buffer size. This allows multiple calls to this function using 1958 * the same buffer (without re-allocating/freeing). 1959 * \param sz_line is the size of the buffer `line` if previously allocated (in `SXML_CHAR`, not byte!). 1960 * If `NULL` or 0, an internal value of `MEM_INCR_RLA` is used. 1961 * \param i0 The position where read characters are stored in `line`. 1962 * \param from The character indicating a start of line. 1963 * \param to The character indicating an end of line. 1964 * \param keep_fromto if 0, removes characters `from` and `to` from the line (stripping). 1965 * \param interest is a special character of interest, usually `'\n'` so we can count line numbers in the 1966 * data source (valid only if `interest_count` is not `NULL`). 1967 * \param interest_count if not `NULL`, will receive the count of `interest` characters while searching. 1968 * \returns the number of characters in the line or 0 if an error occurred. 1969 */ 1970 int read_line_alloc(void* in, DataSourceType in_type, SXML_CHAR** line, int* sz_line, int i0, SXML_CHAR from, SXML_CHAR to, int keep_fromto, SXML_CHAR interest, int* interest_count) 1971 { 1972 int init_sz = 0; 1973 SXML_CHAR ch, *pt; 1974 int c; 1975 int n, ret; 1976 int (*mgetc)(void* ds) = (in_type == DATA_SOURCE_BUFFER ? (int(*)(void*))_bgetc : (int(*)(void*))sx_fgetc); 1977 int (*meos)(void* ds) = (in_type == DATA_SOURCE_BUFFER ? (int(*)(void*))_beob : (int(*)(void*))sx_feof); 1978 1979 if (in == NULL || line == NULL) 1980 return 0; 1981 1982 if (to == NULC) 1983 to = C2SX('\n'); 1984 /* Search for character 'from' */ 1985 if (interest_count != NULL) 1986 *interest_count = 0; 1987 while (true) { 1988 /* Reaching EOF before 'to' char is not an error but should trigger 'line' alloc and init to '' */ 1989 c = mgetc(in); 1990 ch = (SXML_CHAR)c; 1991 if (c == EOF) 1992 break; 1993 if (interest_count != NULL && ch == interest) 1994 (*interest_count)++; 1995 /* If 'from' is '\0', we stop here */ 1996 if (ch == from || from == NULC) 1997 break; 1998 } 1999 2000 if (sz_line == NULL) 2001 sz_line = &init_sz; 2002 2003 if (*line == NULL || *sz_line == 0) { 2004 if (*sz_line == 0) *sz_line = MEM_INCR_RLA; 2005 *line = __malloc(*sz_line*sizeof(SXML_CHAR)); 2006 if (*line == NULL) 2007 return 0; 2008 } 2009 if (i0 < 0) 2010 i0 = 0; 2011 if (i0 >= *sz_line) 2012 return 0; 2013 2014 n = i0; 2015 if (c == CEOF) { /* EOF reached before 'to' char => return the empty string */ 2016 (*line)[n] = NULC; 2017 return meos(in) ? n : 0; /* Error if not EOF */ 2018 } 2019 if (ch != from || keep_fromto) { 2020 (*line)[n++] = ch; 2021 if (n >= *sz_line) { 2022 *sz_line += MEM_INCR_RLA; 2023 pt = __realloc(*line, *sz_line*sizeof(SXML_CHAR)); 2024 if (pt == NULL) { 2025 return 0; 2026 } else 2027 *line = pt; 2028 } 2029 } 2030 (*line)[n] = NULC; 2031 ret = 0; 2032 while (true) { 2033 if ((c = mgetc(in)) == CEOF) { /* EOF or error */ 2034 (*line)[n] = NULC; 2035 ret = meos(in) ? n : 0; 2036 break; 2037 } 2038 ch = (SXML_CHAR)c; 2039 if (interest_count != NULL && ch == interest) 2040 (*interest_count)++; 2041 (*line)[n] = ch; 2042 if (ch != to || (keep_fromto && to != NULC && ch == to)) /* If we reached the 'to' character and we keep it, we still need to add the extra '\0' */ 2043 n++; 2044 if (n >= *sz_line) { /* Too many characters for our line => realloc some more */ 2045 *sz_line += MEM_INCR_RLA; 2046 pt = __realloc(*line, *sz_line*sizeof(SXML_CHAR)); 2047 if (pt == NULL) { 2048 ret = 0; 2049 break; 2050 } else 2051 *line = pt; 2052 } 2053 (*line)[n] = NULC; /* If we reached the 'to' character and we want to strip it, 'n' hasn't changed and 'line[n]' (which is 'to') will be replaced by '\0' */ 2054 if (ch == to) { 2055 ret = n; 2056 break; 2057 } 2058 } 2059 2060 #if 0 /* Automatic buffer resize is deactivated */ 2061 /* Resize line to the exact size */ 2062 pt = __realloc(*line, (n+1)*sizeof(SXML_CHAR)); 2063 if (pt != NULL) 2064 *line = pt; 2065 #endif 2066 2067 return ret; 2068 } 2069 2070 /* --- */ 2071 2072 SXML_CHAR* strcat_alloc(SXML_CHAR** src1, const SXML_CHAR* src2) 2073 { 2074 SXML_CHAR* cat; 2075 int n; 2076 2077 /* Do not concatenate '*src1' with itself */ 2078 if (src1 == NULL || *src1 == src2) 2079 return NULL; 2080 2081 /* Concatenate a NULL or empty string */ 2082 if (src2 == NULL || *src2 == NULC) 2083 return *src1; 2084 2085 n = (*src1 == NULL ? 0 : sx_strlen(*src1)) + sx_strlen(src2) + 1; 2086 cat = __realloc(*src1, n*sizeof(SXML_CHAR)); 2087 if (cat == NULL) 2088 return NULL; 2089 if (*src1 == NULL) 2090 *cat = NULC; 2091 *src1 = cat; 2092 sx_strcat(*src1, src2); 2093 2094 return *src1; 2095 } 2096 2097 SXML_CHAR* strip_spaces(SXML_CHAR* str, SXML_CHAR repl_sq) 2098 { 2099 SXML_CHAR* p; 2100 int i, len; 2101 2102 /* 'p' to the first non-space */ 2103 for (p = str; *p != NULC && sx_isspace(*p); p++) ; /* No need to search for 'protect' as it is not a space */ 2104 len = sx_strlen(str); 2105 for (i = len-1; i >= 0 && sx_isspace(str[i]); i--) ; 2106 if (i >= 0 && str[i] == C2SX('\\')) /* If last non-space is the protection, keep the last space */ 2107 i++; 2108 str[i+1] = NULC; /* New end of string to last non-space */ 2109 2110 if (repl_sq == NULC) { 2111 if (p == str && i == len) 2112 return str; /* Nothing to do */ 2113 for (i = 0; (str[i] = *p) != NULC; i++, p++) ; /* Copy 'p' to 'str' */ 2114 return str; 2115 } 2116 2117 /* Squeeze all spaces with 'repl_sq' */ 2118 i = 0; 2119 while (*p != NULC) { 2120 if (sx_isspace(*p)) { 2121 str[i++] = repl_sq; 2122 while (sx_isspace(*++p)) ; /* Skips all next spaces */ 2123 } else { 2124 if (*p == C2SX('\\')) 2125 p++; 2126 str[i++] = *p++; 2127 } 2128 } 2129 str[i] = NULC; 2130 2131 return str; 2132 } 2133 2134 SXML_CHAR* str_unescape(SXML_CHAR* str) 2135 { 2136 int i, j; 2137 2138 if (str == NULL) 2139 return NULL; 2140 2141 for (i = j = 0; str[j]; j++) { 2142 if (str[j] == C2SX('\\')) 2143 j++; 2144 str[i++] = str[j]; 2145 } 2146 2147 return str; 2148 } 2149 2150 int split_left_right(SXML_CHAR* str, SXML_CHAR sep, int* l0, int* l1, int* i_sep, int* r0, int* r1, int ignore_spaces, int ignore_quotes) 2151 { 2152 int n0, n1, is; 2153 SXML_CHAR quote = '\0'; 2154 2155 if (str == NULL) 2156 return false; 2157 2158 if (i_sep != NULL) 2159 *i_sep = -1; 2160 2161 if (!ignore_spaces) /* No sense of ignore quotes if spaces are to be kept */ 2162 ignore_quotes = false; 2163 2164 /* Parse left part */ 2165 2166 if (ignore_spaces) { 2167 for (n0 = 0; str[n0] != NULC && sx_isspace(str[n0]); n0++) ; /* Skip head spaces, n0 points to first non-space */ 2168 if (ignore_quotes && isquote(str[n0])) { /* If quote is found, look for next one */ 2169 quote = str[n0++]; /* Quote can be '\'' or '"' */ 2170 for (n1 = n0; str[n1] != NULC && str[n1] != quote; n1++) { 2171 if (str[n1] == C2SX('\\') && str[++n1] == NULC) 2172 break; /* Escape character (can be the last) */ 2173 } 2174 for (is = n1 + 1; str[is] != NULC && sx_isspace(str[is]); is++) ; /* '--' not to take quote into account */ 2175 } else { 2176 for (n1 = n0; str[n1] != NULC && str[n1] != sep && !sx_isspace(str[n1]); n1++) ; /* Search for separator or a space */ 2177 for (is = n1; str[is] != NULC && sx_isspace(str[is]); is++) ; 2178 } 2179 } else { 2180 n0 = 0; 2181 for (n1 = 0; str[n1] != NULC && str[n1] != sep; n1++) ; /* Search for separator only */ 2182 is = n1; 2183 } 2184 2185 /* Here 'n0' is the start of left member, 'n1' is the character after the end of left member */ 2186 2187 if (l0 != NULL) 2188 *l0 = n0; 2189 if (l1 != NULL) 2190 *l1 = n1 - 1; 2191 if (i_sep != NULL) 2192 *i_sep = is; 2193 if (str[is] == NULC || str[is+1] == NULC) { /* No separator => empty right member */ 2194 if (r0 != NULL) 2195 *r0 = is; 2196 if (r1 != NULL) 2197 *r1 = is-1; 2198 if (i_sep != NULL) 2199 *i_sep = (str[is] == NULC ? -1 : is); 2200 return true; 2201 } 2202 2203 /* Parse right part */ 2204 2205 n0 = is + 1; 2206 if (ignore_spaces) { 2207 for (; str[n0] != NULC && sx_isspace(str[n0]); n0++) ; 2208 if (ignore_quotes && isquote(str[n0])) 2209 quote = str[n0]; 2210 } 2211 2212 for (n1 = ++n0; str[n1]; n1++) { 2213 if (ignore_quotes && str[n1] == quote) /* Quote was reached */ 2214 break; 2215 if (str[n1] == C2SX('\\') && str[++n1] == NULC) /* Escape character (can be the last) */ 2216 break; 2217 } 2218 if (ignore_quotes && str[n1--] != quote) /* Quote is not the same than earlier, '--' is not to take it into account */ 2219 return false; 2220 if (!ignore_spaces) 2221 while (str[++n1]) ; /* Jump down the end of the string */ 2222 2223 if (r0 != NULL) 2224 *r0 = n0; 2225 if (r1 != NULL) 2226 *r1 = n1; 2227 2228 return true; 2229 } 2230 2231 BOM_TYPE freadBOM(FILE* f, unsigned char* bom, int* sz_bom) 2232 { 2233 unsigned char c1, c2; 2234 long pos; 2235 2236 if (f == NULL) 2237 return BOM_NONE; 2238 2239 /* Save position and try to read and skip BOM if found. If not, go back to saved position. */ 2240 pos = ftell(f); 2241 if (pos < 0) 2242 return BOM_NONE; 2243 if (fread(&c1, sizeof(char), 1, f) != 1 || fread(&c2, sizeof(char), 1, f) != 1) { 2244 fseek(f, pos, SEEK_SET); 2245 return BOM_NONE; 2246 } 2247 if (bom != NULL) { 2248 bom[0] = c1; 2249 bom[1] = c2; 2250 bom[2] = '\0'; 2251 if (sz_bom != NULL) 2252 *sz_bom = 2; 2253 } 2254 switch ((unsigned short)(c1 << 8) | c2) { 2255 case (unsigned short)0xfeff: 2256 return BOM_UTF_16BE; 2257 2258 case (unsigned short)0xfffe: 2259 pos = ftell(f); /* Save current position to get it back if BOM is not UTF-32LE */ 2260 if (pos < 0) 2261 return BOM_UTF_16LE; 2262 if (fread(&c1, sizeof(char), 1, f) != 1 || fread(&c2, sizeof(char), 1, f) != 1) { 2263 fseek(f, pos, SEEK_SET); 2264 return BOM_UTF_16LE; 2265 } 2266 if (c1 == 0x00 && c2 == 0x00) { 2267 if (bom != NULL) 2268 bom[2] = bom[3] = bom[4] = '\0'; 2269 if (sz_bom != NULL) 2270 *sz_bom = 4; 2271 return BOM_UTF_32LE; 2272 } 2273 fseek(f, pos, SEEK_SET); /* fseek(f, -2, SEEK_CUR) is not garanteed on Windows (and actually fails in Unicode...) */ 2274 return BOM_UTF_16LE; 2275 2276 case (unsigned short)0x0000: 2277 if (fread(&c1, sizeof(char), 1, f) == 1 && fread(&c2, sizeof(char), 1, f) == 1 2278 && c1 == 0xfe && c2 == 0xff) { 2279 bom[2] = c1; 2280 bom[3] = c2; 2281 bom[4] = '\0'; 2282 if (sz_bom != NULL) 2283 *sz_bom = 4; 2284 return BOM_UTF_32BE; 2285 } 2286 fseek(f, pos, SEEK_SET); 2287 return BOM_NONE; 2288 2289 case (unsigned short)0xefbb: /* UTF-8? */ 2290 if (fread(&c1, sizeof(char), 1, f) != 1 || c1 != 0xbf) { /* Not UTF-8 */ 2291 fseek(f, pos, SEEK_SET); 2292 if (bom != NULL) 2293 bom[0] = '\0'; 2294 if (sz_bom != NULL) 2295 *sz_bom = 0; 2296 return BOM_NONE; 2297 } 2298 if (bom != NULL) { 2299 bom[2] = c1; 2300 bom[3] = '\0'; 2301 } 2302 if (sz_bom != NULL) 2303 *sz_bom = 3; 2304 return BOM_UTF_8; 2305 2306 default: /* No BOM, go back */ 2307 fseek(f, pos, SEEK_SET); 2308 if (bom != NULL) 2309 bom[0] = '\0'; 2310 if (sz_bom != NULL) 2311 *sz_bom = 0; 2312 return BOM_NONE; 2313 } 2314 } 2315 2316 /* --- */ 2317 2318 SXML_CHAR* html2str(SXML_CHAR* html, SXML_CHAR* str) 2319 { 2320 SXML_CHAR *ps, *pd; 2321 int i; 2322 2323 if (html == NULL) 2324 return NULL; 2325 2326 if (str == NULL) 2327 str = html; 2328 2329 /* Look for '&' and matches it to any of the recognized HTML pattern. */ 2330 /* If found, replaces the '&' by the corresponding char. */ 2331 /* 'p2' is the char to analyze, 'p1' is where to insert it */ 2332 for (pd = str, ps = html; *ps; ps++, pd++) { 2333 if (*ps != C2SX('&')) { 2334 if (pd != ps) 2335 *pd = *ps; 2336 continue; 2337 } 2338 2339 for (i = 0; HTML_SPECIAL_DICT[i].chr; i++) { 2340 if (sx_strncmp(ps, HTML_SPECIAL_DICT[i].html, HTML_SPECIAL_DICT[i].html_len)) 2341 continue; 2342 2343 *pd = HTML_SPECIAL_DICT[i].chr; 2344 ps += HTML_SPECIAL_DICT[i].html_len-1; 2345 break; 2346 } 2347 /* If no string was found, simply copy the character */ 2348 if (HTML_SPECIAL_DICT[i].chr == NULC && pd != ps) 2349 *pd = *ps; 2350 } 2351 *pd = NULC; 2352 2353 return str; 2354 } 2355 2356 /* TODO: Allocate 'html'? */ 2357 SXML_CHAR* str2html(SXML_CHAR* str, SXML_CHAR* html) 2358 { 2359 SXML_CHAR *ps, *pd; 2360 int i; 2361 2362 if (str == NULL) 2363 return NULL; 2364 2365 if (html == str) /* Not handled (yet) */ 2366 return NULL; 2367 2368 if (html == NULL) { /* Allocate 'html' to the correct size */ 2369 html = __malloc(strlen_html(str) * sizeof(SXML_CHAR)); 2370 if (html == NULL) 2371 return NULL; 2372 } 2373 2374 for (ps = str, pd = html; *ps; ps++, pd++) { 2375 for (i = 0; HTML_SPECIAL_DICT[i].chr; i++) { 2376 if (*ps == HTML_SPECIAL_DICT[i].chr) { 2377 sx_strcpy(pd, HTML_SPECIAL_DICT[i].html); 2378 pd += HTML_SPECIAL_DICT[i].html_len - 1; 2379 break; 2380 } 2381 } 2382 if (HTML_SPECIAL_DICT[i].chr == NULC && pd != ps) 2383 *pd = *ps; 2384 } 2385 *pd = NULC; 2386 2387 return html; 2388 } 2389 2390 int strlen_html(SXML_CHAR* str) 2391 { 2392 int i, j, n; 2393 2394 if (str == NULL) 2395 return 0; 2396 2397 n = 0; 2398 for (i = 0; str[i] != NULC; i++) { 2399 for (j = 0; HTML_SPECIAL_DICT[j].chr; j++) { 2400 if (str[i] == HTML_SPECIAL_DICT[j].chr) { 2401 n += HTML_SPECIAL_DICT[j].html_len; 2402 break; 2403 } 2404 } 2405 if (HTML_SPECIAL_DICT[j].chr == NULC) 2406 n++; 2407 } 2408 2409 return n; 2410 } 2411 2412 int fprintHTML(FILE* f, SXML_CHAR* str) 2413 { 2414 SXML_CHAR* p; 2415 int i, n; 2416 2417 for (p = str, n = 0; *p != NULC; p++) { 2418 for (i = 0; HTML_SPECIAL_DICT[i].chr; i++) { 2419 if (*p != HTML_SPECIAL_DICT[i].chr) 2420 continue; 2421 if (f != NULL) 2422 sx_fputs(HTML_SPECIAL_DICT[i].html, f); 2423 n += HTML_SPECIAL_DICT[i].html_len; 2424 break; 2425 } 2426 if (HTML_SPECIAL_DICT[i].chr == NULC) { 2427 if (f != NULL) 2428 (void)sx_fputc(*p, f); 2429 n++; 2430 } 2431 } 2432 2433 return n; 2434 } 2435 2436 int regstrcmp(SXML_CHAR* str, SXML_CHAR* pattern) 2437 { 2438 SXML_CHAR *p, *s; 2439 2440 if (str == NULL && pattern == NULL) 2441 return true; 2442 2443 if (str == NULL || pattern == NULL) 2444 return false; 2445 2446 p = pattern; 2447 s = str; 2448 while (true) { 2449 switch (*p) { 2450 /* Any character matches, go to next one */ 2451 case C2SX('?'): 2452 p++; 2453 s++; 2454 break; 2455 2456 /* Go to next character in pattern and wait until it is found in 'str' */ 2457 case C2SX('*'): 2458 for (; *p != NULC; p++) { /* Squeeze '**?*??**' to '*' */ 2459 if (*p != C2SX('*') && *p != C2SX('?')) 2460 break; 2461 } 2462 for (; *s != NULC; s++) { 2463 if (*s == *p) 2464 break; 2465 } 2466 break; 2467 2468 /* NULL character on pattern has to be matched by 'str' */ 2469 case 0: 2470 return *s ? false : true; 2471 2472 default: 2473 if (*p == C2SX('\\')) /* Escape character */ 2474 p++; 2475 if (*p++ != *s++) /* Characters do not match */ 2476 return false; 2477 break; 2478 } 2479 } 2480 2481 return false; 2482 }