1 /* 2 Copyright (c) 2010, Matthieu Labas 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without modification, 6 are permitted provided that the following conditions are met: 7 8 1. Redistributions of source code must retain the above copyright notice, 9 this list of conditions and the following disclaimer. 10 11 2. Redistributions in binary form must reproduce the above copyright notice, 12 this list of conditions and the following disclaimer in the documentation 13 and/or other materials provided with the distribution. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 24 OF SUCH DAMAGE. 25 26 The views and conclusions contained in the software and documentation are those of the 27 authors and should not be interpreted as representing official policies, either expressed 28 or implied, of the FreeBSD Project. 29 */ 30 #if defined(WIN32) || defined(WIN64) 31 #pragma warning(disable : 4996) 32 #endif 33 34 #include 35 #include 36 #include "sxmlc.h" 37 #include "sxmlsearch.h" 38 39 #define INVALID_XMLNODE_POINTER ((XMLNode*)-1) 40 41 /* The function used to compare a string to a pattern */ 42 static REGEXPR_COMPARE regstrcmp_search = regstrcmp; 43 44 REGEXPR_COMPARE XMLSearch_set_regexpr_compare(REGEXPR_COMPARE fct) 45 { 46 REGEXPR_COMPARE previous = regstrcmp_search; 47 48 regstrcmp_search = fct; 49 50 return previous; 51 } 52 53 int XMLSearch_init(XMLSearch* search) 54 { 55 if (search == NULL) 56 return false; 57 58 if (search->init_value == XML_INIT_DONE) 59 XMLSearch_free(search, true); 60 61 search->tag = NULL; 62 search->text = NULL; 63 search->attributes = NULL; 64 search->n_attributes = 0; 65 search->next = NULL; 66 search->prev = NULL; 67 search->stop_at = INVALID_XMLNODE_POINTER; /* Because 'NULL' can be a valid value */ 68 search->init_value = XML_INIT_DONE; 69 70 return true; 71 } 72 73 int XMLSearch_free(XMLSearch* search, int free_next) 74 { 75 int i; 76 77 if (search == NULL || search->init_value != XML_INIT_DONE) 78 return false; 79 80 if (search->tag != NULL) { 81 __free(search->tag); 82 search->tag = NULL; 83 } 84 85 if (search->attributes != NULL) { 86 for (i = 0; i < search->n_attributes; i++) { 87 if (search->attributes[i].name != NULL) 88 __free(search->attributes[i].name); 89 if (search->attributes[i].value != NULL) 90 __free(search->attributes[i].value); 91 } 92 __free(search->attributes); 93 search->n_attributes = 0; 94 search->attributes = NULL; 95 } 96 97 if (free_next && search->next != NULL) { 98 (void)XMLSearch_free(search->next, true); 99 __free(search->next); 100 search->next = NULL; 101 } 102 search->init_value = 0; /* Something not XML_INIT_DONE, otherwise we'll go into 'XMLSearch_free' again */ 103 (void)XMLSearch_init(search); 104 105 return true; 106 } 107 108 int XMLSearch_search_set_tag(XMLSearch* search, const SXML_CHAR* tag) 109 { 110 if (search == NULL) 111 return false; 112 113 if (tag == NULL) { 114 if (search->tag != NULL) { 115 __free(search->tag); 116 search->tag = NULL; 117 } 118 return true; 119 } 120 121 search->tag = sx_strdup(tag); 122 return (search->tag != NULL); 123 } 124 125 int XMLSearch_search_set_text(XMLSearch* search, const SXML_CHAR* text) 126 { 127 if (search == NULL) 128 return false; 129 130 if (text == NULL) { 131 if (search->text != NULL) { 132 __free(search->text); 133 search->text = NULL; 134 } 135 return true; 136 } 137 138 search->text = sx_strdup(text); 139 return (search->text != NULL); 140 } 141 142 int XMLSearch_search_add_attribute(XMLSearch* search, const SXML_CHAR* attr_name, const SXML_CHAR* attr_value, int value_equal) 143 { 144 int i; 145 XMLAttribute* pt; 146 SXML_CHAR* name; 147 SXML_CHAR* value; 148 149 if (search == NULL) 150 return -1; 151 152 if (attr_name == NULL || attr_name[0] == NULC) 153 return -1; 154 155 name = sx_strdup(attr_name); 156 value = (attr_value == NULL ? NULL : sx_strdup(attr_value)); 157 if (name == NULL || (attr_value && value == NULL)) { 158 if (value != NULL) 159 __free(value); 160 if (name != NULL) 161 __free(name); 162 } 163 164 i = search->n_attributes; 165 pt = (XMLAttribute*)__realloc(search->attributes, (i + 1) * sizeof(XMLAttribute)); 166 if (pt == NULL) { 167 if (value) 168 __free(value); 169 __free(name); 170 return -1; 171 } 172 173 pt[i].name = name; 174 pt[i].value = value; 175 pt[i].active = value_equal; 176 177 search->n_attributes = i+1; 178 search->attributes = pt; 179 180 return i; 181 } 182 183 int XMLSearch_search_get_attribute_index(const XMLSearch* search, const SXML_CHAR* attr_name) 184 { 185 int i; 186 187 if (search == NULL || attr_name == NULL || attr_name[0] == NULC) 188 return -1; 189 190 for (i = 0; i < search->n_attributes; i++) { 191 if (!sx_strcmp(search->attributes[i].name, attr_name)) 192 return i; 193 } 194 195 return -1; 196 } 197 198 int XMLSearch_search_remove_attribute(XMLSearch* search, int i_attr) 199 { 200 XMLAttribute* pt; 201 202 if (search == NULL || i_attr < 0 || i_attr >= search->n_attributes) 203 return -1; 204 205 /* Free attribute fields first */ 206 if (search->n_attributes == 1) 207 pt = NULL; 208 else { 209 pt = (XMLAttribute*)__malloc((search->n_attributes - 1) * sizeof(XMLAttribute)); 210 if (pt == NULL) 211 return -1; 212 } 213 if (search->attributes[i_attr].name != NULL) 214 __free(search->attributes[i_attr].name); 215 if (search->attributes[i_attr].value != NULL) 216 __free(search->attributes[i_attr].value); 217 218 if (pt != NULL) { 219 memcpy(pt, search->attributes, i_attr * sizeof(XMLAttribute)); 220 memcpy(&pt[i_attr], &search->attributes[i_attr + 1], (search->n_attributes - i_attr - 1) * sizeof(XMLAttribute)); 221 } 222 if (search->attributes) 223 __free(search->attributes); 224 search->attributes = pt; 225 search->n_attributes--; 226 227 return search->n_attributes; 228 } 229 230 int XMLSearch_search_set_children_search(XMLSearch* search, XMLSearch* children_search) 231 { 232 if (search == NULL) 233 return false; 234 235 if (search->next != NULL) 236 XMLSearch_free(search->next, true); 237 238 search->next = children_search; 239 children_search->prev = search; 240 241 return true; 242 } 243 244 SXML_CHAR* XMLSearch_get_XPath_string(const XMLSearch* search, SXML_CHAR** xpath, SXML_CHAR quote) 245 { 246 const XMLSearch* s; 247 SXML_CHAR squote[] = C2SX("'"); 248 int i, fill; 249 250 if (xpath == NULL) 251 return NULL; 252 253 /* NULL 'search' is an empty string */ 254 if (search == NULL) { 255 *xpath = sx_strdup(C2SX("")); 256 if (*xpath == NULL) 257 return NULL; 258 259 return *xpath; 260 } 261 262 squote[0] = (quote == NULC ? XML_DEFAULT_QUOTE : quote); 263 264 for (s = search; s != NULL; s = s->next) { 265 if (s != search && strcat_alloc(xpath, C2SX("/")) == NULL) goto err; /* No "/" prefix for the first criteria */ 266 if (strcat_alloc(xpath, s->tag == NULL || s->tag[0] == NULC ? C2SX("*"): s->tag) == NULL) goto err; 267 268 if (s->n_attributes > 0 || (s->text != NULL && s->text[0] != NULC)) 269 if (strcat_alloc(xpath, C2SX("[")) == NULL) goto err; 270 271 fill = false; /* '[' has not been filled with text yet, no ", " separator should be added */ 272 if (s->text != NULL && s->text[0] != NULC) { 273 if (strcat_alloc(xpath, C2SX(".=")) == NULL) goto err; 274 if (strcat_alloc(xpath, squote) == NULL) goto err; 275 if (strcat_alloc(xpath, s->text) == NULL) goto err; 276 if (strcat_alloc(xpath, squote) == NULL) goto err; 277 fill = true; 278 } 279 280 for (i = 0; i < s->n_attributes; i++) { 281 if (fill) { 282 if (strcat_alloc(xpath, C2SX(", ")) == NULL) goto err; 283 } else 284 fill = true; /* filling is being performed */ 285 if (strcat_alloc(xpath, C2SX("@")) == NULL) goto err; 286 if (strcat_alloc(xpath, s->attributes[i].name) == NULL) goto err; 287 if (s->attributes[i].value == NULL) continue; 288 289 if (strcat_alloc(xpath, s->attributes[i].active ? C2SX("=") : C2SX("!=")) == NULL) goto err; 290 if (strcat_alloc(xpath, squote) == NULL) goto err; 291 if (strcat_alloc(xpath, s->attributes[i].value) == NULL) goto err; 292 if (strcat_alloc(xpath, squote) == NULL) goto err; 293 } 294 if ((s->text != NULL && s->text[0] != NULC) || s->n_attributes > 0) { 295 if (strcat_alloc(xpath, C2SX("]")) == NULL) goto err; 296 } 297 } 298 299 return *xpath; 300 301 err: 302 __free(*xpath); 303 *xpath = NULL; 304 305 return NULL; 306 } 307 308 /* 309 Extract search information from 'xpath', where 'xpath' represents a single node 310 (i.e. no '/' inside, except escaped ones), stripped from lead and tail '/'. 311 tag[.=text, @attrib="value"] with potential spaces around '=' and ','. 312 Return 'false' if parsing failed, 'true' for success. 313 This is an internal function so we assume that arguments are valid (non-NULL). 314 */ 315 static int _init_search_from_1XPath(SXML_CHAR* xpath, XMLSearch* search) 316 { 317 SXML_CHAR *p, *q; 318 SXML_CHAR c, c1, cc; 319 int l0, l1, is, r0, r1; 320 int ret; 321 322 XMLSearch_init(search); 323 324 /* Look for tag name */ 325 for (p = xpath; *p != NULC && *p != C2SX('['); p++) ; 326 c = *p; /* Either '[' or '\0' */ 327 *p = NULC; 328 ret = XMLSearch_search_set_tag(search, xpath); 329 *p = c; 330 if (!ret) 331 return false; 332 333 if (*p == NULC) 334 return true; 335 336 /* Here, '*p' is '[', we have to parse either text or attribute names/values until ']' */ 337 for (p++; *p && *p != C2SX(']'); p++) { 338 for (q = p; *q && *q != C2SX(',') && *q != C2SX(']'); q++) ; /* Look for potential ',' separator to null it */ 339 cc = *q; 340 if (*q == C2SX(',') || *q == C2SX(']')) 341 *q = NULC; 342 ret = true; 343 switch (*p) { 344 case C2SX('.'): /* '.[ ]=[ ]["']...["']' to search for text */ 345 if (!split_left_right(p, C2SX('='), &l0, &l1, &is, &r0, &r1, true, true)) 346 return false; 347 c = p[r1+1]; 348 p[r1+1] = NULC; 349 ret = XMLSearch_search_set_text(search, &p[r0]); 350 p[r1+1] = c; 351 p += r1+1; 352 break; 353 354 /* Attribute name, possibly '@attrib[[ ]=[ ]"value"]' */ 355 case C2SX('@'): 356 if (!split_left_right(++p, '=', &l0, &l1, &is, &r0, &r1, true, true)) 357 return false; 358 c = p[l1+1]; 359 c1 = p[r1+1]; 360 p[l1+1] = NULC; 361 p[r1+1] = NULC; 362 ret = (XMLSearch_search_add_attribute(search, &p[l0], (is < 0 ? NULL : &p[r0]), true) < 0 ? false : true); /* 'is' < 0 when there is no '=' (i.e. check for attribute presence only */ 363 p[l1+1] = c; 364 p[r1+1] = c1; 365 p += r1-1; /* Jump to next value */ 366 break; 367 368 default: /* Not implemented */ 369 break; 370 } 371 *q = cc; /* Restore ',' separator if any */ 372 if (!ret) 373 return false; 374 } 375 376 return true; 377 } 378 379 int XMLSearch_init_from_XPath(const SXML_CHAR* xpath, XMLSearch* search) 380 { 381 XMLSearch *search1, *search2; 382 SXML_CHAR *p, *tag, *tag0; 383 SXML_CHAR c; 384 385 if (!XMLSearch_init(search)) 386 return false; 387 388 /* NULL or empty xpath is an empty (initialized only) search */ 389 if (xpath == NULL || *xpath == NULC) 390 return true; 391 392 search1 = NULL; /* Search struct to add the xpath portion to */ 393 search2 = search; /* Search struct to be filled from xpath portion */ 394 395 tag = tag0 = sx_strdup(xpath); /* Create a copy of 'xpath' to be able to patch it (or segfault if 'xpath' is const, cnacu6o Sergey@sourceforge!) */ 396 while (*tag != NULC) { 397 if (search2 != search) { /* Allocate a new search when the original one (i.e. 'search') has already been filled */ 398 search2 = (XMLSearch*)__calloc(1, sizeof(XMLSearch)); 399 if (search2 == NULL) { 400 __free(tag0); 401 (void)XMLSearch_free(search, true); 402 return false; 403 } 404 } 405 /* Skip all first '/' */ 406 for (; *tag != NULC && *tag == C2SX('/'); tag++) ; 407 if (*tag == NULC) { 408 __free(tag0); 409 return false; 410 } 411 412 /* Look for the end of tag name: after '/' (to get another tag) or end of string */ 413 for (p = &tag[1]; *p != NULC && *p != C2SX('/'); p++) { 414 if (*p == C2SX('\\') && *++p == NULC) 415 break; /* Escape character, '\' could be the last character... */ 416 } 417 c = *p; /* Backup character before nulling it */ 418 *p = NULC; 419 if (!_init_search_from_1XPath(tag, search2)) { 420 __free(tag0); 421 (void)XMLSearch_free(search, true); 422 return false; 423 } 424 *p = c; 425 426 /* 'search2' is the newly parsed tag, 'search1' is the previous tag (or NULL if 'search2' is the first tag to parse (i.e. 'search2' == 'search') */ 427 428 if (search1 != NULL) search1->next = search2; 429 if (search2 != search) search2->prev = search1; 430 search1 = search2; 431 search2 = NULL; /* Will force allocation during next loop */ 432 tag = p; 433 } 434 435 __free(tag0); 436 return true; 437 } 438 439 static int _attribute_matches(XMLAttribute* to_test, XMLAttribute* pattern) 440 { 441 if (to_test == NULL && pattern == NULL) 442 return true; 443 444 if (to_test == NULL || pattern == NULL) 445 return false; 446 447 /* No test on name => match */ 448 if (pattern->name == NULL || pattern->name[0] == NULC) 449 return true; 450 451 /* Test on name fails => no match */ 452 if (!regstrcmp_search(to_test->name, pattern->name)) 453 return false; 454 455 /* No test on value => match */ 456 if (pattern->value == NULL) 457 return true; 458 459 /* Test on value according to pattern "equal" attribute */ 460 return regstrcmp_search(to_test->value, pattern->value) == pattern->active ? true : false; 461 } 462 463 int XMLSearch_node_matches(const XMLNode* node, const XMLSearch* search) 464 { 465 int i, j; 466 467 if (node == NULL) 468 return false; 469 470 if (search == NULL) 471 return true; 472 473 /* No comments, prolog, or such type of nodes are tested */ 474 if (node->tag_type != TAG_FATHER && node->tag_type != TAG_SELF) 475 return false; 476 477 /* Check tag */ 478 if (search->tag != NULL && !regstrcmp_search(node->tag, search->tag)) 479 return false; 480 481 /* Check text */ 482 if (search->text != NULL && !regstrcmp_search(node->text, search->text)) 483 return false; 484 485 /* Check attributes */ 486 if (search->attributes != NULL) { 487 for (i = 0; i < search->n_attributes; i++) { 488 for (j = 0; j < node->n_attributes; j++) { 489 if (!node->attributes[j].active) 490 continue; 491 if (_attribute_matches(&node->attributes[j], &search->attributes[i])) 492 break; 493 } 494 if (j >= node->n_attributes) /* All attributes where scanned without a successful match */ 495 return false; 496 } 497 } 498 499 /* 'node' matches 'search'. If there is a father search, its father must match it */ 500 if (search->prev != NULL) 501 return XMLSearch_node_matches(node->father, search->prev); 502 503 /* TODO: Should a node match if search has no more 'prev' search and node father is still below the initial search ? 504 Depends if XPath started with "//" (=> yes) or "/" (=> no). 505 if (search->prev == NULL && node->father != search->from) return false; ? */ 506 507 return true; 508 } 509 510 XMLNode* XMLSearch_next(const XMLNode* from, XMLSearch* search) 511 { 512 XMLNode* node; 513 514 if (search == NULL || from == NULL) 515 return NULL; 516 517 /* Go down the last child search as fathers will be tested recursively by the 'XMLSearch_node_matches' function */ 518 for (; search->next != NULL; search = search->next) ; 519 520 /* Initialize the 'stop_at' node on first search, to remember where to stop as there will be multiple calls */ 521 /* 'stop_at' can be NULL when 'from' is a root node, that is why it should be initialized with something else than NULL */ 522 if (search->stop_at == INVALID_XMLNODE_POINTER) 523 search->stop_at = XMLNode_next_sibling(from); 524 525 for (node = XMLNode_next(from); node != search->stop_at; node = XMLNode_next(node)) { /* && node != NULL */ 526 if (!XMLSearch_node_matches(node, search)) 527 continue; 528 529 /* 'node' is a matching node */ 530 531 /* No search to perform on 'node' children => 'node' is returned */ 532 if (search->next == NULL) 533 return node; 534 535 /* Run the search on 'node' children */ 536 return XMLSearch_next(node, search->next); 537 } 538 539 return NULL; 540 } 541 542 static SXML_CHAR* _get_XPath(const XMLNode* node, SXML_CHAR** xpath) 543 { 544 int i, n, brackets, sz_xpath; 545 SXML_CHAR* p; 546 547 brackets = 0; 548 sz_xpath = sx_strlen(node->tag); 549 if (node->text != NULL) { 550 sz_xpath += strlen_html(node->text) + 4; /* 4 = '.=""' */ 551 brackets = 2; /* Text has to be displayed => add '[]' */ 552 } 553 for (i = 0; i < node->n_attributes; i++) { 554 if (!node->attributes[i].active) 555 continue; 556 brackets = 2; /* At least one attribute has to be displayed => add '[]' */ 557 sz_xpath += strlen_html(node->attributes[i].name) + strlen_html(node->attributes[i].value) + 6; /* 6 = ', @=""' */ 558 } 559 sz_xpath += brackets + 1; 560 *xpath = (SXML_CHAR*)__malloc(sz_xpath*sizeof(SXML_CHAR)); 561 562 if (*xpath == NULL) 563 return NULL; 564 565 sx_strcpy(*xpath, node->tag); 566 if (node->text != NULL) { 567 sx_strcat(*xpath, C2SX("[.=\"")); 568 (void)str2html(node->text, &(*xpath[sx_strlen(*xpath)])); 569 sx_strcat(*xpath, C2SX("\"")); 570 n = 1; /* Indicates '[' has been put */ 571 } else 572 n = 0; 573 574 for (i = 0; i < node->n_attributes; i++) { 575 if (!node->attributes[i].active) 576 continue; 577 578 if (n == 0) { 579 sx_strcat(*xpath, C2SX("[")); 580 n = 1; 581 } else 582 sx_strcat(*xpath, C2SX(", ")); 583 p = &(*xpath)[sx_strlen(*xpath)]; 584 585 /* Standard and Unicode versions of 'sprintf' do not have the same signature! :( */ 586 sx_sprintf(p, 587 #ifdef SXMLC_UNICODE 588 sz_xpath, 589 #endif 590 C2SX("@%s=%c"), node->attributes[i].name, XML_DEFAULT_QUOTE); 591 592 (void)str2html(node->attributes[i].value, p); 593 sx_strcat(*xpath, C2SX("\"")); 594 } 595 if (n > 0) 596 sx_strcat(*xpath, C2SX("]")); 597 598 return *xpath; 599 } 600 601 SXML_CHAR* XMLNode_get_XPath(XMLNode* node, SXML_CHAR** xpath, int incl_parents) 602 { 603 SXML_CHAR* xp = NULL; 604 SXML_CHAR* xparent; 605 XMLNode* parent; 606 607 if (node == NULL || node->init_value != XML_INIT_DONE || xpath == NULL) 608 return NULL; 609 610 if (!incl_parents) { 611 if (_get_XPath(node, &xp) == NULL) { 612 *xpath = NULL; 613 return NULL; 614 } 615 return *xpath = xp; 616 } 617 618 /* Go up to root node */ 619 parent = node; 620 do { 621 xparent = NULL; 622 if (_get_XPath(parent, &xparent) == NULL) goto xp_err; 623 if (xp != NULL) { 624 if (strcat_alloc(&xparent, C2SX("/")) == NULL) goto xp_err; 625 if (strcat_alloc(&xparent, xp) == NULL) goto xp_err; 626 } 627 xp = xparent; 628 parent = parent->father; 629 } while (parent != NULL); 630 if ((*xpath = sx_strdup(C2SX("/"))) == NULL || strcat_alloc(xpath, xp) == NULL) goto xp_err; 631 632 return *xpath; 633 634 xp_err: 635 if (xp != NULL) __free(xp); 636 *xpath = NULL; 637 638 return NULL; 639 }