Raw File
1 /*
2 Copyright (c) 2010, Matthieu Labas
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7
8 1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
24 OF SUCH DAMAGE.
25
26 The views and conclusions contained in the software and documentation are those of the
27 authors and should not be interpreted as representing official policies, either expressed
28 or implied, of the FreeBSD Project.
29 */
30 #if defined(WIN32) || defined(WIN64)
31 #pragma warning(disable : 4996)
32 #endif
33
34 #include <string.h>
35 #include <stdlib.h>
36 #include "sxmlc.h"
37 #include "sxmlsearch.h"
38
39 #define INVALID_XMLNODE_POINTER ((XMLNode*)-1)
40
41 /* The function used to compare a string to a pattern */
42 static REGEXPR_COMPARE regstrcmp_search = regstrcmp;
43
44 REGEXPR_COMPARE XMLSearch_set_regexpr_compare(REGEXPR_COMPARE fct)
45 {
46 REGEXPR_COMPARE previous = regstrcmp_search;
47
48 regstrcmp_search = fct;
49
50 return previous;
51 }
52
53 int XMLSearch_init(XMLSearch* search)
54 {
55 if (search == NULL)
56 return false;
57
58 if (search->init_value == XML_INIT_DONE)
59 XMLSearch_free(search, true);
60
61 search->tag = NULL;
62 search->text = NULL;
63 search->attributes = NULL;
64 search->n_attributes = 0;
65 search->next = NULL;
66 search->prev = NULL;
67 search->stop_at = INVALID_XMLNODE_POINTER; /* Because 'NULL' can be a valid value */
68 search->init_value = XML_INIT_DONE;
69
70 return true;
71 }
72
73 int XMLSearch_free(XMLSearch* search, int free_next)
74 {
75 int i;
76
77 if (search == NULL || search->init_value != XML_INIT_DONE)
78 return false;
79
80 if (search->tag != NULL) {
81 __free(search->tag);
82 search->tag = NULL;
83 }
84
85 if (search->attributes != NULL) {
86 for (i = 0; i < search->n_attributes; i++) {
87 if (search->attributes[i].name != NULL)
88 __free(search->attributes[i].name);
89 if (search->attributes[i].value != NULL)
90 __free(search->attributes[i].value);
91 }
92 __free(search->attributes);
93 search->n_attributes = 0;
94 search->attributes = NULL;
95 }
96
97 if (free_next && search->next != NULL) {
98 (void)XMLSearch_free(search->next, true);
99 __free(search->next);
100 search->next = NULL;
101 }
102 search->init_value = 0; /* Something not XML_INIT_DONE, otherwise we'll go into 'XMLSearch_free' again */
103 (void)XMLSearch_init(search);
104
105 return true;
106 }
107
108 int XMLSearch_search_set_tag(XMLSearch* search, const SXML_CHAR* tag)
109 {
110 if (search == NULL)
111 return false;
112
113 if (tag == NULL) {
114 if (search->tag != NULL) {
115 __free(search->tag);
116 search->tag = NULL;
117 }
118 return true;
119 }
120
121 search->tag = sx_strdup(tag);
122 return (search->tag != NULL);
123 }
124
125 int XMLSearch_search_set_text(XMLSearch* search, const SXML_CHAR* text)
126 {
127 if (search == NULL)
128 return false;
129
130 if (text == NULL) {
131 if (search->text != NULL) {
132 __free(search->text);
133 search->text = NULL;
134 }
135 return true;
136 }
137
138 search->text = sx_strdup(text);
139 return (search->text != NULL);
140 }
141
142 int XMLSearch_search_add_attribute(XMLSearch* search, const SXML_CHAR* attr_name, const SXML_CHAR* attr_value, int value_equal)
143 {
144 int i;
145 XMLAttribute* pt;
146 SXML_CHAR* name;
147 SXML_CHAR* value;
148
149 if (search == NULL)
150 return -1;
151
152 if (attr_name == NULL || attr_name[0] == NULC)
153 return -1;
154
155 name = sx_strdup(attr_name);
156 value = (attr_value == NULL ? NULL : sx_strdup(attr_value));
157 if (name == NULL || (attr_value && value == NULL)) {
158 if (value != NULL)
159 __free(value);
160 if (name != NULL)
161 __free(name);
162 }
163
164 i = search->n_attributes;
165 pt = (XMLAttribute*)__realloc(search->attributes, (i + 1) * sizeof(XMLAttribute));
166 if (pt == NULL) {
167 if (value)
168 __free(value);
169 __free(name);
170 return -1;
171 }
172
173 pt[i].name = name;
174 pt[i].value = value;
175 pt[i].active = value_equal;
176
177 search->n_attributes = i+1;
178 search->attributes = pt;
179
180 return i;
181 }
182
183 int XMLSearch_search_get_attribute_index(const XMLSearch* search, const SXML_CHAR* attr_name)
184 {
185 int i;
186
187 if (search == NULL || attr_name == NULL || attr_name[0] == NULC)
188 return -1;
189
190 for (i = 0; i < search->n_attributes; i++) {
191 if (!sx_strcmp(search->attributes[i].name, attr_name))
192 return i;
193 }
194
195 return -1;
196 }
197
198 int XMLSearch_search_remove_attribute(XMLSearch* search, int i_attr)
199 {
200 XMLAttribute* pt;
201
202 if (search == NULL || i_attr < 0 || i_attr >= search->n_attributes)
203 return -1;
204
205 /* Free attribute fields first */
206 if (search->n_attributes == 1)
207 pt = NULL;
208 else {
209 pt = (XMLAttribute*)__malloc((search->n_attributes - 1) * sizeof(XMLAttribute));
210 if (pt == NULL)
211 return -1;
212 }
213 if (search->attributes[i_attr].name != NULL)
214 __free(search->attributes[i_attr].name);
215 if (search->attributes[i_attr].value != NULL)
216 __free(search->attributes[i_attr].value);
217
218 if (pt != NULL) {
219 memcpy(pt, search->attributes, i_attr * sizeof(XMLAttribute));
220 memcpy(&pt[i_attr], &search->attributes[i_attr + 1], (search->n_attributes - i_attr - 1) * sizeof(XMLAttribute));
221 }
222 if (search->attributes)
223 __free(search->attributes);
224 search->attributes = pt;
225 search->n_attributes--;
226
227 return search->n_attributes;
228 }
229
230 int XMLSearch_search_set_children_search(XMLSearch* search, XMLSearch* children_search)
231 {
232 if (search == NULL)
233 return false;
234
235 if (search->next != NULL)
236 XMLSearch_free(search->next, true);
237
238 search->next = children_search;
239 children_search->prev = search;
240
241 return true;
242 }
243
244 SXML_CHAR* XMLSearch_get_XPath_string(const XMLSearch* search, SXML_CHAR** xpath, SXML_CHAR quote)
245 {
246 const XMLSearch* s;
247 SXML_CHAR squote[] = C2SX("'");
248 int i, fill;
249
250 if (xpath == NULL)
251 return NULL;
252
253 /* NULL 'search' is an empty string */
254 if (search == NULL) {
255 *xpath = sx_strdup(C2SX(""));
256 if (*xpath == NULL)
257 return NULL;
258
259 return *xpath;
260 }
261
262 squote[0] = (quote == NULC ? XML_DEFAULT_QUOTE : quote);
263
264 for (s = search; s != NULL; s = s->next) {
265 if (s != search && strcat_alloc(xpath, C2SX("/")) == NULL) goto err; /* No "/" prefix for the first criteria */
266 if (strcat_alloc(xpath, s->tag == NULL || s->tag[0] == NULC ? C2SX("*"): s->tag) == NULL) goto err;
267
268 if (s->n_attributes > 0 || (s->text != NULL && s->text[0] != NULC))
269 if (strcat_alloc(xpath, C2SX("[")) == NULL) goto err;
270
271 fill = false; /* '[' has not been filled with text yet, no ", " separator should be added */
272 if (s->text != NULL && s->text[0] != NULC) {
273 if (strcat_alloc(xpath, C2SX(".=")) == NULL) goto err;
274 if (strcat_alloc(xpath, squote) == NULL) goto err;
275 if (strcat_alloc(xpath, s->text) == NULL) goto err;
276 if (strcat_alloc(xpath, squote) == NULL) goto err;
277 fill = true;
278 }
279
280 for (i = 0; i < s->n_attributes; i++) {
281 if (fill) {
282 if (strcat_alloc(xpath, C2SX(", ")) == NULL) goto err;
283 } else
284 fill = true; /* filling is being performed */
285 if (strcat_alloc(xpath, C2SX("@")) == NULL) goto err;
286 if (strcat_alloc(xpath, s->attributes[i].name) == NULL) goto err;
287 if (s->attributes[i].value == NULL) continue;
288
289 if (strcat_alloc(xpath, s->attributes[i].active ? C2SX("=") : C2SX("!=")) == NULL) goto err;
290 if (strcat_alloc(xpath, squote) == NULL) goto err;
291 if (strcat_alloc(xpath, s->attributes[i].value) == NULL) goto err;
292 if (strcat_alloc(xpath, squote) == NULL) goto err;
293 }
294 if ((s->text != NULL && s->text[0] != NULC) || s->n_attributes > 0) {
295 if (strcat_alloc(xpath, C2SX("]")) == NULL) goto err;
296 }
297 }
298
299 return *xpath;
300
301 err:
302 __free(*xpath);
303 *xpath = NULL;
304
305 return NULL;
306 }
307
308 /*
309 Extract search information from 'xpath', where 'xpath' represents a single node
310 (i.e. no '/' inside, except escaped ones), stripped from lead and tail '/'.
311 tag[.=text, @attrib="value"] with potential spaces around '=' and ','.
312 Return 'false' if parsing failed, 'true' for success.
313 This is an internal function so we assume that arguments are valid (non-NULL).
314 */
315 static int _init_search_from_1XPath(SXML_CHAR* xpath, XMLSearch* search)
316 {
317 SXML_CHAR *p, *q;
318 SXML_CHAR c, c1, cc;
319 int l0, l1, is, r0, r1;
320 int ret;
321
322 XMLSearch_init(search);
323
324 /* Look for tag name */
325 for (p = xpath; *p != NULC && *p != C2SX('['); p++) ;
326 c = *p; /* Either '[' or '\0' */
327 *p = NULC;
328 ret = XMLSearch_search_set_tag(search, xpath);
329 *p = c;
330 if (!ret)
331 return false;
332
333 if (*p == NULC)
334 return true;
335
336 /* Here, '*p' is '[', we have to parse either text or attribute names/values until ']' */
337 for (p++; *p && *p != C2SX(']'); p++) {
338 for (q = p; *q && *q != C2SX(',') && *q != C2SX(']'); q++) ; /* Look for potential ',' separator to null it */
339 cc = *q;
340 if (*q == C2SX(',') || *q == C2SX(']'))
341 *q = NULC;
342 ret = true;
343 switch (*p) {
344 case C2SX('.'): /* '.[ ]=[ ]["']...["']' to search for text */
345 if (!split_left_right(p, C2SX('='), &l0, &l1, &is, &r0, &r1, true, true))
346 return false;
347 c = p[r1+1];
348 p[r1+1] = NULC;
349 ret = XMLSearch_search_set_text(search, &p[r0]);
350 p[r1+1] = c;
351 p += r1+1;
352 break;
353
354 /* Attribute name, possibly '@attrib[[ ]=[ ]"value"]' */
355 case C2SX('@'):
356 if (!split_left_right(++p, '=', &l0, &l1, &is, &r0, &r1, true, true))
357 return false;
358 c = p[l1+1];
359 c1 = p[r1+1];
360 p[l1+1] = NULC;
361 p[r1+1] = NULC;
362 ret = (XMLSearch_search_add_attribute(search, &p[l0], (is < 0 ? NULL : &p[r0]), true) < 0 ? false : true); /* 'is' < 0 when there is no '=' (i.e. check for attribute presence only */
363 p[l1+1] = c;
364 p[r1+1] = c1;
365 p += r1-1; /* Jump to next value */
366 break;
367
368 default: /* Not implemented */
369 break;
370 }
371 *q = cc; /* Restore ',' separator if any */
372 if (!ret)
373 return false;
374 }
375
376 return true;
377 }
378
379 int XMLSearch_init_from_XPath(const SXML_CHAR* xpath, XMLSearch* search)
380 {
381 XMLSearch *search1, *search2;
382 SXML_CHAR *p, *tag, *tag0;
383 SXML_CHAR c;
384
385 if (!XMLSearch_init(search))
386 return false;
387
388 /* NULL or empty xpath is an empty (initialized only) search */
389 if (xpath == NULL || *xpath == NULC)
390 return true;
391
392 search1 = NULL; /* Search struct to add the xpath portion to */
393 search2 = search; /* Search struct to be filled from xpath portion */
394
395 tag = tag0 = sx_strdup(xpath); /* Create a copy of 'xpath' to be able to patch it (or segfault if 'xpath' is const, cnacu6o Sergey@sourceforge!) */
396 while (*tag != NULC) {
397 if (search2 != search) { /* Allocate a new search when the original one (i.e. 'search') has already been filled */
398 search2 = (XMLSearch*)__calloc(1, sizeof(XMLSearch));
399 if (search2 == NULL) {
400 __free(tag0);
401 (void)XMLSearch_free(search, true);
402 return false;
403 }
404 }
405 /* Skip all first '/' */
406 for (; *tag != NULC && *tag == C2SX('/'); tag++) ;
407 if (*tag == NULC) {
408 __free(tag0);
409 return false;
410 }
411
412 /* Look for the end of tag name: after '/' (to get another tag) or end of string */
413 for (p = &tag[1]; *p != NULC && *p != C2SX('/'); p++) {
414 if (*p == C2SX('\\') && *++p == NULC)
415 break; /* Escape character, '\' could be the last character... */
416 }
417 c = *p; /* Backup character before nulling it */
418 *p = NULC;
419 if (!_init_search_from_1XPath(tag, search2)) {
420 __free(tag0);
421 (void)XMLSearch_free(search, true);
422 return false;
423 }
424 *p = c;
425
426 /* 'search2' is the newly parsed tag, 'search1' is the previous tag (or NULL if 'search2' is the first tag to parse (i.e. 'search2' == 'search') */
427
428 if (search1 != NULL) search1->next = search2;
429 if (search2 != search) search2->prev = search1;
430 search1 = search2;
431 search2 = NULL; /* Will force allocation during next loop */
432 tag = p;
433 }
434
435 __free(tag0);
436 return true;
437 }
438
439 static int _attribute_matches(XMLAttribute* to_test, XMLAttribute* pattern)
440 {
441 if (to_test == NULL && pattern == NULL)
442 return true;
443
444 if (to_test == NULL || pattern == NULL)
445 return false;
446
447 /* No test on name => match */
448 if (pattern->name == NULL || pattern->name[0] == NULC)
449 return true;
450
451 /* Test on name fails => no match */
452 if (!regstrcmp_search(to_test->name, pattern->name))
453 return false;
454
455 /* No test on value => match */
456 if (pattern->value == NULL)
457 return true;
458
459 /* Test on value according to pattern "equal" attribute */
460 return regstrcmp_search(to_test->value, pattern->value) == pattern->active ? true : false;
461 }
462
463 int XMLSearch_node_matches(const XMLNode* node, const XMLSearch* search)
464 {
465 int i, j;
466
467 if (node == NULL)
468 return false;
469
470 if (search == NULL)
471 return true;
472
473 /* No comments, prolog, or such type of nodes are tested */
474 if (node->tag_type != TAG_FATHER && node->tag_type != TAG_SELF)
475 return false;
476
477 /* Check tag */
478 if (search->tag != NULL && !regstrcmp_search(node->tag, search->tag))
479 return false;
480
481 /* Check text */
482 if (search->text != NULL && !regstrcmp_search(node->text, search->text))
483 return false;
484
485 /* Check attributes */
486 if (search->attributes != NULL) {
487 for (i = 0; i < search->n_attributes; i++) {
488 for (j = 0; j < node->n_attributes; j++) {
489 if (!node->attributes[j].active)
490 continue;
491 if (_attribute_matches(&node->attributes[j], &search->attributes[i]))
492 break;
493 }
494 if (j >= node->n_attributes) /* All attributes where scanned without a successful match */
495 return false;
496 }
497 }
498
499 /* 'node' matches 'search'. If there is a father search, its father must match it */
500 if (search->prev != NULL)
501 return XMLSearch_node_matches(node->father, search->prev);
502
503 /* TODO: Should a node match if search has no more 'prev' search and node father is still below the initial search ?
504 Depends if XPath started with "//" (=> yes) or "/" (=> no).
505 if (search->prev == NULL && node->father != search->from) return false; ? */
506
507 return true;
508 }
509
510 XMLNode* XMLSearch_next(const XMLNode* from, XMLSearch* search)
511 {
512 XMLNode* node;
513
514 if (search == NULL || from == NULL)
515 return NULL;
516
517 /* Go down the last child search as fathers will be tested recursively by the 'XMLSearch_node_matches' function */
518 for (; search->next != NULL; search = search->next) ;
519
520 /* Initialize the 'stop_at' node on first search, to remember where to stop as there will be multiple calls */
521 /* 'stop_at' can be NULL when 'from' is a root node, that is why it should be initialized with something else than NULL */
522 if (search->stop_at == INVALID_XMLNODE_POINTER)
523 search->stop_at = XMLNode_next_sibling(from);
524
525 for (node = XMLNode_next(from); node != search->stop_at; node = XMLNode_next(node)) { /* && node != NULL */
526 if (!XMLSearch_node_matches(node, search))
527 continue;
528
529 /* 'node' is a matching node */
530
531 /* No search to perform on 'node' children => 'node' is returned */
532 if (search->next == NULL)
533 return node;
534
535 /* Run the search on 'node' children */
536 return XMLSearch_next(node, search->next);
537 }
538
539 return NULL;
540 }
541
542 static SXML_CHAR* _get_XPath(const XMLNode* node, SXML_CHAR** xpath)
543 {
544 int i, n, brackets, sz_xpath;
545 SXML_CHAR* p;
546
547 brackets = 0;
548 sz_xpath = sx_strlen(node->tag);
549 if (node->text != NULL) {
550 sz_xpath += strlen_html(node->text) + 4; /* 4 = '.=""' */
551 brackets = 2; /* Text has to be displayed => add '[]' */
552 }
553 for (i = 0; i < node->n_attributes; i++) {
554 if (!node->attributes[i].active)
555 continue;
556 brackets = 2; /* At least one attribute has to be displayed => add '[]' */
557 sz_xpath += strlen_html(node->attributes[i].name) + strlen_html(node->attributes[i].value) + 6; /* 6 = ', @=""' */
558 }
559 sz_xpath += brackets + 1;
560 *xpath = (SXML_CHAR*)__malloc(sz_xpath*sizeof(SXML_CHAR));
561
562 if (*xpath == NULL)
563 return NULL;
564
565 sx_strcpy(*xpath, node->tag);
566 if (node->text != NULL) {
567 sx_strcat(*xpath, C2SX("[.=\""));
568 (void)str2html(node->text, &(*xpath[sx_strlen(*xpath)]));
569 sx_strcat(*xpath, C2SX("\""));
570 n = 1; /* Indicates '[' has been put */
571 } else
572 n = 0;
573
574 for (i = 0; i < node->n_attributes; i++) {
575 if (!node->attributes[i].active)
576 continue;
577
578 if (n == 0) {
579 sx_strcat(*xpath, C2SX("["));
580 n = 1;
581 } else
582 sx_strcat(*xpath, C2SX(", "));
583 p = &(*xpath)[sx_strlen(*xpath)];
584
585 /* Standard and Unicode versions of 'sprintf' do not have the same signature! :( */
586 sx_sprintf(p,
587 #ifdef SXMLC_UNICODE
588 sz_xpath,
589 #endif
590 C2SX("@%s=%c"), node->attributes[i].name, XML_DEFAULT_QUOTE);
591
592 (void)str2html(node->attributes[i].value, p);
593 sx_strcat(*xpath, C2SX("\""));
594 }
595 if (n > 0)
596 sx_strcat(*xpath, C2SX("]"));
597
598 return *xpath;
599 }
600
601 SXML_CHAR* XMLNode_get_XPath(XMLNode* node, SXML_CHAR** xpath, int incl_parents)
602 {
603 SXML_CHAR* xp = NULL;
604 SXML_CHAR* xparent;
605 XMLNode* parent;
606
607 if (node == NULL || node->init_value != XML_INIT_DONE || xpath == NULL)
608 return NULL;
609
610 if (!incl_parents) {
611 if (_get_XPath(node, &xp) == NULL) {
612 *xpath = NULL;
613 return NULL;
614 }
615 return *xpath = xp;
616 }
617
618 /* Go up to root node */
619 parent = node;
620 do {
621 xparent = NULL;
622 if (_get_XPath(parent, &xparent) == NULL) goto xp_err;
623 if (xp != NULL) {
624 if (strcat_alloc(&xparent, C2SX("/")) == NULL) goto xp_err;
625 if (strcat_alloc(&xparent, xp) == NULL) goto xp_err;
626 }
627 xp = xparent;
628 parent = parent->father;
629 } while (parent != NULL);
630 if ((*xpath = sx_strdup(C2SX("/"))) == NULL || strcat_alloc(xpath, xp) == NULL) goto xp_err;
631
632 return *xpath;
633
634 xp_err:
635 if (xp != NULL) __free(xp);
636 *xpath = NULL;
637
638 return NULL;
639 }
640
Generated by GNU Enscript 1.6.6, and GophHub 1.3.