1 /**
2 Copyright (c) 2010, Matthieu Labas
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7
8 1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
24 OF SUCH DAMAGE.
25
26 The views and conclusions contained in the software and documentation are those of the
27 authors and should not be interpreted as representing official policies, either expressed
28 or implied, of the FreeBSD Project.
29 */
30 #ifndef _SXMLCSEARCH_H_
31 #define _SXMLCSEARCH_H_
32
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36
37 #include "sxmlc.h"
38
39 /**
40 * \brief XML search parameters. Can be initialized from an XPath string.
41 */
42 typedef struct _XMLSearch {
43
44 SXML_CHAR* tag; /**< Search for nodes which tag match this `tag` field. */
45 /**< If NULL or an empty string, all nodes will be matching. */
46
47 XMLAttribute* attributes; /**< Search for nodes which attributes match all the ones described. */
48 /**< If NULL, all nodes will be matching. */
49 /**< The `attribute->name` should not be NULL. If corresponding `attribute->value` */
50 /**< is NULL or an empty-string, search will return the first node with an attribute */
51 /**< `attribute->name`, no matter what its value is. */
52 /**< If `attribute->value` is not NULL, a matching node should have an attribute */
53 /**< `attribute->name` with the corresponding value `attribute->value`. */
54 /**< When `attribute->value` is not NULL, the `attribute->active` should be `true` */
55 /**< to specify that values should be equal, or `false` to specify that values should */
56 /**< be different. */
57 int n_attributes; /**< The size of `attributes`array. */
58
59 SXML_CHAR* text; /**< Search for nodes which text match this `text` field. */
60 /**< If NULL or an empty string, all nodes will be matching (i.e. not used). */
61
62 struct _XMLSearch* next; /**< Next search to perform on children of a node matching current struct. */
63 /**< Used to search for nodes children of specific nodes (used in XPath queries). */
64 struct _XMLSearch* prev;
65
66 XMLNode* stop_at; /**< Internal use only. Must be initialized to 'INVALID_XMLNODE_POINTER' prior to first search. */
67
68 /* Keep 'init_value' as the last member */
69 int init_value; /**< Initialized to 'XML_INIT_DONE' to indicate that document has been initialized properly */
70 } XMLSearch;
71
72 /**
73 * \brief The prototype used by the regular expression handler.
74 * The default regex function can be overriden by user code through `XMLSearch_set_regexpr_compare()`.
75 * \param str The string to match on `pattern`.
76 * \param pattern The pattern to match `str` to.
77 * \return `true` if `str` matches `pattern`.
78 */
79 typedef int (*REGEXPR_COMPARE)(SXML_CHAR* str, SXML_CHAR* pattern);
80
81 /**
82 * \brief Set a new comparison function to evaluate whether a string matches a given pattern.
83 *
84 * The default one is `regstrcmp()` which handles limited regular expressions ('?'
85 * and '*' wildcards).
86 *
87 * \return The previous function used for matching.
88 */
89 REGEXPR_COMPARE XMLSearch_set_regexpr_compare(REGEXPR_COMPARE fct);
90
91 /**
92 * \brief Initialize an empty search. No memory freeing is performed.
93 * \param search The search parameters.
94 * \return `false` when `search` is NULL.
95 */
96 int XMLSearch_init(XMLSearch* search);
97
98 /**
99 * \brief Free all search members except for the `search->next` member that should be freed
100 * by its creator, unless `free_next` is `true`.
101 *
102 * It is recommended that `free_next` is positioned to `true` only when the creator did not
103 * handle the whole memory allocation chain, e.g. when using `XMLSearch_init_from_XPath()`
104 * that allocates all search structs.
105 *
106 * \param search The search parameters.
107 * \param free_next `false` in order *not* to free the `search->next` structures.
108 *
109 * \return `false` when `search` is NULL.
110 */
111 int XMLSearch_free(XMLSearch* search, int free_next);
112
113 /**
114 * \brief Set the search based on tag.
115 * \param search The search parameters.
116 * \param tag should be NULL or empty to search for any node (e.g. search based on attributes
117 * only). In this case, the previous tag is freed.
118 * \return `true` upon successful completion, `false` for memory error.
119 */
120 int XMLSearch_search_set_tag(XMLSearch* search, const SXML_CHAR* tag);
121
122 /**
123 * \brief Add an attribute search criteria.
124 * \param search The search parameters.
125 * \param attr_name is the attribute name to search. Mandatory.
126 * \param attr_value should be NULL to test for attribute presence only
127 * (no test on value). An empty string means the attribute should exist
128 * with an empty value.
129 * \param value_equal should be specified to test for attribute value equality (`true`) or
130 * difference (`false`).
131 * \return the index of the new attribute, or -1 for memory error.
132 */
133 int XMLSearch_search_add_attribute(XMLSearch* search, const SXML_CHAR* attr_name, const SXML_CHAR* attr_value, int value_equal);
134
135 /**
136 * \brief Retrieve attribute search parameters on attribute `attr_name`.
137 * \param search The search parameters.
138 * \param attr_name The attribute name to look for.
139 * \return The attribute search index or -1 if not found.
140 */
141 int XMLSearch_search_get_attribute_index(const XMLSearch* search, const SXML_CHAR* attr_name);
142
143 /**
144 * \brief Remove the attribute search parameters by index.
145 * \param search The search parameters.
146 * \param i_attr The search attribute index.
147 * \return the number of search attributes parameters left.
148 */
149 int XMLSearch_search_remove_attribute(XMLSearch* search, int i_attr);
150
151 /**
152 * \brief Set the search based on text content.
153 * \param search The search parameters.
154 * \param text should be NULL or empty to search for any node (e.g. search based on attributes
155 * only). In this case, the previous text is freed.
156 *
157 * \return `true` upon successful completion, `false` for memory error.
158 */
159 int XMLSearch_search_set_text(XMLSearch* search, const SXML_CHAR* text);
160
161 /**
162 * \brief Set an additional search on children nodes of a previously matching node.
163 *
164 * Search struct are chained to finally return the node matching the last search struct,
165 * which father node matches the previous search struct, and so on.
166 * This allows describing more complex search queries like XPath
167 * `"//FatherTag[@attrib=val]/ChildTag/"`.
168 *
169 * In this case, a first search struct would have `search->tag = "FatherTag"` and
170 * `search->attributes[0] = { "attrib", "val" }` and a second search struct with
171 * `search->tag = "ChildTag"`.
172 * If `children_search` is NULL, next search is removed. Freeing previous search is to be
173 * performed by its owner.
174 * In any case, if `search` next search is not NULL, it is freed.
175 *
176 * \param search The search parameters.
177 * \param children_search The search parameters to be applied to children of nodes
178 * matching `search`.
179 *
180 * \return `true` when association has been made, `false` when an error occurred.
181 */
182 int XMLSearch_search_set_children_search(XMLSearch* search, XMLSearch* children_search);
183
184 /**
185 * \brief Compute an XPath-equivalent string of the search criteria.
186 *
187 * \param search The search parameters. NULL will return an empty string.
188 * \param xpath is a pointer to a string that will be allocated by the function and should
189 * be freed after use.
190 * \param quote is the quote character to be used (e.g. `"` or `'`). If '\0',
191 * `XML_DEFAULT_QUOTE` will be used.
192 *
193 * \return `false` for a memory problem, `true` otherwise.
194 */
195 SXML_CHAR* XMLSearch_get_XPath_string(const XMLSearch* search, SXML_CHAR** xpath, SXML_CHAR quote);
196
197 /**
198 * \brief Initialize a search struct from an XPath-like query. "XPath-like" means that
199 * it does not fully comply to XPath standard.
200 *
201 * \param xpath should be like "tag[.=text, @attrib="value", @attrib!='value', ...]/tag...".
202 * *Warning*: the XPath query on node text like `father[child="text"]` should be
203 * re-written `father/child[.="text"]` instead (which should be XPath-compliant as well).
204 * \param search The search parameters.
205 *
206 *
207 * \return `true` when `search` was correctly initialized, `false` in case of memory
208 * problem or malformed `xpath`.
209 */
210 int XMLSearch_init_from_XPath(const SXML_CHAR* xpath, XMLSearch* search);
211
212 /**
213 * \brief Check whether a node matches a search criteria.
214 *
215 * If `search->prev` is not NULL (i.e. has a father search), `node->father` is also
216 * tested, recursively (i.e. grand-father and so on).
217 *
218 * \param node The node to test. `tag_type` should be `TAG_FATHER` or `TAG_SELF` only.
219 * \param search The search parameters.
220 *
221 * \return `false` when `node` does not match or for invalid arguments, `true`
222 * if `node` is a match.
223 */
224 int XMLSearch_node_matches(const XMLNode* node, const XMLSearch* search);
225
226 /**
227 * \brief Search next matching node, according to search parameters.
228 *
229 * Search starts from node `from` by scanning all its children, and going up to siblings,
230 * uncles and so on.
231 *
232 * Searching for the next matching node is performed by running the search again on the last
233 * matching node. So `search` has to be initialized by `XMLSearch_init()` prior to the first
234 * call, to memorize the initial `from` node and know where to stop search.
235 * `from` ITSELF IS NOT CHECKED! Direct call to `XMLSearch_node_matches(from, search)` should
236 * be made if necessary.
237 *
238 * If the document has several root nodes, a complete search in the document should be performed
239 * by manually calling `XMLSearch_next()` on each root node in a for loop.
240 * Note that `search` should be the initial search struct (i.e. `search->prev` should be NULL). This
241 * cannot be checked corrected by the function itself as it is partly recursive.
242 *
243 * \param from The node to start searching from.
244 * \param search The search parameters.
245 *
246 * \return the next matching node according to `search` criteria, or NULL when no more nodes match
247 * or when an error occurred.
248 */
249 XMLNode* XMLSearch_next(const XMLNode* from, XMLSearch* search);
250
251 /**
252 * \brief Get node XPath-like equivalent: `tag[.="text", @attribute="value", ...]`, potentially
253 * including father nodes XPathes.
254 *
255 * The computed XPath is stored in a dynamically-allocated string.
256 *
257 * \return the XPath, or NULL if `node` is invalid or on memory error.
258 */
259 SXML_CHAR* XMLNode_get_XPath(XMLNode* node, SXML_CHAR** xpath, int incl_parents);
260
261 /**
262 * Checks whether a string corresponds to a pattern.
263 * \param str The string to check.
264 * \param pattern can use wildcads such as `*` (any potentially empty string) or
265 * `?` (any character) and use `\` as an escape character.
266 * \returns `true` when `str` matches `pattern`, `false` otherwise.
267 */
268 int regstrcmp(SXML_CHAR* str, SXML_CHAR* pattern);
269
270 #ifdef __cplusplus
271 }
272 #endif
273
274 #endif