GophHub - kevinboone/epub2txt2/src/sxmlsearch.h


Raw File

    1	/**
    2		Copyright (c) 2010, Matthieu Labas
    3		All rights reserved.
    4	
    5		Redistribution and use in source and binary forms, with or without modification,
    6		are permitted provided that the following conditions are met:
    7	
    8		1. Redistributions of source code must retain the above copyright notice,
    9		   this list of conditions and the following disclaimer.
   10	
   11		2. Redistributions in binary form must reproduce the above copyright notice,
   12		   this list of conditions and the following disclaimer in the documentation
   13		   and/or other materials provided with the distribution.
   14	
   15		THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
   16		ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   17		WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   18		IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
   19		INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   20		NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   21		PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
   22		WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   23		ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
   24		OF SUCH DAMAGE.
   25	
   26		The views and conclusions contained in the software and documentation are those of the
   27		authors and should not be interpreted as representing official policies, either expressed
   28		or implied, of the FreeBSD Project.
   29	*/
   30	#ifndef _SXMLCSEARCH_H_
   31	#define _SXMLCSEARCH_H_
   32	
   33	#ifdef __cplusplus
   34	extern "C" {
   35	#endif

   36	
   37	#include "sxmlc.h"
   38	
   39	/**
   40	 * \brief XML search parameters. Can be initialized from an XPath string.
   41	 */
   42	typedef struct _XMLSearch {
   43	
   44		SXML_CHAR* tag; /**< Search for nodes which tag match this `tag` field. */
   45						/**< If NULL or an empty string, all nodes will be matching. */
   46	
   47		XMLAttribute* attributes;	/**< Search for nodes which attributes match all the ones described. */
   48									/**< If NULL, all nodes will be matching. */
   49									/**<  The `attribute->name` should not be NULL. If corresponding `attribute->value` */
   50									/**< is NULL or an empty-string, search will return the first node with an attribute */
   51									/**< `attribute->name`, no matter what its value is. */
   52									/**< If `attribute->value` is not NULL, a matching node should have an attribute */
   53									/**< `attribute->name` with the corresponding value `attribute->value`. */
   54									/**< When `attribute->value` is not NULL, the `attribute->active` should be `true` */
   55									/**< to specify that values should be equal, or `false` to specify that values should */
   56									/**< be different. */
   57		int n_attributes;	/**< The size of `attributes`array. */
   58	
   59		SXML_CHAR* text;	/**< Search for nodes which text match this `text` field. */
   60							/**< If NULL or an empty string, all nodes will be matching (i.e. not used). */
   61	
   62		struct _XMLSearch* next;	/**< Next search to perform on children of a node matching current struct. */
   63									/**< Used to search for nodes children of specific nodes (used in XPath queries). */
   64		struct _XMLSearch* prev;
   65	
   66		XMLNode* stop_at;	/**< Internal use only. Must be initialized to 'INVALID_XMLNODE_POINTER' prior to first search. */
   67	
   68		/* Keep 'init_value' as the last member */
   69		int init_value;	/**< Initialized to 'XML_INIT_DONE' to indicate that document has been initialized properly */
   70	} XMLSearch;
   71	
   72	/**
   73	 * \brief The prototype used by the regular expression handler.
   74	 * The default regex function can be overriden by user code through `XMLSearch_set_regexpr_compare()`.
   75	 * \param str The string to match on `pattern`.
   76	 * \param pattern The pattern to match `str` to.
   77	 * \return `true` if `str` matches `pattern`.
   78	 */
   79	typedef int (*REGEXPR_COMPARE)(SXML_CHAR* str, SXML_CHAR* pattern);
   80	
   81	/**
   82	 * \brief Set a new comparison function to evaluate whether a string matches a given pattern.
   83	 *
   84	 * The default one is `regstrcmp()` which handles limited regular expressions (<code>'?'</code>
   85	 * and <code>'*'</code> wildcards).
   86	 *
   87	 * \return The previous function used for matching.
   88	 */
   89	REGEXPR_COMPARE XMLSearch_set_regexpr_compare(REGEXPR_COMPARE fct);
   90	
   91	/**
   92	 * \brief Initialize an empty search. No memory freeing is performed.
   93	 * \param search The search parameters.
   94	 * \return `false` when `search` is NULL.
   95	 */
   96	int XMLSearch_init(XMLSearch* search);
   97	
   98	/**
   99	 * \brief Free all search members except for the `search->next` member that should be freed
  100	 * by its creator, unless `free_next` is `true`.
  101	 *
  102	 * It is recommended that `free_next` is positioned to `true` only when the creator did not
  103	 * handle the whole memory allocation chain, e.g. when using `XMLSearch_init_from_XPath()`
  104	 * that allocates all search structs.
  105	 *
  106	 * \param search The search parameters.
  107	 * \param free_next `false` in order *not* to free the `search->next` structures.
  108	 *
  109	 * \return `false` when `search` is NULL.
  110	 */
  111	int XMLSearch_free(XMLSearch* search, int free_next);
  112	
  113	/**
  114	 * \brief Set the search based on tag.
  115	 * \param search The search parameters.
  116	 * \param tag should be NULL or empty to search for any node (e.g. search based on attributes
  117	 * only). In this case, the previous tag is freed.
  118	 * \return `true` upon successful completion, `false` for memory error.
  119	 */
  120	int XMLSearch_search_set_tag(XMLSearch* search, const SXML_CHAR* tag);
  121	
  122	/**
  123	 * \brief Add an attribute search criteria.
  124	 * \param search The search parameters.
  125	 * \param attr_name is the attribute name to search. Mandatory.
  126	 * \param attr_value should be NULL to test for attribute presence only
  127	 * 		(no test on value). An empty string means the attribute should exist
  128	 * 		with an empty value.
  129	 * \param value_equal should be specified to test for attribute value equality (`true`) or
  130	 *		difference (`false`).
  131	 * \return the index of the new attribute, or -1 for memory error.
  132	 */
  133	int XMLSearch_search_add_attribute(XMLSearch* search, const SXML_CHAR* attr_name, const SXML_CHAR* attr_value, int value_equal);
  134	
  135	/**
  136	 * \brief Retrieve attribute search parameters on attribute `attr_name`.
  137	 * \param search The search parameters.
  138	 * \param attr_name The attribute name to look for.
  139	 * \return The attribute search index or -1 if not found.
  140	 */
  141	int XMLSearch_search_get_attribute_index(const XMLSearch* search, const SXML_CHAR* attr_name);
  142	
  143	/**
  144	 * \brief Remove the attribute search parameters by index.
  145	 * \param search The search parameters.
  146	 * \param i_attr The search attribute index.
  147	 * \return the number of search attributes parameters left.
  148	 */
  149	int XMLSearch_search_remove_attribute(XMLSearch* search, int i_attr);
  150	
  151	/**
  152	 * \brief Set the search based on text content.
  153	 * \param search The search parameters.
  154	 * \param text should be NULL or empty to search for any node (e.g. search based on attributes
  155	 * 		only). In this case, the previous text is freed.
  156	 *
  157	 * \return `true` upon successful completion, `false` for memory error.
  158	 */
  159	int XMLSearch_search_set_text(XMLSearch* search, const SXML_CHAR* text);
  160	
  161	/**
  162	 * \brief Set an additional search on children nodes of a previously matching node.
  163	 *
  164	 * Search struct are chained to finally return the node matching the last search struct,
  165	 * which father node matches the previous search struct, and so on.
  166	 * This allows describing more complex search queries like XPath
  167	 * `"//FatherTag[@attrib=val]/ChildTag/"`.
  168	 *
  169	 * In this case, a first search struct would have `search->tag = "FatherTag"` and
  170	 * `search->attributes[0] = { "attrib", "val" }` and a second search struct with
  171	 * `search->tag = "ChildTag"`.
  172	 * If `children_search` is NULL, next search is removed. Freeing previous search is to be
  173	 * performed by its owner.
  174	 * In any case, if `search` next search is not NULL, it is freed.
  175	 *
  176	 * \param search The search parameters.
  177	 * \param children_search The search parameters to be applied to children of nodes
  178	 * 		matching `search`.
  179	 *
  180	 * \return `true` when association has been made, `false` when an error occurred.
  181	 */
  182	int XMLSearch_search_set_children_search(XMLSearch* search, XMLSearch* children_search);
  183	
  184	/**
  185	 * \brief Compute an XPath-equivalent string of the search criteria.
  186	 *
  187	 * \param search The search parameters. NULL will return an empty string.
  188	 * \param xpath is a pointer to a string that will be allocated by the function and should
  189	 *		be freed after use.
  190	 * \param quote is the quote character to be used (e.g. `"` or `'`). If <code>'\0'</code>,
  191	 * 		`XML_DEFAULT_QUOTE` will be used.
  192	 *
  193	 * \return `false` for a memory problem, `true` otherwise.
  194	 */
  195	SXML_CHAR* XMLSearch_get_XPath_string(const XMLSearch* search, SXML_CHAR** xpath, SXML_CHAR quote);
  196	
  197	/**
  198	 * \brief Initialize a search struct from an XPath-like query. "XPath-like" means that
  199	 * it does not fully comply to XPath standard.
  200	 *
  201	 * \param xpath should be like <code>"tag[.=text, @attrib="value", @attrib!='value', ...]/tag..."</code>.
  202	 * 		*Warning*: the XPath query on node text like `father[child="text"]` should be
  203	 * 		re-written `father/child[.="text"]` instead (which should be XPath-compliant as well).
  204	 * \param search The search parameters.
  205	 *
  206	 *
  207	 * \return `true` when `search` was correctly initialized, `false` in case of memory
  208	 * 		problem or malformed `xpath`.
  209	 */
  210	int XMLSearch_init_from_XPath(const SXML_CHAR* xpath, XMLSearch* search);
  211	
  212	/**
  213	 * \brief Check whether a node matches a search criteria.
  214	 *
  215	 * If `search->prev` is not NULL (i.e. has a father search), `node->father` is also
  216	 * tested, recursively (i.e. grand-father and so on).
  217	 *
  218	 * \param node The node to test. `tag_type` should be `TAG_FATHER` or `TAG_SELF` only.
  219	 * \param search The search parameters.
  220	 *
  221	 * \return `false` when `node` does not match or for invalid arguments, `true`
  222	 * 		if `node` is a match.
  223	 */
  224	int XMLSearch_node_matches(const XMLNode* node, const XMLSearch* search);
  225	
  226	/**
  227	 * \brief Search next matching node, according to search parameters.
  228	 *
  229	 * Search starts from node `from` by scanning all its children, and going up to siblings,
  230	 * uncles and so on.
  231	 *
  232	 * Searching for the next matching node is performed by running the search again on the last
  233	 * matching node. So `search` has to be initialized by `XMLSearch_init()` prior to the first
  234	 * call, to memorize the initial `from` node and know where to stop search.
  235	 * `from` ITSELF IS NOT CHECKED! Direct call to `XMLSearch_node_matches(from, search)` should
  236	 * be made if necessary.
  237	 *
  238	 * If the document has several root nodes, a complete search in the document should be performed
  239	 * by manually calling `XMLSearch_next()` on each root node in a for loop.
  240	 * Note that `search` should be the initial search struct (i.e. `search->prev` should be NULL). This
  241	 * cannot be checked corrected by the function itself as it is partly recursive.
  242	 *
  243	 * \param from The node to start searching from.
  244	 * \param search The search parameters.
  245	 *
  246	 * \return the next matching node according to `search` criteria, or NULL when no more nodes match
  247	 * 		or when an error occurred.
  248	 */
  249	XMLNode* XMLSearch_next(const XMLNode* from, XMLSearch* search);
  250	
  251	/**
  252	 * \brief Get node XPath-like equivalent: `tag[.="text", @attribute="value", ...]`, potentially
  253	 * including father nodes XPathes.
  254	 *
  255	 * The computed XPath is stored in a dynamically-allocated string.
  256	 *
  257	 * \return the XPath, or NULL if `node` is invalid or on memory error.
  258	 */
  259	SXML_CHAR* XMLNode_get_XPath(XMLNode* node, SXML_CHAR** xpath, int incl_parents);
  260	
  261	/**
  262	 * Checks whether a string corresponds to a pattern.
  263	 * \param str The string to check.
  264	 * \param pattern can use wildcads such as `*` (any potentially empty string) or
  265	 * 		`?` (any character) and use `\` as an escape character.
  266	 * \returns `true` when `str` matches `pattern`, `false` otherwise.
  267	 */
  268	int regstrcmp(SXML_CHAR* str, SXML_CHAR* pattern);
  269	
  270	#ifdef __cplusplus
  271	}
  272	#endif

  273	
  274	#endif

  275	

Generated by GNU Enscript 1.6.6, and GophHub 1.3.