tuse binary search (bsearch) for named entity lookup on the sorted list - webdump - [FORK] git://git.codemadness.org/webdump
(HTM) git clone git://git.z3bra.org/webdump.git
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 0aaf37618319738faf0c0014c9b8c80abf39e3f6
(DIR) parent fb8467d36699d79fa9678342993180ddd7590c29
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sun, 8 Dec 2019 16:41:00 +0100
use binary search (bsearch) for named entity lookup on the sorted list
using the big list namedentities.all.h this is much faster (of course).
Diffstat:
M xml.c | 33 +++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
---
(DIR) diff --git a/xml.c b/xml.c
t@@ -243,27 +243,40 @@ codepointtoutf8(long r, char *s)
}
}
+struct namedentity {
+ const char *entity;
+ long cp;
+};
+
+int
+namedentitycmp(const void *v1, const void *v2)
+{
+ struct namedentity *n1 = (struct namedentity *)v1;
+ struct namedentity *n2 = (struct namedentity *)v2;
+
+ return strcmp(n1->entity, n2->entity);
+}
+
static int
namedentitytostr(const char *e, char *buf, size_t bufsiz)
{
- static const struct {
- const char *entity;
- long cp;
- } entities[] = {
+ static const struct namedentity entities[] = {
#include "namedentities.h"
};
+ struct namedentity find, *found;
size_t i;
/* buffer is too small */
if (bufsiz < 5)
return -1;
- for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
- if (!strcmp(e, entities[i].entity)) {
- i = codepointtoutf8(entities[i].cp, buf);
- buf[i] = '\0';
- return i;
- }
+ find.entity = e;
+ found = bsearch(&find, entities, sizeof(entities) / sizeof(*entities),
+ sizeof(*entities), namedentitycmp);
+ if (found) {
+ i = codepointtoutf8(found->cp, buf);
+ buf[i] = '\0';
+ return i;
}
return 0;
}