z3bra.org

       tuse binary search (bsearch) for named entity lookup on the sorted list - webdump - [FORK] git://git.codemadness.org/webdump
 (HTM) git clone git://git.z3bra.org/webdump.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 0aaf37618319738faf0c0014c9b8c80abf39e3f6
 (DIR) parent fb8467d36699d79fa9678342993180ddd7590c29
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sun,  8 Dec 2019 16:41:00 +0100
       
       use binary search (bsearch) for named entity lookup on the sorted list
       
       using the big list namedentities.all.h this is much faster (of course).
       
       Diffstat:
         M xml.c                               |      33 +++++++++++++++++++++----------
       
       1 file changed, 23 insertions(+), 10 deletions(-)
       ---
 (DIR) diff --git a/xml.c b/xml.c
       t@@ -243,27 +243,40 @@ codepointtoutf8(long r, char *s)
                }
        }
        
       +struct namedentity {
       +        const char *entity;
       +        long cp;
       +};
       +
       +int
       +namedentitycmp(const void *v1, const void *v2)
       +{
       +        struct namedentity *n1 = (struct namedentity *)v1;
       +        struct namedentity *n2 = (struct namedentity *)v2;
       +
       +        return strcmp(n1->entity, n2->entity);
       +}
       +
        static int
        namedentitytostr(const char *e, char *buf, size_t bufsiz)
        {
       -        static const struct {
       -                const char *entity;
       -                long cp;
       -        } entities[] = {
       +        static const struct namedentity entities[] = {
        #include "namedentities.h"
                };
       +        struct namedentity find, *found;
                size_t i;
        
                /* buffer is too small */
                if (bufsiz < 5)
                        return -1;
        
       -        for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
       -                if (!strcmp(e, entities[i].entity)) {
       -                        i = codepointtoutf8(entities[i].cp, buf);
       -                        buf[i] = '\0';
       -                        return i;
       -                }
       +        find.entity = e;
       +        found = bsearch(&find, entities, sizeof(entities) / sizeof(*entities),
       +                sizeof(*entities), namedentitycmp);
       +        if (found) {
       +                i = codepointtoutf8(found->cp, buf);
       +                buf[i] = '\0';
       +                return i;
                }
                return 0;
        }