tMore demangling. - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 2eb305240f559fa69a08969184ce9585d16f8b02
 (DIR) parent 2d658c0d185051b0bea78c8273a2933a586e617b
 (HTM) Author: rsc <devnull@localhost>
       Date:   Tue, 29 Nov 2005 04:05:36 +0000
       
       More demangling.
       
       Diffstat:
         M src/libmach/manglegcc2.c            |     587 ++++++++++++++++++++++---------
         M src/libmach/mkfile                  |       3 +++
       
       2 files changed, 423 insertions(+), 167 deletions(-)
       ---
 (DIR) diff --git a/src/libmach/manglegcc2.c b/src/libmach/manglegcc2.c
       t@@ -1,6 +1,20 @@
        /*
         * gcc2 name demangler.
       + *
       + * gcc2 follows the C++ Annotated Reference Manual section 7.2.1
       + * name mangling description with a few changes.
       + * See gpcompare.texi, gxxint_15.html in this directory for the changes.
       + *
       + * Not implemented:
       + *        unicode mangling
         */
       +/*
       +RULES TO ADD:
       +
       +_10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_
       +
       +
       +*/
        #include <u.h>
        #include <libc.h>
        #include <bio.h>
       t@@ -24,114 +38,310 @@ chartabsearch(Chartab *ct, int c)
                return nil;
        }
        
       -typedef struct Gccstate Gccstate;
       -struct Gccstate
       +static Chartab typetab[] =
       +{
       +        'b',        "bool",
       +        'c',        "char",
       +        'd',        "double",
       +        'e',        "...",
       +        'f',        "float",
       +        'i',        "int",
       +        'J',        "complex",
       +        'l',        "long",
       +        'r',        "long double",
       +        's',        "short",
       +        'v',        "void",
       +        'w',        "wchar_t",
       +        'x',        "long long",
       +        0, 0
       +};
       +
       +static Chartab modifiertab[] =
        {
       -        char *name[128];
       -        int nname;
       +        'C',        "const",
       +        'S',        "signed",                /* means static for member functions */
       +        'U',        "unsigned",
       +        'V',        "volatile",
       +        
       +        'G',        "garbage",        /* no idea what this is */
       +        0, 0
        };
       -static int gccname(char**, char**, Gccstate*);
       +
       +static char constructor[] = "constructor";
       +static char destructor[] = "destructor";
       +static char gconstructor[] = "$gconstructor";        /* global destructor */
       +static char gdestructor[] = "$gdestructor";        /* global destructor */
       +
       +static char manglestarts[] = "123456789CFHQSUVt";
       +
       +static int gccname(char**, char**);
       +static char *demanglegcc2a(char*, char*);
       +static char *demanglegcc2b(char*, char*);
       +static char *demanglegcc2c(char*, char*);
       +static int gccnumber(char**, int*, int);
       +
        char*
        demanglegcc2(char *s, char *buf)
        {
       -        char *p, *os, *name, *t;
       -        int namelen;
       -        Gccstate state;
       +        char *name, *os, *p, *t;
       +        int isfn, namelen;
                
       -        state.nname = 0;
       -        os = s;
       -        p = buf;
        
       -        if(memcmp(os, "_._", 3) == 0){
       -                name = "destructor";
       +        /*
       +         * Pick off some cases that seem not to fit the pattern.
       +         */
       +        if((t = demanglegcc2a(s, buf)) != nil)
       +                return t;
       +        if((t = demanglegcc2b(s, buf)) != nil)
       +                return t;
       +        if((t = demanglegcc2c(s, buf)) != nil)
       +                return t;
       +
       +        /*
       +         * First, figure out whether this is a mangled name.
       +         * The name begins with a short version of the name, then __.
       +         * Of course, some C names begin with __ too, so the ultimate
       +         * test is whether what follows __ looks reasonable.
       +         * We use a test on the first letter instead.
       +         *
       +         * Constructors have no name - they begin __ (double underscore).
       +         * Destructors break the rule - they begin _._ (underscore, dot, underscore).
       +         */
       +        os = s;
       +        isfn = 0;
       +        if(memcmp(s, "_._", 3) == 0){
       +                isfn = 1;
       +                name = destructor;
       +                namelen = strlen(name);
       +                s += 3;
       +        }else if(memcmp(s, "_GLOBAL_.D.__", 13) == 0){
       +                isfn = 1;
       +                name = gdestructor;
       +                namelen = strlen(name);
       +                s += 13;                
       +        }else if(memcmp(s, "_GLOBAL_.D._", 12) == 0){
       +                isfn = 0;
       +                name = gdestructor;
                        namelen = strlen(name);
       -                s = os+3;
       +                s += 12;                
       +        }else if(memcmp(s, "_GLOBAL_.I.__", 13) == 0){
       +                isfn = 1;
       +                name = gconstructor;
       +                namelen = strlen(name);
       +                s += 13;                
       +        }else if(memcmp(s, "_GLOBAL_.I._", 12) == 0){
       +                isfn = 0;
       +                name = gconstructor;
       +                namelen = strlen(name);
       +                s += 12;                
                }else{
       -                /* the mangled part begins with the final __ */
       -                if((s = strstr(os, "__")) == nil)
       +                t = strstr(os, "__");
       +                if(t == nil)
                                return os;
                        do{
       -                        t = s;
       -                        if(strchr("123456789FHQt", s[2]))
       +                        s = t;
       +                        if(strchr(manglestarts, *(s+2)))
                                        break;
       -                }while((s = strstr(t+1, "__")) != nil);
       -                
       -                s = t;
       +                }while((t = strstr(s+1, "__")) != nil);
       +        
                        name = os;
       -                namelen = t - os;
       +                namelen = s - os;
                        if(namelen == 0){
       -                        name = "constructor";
       +                        isfn = 1;
       +                        name = constructor;
                                namelen = strlen(name);
                        }
                        s += 2;
                }
       -
       -        switch(*s){
       -        default:
       +        
       +        /*
       +         * Now s points at the mangled crap (maybe).
       +         * and name is the final element of the name.
       +         */
       +        if(strchr(manglestarts, *s) == nil)
                        return os;
       -
       -        case 'F':        /* plain function */
       -                s++;
       -                break;
                
       -        case 'Q':
       -        case 'H':
       -        case 't':
       -        case '1': case '2': case '3': case '4':
       -        case '5': case '6': case '7': case '8': case '9':
       -                if(!gccname(&s, &p, &state)){
       -                        if(debug) fprint(2, "bad name: %s\n", os);
       +        p = buf;
       +        if(*s == 'F'){
       +                /* global function, no extra name pieces, just types */
       +                isfn = 1;
       +        }else{
       +                /* parse extra name pieces */
       +                if(!gccname(&s, &p)){
       +                        if(debug)
       +                                fprint(2, "parsename %s: %r\n", s);
                                return os;
                        }
       +                
       +                /* if we have a constructor or destructor, try to use the C++ name */
       +                t = nil;
       +                if(name == constructor || name == destructor){
       +                        *p = 0;
       +                        t = strrchr(buf, ':');
       +                        if(t == nil)
       +                                t = buf;
       +                }
                        strcpy(p, "::");
                        p += 2;
       -                break;
       +                if(t){
       +                        namelen = strlen(t)-2;
       +                        if(name == destructor)
       +                                *p++ = '~';
       +                        name = t;
       +                }
                }
       -
                memmove(p, name, namelen);
                p += namelen;
       +        
       +        if(*s == 'F'){
       +                /* might be from above, or might follow name pieces */
       +                s++;
       +                isfn = 1;
       +        }
        
       -        if(*s && *s != '_'){
       -                /* the rest of the name is the argument types */
       +        /* the rest of the name is argument types - could skip this */
       +        if(*s || isfn){
                        *p++ = '(';
       -                while(*s != 0 && *s != '_' && gccname(&s, &p, &state))
       +                while(*s != 0 && *s != '_'){
       +                        if(!gccname(&s, &p))
       +                                break;
                                *p++ = ',';
       +                }
                        if(*(p-1) == ',')
                                p--;
                        *p++ = ')';
                }
       -        
       -        if(*s == '_'){
       -                /* the remainder is the type of the return value */
       -        }
        
       +        if(*s == '_'){
       +                /* return type (left over from H) */
       +        }        
       +        
                *p = 0;
                return buf;
        }
        
       -static Chartab typetab[] =
       +/*
       + * _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_
       + * _t12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc.npos
       + * (maybe the funny syntax means they are private)
       + */
       +static char*
       +demanglegcc2a(char *s, char *buf)
        {
       -        'b',        "bool",
       -        'c',        "char",
       -        'd',        "double",
       -        'i',        "int",
       -        'l',        "long",
       -        'v',        "void",
       -        0, 0
       -};
       +        char *p;
       +        
       +        if(*s != '_' || strchr(manglestarts, *(s+1)) == nil)
       +                return nil;
       +        p = buf;
       +        s++;
       +        if(!gccname(&s, &p))
       +                return nil;
       +        if(*s != '.')
       +                return nil;
       +        s++;
       +        strcpy(p, "::");
       +        p += 2;
       +        strcpy(p, s);
       +        return buf;
       +}
       +
       +/*
       + * _tfb => type info for bool
       + * __vt_7ostream => vtbl for ostream
       + */
       +static char*
       +demanglegcc2b(char *s, char *buf)
       +{
       +        char *p;
       +        char *t;
       +        
       +        if(memcmp(s, "__ti", 4) == 0){
       +                t = "$typeinfo";
       +                s += 4;
       +        }else if(memcmp(s, "__tf", 4) == 0){
       +                t = "$typeinfofn";
       +                s += 4;
       +        }else if(memcmp(s, "__vt_", 5) == 0){
       +                t = "$vtbl";
       +                s += 5;
       +        }else
       +                return nil;
        
       +        p = buf;
       +        for(;;){
       +                if(*s == 0 || !gccname(&s, &p))
       +                        return nil;
       +                if(*s == 0)
       +                        break;
       +                if(*s != '.' && *s != '$')
       +                        return nil;
       +                strcpy(p, "::");
       +                p += 2;
       +                s++;
       +        }
       +        strcpy(p, "::");
       +        p += 2;
       +        strcpy(p, t);
       +        return buf;
       +}
       +
       +/*
       + * __thunk_176__._Q210LogMessage9LogStream => thunk (offset -176) for LogMessage::LogStream
       + */
       +static char*
       +demanglegcc2c(char *s, char *buf)
       +{
       +        int n;
       +        char *p;
       +
       +        if(memcmp(s, "__thunk_", 8) != 0)
       +                return nil;
       +        s += 8;
       +        if(!gccnumber(&s, &n, 1))
       +                return nil;
       +        if(memcmp(s, "__._", 4) != 0)        /* might as well be morse code */
       +                return nil;
       +        s += 4;
       +        p = buf;
       +        if(!gccname(&s, &p))
       +                return nil;
       +        strcpy(p, "::$thunk");
       +        return buf;
       +}
       +
       +/*
       + * Parse a number, a non-empty run of digits.
       + * If many==0, then only one digit is used, even
       + * if it is followed by more.  When we need a big
       + * number in a one-digit slot, it gets bracketed by underscores.
       + */
        static int
       -gccnumber(char **ps, int *pn)
       +gccnumber(char **ps, int *pn, int many)
        {
                char *s;
       -        int n;
       +        int n, eatunderscore;
                
                s = *ps;
       -        if(!isdigit((uchar)*s))
       +        eatunderscore = 0;
       +        if(!many && *s == '_'){
       +                many = 1;
       +                s++;
       +                eatunderscore = 1;
       +        }
       +        if(!isdigit((uchar)*s)){
       +        bad:
       +                werrstr("bad number %.20s", *ps);
                        return 0;
       -        n = strtol(s, &s, 10);
       -        if(*s == '_')
       +        }
       +        if(many)
       +                n = strtol(s, &s, 10);
       +        else
       +                n = *s++ - '0';
       +        if(eatunderscore){
       +                if(*s != '_')
       +                        goto bad;
                        s++;
       +        }
                *ps = s;
                *pn = n;
                return 1;
       t@@ -143,10 +353,10 @@ gccnumber(char **ps, int *pn)
         * Let's see how far we can go before that becomes a problem.
         */
        static int
       -gccname(char **ps, char **pp, Gccstate *state)
       +gccname(char **ps, char **pp)
        {
                int i, n, m, val;
       -        char c, *os, *s, *t, *p;
       +        char *os, *s, *t, *p, *p0, *p1;
                
                s = *ps;
                os = s;
       t@@ -154,96 +364,159 @@ gccname(char **ps, char **pp, Gccstate *state)
        
        /*        print("\tgccname: %s\n", s); */
        
       -#if 0
       -        /* overloaded operators */
       -        for(i=0; operators[i].shrt; i++){
       -                if(memcmp(operators[i].shrt, s, 2) == 0){
       -                        strcpy(p, "operator$");
       -                        strcat(p, operators[i].lng);
       -                        p += strlen(p);
       -                        s += 2;
       -                        goto suffix;
       -                }
       -        }
       -#endif
                /* basic types */
                if((t = chartabsearch(typetab, *s)) != nil){
                        s++;
                        strcpy(p, t);
                        p += strlen(t);
       -                goto suffix;
       +                goto out;
                }
       -
       +        
       +        /* modifiers */
       +        if((t = chartabsearch(modifiertab, *s)) != nil){
       +                s++;
       +                if(!gccname(&s, &p))
       +                        return 0;
       +                /*
       +                 * These don't end up in the right place
       +                 * and i don't care anyway
       +                 * (AssertHeld__C17ReaderWriterMutex)
       +                 */
       +                /*
       +                *p++ = ' ';
       +                strcpy(p, t);
       +                p += strlen(p);
       +                */
       +                goto out;
       +        }
       +        
                switch(*s){
                default:
                bad:
       -                if(debug) fprint(2, "gccname: %s (%s)\n", os, s);
       +                if(debug)
       +                        fprint(2, "gccname: %s (%s)\n", os, s);
       +                werrstr("bad name %.20s", s);
                        return 0;
        
       -        case '1': case '2': case '3': case '4':        /* name length */
       +        case '1': case '2': case '3': case '4':        /* length-prefixed string */
                case '5': case '6': case '7': case '8': case '9':
       -                n = strtol(s, &s, 10);
       +                if(!gccnumber(&s, &n, 1))
       +                        return 0;
                        memmove(p, s, n);
                        p += n;
                        s += n;
                        break;
        
       -        case 'C':        /* const */
       +        case 'A':        /* array */
       +                t = s;
                        s++;
       -                strcpy(p, "const ");
       -                p += strlen(p);
       -                if(!gccname(&s, &p, state))
       +                if(!gccnumber(&s, &n, 1))
       +                        return 0;
       +                if(*s != '_'){
       +                        werrstr("bad array %.20s", t);
                                return 0;
       +                }
       +                s++;
       +                sprint(p, "array[%d] ", n);
       +                p += strlen(p);
                        break;
        
       -        case 'U':        /* unsigned */
       +        case 'F':        /* function */
       +                t = s;
                        s++;
       -                strcpy(p, "unsigned ");
       -                p += strlen(p);
       -                if(!gccname(&s, &p, state))
       +                strcpy(p, "fn(");
       +                p += 3;
       +                /* arguments */
       +                while(*s && *s != '_')
       +                        if(!gccname(&s, &p))
       +                                return 0;
       +                if(*s != '_'){
       +                        werrstr("unexpected end in function: %s", t);
                                return 0;
       +                }
       +                s++;
       +                strcpy(p, " => ");
       +                p += 4;
       +                /* return type */
       +                if(!gccname(&s, &p))
       +                        return 0;
       +                *p++ = ')';
                        break;
        
       -#if 0
       -        case 'L':        /* default value */
       +        case 'H':        /* template specialization */
                        t = s;
                        s++;
       -                if(!gccname(&s, &p, state))
       -                        return 0;
       -                if(!isdigit((uchar)*s)){
       -                        fprint(2, "bad value: %s\n", t);
       +                if(!gccnumber(&s, &n, 0))
                                return 0;
       +                p0 = p;
       +                /* template arguments */
       +                *p++ = '<';
       +                for(i=0; i<n; i++){
       +                        val = 1;
       +                        if(*s == 'Z'){        /* argument is a type, not value */
       +                                val = 0;
       +                                s++;
       +                        }
       +                        if(!gccname(&s, &p))
       +                                return 0;
       +                        if(val){
       +                                if(!gccnumber(&s, &m, 1))        /* gccnumber: 1 or 0? */
       +                                        return 0;
       +                                sprint(p, "=%d", m);
       +                                p += strlen(p);
       +                        }
       +                        if(i+1<n)
       +                                *p++ = ',';
                        }
       -                n = strtol(s, &s, 10);
       -                if(*s != 'E'){
       -                        fprint(2, "bad value2: %s\n", t);
       +                *p++ = '>';
       +                if(*s != '_'){
       +                        werrstr("bad template %s", t);
                                return 0;
                        }
       -                sprint(p, "=%d", n);
       -                p += strlen(p);
                        s++;
       +                p1 = p;
       +                /* name */
       +                if(!gccname(&s, &p))
       +                        return 0;
       +                /* XXX 
       +__adjust_heap__H3ZPt4pair2Zt12basic_string3ZcZt11char_traits1ZcZt9allocator1ZcZt12basic_string3ZcZt11char_traits1ZcZt9allocator1ZcZiZt4pair2Zt12basic_string3ZcZt11char_traits1ZcZt9allocator1ZcZt12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc_X01X11X11X21_v
       +                */
       +                /* XXX swap p0, p1, p - maybe defer to main */
                        break;
       -#endif
        
       -        case 'N':        /* repeated name/type */
       -        case 'X':
       -                c = *s++;
       -                if(!isdigit((uchar)*s) || !isdigit((uchar)*(s+1)))
       +        case 'M':        /* M1S: pointer to member */
       +                if(*(s+1) != '1' || *(s+2) != 'S')
                                goto bad;
       -                n = *s++ - '0';
       -                m = *s++ - '0';
       -                sprint(p, "%c%d/%d", c, n, m);
       +                s += 3;
       +                strcpy(p, "mptr ");
       +                p += 5;
       +                if(!gccname(&s, &p))
       +                        return 0;
       +                break;
       +
       +        case 'N':        /* multiply-repeated type */
       +                s++;
       +                if(!gccnumber(&s, &n, 0) || !gccnumber(&s, &m, 0))
       +                        return 0;
       +                sprint(p, "T%dx%d", m, n);
                        p += strlen(p);
                        break;
        
       -        case 'Q':        /* hierarchical name */
       +        case 'P':        /* pointer */
                        s++;
       -                if(!isdigit((uchar)*s))
       -                        goto bad;
       -                n = *s++ - '0';
       +                strcpy(p, "ptr ");
       +                p += 4;
       +                if(!gccname(&s, &p))
       +                        return 0;
       +                break;
       +        
       +        case 'Q':        /* qualified name */
       +                s++;
       +                if(!gccnumber(&s, &n, 0))
       +                        return 0;
                        for(i=0; i<n; i++){
       -                        if(!gccname(&s, &p, state)){
       -                                if(debug) fprint(2, "bad name in hierarchy: %s in %s\n", s, os);
       +                        if(!gccname(&s, &p)){
       +                                werrstr("in hierarchy: %r");
                                        return 0;
                                }
                                if(i+1 < n){
       t@@ -252,84 +525,64 @@ gccname(char **ps, char **pp, Gccstate *state)
                                }
                        }
                        break;
       -        
       -        case 'P':        /* pointer to */
       -                s++;
       -                if(!gccname(&s, &p, state))
       -                        return 0;
       -                *p++ = '*';
       -                break;
        
       -        case 'R':        /* reference to */
       +        case 'R':        /* reference */
                        s++;
       -                if(!gccname(&s, &p, state))
       +                strcpy(p, "ref ");
       +                p += 4;
       +                if(!gccname(&s, &p))
                                return 0;
       -                *p++ = '&';
                        break;
        
       -        case 'S':        /* standard or previously-seen name */
       +        case 't':        /* class template instantiation */
       +                /* should share code with case 'H' */
       +                t = s;
                        s++;
       -                if('0' <= *s && *s <= '9'){
       -                        /* previously seen */
       -                        t = s-1;
       -                        n = strtol(s, &s, 10);
       -                        if(*s != '_'){
       -                                fprint(2, "bad S: %s\n", t);
       -                                return 0;
       -                        }
       -                        s++;
       -                        sprint(p, "S%d_", n);
       -                        p += strlen(p);
       -                        break;
       -                }
       -                goto bad;
       -
       -        case 't':        /* named template */
       -                c = *s++;
       -                if(!gccname(&s, &p, state))
       +                if(!gccname(&s, &p))
                                return 0;
       -                goto template;
       -        case 'H':        /* nameless template */
       -                c = *s++;
       -        template:
       -                if(!gccnumber(&s, &n))
       -                        goto bad;
       +                if(!gccnumber(&s, &n, 0))
       +                        return 0;
       +                p0 = p;
       +                /* template arguments */
                        *p++ = '<';
                        for(i=0; i<n; i++){
                                val = 1;
       -                        if(*s == 'Z'){
       +                        if(*s == 'Z'){        /* argument is a type, not value */
                                        val = 0;
                                        s++;
                                }
       -                        if(!gccname(&s, &p, state))
       -                                goto bad;
       +                        if(!gccname(&s, &p))
       +                                return 0;
                                if(val){
       -                                if(!gccnumber(&s, &m))
       -                                        goto bad;
       +                                if(!gccnumber(&s, &m, 1))        /* gccnumber: 1 or 0? */
       +                                        return 0;
                                        sprint(p, "=%d", m);
                                        p += strlen(p);
                                }
       -                        if(i+1 < n)
       +                        if(i+1<n)
                                        *p++ = ',';
                        }
                        *p++ = '>';
       -                if(c == 'H'){
       -                        if(*s != '_')
       -                                goto bad;
       -                        s++;
       -                }
                        break;
       -
       -        case 'T':        /* previously-seen type??? e.g., T2 */
       -                t = s;
       -                for(s++; isdigit((uchar)*s); s++)
       -                        ;
       -                memmove(p, t, s-t);
       -                p += s-t;
       -                break;                
       +                
       +        case 'T':        /* once-repeated type */
       +                s++;
       +                if(!gccnumber(&s, &n, 0))
       +                        return 0;
       +                sprint(p, "T%d", n);
       +                p += strlen(p);
       +                break;
       +        
       +        case 'X':        /* type parameter in 'H' */
       +                if(!isdigit((uchar)*(s+1)) || !isdigit((uchar)*(s+2)))
       +                        goto bad;
       +                memmove(p, s, 3);
       +                p += 3;
       +                s += 3;
       +                break;
                }
        
       -suffix:        
       +out:        
                *ps = s;
                *pp = p;
                return 1;
 (DIR) diff --git a/src/libmach/mkfile b/src/libmach/mkfile
       t@@ -69,6 +69,9 @@ t: t.o $LIBDIR/$LIB
        elfnm: elfnm.o $LIBDIR/$LIB
                $LD -o $target $prereq -l9
        
       +demangler: demangler.o $LIBDIR/$LIB
       +        $LD -o $target $prereq -l9
       +
        
        SunOS.$O: nosys.c
        Darwin.$O: nosys.c