9vx: introduce multiple user address spaces - vx32 - Local 9vx git repository for patches.
 (HTM) git clone git://r-36.net/vx32
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
 (DIR) commit 0687238f4aa7b3150cc097cb757f16e762f4cfc6
 (DIR) parent 09a0ae1864ae46cd1220469a1104bca6df770805
 (HTM) Author: Russ Cox <rsc@swtch.com>
       Date:   Fri,  4 Jul 2008 02:21:42 -0400
       
       9vx: introduce multiple user address spaces
       
       Diffstat:
         M src/9vx/a/dat.h                     |      18 ++++++++++++++----
         M src/9vx/a/fault.c                   |       4 ++--
         M src/9vx/a/sysproc.c                 |       2 ++
         M src/9vx/main.c                      |       8 ++++++++
         M src/9vx/mmu.c                       |     195 +++++++++++++++++++++++--------
         M src/9vx/trap.c                      |      14 +++++++++-----
         M src/9vx/vx32.c                      |     114 ++++++++++++++++---------------
       
       7 files changed, 238 insertions(+), 117 deletions(-)
       ---
 (DIR) diff --git a/src/9vx/a/dat.h b/src/9vx/a/dat.h
       @@ -1,4 +1,5 @@
        #include <ucontext.h>
       +#include "libvx32/vx32.h"
        
        typedef struct BIOS32si        BIOS32si;
        typedef struct Conf        Conf;
       @@ -114,12 +115,14 @@ struct Conf
         *  MMU stuff in proc
         */
        #define NCOLOR 1
       +typedef struct Uspace Uspace;
        struct PMMU
        {
       -        ulong lo;        // Plan 9 VX
       -        ulong hi;        // Plan 9 VX
                struct vxproc *vxproc;        // Plan 9 VX
       -        struct vxmmap *vxmm;        // Plan 9 VX
       +        struct vxmem vxmem;
       +        struct vxmmap vxmm;        // Plan 9 VX
       +        Uspace *us;
       +        uchar *uzero;
        };
        
        /*
       @@ -348,7 +351,6 @@ struct DevConf
        // Plan 9 VX
        extern int traceprocs;
        extern int tracesyscalls;
       -extern uchar *uzero;
        extern int doabort;
        
        /* Pthreads-based sleep and wakeup. */
       @@ -361,3 +363,11 @@ struct Psleep
                Pwaiter *waiter;
        };
        
       +struct Uspace
       +{
       +        Proc *p;        // proc currently mapped
       +        uchar *uzero;
       +        ulong lo;
       +        ulong hi;
       +};
       +
 (DIR) diff --git a/src/9vx/a/fault.c b/src/9vx/a/fault.c
       @@ -343,7 +343,7 @@ okaddr(ulong addr, ulong len, int write)
                                        continue;
                                }
                                qunlock(&s->lk);
       -                        return uzero+addr0;
       +                        return up->pmmu.uzero+addr0;
                        }
                }
                pprint("suicide: invalid address 0x%lux/%lud in sys call pc=0x%lux\n", addr, len, userpc());
       @@ -400,7 +400,7 @@ vmemchr(void *s, int c, int n)
                        a += m_;
                        n -= m_;
                        if(isuaddr(a))
       -                        uvalidaddr(a-uzero, 1, 0);
       +                        uvalidaddr(a-up->pmmu.uzero, 1, 0);
                }
        
                /* fits in one page */
 (DIR) diff --git a/src/9vx/a/sysproc.c b/src/9vx/a/sysproc.c
       @@ -379,6 +379,8 @@ sysexec(ulong *arg)
                /*
                 * Top-of-stack structure.
                 */
       +        uchar *uzero;
       +        uzero = up->pmmu.uzero;
                Tos *tos;
                ulong utos;
                utos = USTKTOP - sizeof(Tos);
 (DIR) diff --git a/src/9vx/main.c b/src/9vx/main.c
       @@ -109,6 +109,9 @@ main(int argc, char **argv)
                case 'S':
                        tracesyscalls++;
                        break;
       +        case 'U':
       +                nuspace = atoi(EARGF(usage()));
       +                break;
                case 'X':
                        vx32_debugxlate++;
                        break;
       @@ -419,7 +422,9 @@ showexec(ulong sp)
        {
                ulong *a, *argv;
                int i, n;
       +        uchar *uzero;
                
       +        uzero = up->pmmu.uzero;
                iprint("showexec %p\n", sp);
                if(sp >= USTKTOP || sp < USTKTOP-USTKSIZE)
                        panic("showexec: bad sp");
       @@ -510,6 +515,7 @@ sigsegv(int signo, siginfo_t *info, void *v)
                int read;
                ulong addr, eip, esp;
                ucontext_t *uc;
       +        uchar *uzero;
        
                if(m == nil)
                        panic("sigsegv: m == nil");
       @@ -518,6 +524,8 @@ sigsegv(int signo, siginfo_t *info, void *v)
                if(up == nil)
                        panic("sigsegv: up == nil");
        
       +        uzero = up->pmmu.uzero;
       +
                uc = v;
        #if defined(__APPLE__)
                mcontext_t mc;
 (DIR) diff --git a/src/9vx/mmu.c b/src/9vx/mmu.c
       @@ -30,14 +30,19 @@ int tracemmu;
        
        static int pagefile;
        static char* pagebase;
       -uchar *uzero;
       +
       +static Uspace uspace[16];
       +static Uspace *ulist[nelem(uspace)];
       +int nuspace = 1;
        
        int
        isuaddr(void *v)
        {
                uchar *p;
       +        uchar *uzero;
                
                p = v;
       +        uzero = up->pmmu.uzero;
                return uzero <= p && p < uzero+USTKTOP;
        }
        
       @@ -46,7 +51,7 @@ isuaddr(void *v)
         * The point is to reserve the space so that
         * nothing else ends up there later.
         */
       -static void
       +static void*
        mapzero(void)
        {
                int fd;
       @@ -55,20 +60,16 @@ mapzero(void)
                /* First try mmaping /dev/zero.  Some OS'es don't allow this. */
                if((fd = open("/dev/zero", O_RDONLY)) >= 0){
                        v = mmap(nil, USTKTOP, PROT_NONE, MAP_PRIVATE, fd, 0);
       -                if(v != MAP_FAILED){
       -                        uzero = v;
       -                        return;
       -                }
       +                if(v != MAP_FAILED)
       +                        return v;
                }
                
                /* Next try an anonymous map. */
                v = mmap(nil, USTKTOP, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
       -        if(v != MAP_FAILED){
       -                uzero = v;
       -                return;
       -        }
       -        
       -        panic("mapzero: cannot reserve process address space");
       +        if(v != MAP_FAILED)
       +                return v;
       +
       +        return nil;
        }
        
        void
       @@ -76,8 +77,8 @@ mmuinit(void)
        {
                char tmp[] = "/var/tmp/9vx.pages.XXXXXX";
                void *v;
       -
       -        mapzero();
       +        int i;
       +        
                if((pagefile = mkstemp(tmp)) < 0)
                        panic("mkstemp: %r");
                if(ftruncate(pagefile, MEMSIZE) < 0)
       @@ -92,6 +93,17 @@ mmuinit(void)
                        panic("mmap pagefile: %r");
                pagebase = v;
        
       +        if(nuspace <= 0)
       +                nuspace = 1;
       +        if(nuspace > nelem(uspace))
       +                nuspace = nelem(uspace);
       +        for(i=0; i<nuspace; i++){
       +                uspace[i].uzero = mapzero();
       +                if(uspace[i].uzero == nil)
       +                        panic("mmap address space %d", i);
       +                ulist[i] = &uspace[i];
       +        }
       +
                conf.mem[0].base = 0;
                conf.mem[0].npage = MEMSIZE / BY2PG;
                
       @@ -128,23 +140,15 @@ kunmap(KMap *k)
        }
        
        /*
       - * The process whose address space we've got mapped.
       - * We cache our own copy so that entering the scheduler
       - * and coming back out running the same process doesn't
       - * cause unnecessary unmapping and remapping.
       - */
       -static Proc *mmup;
       -
       -/*
         * Flush the current address space.
         */
        static void
       -mmapflush(void)
       +mmapflush(Uspace *us)
        {
                m->flushmmu = 0;
        
                /* Nothing mapped? */
       -        if(mmup == nil || mmup->pmmu.lo > mmup->pmmu.hi)
       +        if(us == nil || us->lo > us->hi || us->uzero == nil)
                        return;
        
        #ifdef __FreeBSD__
       @@ -154,20 +158,20 @@ mmapflush(void)
                         * tell whether a page is mapped, so we have to remap
                         * something with no pages here. 
                         */
       -                if(mmap(uzero, mmup->pmmu.hi+BY2PG, PROT_NONE, 
       +                if(mmap(us->uzero, us->hi+BY2PG, PROT_NONE, 
                                        MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
                                panic("mmapflush mmap: %r");
       -                mmup->pmmu.lo = 0x80000000UL;
       -                mmup->pmmu.hi = 0;
       +                us->lo = 0x80000000UL;
       +                us->hi = 0;
                        return;
                }
        #endif
        
                /* Clear only as much as got mapped. */
       -        if(mprotect(uzero, mmup->pmmu.hi+BY2PG, PROT_NONE) < 0)
       +        if(mprotect(us->uzero, us->hi+BY2PG, PROT_NONE) < 0)
                        panic("mmapflush mprotect: %r");
       -        mmup->pmmu.lo = 0x80000000UL;
       -        mmup->pmmu.hi = 0;
       +        us->lo = 0x80000000UL;
       +        us->hi = 0;
        }
        
        /*
       @@ -178,13 +182,15 @@ void
        putmmu(ulong va, ulong pa, Page *p)
        {
                int prot;
       -        PMMU *pmmu;
       +        Uspace *us;
        
                if(tracemmu || (pa&~(PTEWRITE|PTEVALID)) != p->pa)
                        print("putmmu va %lux pa %lux p->pa %lux\n", va, pa, p->pa);
        
                assert(p->pa < MEMSIZE && pa < MEMSIZE);
                assert(up);
       +        us = up->pmmu.us;
       +        assert(us);
        
                /* Map the page */
                prot = PROT_READ;
       @@ -192,21 +198,20 @@ putmmu(ulong va, ulong pa, Page *p)
                        prot |= PROT_WRITE;
                pa &= ~(BY2PG-1);
                va  &= ~(BY2PG-1);
       -        if(mmap(uzero+va, BY2PG, prot, MAP_FIXED|MAP_SHARED,
       +        if(mmap(us->uzero+va, BY2PG, prot, MAP_FIXED|MAP_SHARED,
                                pagefile, pa) == MAP_FAILED)
                        panic("putmmu");
                
                /* Record high and low address range for quick unmap. */
       -        pmmu = &up->pmmu;
       -        if(pmmu->lo > va)
       -                pmmu->lo = va;
       -        if(pmmu->hi < va)
       -                pmmu->hi = va;
       +        if(us->lo > va)
       +                us->lo = va;
       +        if(us->hi < va)
       +                us->hi = va;
        //        printlinuxmaps();
        }
        
        /*
       - * The memory maps have changed.  Flush all cached state.
       + * The memory maps have changed for up.  Flush all cached state.
         */
        void
        flushmmu(void)
       @@ -214,9 +219,78 @@ flushmmu(void)
                if(tracemmu)
                        print("flushmmu\n");
        
       -        if(up)
       +        if(up){
                        vxproc_flush(up->pmmu.vxproc);
       -        mmapflush();
       +                mmapflush(up->pmmu.us);
       +        }
       +}
       +
       +void
       +usespace(Uspace *us)
       +{
       +        int i;
       +        
       +        for(i=0; i<nuspace; i++)
       +                if(ulist[i] == us){
       +                        while(i > 0){
       +                                ulist[i] = ulist[i-1];
       +                                i--;
       +                        }
       +                        ulist[0] = us;
       +                        break;
       +                }
       +}
       +
       +Uspace*
       +getspace(Proc *p)
       +{
       +        Uspace *us;
       +        
       +        us = ulist[nuspace-1];
       +        if(us->p){
       +                if(tracemmu)
       +                        print("^^^^^^^^^^ %ld %s [evict %d]\n", us->p->pid, us->p->text, us - uspace);
       +                mmapflush(us);
       +        }
       +        us->p = p;
       +        p->pmmu.vxmm.base = us->uzero;
       +        p->pmmu.uzero = us->uzero;
       +        p->pmmu.us = us;
       +        usespace(us);
       +        return us;
       +}
       +
       +void
       +takespace(Proc *p, Uspace *us)
       +{
       +        usespace(us);
       +        if(us->p == p)
       +                return;
       +        if(tracemmu){
       +                if(us->p)
       +                        print("^^^^^^^^^^ %ld %s [steal %d]\n", us->p->pid, us->p->text, us - uspace);
       +        }
       +        us->p = p;
       +        mmapflush(us);
       +}
       +
       +void
       +putspace(Uspace *us)
       +{
       +        int i;
       +
       +        mmapflush(us);
       +        us->p->pmmu.us = nil;
       +        us->p->pmmu.uzero = nil;
       +        us->p->pmmu.vxmm.base = nil;
       +        us->p = nil;
       +        for(i=0; i<nuspace; i++)
       +                if(ulist[i] == us){
       +                        while(++i < nuspace)
       +                                ulist[i-1] = ulist[i];
       +                        ulist[i-1] = us;
       +                        break;
       +                }
        }
        
        /*
       @@ -231,15 +305,31 @@ mmuswitch(Proc *p)
                 * one we were just in.  Also, kprocs don't count --
                 * only the guys on cpu0 do.
                 */
       -        if(!p->kp && (mmup != p || p->newtlb || m->flushmmu)){
       -                if(0) print("^^^^^^^^^^ %ld %s\n========== %ld %s\n",
       -                        mmup ? mmup->pid : 0, mmup? mmup->text : "",
       -                        p->pid, p->text);
       -                /* No vxproc_flush - vxproc cache is okay */
       -                mmapflush();
       +        if(p->kp)
       +                return;
       +        
       +        if(tracemmu)
       +                print("mmuswitch %ld %s\n", p->pid, p->text);
       +
       +        if(p->pmmu.us && p->pmmu.us->p == p){
       +                if(tracemmu) print("---------- %ld %s [%d]\n",
       +                        p->pid, p->text, p->pmmu.us - uspace);
       +                usespace(p->pmmu.us);
       +                if(!p->newtlb && !m->flushmmu){
       +                        usespace(p->pmmu.us);
       +                        return;
       +                }
       +                mmapflush(p->pmmu.us);
                        p->newtlb = 0;
       -                mmup = p;
       +                return;
                }
       +
       +        if(p->pmmu.us == nil)
       +                getspace(p);
       +        else
       +                takespace(p, p->pmmu.us);
       +        if(tracemmu) print("========== %ld %s [%d]\n",
       +                p->pid, p->text, p->pmmu.us - uspace);
        }
        
        /*
       @@ -250,11 +340,16 @@ mmurelease(Proc *p)
        {
                if(p->kp)
                        return;
       +        if(tracemmu)
       +                print("mmurelease %ld %s\n", p->pid, p->text);
                if(p->pmmu.vxproc)
                        vxproc_flush(p->pmmu.vxproc);
       -        if(p == mmup || m->flushmmu){
       -                mmapflush();
       -                mmup = nil;
       +        if(p->pmmu.us){
       +                if(tracemmu)
       +                        print("^^^^^^^^^^ %ld %s [release %d]\n", p->pid, p->text, p->pmmu.us - uspace);
       +                putspace(p->pmmu.us);
       +                if(m->flushmmu)
       +                        mmapflush(p->pmmu.us);
                }
        }
        
 (DIR) diff --git a/src/9vx/trap.c b/src/9vx/trap.c
       @@ -30,7 +30,7 @@ kexit(Ureg *ureg)
                Tos *tos;
        
                /* precise time accounting, kernel exit */
       -        tos = (Tos*)(uzero+USTKTOP-sizeof(Tos));
       +        tos = (Tos*)(up->pmmu.uzero+USTKTOP-sizeof(Tos));
                cycles(&t);
                tos->kcycles += t - up->kentry;
                tos->pcycles = up->pcycles;
       @@ -90,7 +90,7 @@ trap(Ureg *ureg)
                
                case VXTRAP_SOFT+0x40:        /* int $0x40 - system call */
                        if(tracesyscalls){
       -                        ulong *sp = (ulong*)(uzero + ureg->usp);
       +                        ulong *sp = (ulong*)(up->pmmu.uzero + ureg->usp);
                                print("%d [%s] %s %#lux %08lux %08lux %08lux %08lux\n",
                                        up->pid, up->text,
                                        sysctab[ureg->ax], sp[0], sp[1], sp[2], sp[3]);
       @@ -262,7 +262,7 @@ syscall(Ureg *ureg)
                up->psstate = 0;
        
                if(scallnr == NOTED)
       -                noted(ureg, *(ulong*)(uzero + sp+BY2WD));
       +                noted(ureg, *(ulong*)(up->pmmu.uzero + sp+BY2WD));
        
                if(scallnr!=RFORK && (up->procctl || up->nnote)){
                        splhi();
       @@ -335,6 +335,8 @@ notify(Ureg* ureg)
                        pexit("Suicide", 0);
                }
        
       +        uchar *uzero;
       +        uzero = up->pmmu.uzero;
                upureg = (void*)(uzero + sp);
                memmove(upureg, ureg, sizeof(Ureg));
                *(ulong*)(uzero + sp-BY2WD) = up->ureg;        /* word under Ureg is old up->ureg */
       @@ -383,6 +385,8 @@ noted(Ureg* ureg, ulong arg0)
                        pexit("Suicide", 0);
                }
                
       +        uchar *uzero;
       +        uzero = up->pmmu.uzero;
                oureg = up->ureg;
                nureg = (Ureg*)(uzero + up->ureg);
        
       @@ -442,11 +446,11 @@ execregs(ulong entry, ulong ssize, ulong nargs)
                up->fpstate = FPinit;
                fpoff();
        
       -        sp = (ulong*)(uzero + USTKTOP - ssize);
       +        sp = (ulong*)(up->pmmu.uzero + USTKTOP - ssize);
                *--sp = nargs;
        
                ureg = up->dbgreg;
       -        ureg->usp = (uchar*)sp - uzero;
       +        ureg->usp = (uchar*)sp - up->pmmu.uzero;
        //showexec(ureg->usp);
                ureg->pc = entry;
                return USTKTOP-sizeof(Tos);                /* address of kernel/user shared data */
 (DIR) diff --git a/src/9vx/vx32.c b/src/9vx/vx32.c
       @@ -17,7 +17,6 @@
        #include "u.h"
        #include <pthread.h>
        #include <sys/mman.h>
       -#include "libvx32/vx32.h"
        #include "lib.h"
        #include "mem.h"
        #include "dat.h"
       @@ -50,47 +49,6 @@ vx32sysr1(void)
        }
        
        /*
       - * Vxnewproc is called at the end of newproc
       - * to fill in vx32-specific entries in the Proc struct
       - * before it gets used.
       - */
       -void
       -vxnewproc(Proc *p)
       -{
       -        PMMU *pm;
       -        
       -        pm = &p->pmmu;
       -
       -        /*
       -         * Kernel procs don't need vxprocs; if this proc
       -         * already has one, take it away.  Also, give
       -         * kernel procs very large stacks so they can call
       -         * into non-thread-friendly routines like x11 
       -         * and getgrgid.
       -         */
       -        if(p->kp){
       -                if(pm->vxproc){
       -                        pm->vxproc->mem = nil;
       -                        vxproc_free(pm->vxproc);
       -                        pm->vxproc = nil;
       -                }
       -                free(p->kstack);
       -                p->kstack = nil;
       -                p->kstack = smalloc(512*1024);
       -                return;
       -        }
       -
       -        pm->lo = 0x80000000UL;
       -        pm->hi = 0;
       -        if(pm->vxproc == nil){
       -                pm->vxproc = vxproc_alloc();
       -                if(pm->vxproc == nil)
       -                        panic("vxproc_alloc");
       -                pm->vxproc->mem = &thevxmem;
       -        }
       -}
       -
       -/*
         * Vx32 hooks to read, write, map, unmap, and check permissions
         * on user memory.  Normally these are more involved, but we're
         * using the processor to do everything.
       @@ -98,29 +56,21 @@ vxnewproc(Proc *p)
        static ssize_t
        vmread(vxmem *vm, void *data, uint32_t addr, uint32_t len)
        {
       -        memmove(data, uzero+addr, len);
       +        memmove(data, vm->mapped->base+addr, len);
                return len;
        }
        
        static ssize_t
        vmwrite(vxmem *vm, const void *data, uint32_t addr, uint32_t len)
        {
       -        memmove(uzero+addr, data, len);
       +        memmove(vm->mapped->base+addr, data, len);
                return len;
        }
        
       -static vxmmap thevxmmap =
       -{
       -        1,
       -        (void*)-1,        /* to be filled in with user0 */
       -        USTKTOP,
       -};
       -
        static vxmmap*
        vmmap(vxmem *vm, uint32_t flags)
        {
       -        thevxmmap.base = uzero;
       -        return &thevxmmap;
       +        return vm->mapped;
        }
        
        static void
       @@ -131,6 +81,14 @@ vmunmap(vxmem *vm, vxmmap *mm)
        static int
        vmcheckperm(vxmem *vm, uint32_t addr, uint32_t len, uint32_t perm, uint32_t *out_faultva)
        {
       +        if(addr >= USTKTOP){
       +                *out_faultva = addr;
       +                return 0;
       +        }
       +        if(addr+len < addr || addr +len > USTKTOP){
       +                *out_faultva = USTKTOP;
       +                return 0;
       +        }
                /* All is allowed - handle faults as they happen. */
                return 1;
        }
       @@ -164,6 +122,50 @@ static vxmem thevxmem =
                vmfree,
        };
        
       +/*
       + * Vxnewproc is called at the end of newproc
       + * to fill in vx32-specific entries in the Proc struct
       + * before it gets used.
       + */
       +void
       +vxnewproc(Proc *p)
       +{
       +        PMMU *pm;
       +        
       +        pm = &p->pmmu;
       +
       +        /*
       +         * Kernel procs don't need vxprocs; if this proc
       +         * already has one, take it away.  Also, give
       +         * kernel procs very large stacks so they can call
       +         * into non-thread-friendly routines like x11 
       +         * and getgrgid.
       +         */
       +        if(p->kp){
       +                if(pm->vxproc){
       +                //        vxunmap(p);
       +                        assert(pm->uzero == nil);
       +                        pm->vxproc->mem = nil;
       +                        vxproc_free(pm->vxproc);
       +                        pm->vxproc = nil;
       +                }
       +                free(p->kstack);
       +                p->kstack = nil;
       +                p->kstack = smalloc(512*1024);
       +                return;
       +        }
       +
       +        if(pm->vxproc == nil){
       +                pm->vxproc = vxproc_alloc();
       +                if(pm->vxproc == nil)
       +                        panic("vxproc_alloc");
       +                pm->vxproc->mem = &pm->vxmem;
       +                pm->vxmem = thevxmem;
       +                pm->vxmem.mapped = &pm->vxmm;
       +                memset(&pm->vxmm, 0, sizeof pm->vxmm);
       +        }
       +}
       +
        static void
        setclock(int start)
        {
       @@ -224,7 +226,7 @@ touser(void *initsp)
                         * Optimization: try to fault in code page and stack
                         * page right now, since we're likely to need them.
                         */
       -                if(up->pmmu.hi == 0){
       +                if(up->pmmu.us->hi == 0){
                                fault(vp->cpu->eip, 1);
                                fault(vp->cpu->reg[ESP], 0);
                        }
       @@ -267,11 +269,11 @@ touser(void *initsp)
                                addr = (uchar*)vp->cpu->trapva;
                                if(traceprocs)
                                        print("fault %p read=%d\n", addr, read);
       -                        if(isuaddr(addr) && fault(addr - uzero, read) >= 0)
       +                        if(isuaddr(addr) && fault(addr - up->pmmu.uzero, read) >= 0)
                                        continue;
                                print("%ld %s: unhandled fault va=%lux [%lux] eip=%lux\n",
                                        up->pid, up->text,
       -                                addr - uzero, vp->cpu->trapva, vp->cpu->eip);
       +                                addr - up->pmmu.uzero, vp->cpu->trapva, vp->cpu->eip);
                                proc2ureg(vp, &u);
                                dumpregs(&u);
                                if(doabort)