9vx: add pager from Plan 9 to flush memory - vx32 - Local 9vx git repository for patches.
       
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
 (DIR) commit 42b0c4ecc2ef6e78f0b2057d3f01cc658d0c6df0
 (DIR) parent 783915cde9c6a7bf0f7c9c259e336f381ec8545e
 (HTM) Author: Russ Cox <rsc@swtch.com>
       Date:   Tue,  1 Jul 2008 16:53:53 -0400
       
       9vx: add pager from Plan 9 to flush memory
       
       Diffstat:
         src/9vx/Makefrag                    |       1 +
         src/9vx/a/dat.h                     |       1 +
         src/9vx/a/page.c                    |       2 +-
         src/9vx/a/proc.c                    |       4 ++--
         src/9vx/a/swap.c                    |     408 +++++++++++++++++++++++++++++++
         src/9vx/a/swap.ed                   |       9 +++++++++
         src/9vx/main.c                      |       1 +
         src/9vx/mmu.c                       |       7 +++++--
         src/9vx/sched.c                     |      24 +++++++++++++++++++++++-
         src/9vx/stub.c                      |      48 ++-----------------------------
         src/9vx/vx32.c                      |       2 +-
       
       11 files changed, 455 insertions(+), 52 deletions(-)
       ---
 (DIR) diff --git a/src/9vx/Makefrag b/src/9vx/Makefrag
       @@ -101,6 +101,7 @@ PLAN9_A_OBJS = \
                        sdscsi.o \
                        segment.o \
                        strecpy.o \
       +                swap.o \
                        sysfile.o \
                        sysproc.o \
                        thwack.o \
 (DIR) diff --git a/src/9vx/a/dat.h b/src/9vx/a/dat.h
       @@ -193,6 +193,7 @@ struct Mach
                int        tlbfault;
                int        tlbpurge;
                int        pfault;
       +        int        new;
                int        cs;
                int        syscall;
                int        load;
 (DIR) diff --git a/src/9vx/a/page.c b/src/9vx/a/page.c
       @@ -129,7 +129,7 @@ newpage(int clear, Segment **s, ulong va)
                color = getpgcolor(va);
                hw = swapalloc.highwater;
                for(;;) {
       -                if(palloc.freecount > hw)
       +                if(palloc.freecount >= hw)
                                break;
                        if(up->kp && palloc.freecount > 0)
                                break;
 (DIR) diff --git a/src/9vx/a/proc.c b/src/9vx/a/proc.c
       @@ -1310,9 +1310,9 @@ procflushseg(Segment *s)
                 *  wait for all processors to take a clock interrupt
                 *  and flush their mmu's
                 */
       -        for(nm = 0; nm < conf.nmach; nm++)
       +        for(nm = 0; nm < conf.nmach && nm < 1; nm++)
                        if(MACHP(nm) != m)
       -                        while(MACHP(nm)->flushmmu)
       +                        while(MACHP(nm)->flushmmu && MACHP(nm)->proc != nil)
                                        sched();
        }
        
 (DIR) diff --git a/src/9vx/a/swap.c b/src/9vx/a/swap.c
       @@ -0,0 +1,408 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +
       +static int        canflush(Proc*, Segment*);
       +static void        executeio(void);
       +static int        needpages(void *v);
       +static void        pageout(Proc*, Segment*);
       +static void        pagepte(int, Page**);
       +static void        pager(void *v);
       +
       +        Image         swapimage;
       +static        Page        **iolist;
       +static        int        ioptr;
       +
       +void
       +swapinit(void)
       +{
       +        swapalloc.swmap = xalloc(conf.nswap);
       +        swapalloc.top = &swapalloc.swmap[conf.nswap];
       +        swapalloc.alloc = swapalloc.swmap;
       +        swapalloc.last = swapalloc.swmap;
       +        swapalloc.free = conf.nswap;
       +        iolist = xalloc(conf.nswppo*sizeof(Page*));
       +        if(swapalloc.swmap == 0 || iolist == 0)
       +                panic("swapinit: not enough memory");
       +
       +        swapimage.notext = 1;
       +}
       +
       +ulong
       +newswap(void)
       +{
       +        uchar *look;
       +
       +        lock(&swapalloc.lk);
       +
       +        if(swapalloc.free == 0){
       +                unlock(&swapalloc.lk);
       +                return ~0;
       +        }
       +
       +        look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last);
       +        if(look == 0)
       +                panic("inconsistent swap");
       +
       +        *look = 1;
       +        swapalloc.last = look;
       +        swapalloc.free--;
       +        unlock(&swapalloc.lk);
       +        return (look-swapalloc.swmap) * BY2PG;
       +}
       +
       +void
       +putswap(Page *p)
       +{
       +        uchar *idx;
       +
       +        lock(&swapalloc.lk);
       +        idx = &swapalloc.swmap[((ulong)p)/BY2PG];
       +        if(--(*idx) == 0) {
       +                swapalloc.free++;
       +                if(idx < swapalloc.last)
       +                        swapalloc.last = idx;
       +        }
       +        if(*idx >= 254)
       +                panic("putswap %lux == %ud", p, *idx);
       +        unlock(&swapalloc.lk);
       +}
       +
       +void
       +dupswap(Page *p)
       +{
       +        lock(&swapalloc.lk);
       +        if(++swapalloc.swmap[((ulong)p)/BY2PG] == 0)
       +                panic("dupswap");
       +        unlock(&swapalloc.lk);
       +}
       +
       +int
       +swapcount(ulong daddr)
       +{
       +        return swapalloc.swmap[daddr/BY2PG];
       +}
       +
       +void
       +kickpager(void)
       +{
       +        static int started;
       +
       +        if(started)
       +                wakeup(&swapalloc.r);
       +        else {
       +                kproc("pager", pager, 0);
       +                started = 1;
       +        }
       +}
       +
       +static void
       +pager(void *junk)
       +{
       +        int i;
       +        Segment *s;
       +        Proc *p, *ep;
       +
       +        if(waserror())
       +                panic("pager: os error\n");
       +
       +        p = proctab(0);
       +        ep = &p[conf.nproc];
       +
       +loop:
       +        up->psstate = "Idle";
       +        sleep(&swapalloc.r, needpages, 0);
       +print("uh oh.  someone woke the pager\n");
       +
       +        while(needpages(junk)) {
       +
       +                if(swapimage.c) {
       +                        p++;
       +                        if(p >= ep)
       +                                p = proctab(0);
       +        
       +                        if(p->state == Dead || p->noswap)
       +                                continue;
       +
       +                        if(!canqlock(&p->seglock))
       +                                continue;                /* process changing its segments */
       +
       +                        for(i = 0; i < NSEG; i++) {
       +                                if(!needpages(junk)){
       +                                        qunlock(&p->seglock);
       +                                        goto loop;
       +                                }
       +
       +                                if((s = p->seg[i])) {
       +                                        switch(s->type&SG_TYPE) {
       +                                        default:
       +                                                break;
       +                                        case SG_TEXT:
       +                                                pageout(p, s);
       +                                                break;
       +                                        case SG_DATA:
       +                                        case SG_BSS:
       +                                        case SG_STACK:
       +                                        case SG_SHARED:
       +                                                up->psstate = "Pageout";
       +                                                pageout(p, s);
       +                                                if(ioptr != 0) {
       +                                                        up->psstate = "I/O";
       +                                                        executeio();
       +                                                }
       +                                                break;
       +                                        }
       +                                }
       +                        }
       +                        qunlock(&p->seglock);
       +                }
       +                else {
       +                        print("out of physical memory; no swap configured\n");
       +                        if(!cpuserver || freebroken() == 0)
       +                                killbig("out of memory");
       +
       +                        /* Emulate the old system if no swap channel */
       +                        tsleep(&up->sleep, return0, 0, 5000);
       +                        wakeup(&palloc.r);
       +                }
       +        }
       +        goto loop;
       +}
       +
       +static void
       +pageout(Proc *p, Segment *s)
       +{
       +        int type, i, size;
       +        Pte *l;
       +        Page **pg, *entry;
       +
       +        if(!canqlock(&s->lk))        /* We cannot afford to wait, we will surely deadlock */
       +                return;
       +
       +        if(s->steal) {                /* Protected by /dev/proc */
       +                qunlock(&s->lk);
       +                return;
       +        }
       +
       +        if(!canflush(p, s)) {        /* Able to invalidate all tlbs with references */
       +                qunlock(&s->lk);
       +                putseg(s);
       +                return;
       +        }
       +
       +        if(waserror()) {
       +                qunlock(&s->lk);
       +                putseg(s);
       +                return;
       +        }
       +
       +        /* Pass through the pte tables looking for memory pages to swap out */
       +        type = s->type&SG_TYPE;
       +        size = s->mapsize;
       +        for(i = 0; i < size; i++) {
       +                l = s->map[i];
       +                if(l == 0)
       +                        continue;
       +                for(pg = l->first; pg < l->last; pg++) {
       +                        entry = *pg;
       +                        if(pagedout(entry))
       +                                continue;
       +
       +                        if(entry->modref & PG_REF) {
       +                                entry->modref &= ~PG_REF;
       +                                continue;
       +                        }
       +
       +                        pagepte(type, pg);
       +
       +                        if(ioptr >= conf.nswppo)
       +                                goto out;
       +                }
       +        }
       +out:
       +        poperror();
       +        qunlock(&s->lk);
       +        putseg(s);
       +}
       +
       +static int
       +canflush(Proc *p, Segment *s)
       +{
       +        int i;
       +        Proc *ep;
       +
       +        lock(&s->ref.lk);
       +        if(s->ref.ref == 1) {                /* Easy if we are the only user */
       +                s->ref.ref++;
       +                unlock(&s->ref.lk);
       +                return canpage(p);
       +        }
       +        s->ref.ref++;
       +        unlock(&s->ref.lk);
       +
       +        /* Now we must do hardwork to ensure all processes which have tlb
       +         * entries for this segment will be flushed if we succeed in paging it out
       +         */
       +        p = proctab(0);
       +        ep = &p[conf.nproc];
       +        while(p < ep) {
       +                if(p->state != Dead) {
       +                        for(i = 0; i < NSEG; i++)
       +                                if(p->seg[i] == s)
       +                                        if(!canpage(p))
       +                                                return 0;
       +                }
       +                p++;
       +        }
       +        return 1;
       +}
       +
       +static void
       +pagepte(int type, Page **pg)
       +{
       +        ulong daddr;
       +        Page *outp;
       +
       +        outp = *pg;
       +        switch(type) {
       +        case SG_TEXT:                                /* Revert to demand load */
       +                putpage(outp);
       +                *pg = 0;
       +                break;
       +
       +        case SG_DATA:
       +        case SG_BSS:
       +        case SG_STACK:
       +        case SG_SHARED:
       +                /*
       +                 *  get a new swap address and clear any pages
       +                 *  referring to it from the cache
       +                 */
       +                daddr = newswap();
       +                if(daddr == ~0)
       +                        break;
       +                cachedel(&swapimage, daddr);
       +
       +                lock(&outp->lk);
       +
       +                /* forget anything that it used to cache */
       +                uncachepage(outp);
       +
       +                /*
       +                 *  incr the reference count to make sure it sticks around while
       +                 *  being written
       +                 */
       +                outp->ref++;
       +
       +                /*
       +                 *  enter it into the cache so that a fault happening
       +                 *  during the write will grab the page from the cache
       +                 *  rather than one partially written to the disk
       +                 */
       +                outp->daddr = daddr;
       +                cachepage(outp, &swapimage);
       +                *pg = (Page*)(daddr|PG_ONSWAP);
       +                unlock(&outp->lk);
       +
       +                /* Add page to IO transaction list */
       +                iolist[ioptr++] = outp;
       +                break;
       +        }
       +}
       +
       +void
       +pagersummary(void)
       +{
       +        print("%lud/%lud memory %lud/%lud swap %d iolist\n",
       +                palloc.user-palloc.freecount,
       +                palloc.user, conf.nswap-swapalloc.free, conf.nswap,
       +                ioptr);
       +}
       +
       +static void
       +executeio(void)
       +{
       +        Page *out;
       +        int i, n;
       +        Chan *c;
       +        char *kaddr;
       +        KMap *k;
       +
       +        c = swapimage.c;
       +
       +        for(i = 0; i < ioptr; i++) {
       +                if(ioptr > conf.nswppo)
       +                        panic("executeio: ioptr %d > %d\n", ioptr, conf.nswppo);
       +                out = iolist[i];
       +                k = kmap(out);
       +                kaddr = (char*)VA(k);
       +
       +                if(waserror())
       +                        panic("executeio: page out I/O error");
       +
       +                n = devtab[c->type]->write(c, kaddr, BY2PG, out->daddr);
       +                if(n != BY2PG)
       +                        nexterror();
       +
       +                kunmap(k);
       +                poperror();
       +
       +                /* Free up the page after I/O */
       +                lock(&out->lk);
       +                out->ref--;
       +                unlock(&out->lk);
       +                putpage(out);
       +        }
       +        ioptr = 0;
       +}
       +
       +static int
       +needpages(void *v)
       +{
       +        return palloc.freecount < swapalloc.headroom;
       +}
       +
       +void
       +setswapchan(Chan *c)
       +{
       +        uchar dirbuf[sizeof(Dir)+100];
       +        Dir d;
       +        int n;
       +
       +        if(swapimage.c) {
       +                if(swapalloc.free != conf.nswap){
       +                        cclose(c);
       +                        error(Einuse);
       +                }
       +                cclose(swapimage.c);
       +        }
       +
       +        /*
       +         *  if this isn't a file, set the swap space
       +         *  to be at most the size of the partition
       +         */
       +        if(devtab[c->type]->dc != L'M'){
       +                n = devtab[c->type]->stat(c, dirbuf, sizeof dirbuf);
       +                if(n <= 0){
       +                        cclose(c);
       +                        error("stat failed in setswapchan");
       +                }
       +                convM2D(dirbuf, n, &d, nil);
       +                if(d.length < conf.nswap*BY2PG){
       +                        conf.nswap = d.length/BY2PG;
       +                        swapalloc.top = &swapalloc.swmap[conf.nswap];
       +                        swapalloc.free = conf.nswap;
       +                }
       +        }
       +
       +        swapimage.c = c;
       +}
       +
       +int
       +swapfull(void)
       +{
       +        return swapalloc.free < conf.nswap/10;
       +}
 (DIR) diff --git a/src/9vx/a/swap.ed b/src/9vx/a/swap.ed
       @@ -0,0 +1,9 @@
       +,s;(void\*);(void *v);g
       +,s;lock(\&swapalloc);lock(\&swapalloc.lk);g
       +,s;s->ref ==;s->ref.ref ==;g
       +,s;s->ref++;s->ref.ref++;g
       +,s;(s = p->seg\[i\]);(&);g
       +,s;lock(s);lock(\&s->ref.lk);g
       +,s;lock(out);lock(\&out->lk);g
       +,s;lock(outp);lock(\&outp->lk);g
       +g/swopen/d
 (DIR) diff --git a/src/9vx/main.c b/src/9vx/main.c
       @@ -742,6 +742,7 @@ newmach(void)
                        panic("out of processors");
                mm = mallocz(sizeof *mm, 1);
                mm->machno = i;
       +        mm->new = 1;
                machp[i] = mm;
                conf.nmach++;
                
 (DIR) diff --git a/src/9vx/mmu.c b/src/9vx/mmu.c
       @@ -141,6 +141,8 @@ static Proc *mmup;
        static void
        mmapflush(void)
        {
       +        m->flushmmu = 0;
       +
                /* Nothing mapped? */
                if(mmup == nil || mmup->pmmu.lo > mmup->pmmu.hi)
                        return;
       @@ -229,12 +231,13 @@ mmuswitch(Proc *p)
                 * one we were just in.  Also, kprocs don't count --
                 * only the guys on cpu0 do.
                 */
       -        if(!p->kp && mmup != p){
       +        if(!p->kp && (mmup != p || p->newtlb || m->flushmmu)){
                        if(0) print("^^^^^^^^^^ %ld %s\n========== %ld %s\n",
                                mmup ? mmup->pid : 0, mmup? mmup->text : "",
                                p->pid, p->text);
                        /* No vxproc_flush - vxproc cache is okay */
                        mmapflush();
       +                p->newtlb = 0;
                        mmup = p;
                }
        }
       @@ -249,7 +252,7 @@ mmurelease(Proc *p)
                        return;
                if(p->pmmu.vxproc)
                        vxproc_flush(p->pmmu.vxproc);
       -        if(p == mmup){
       +        if(p == mmup || m->flushmmu){
                        mmapflush();
                        mmup = nil;
                }
 (DIR) diff --git a/src/9vx/sched.c b/src/9vx/sched.c
       @@ -41,11 +41,17 @@ idlehands(void)
                plock(&idling);
                nbad = 0;
                while(!idlewakeup){
       +                if(traceprocs)
       +                        iprint("cpu%d: idlehands\n", m->machno);
                        psleep(&idling);
       +                if(traceprocs)
       +                        iprint("cpu%d: busy hands\n", m->machno);
                        if(!idlewakeup && ++nbad%1000 == 0)
                                iprint("idlehands spurious wakeup\n");
                }
                idlewakeup = 0;
       +        if(traceprocs)
       +                iprint("cpu%d: idlehands returning\n", m->machno);
                punlock(&idling);
        }
        
       @@ -96,8 +102,14 @@ ready(Proc *p)
                 * kick off a new one.
                 */
                kprocq.n++;
       -        if(kprocq.n > nrunproc)
       +        if(kprocq.n > nrunproc){
       +                if(traceprocs)
       +                        iprint("create new cpu: kprocq.n=%d nrunproc=%d\n", kprocq.n, nrunproc);
       +                nrunproc++;
                        newmach();
       +        }
       +        if(traceprocs)
       +                iprint("cpu%d: ready %ld %s; wakeup kproc cpus\n", m->machno, p->pid, p->text);
                pwakeup(&run);
                unlock(&kprocq.lk);
                punlock(&run);
       @@ -120,19 +132,29 @@ runproc(void)
                nbad = 0;
                plock(&run);
                lock(&kprocq.lk);        /* redundant but fine */
       +        if(m->new){
       +                nrunproc--;
       +                m->new = 0;
       +        }
                while((p = kprocq.head) == nil){
                        nrunproc++;
                        unlock(&kprocq.lk);
       +                if(traceprocs)
       +                        iprint("cpu%d: runproc psleep %d %d\n", m->machno, kprocq.n, nrunproc);
                        psleep(&run);
                        lock(&kprocq.lk);
                        if(kprocq.head == nil && ++nbad%1000 == 0)
                                iprint("cpu%d: runproc spurious wakeup\n", m->machno);        
       +                if(traceprocs)
       +                        iprint("cpu%d: runproc awake\n", m->machno);
                        nrunproc--;
                }
                kprocq.head = p->rnext;
                if(kprocq.head == 0)
                        kprocq.tail = nil;
                kprocq.n--;
       +        if(traceprocs)
       +                iprint("cpu%d: runproc %ld %s [%d %d]\n", m->machno, p->pid, p->text, kprocq.n, nrunproc);
                unlock(&kprocq.lk);
                punlock(&run);
                return p;
 (DIR) diff --git a/src/9vx/stub.c b/src/9vx/stub.c
       @@ -134,51 +134,6 @@ splx(int s)
        
        
        /*
       - * Swap
       - */
       -Image swapimage;
       -
       -int
       -swapfull(void)
       -{
       -        return 0;
       -}
       -
       -void
       -kickpager(void)
       -{
       -}
       -
       -void
       -setswapchan(Chan *c)
       -{
       -}
       -
       -void
       -pagersummary(void)
       -{
       -}
       -
       -void
       -putswap(Page *p)
       -{
       -        panic("putswap");
       -}
       -
       -int
       -swapcount(ulong daddr)
       -{
       -        return 0;
       -}
       -
       -void
       -dupswap(Page *p)
       -{
       -        panic("dupswap");
       -}
       -
       -
       -/*
         * Floating point.
         */
        void
       @@ -542,6 +497,9 @@ panic(char *fmt, ...)
                buf[n] = '\n';
                write(2, buf, n+1);
                if(doabort){
       +#ifndef __APPLE__
       +                abort();
       +#endif
                        for(;;)
                                microdelay(1000000);
                }
 (DIR) diff --git a/src/9vx/vx32.c b/src/9vx/vx32.c
       @@ -44,7 +44,7 @@ static vxmem thevxmem;
        void
        vx32sysr1(void)
        {
       -//        traceprocs = !traceprocs;
       +        traceprocs = !traceprocs;
        //        vx32_debugxlate = traceprocs;
                tracesyscalls = !tracesyscalls;
        }