tassorted changes from Plan 9 - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 28b49df3542a635cca788f3de213385f3fcb6334
 (DIR) parent 686bd37d9d8db5e3b969a3aa2d5b455e0976b262
 (HTM) Author: rsc <devnull@localhost>
       Date:   Tue, 18 Jul 2006 15:26:33 +0000
       
       assorted changes from Plan 9
       
       Diffstat:
         M src/cmd/venti/srv/arena.c           |      40 +++++++++++++++++++++++--------
         M src/cmd/venti/srv/arenas.c          |       6 +++---
         M src/cmd/venti/srv/bloom.c           |      62 ++++++++++++++++++++++++-------
         M src/cmd/venti/srv/buildbuck.c       |       6 +++---
         M src/cmd/venti/srv/buildindex.c      |    1018 +++++++++++++++++++++++++++----
         M src/cmd/venti/srv/checkindex.c      |       4 +++-
         M src/cmd/venti/srv/clump.c           |       7 ++++++-
         M src/cmd/venti/srv/conv.c            |      66 +++++++++++++++++++++++++++++--
         M src/cmd/venti/srv/dat.h             |      42 +++++++++++++++++++------------
         M src/cmd/venti/srv/dcache.c          |      64 +++++++++++++++++++++----------
         A src/cmd/venti/srv/disksched.c       |      88 +++++++++++++++++++++++++++++++
         M src/cmd/venti/srv/findscore.c       |       2 +-
         A src/cmd/venti/srv/fixarenas.c       |    1894 +++++++++++++++++++++++++++++++
         M src/cmd/venti/srv/fns.h             |       9 +++++++++
         M src/cmd/venti/srv/graph.c           |      16 ++++++++++------
         M src/cmd/venti/srv/httpd.c           |     219 ++++++++++++++++++++++++++++---
         M src/cmd/venti/srv/icache.c          |      50 ++++++++++++++++++++++++++-----
         M src/cmd/venti/srv/icachewrite.c     |      36 +++++++++++++++++++++----------
         M src/cmd/venti/srv/index.c           |      22 ++++++++++------------
         M src/cmd/venti/srv/lump.c            |      27 ++++++++++++++++++++++++---
         M src/cmd/venti/srv/lumpcache.c       |      13 +++++++++++--
         M src/cmd/venti/srv/lumpqueue.c       |      16 ----------------
         A src/cmd/venti/srv/mirrorarenas.c    |     464 ++++++++++++++++++++++++++++++
         M src/cmd/venti/srv/mkfile            |       3 +++
         M src/cmd/venti/srv/part.c            |     260 ++++++++++++++++++++++++++++++-
         A src/cmd/venti/srv/printarenapart.c  |     160 +++++++++++++++++++++++++++++++
         M src/cmd/venti/srv/printarenas.c     |       2 +-
         M src/cmd/venti/srv/sortientry.c      |      17 +++++------------
         M src/cmd/venti/srv/stats.c           |       2 +-
         M src/cmd/venti/srv/syncarena.c       |      21 +++++++++++----------
         M src/cmd/venti/srv/syncindex.c       |       2 ++
         M src/cmd/venti/srv/syncindex0.c      |      16 +++++++++++++++-
         M src/cmd/venti/srv/unwhack.c         |       2 +-
         M src/cmd/venti/srv/utils.c           |       5 +++++
         M src/cmd/venti/srv/venti.c           |       6 ++++--
         M src/cmd/venti/srv/verifyarena.c     |     220 +++++++++++++++++++++++++------
         M src/cmd/venti/srv/wrarena.c         |       4 ++--
         M src/cmd/venti/srv/zblock.c          |       6 ++++--
         M src/cmd/venti/srv/zeropart.c        |       4 ----
       
       39 files changed, 4540 insertions(+), 361 deletions(-)
       ---
 (DIR) diff --git a/src/cmd/venti/srv/arena.c b/src/cmd/venti/srv/arena.c
       t@@ -20,6 +20,7 @@ static void        sumproc(void *);
        static QLock        sumlock;
        static Rendez        sumwait;
        static ASum        *sumq;
       +static ASum        *sumqtail;
        static uchar zero[8192];
        
        int        arenasumsleeptime;
       t@@ -257,7 +258,6 @@ writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
                        if(m > n - nn)
                                m = n - nn;
                        memmove(&b->data[off], &clbuf[nn], m);
       -                /* ok = writepart(arena->part, a, b->data, blocksize); */
                        ok = 0;
                        putdblock(b);
                        if(ok < 0){
       t@@ -329,7 +329,6 @@ writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int start, u64int *pa)
                        if(m > n - nn)
                                m = n - nn;
                        memmove(&b->data[off], &clbuf[nn], m);
       -        /*        ok = writepart(arena->part, a, b->data, blocksize); */
                        ok = 0;
                        putdblock(b);
                        if(ok < 0){
       t@@ -356,6 +355,7 @@ writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int start, u64int *pa)
                        arena->ctime = arena->wtime;
        
                writeclumpinfo(arena, clump, &c->info);
       +        wbarena(arena);
        
                /* set up for call to setdcachestate */
                as.arena = arena;
       t@@ -410,6 +410,9 @@ setatailstate(AState *as)
        
                trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
        
       +        /*
       +         * Look up as->arena to find index.
       +         */
                ix = mainindex;
                for(i=0; i<ix->narenas; i++)
                        if(ix->arenas[i] == as->arena)
       t@@ -419,6 +422,9 @@ setatailstate(AState *as)
                        return;
                }
        
       +        /*
       +          * Walk backward until we find the last time these were in sync.
       +         */
                for(j=i; --j>=0; ){
                        a = ix->arenas[j];
                        if(atailcmp(&a->diskstats, &a->memstats) == 0)
       t@@ -464,8 +470,12 @@ backsumarena(Arena *arena)
                        return;
                qlock(&sumlock);
                as->arena = arena;
       -        as->next = sumq;
       -        sumq = as;
       +        as->next = nil;
       +        if(sumq)
       +                sumqtail->next = as;
       +        else
       +                sumq = as;
       +        sumqtail = as;
                rwakeup(&sumwait);
                qunlock(&sumlock);
        }
       t@@ -499,6 +509,7 @@ sumarena(Arena *arena)
                DigestState s;
                u64int a, e;
                u32int bs;
       +        int t;
                u8int score[VtScoreSize];
        
                bs = MaxIoSize;
       t@@ -512,7 +523,12 @@ sumarena(Arena *arena)
                b = alloczblock(bs, 0, arena->part->blocksize);
                e = arena->base + arena->size;
                for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
       -                sleep(arenasumsleeptime);
       +                disksched();
       +                while((t=arenasumsleeptime) == SleepForever){
       +                        sleep(1000);
       +                        disksched();
       +                }
       +                sleep(t);
                        if(a + bs > e)
                                bs = arena->blocksize;
                        if(readpart(arena->part, a, b->data, bs) < 0)
       t@@ -595,7 +611,7 @@ wbarenahead(Arena *arena)
                b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
                if(b == nil){
                        logerr(EAdmin, "can't write arena header: %r");
       -/*/ZZZ add error message? */
       +/* ZZZ add error message? */
                        return -1;
                }
                /*
       t@@ -681,18 +697,22 @@ okarena(Arena *arena)
                ok = 0;
                dsize = arenadirsize(arena, arena->diskstats.clumps);
                if(arena->diskstats.used + dsize > arena->size){
       -                seterr(ECorrupt, "arena used > size");
       +                seterr(ECorrupt, "arena %s used > size", arena->name);
                        ok = -1;
                }
        
                if(arena->diskstats.cclumps > arena->diskstats.clumps)
       -                logerr(ECorrupt, "arena has more compressed clumps than total clumps");
       +                logerr(ECorrupt, "arena %s has more compressed clumps than total clumps", arena->name);
        
       +        /*
       +         * This need not be true if some of the disk is corrupted.
       +         *
                if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
       -                logerr(ECorrupt, "arena uncompressed size inconsistent with used space %lld %d %lld", arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
       +                logerr(ECorrupt, "arena %s uncompressed size inconsistent with used space %lld %d %lld", arena->name, arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
       +         */
        
                if(arena->ctime > arena->wtime)
       -                logerr(ECorrupt, "arena creation time after last write time");
       +                logerr(ECorrupt, "arena %s creation time after last write time", arena->name);
        
                return ok;
        }
 (DIR) diff --git a/src/cmd/venti/srv/arenas.c b/src/cmd/venti/srv/arenas.c
       t@@ -214,7 +214,7 @@ wbarenapart(ArenaPart *ap)
                        return -1;
                b = alloczblock(HeadSize, 1, 0);
                if(b == nil)
       -/*ZZZ set error message? */
       +/* ZZZ set error message? */
                        return -1;
        
                if(packarenapart(ap, b->data) < 0){
       t@@ -337,8 +337,8 @@ wbarenamap(AMap *am, int n, Part *part, u64int base, u64int size)
        /*
         * amap: n '\n' amapelem * n
         * n: u32int
       - * amapelem: name '\t' astart '\t' asize '\n'
       - * astart, asize: u64int
       + * amapelem: name '\t' astart '\t' astop '\n'
       + * astart, astop: u64int
         */
        int
        parseamap(IFile *f, AMapN *amn)
 (DIR) diff --git a/src/cmd/venti/srv/bloom.c b/src/cmd/venti/srv/bloom.c
       t@@ -7,6 +7,8 @@
        #include "dat.h"
        #include "fns.h"
        
       +int ignorebloom;
       +
        int
        bloominit(Bloom *b, vlong vsize, u8int *data)
        {
       t@@ -24,6 +26,7 @@ bloominit(Bloom *b, vlong vsize, u8int *data)
                        if(unpackbloomhead(b, data) < 0)
                                return -1;
                
       +fprint(2, "bloom size %lud nhash %d\n", b->size, b->nhash);
                b->mask = b->size-1;
                b->data = data;
                return 0;
       t@@ -38,11 +41,7 @@ wbbloomhead(Bloom *b)
        Bloom*
        readbloom(Part *p)
        {
       -        int i, n;
       -        uint ones;
                uchar buf[512];
       -        uchar *data;
       -        u32int *a;
                Bloom *b;
                
                b = vtmallocz(sizeof *b);
       t@@ -52,14 +51,40 @@ readbloom(Part *p)
                        vtfree(b);
                        return nil;
                }
       +        b->part = p;
       +        return b;
       +}
       +
       +int
       +resetbloom(Bloom *b)
       +{
       +        uchar *data;
       +        
                data = vtmallocz(b->size);
       -        if(readpart(p, 0, data, b->size) < 0){
       +fprint(2, "bloom data %lud\n", b->size);
       +        b->data = data;
       +        if(b->size == MaxBloomSize)        /* 2^32 overflows ulong */
       +                addstat(StatBloomBits, b->size*8-1);
       +        else
       +                addstat(StatBloomBits, b->size*8);
       +        return 0;
       +}
       +
       +int
       +loadbloom(Bloom *b)
       +{
       +        int i, n;
       +        uint ones;
       +        uchar *data;
       +        u32int *a;
       +        
       +        data = vtmallocz(b->size);
       +        if(readpart(b->part, 0, data, b->size) < 0){
                        vtfree(b);
                        vtfree(data);
       -                return nil;
       +                return -1;
                }
                b->data = data;
       -        b->part = p;
        
                a = (u32int*)b->data;
                n = b->size/4;
       t@@ -73,7 +98,7 @@ readbloom(Part *p)
                else
                        addstat(StatBloomBits, b->size*8);
                        
       -        return b;
       +        return 0;
        }
        
        int
       t@@ -101,6 +126,8 @@ gethashes(u8int *score, ulong *h)
                        a ^= *(u32int*)(score+i);
                        b ^= *(u32int*)(score+i+4);
                }
       +        if(i+4 <= VtScoreSize)        /* 20 is not 4-aligned */
       +                a ^= *(u32int*)(score+i);
                for(i=0; i<BloomMaxHash; i++, a+=b)
                        h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
        }
       t@@ -154,14 +181,17 @@ inbloomfilter(Bloom *b, u8int *score)
                int r;
                uint ms;
        
       -        if(b == nil)
       +        if(b == nil || b->data == nil)
                        return 1;
        
       +        if(ignorebloom)
       +                return 1;
       +        
                ms = msec();
                rlock(&b->lk);
                r = _inbloomfilter(b, score);
                runlock(&b->lk);
       -        ms = msec() - ms;
       +        ms = ms - msec();
                addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
                if(r)
                        addstat(StatBloomMiss, 1);
       t@@ -173,7 +203,7 @@ inbloomfilter(Bloom *b, u8int *score)
        void
        markbloomfilter(Bloom *b, u8int *score)
        {
       -        if(b == nil)
       +        if(b == nil || b->data == nil)
                        return;
        
                rlock(&b->lk);
       t@@ -186,14 +216,18 @@ markbloomfilter(Bloom *b, u8int *score)
        static void
        bloomwriteproc(void *v)
        {
       +        int ret;
                Bloom *b;
       -        
       +
       +        threadsetname("bloomwriteproc");        
                b = v;
                for(;;){
                        recv(b->writechan, 0);
       -                if(writebloom(b) < 0)
       +                if((ret=writebloom(b)) < 0)
                                fprint(2, "oops! writing bloom: %r\n");
       -                send(b->writedonechan, 0);
       +                else
       +                        ret = 0;
       +                sendul(b->writedonechan, ret);
                }
        }
        
 (DIR) diff --git a/src/cmd/venti/srv/buildbuck.c b/src/cmd/venti/srv/buildbuck.c
       t@@ -21,7 +21,7 @@ initiestream(Part *part, u64int off, u64int clumps, u32int size)
        {
                IEStream *ies;
        
       -/*ZZZ out of memory? */
       +/* out of memory? */
                ies = MKZ(IEStream);
                ies->buf = MKN(u8int, size);
                ies->epos = ies->buf;
       t@@ -61,7 +61,7 @@ peekientry(IEStream *ies)
                        nn -= n;
                        if(nn == 0)
                                return nil;
       -/*fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos); */
       +//fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos);
                        if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
                                seterr(EOk, "can't read sorted index entries: %r");
                                return nil;
       t@@ -101,7 +101,7 @@ buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint maxdata)
                        b = peekientry(ies);
                        if(b == nil)
                                return TWID32;
       -/*fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b); */
       +/* fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b); */
                        if(ib->n == 0)
                                buck = iebuck(ix, b, ib, ies);
                        else{
 (DIR) diff --git a/src/cmd/venti/srv/buildindex.c b/src/cmd/venti/srv/buildindex.c
       t@@ -1,164 +1,936 @@
        /*
       - * Rebuild the Venti index from scratch.
       + * Rebuild the index from scratch, in place.
         */
       -
        #include "stdinc.h"
        #include "dat.h"
        #include "fns.h"
        
       -/*
       - * Write a single bucket.  Could profit from a big buffer here
       - * so that we can absorb sporadic runs of blocks into one write,
       - * avoiding disk seeks.
       - */
       -static int
       -writebucket(Index *ix, u32int buck, IBucket *ib, ZBlock *b)
       +enum
        {
       -        ISect *is;
       +        MinBufSize = 64*1024,
       +        MaxBufSize = 4*1024*1024,
       +};
        
       -        is = ix->sects[indexsect0(ix, buck)];
       -        if(buck < is->start || buck >= is->stop){
       -                seterr(EAdmin, "cannot find index section for bucket %lud\n", (ulong)buck);
       -                return -1;
       -        }
       -        buck -= is->start;
       +int                dumb;
       +int                errors;
       +char                **isect;
       +int                nisect;
       +int                bloom;
       +int                zero;
        
       -/*
       -        qlock(&stats.lock);
       -        stats.indexwrites++;
       -        qunlock(&stats.lock);
       -*/
       -        packibucket(ib, b->data, is->bucketmagic);
       -        return writepart(is->part, is->blockbase + ((u64int)buck << is->blocklog), b->data, is->blocksize);
       -}
       +u32int        isectmem;
       +u64int        totalbuckets;
       +u64int        totalclumps;
       +Channel        *arenadonechan;
       +Channel        *isectdonechan;
       +Index        *ix;
        
       -static int
       -buildindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
       -{
       -        IEStream *ies;
       -        IBucket ib, zib;
       -        ZBlock *z, *b;
       -        u32int next, buck;
       -        int ok;
       -        uint nbuck;
       -        u64int found = 0;
       -
       -/*ZZZ make buffer size configurable */
       -        b = alloczblock(ix->blocksize, 0, ix->blocksize);
       -        z = alloczblock(ix->blocksize, 1, ix->blocksize);
       -        ies = initiestream(part, off, clumps, 64*1024);
       -        if(b == nil || z == nil || ies == nil){
       -                ok = 0;
       -                goto breakout;
       -                return -1;
       -        }
       -        ok = 0;
       -        next = 0;
       -        memset(&ib, 0, sizeof ib);
       -        ib.data = b->data + IBucketSize;
       -        zib.data = z->data + IBucketSize;
       -        zib.n = 0;
       -        nbuck = 0;
       -        for(;;){
       -                buck = buildbucket(ix, ies, &ib, ix->blocksize-IBucketSize);
       -                found += ib.n;
       -                if(zero){
       -                        for(; next != buck; next++){
       -                                if(next == ix->buckets){
       -                                        if(buck != TWID32){
       -                                                fprint(2, "bucket out of range\n");
       -                                                ok = -1;
       -                                        }
       -                                        goto breakout;
       -                                }
       -                                if(writebucket(ix, next, &zib, z) < 0){
       -                                        fprint(2, "can't write zero bucket to buck=%d: %r", next);
       -                                        ok = -1;
       -                                }
       -                        }
       -                }
       -                if(buck >= ix->buckets){
       -                        if(buck == TWID32)
       -                                break;
       -                        fprint(2, "bucket out of range\n");
       -                        ok = -1;
       -                        goto breakout;
       -                }
       -                if(writebucket(ix, buck, &ib, b) < 0){
       -                        fprint(2, "bad bucket found=%lld: %r\n", found);
       -                        ok = -1;
       -                }
       -                next = buck + 1;
       -                if(++nbuck%10000 == 0)
       -                        fprint(2, "\t%,d buckets written...\n", nbuck);
       -        }
       -breakout:;
       -        fprint(2, "wrote index with %lld entries\n", found);
       -        freeiestream(ies);
       -        freezblock(z);
       -        freezblock(b);
       -        return ok;
       -}
       +u64int        arenaentries;
       +u64int        skipentries;
       +u64int        indexentries;
       +
       +static int shouldprocess(ISect*);
       +static void        isectproc(void*);
       +static void        arenapartproc(void*);
        
        void
        usage(void)
        {
       -        fprint(2, "usage: buildindex [-Z] [-B blockcachesize] config tmppart\n");
       -        threadexitsall(0);
       +        fprint(2, "usage: buildindex [-b] [-i isect]... [-M imem] venti.conf\n");
       +        threadexitsall("usage");
        }
        
       -Config conf;
       -
        void
        threadmain(int argc, char *argv[])
        {
       -        Part *part;
       -        u64int clumps, base;
       -        u32int bcmem;
       -        int zero;
       -
       -        zero = 1;
       -        bcmem = 0;
       +        int fd, i, napart;
       +        u32int bcmem, imem;
       +        Config conf;
       +        Part *p;
       +        
                ventifmtinstall();
       +        imem = 256*1024*1024;
                ARGBEGIN{
       -        case 'B':
       -                bcmem = unittoull(ARGF());
       +        case 'b':
       +                bloom = 1;
       +                break;
       +        case 'i':
       +                isect = vtrealloc(isect, (nisect+1)*sizeof(isect[0]));
       +                isect[nisect++] = EARGF(usage());
                        break;
       -        case 'Z':
       -                zero = 0;
       +        case 'd':        /* debugging - make sure to run all 3 passes */
       +                dumb = 1;
       +                break;
       +        case 'M':
       +                imem = unittoull(EARGF(usage()));
                        break;
                default:
                        usage();
                        break;
                }ARGEND
       -
       -        if(argc != 2)
       +        
       +        if(argc != 1)
                        usage();
        
                if(initventi(argv[0], &conf) < 0)
                        sysfatal("can't init venti: %r");
       +        ix = mainindex;
       +        if(nisect == 0 && ix->bloom)
       +                bloom = 1;
       +        if(bloom && ix->bloom && resetbloom(ix->bloom) < 0)
       +                sysfatal("loadbloom: %r");
       +        if(bloom && !ix->bloom)
       +                sysfatal("-b specified but no bloom filter");
       +        if(!bloom)
       +                ix->bloom = nil;
       +        isectmem = imem/ix->nsects;
        
       -        if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
       -                bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
       +        /*
       +         * safety first - only need read access to arenas
       +         */
       +        p = nil;
       +        for(i=0; i<ix->narenas; i++){
       +                if(ix->arenas[i]->part != p){
       +                        p = ix->arenas[i]->part;
       +                        if((fd = open(p->filename, OREAD)) < 0)
       +                                sysfatal("cannot reopen %s: %r", p->filename);
       +                        dup(fd, p->fd);
       +                        close(fd);
       +                }
       +        }
       +        
       +        /*
       +         * need a block for every arena
       +         */
       +        bcmem = maxblocksize * (mainindex->narenas + 16);
                if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
                initdcache(bcmem);
       +        
       +        totalclumps = 0;
       +        for(i=0; i<ix->narenas; i++)
       +                totalclumps += ix->arenas[i]->diskstats.clumps;
       +        
       +        totalbuckets = 0;
       +        for(i=0; i<ix->nsects; i++)
       +                totalbuckets += ix->sects[i]->blocks;
       +        fprint(2, "%,lld clumps, %,lld buckets\n", totalclumps, totalbuckets);
       +
       +        /* start index procs */
       +        fprint(2, "%T read index\n");
       +        isectdonechan = chancreate(sizeof(void*), 0);
       +        for(i=0; i<ix->nsects; i++){
       +                if(shouldprocess(ix->sects[i]))
       +                        ix->sects[i]->writechan = chancreate(sizeof(IEntry), 0);
       +                vtproc(isectproc, ix->sects[i]);
       +        }
       +        
       +        for(i=0; i<nisect; i++)
       +                if(isect[i])
       +                        fprint(2, "warning: did not find index section %s\n", isect[i]);
       +
       +        /* start arena procs */
       +        p = nil;
       +        napart = 0;
       +        arenadonechan = chancreate(sizeof(void*), 0);
       +        for(i=0; i<ix->narenas; i++){
       +                if(ix->arenas[i]->part != p){
       +                        p = ix->arenas[i]->part;
       +                        vtproc(arenapartproc, p);
       +                        napart++;
       +                }
       +        }
       +
       +        /* wait for arena procs to finish */
       +        for(i=0; i<napart; i++)
       +                recvp(arenadonechan);
       +
       +        /* tell index procs to finish */
       +        for(i=0; i<ix->nsects; i++)
       +                if(ix->sects[i]->writechan)
       +                        send(ix->sects[i]->writechan, nil);
       +
       +        /* wait for index procs to finish */
       +        for(i=0; i<ix->nsects; i++)
       +                if(ix->sects[i]->writechan)
       +                        recvp(isectdonechan);
       +
       +        if(ix->bloom && writebloom(ix->bloom) < 0)
       +                fprint(2, "writing bloom filter: %r\n");
       +
       +        fprint(2, "%T done arenaentries=%,lld indexed=%,lld (nskip=%,lld)\n", 
       +                arenaentries, indexentries, skipentries);
       +        threadexitsall(nil);
       +}
       +
       +static int
       +shouldprocess(ISect *is)
       +{
       +        int i;
       +        
       +        if(nisect == 0)
       +                return 1;
       +
       +        for(i=0; i<nisect; i++)
       +                if(isect[i] && strcmp(isect[i], is->name) == 0){
       +                        isect[i] = nil;
       +                        return 1;
       +                }
       +        return 0;
       +}
       +
       +static void
       +add(u64int *a, u64int n)
       +{
       +        static Lock l;
       +        
       +        lock(&l);
       +        *a += n;
       +        unlock(&l);
       +}
       +
       +/*
       + * Read through an arena partition and send each of its IEntries
       + * to the appropriate index section.  When finished, send on
       + * arenadonechan.
       + */
       +enum
       +{
       +        ClumpChunks = 32*1024,
       +};
       +static void
       +arenapartproc(void *v)
       +{
       +        int i, j, n, nskip, x;
       +        u32int clump;
       +        u64int addr, tot;
       +        Arena *a;
       +        ClumpInfo *ci, *cis;
       +        IEntry ie;
       +        Part *p;
       +        
       +        p = v;
       +        threadsetname("arenaproc %s", p->name);
       +
       +        nskip = 0;
       +        tot = 0;
       +        cis = MKN(ClumpInfo, ClumpChunks);
       +        for(i=0; i<ix->narenas; i++){
       +                a = ix->arenas[i];
       +                if(a->part != p)
       +                        continue;
       +                if(a->memstats.clumps)
       +                        fprint(2, "%T arena %s: %d entries\n", 
       +                                a->name, a->memstats.clumps);
       +                addr = ix->amap[i].start;
       +                for(clump=0; clump<a->memstats.clumps; clump+=n){
       +                        n = ClumpChunks;
       +                        if(n > a->memstats.clumps - clump)
       +                                n = a->memstats.clumps - clump;
       +                        if(readclumpinfos(a, clump, cis, n) != n){
       +                                fprint(2, "%T arena %s: directory read: %r\n", a->name);
       +                                errors = 1;
       +                                break;
       +                        }
       +                        for(j=0; j<n; j++){
       +                                ci = &cis[j];
       +                                ie.ia.type = ci->type;
       +                                ie.ia.size = ci->uncsize;
       +                                ie.ia.addr = addr;
       +                                addr += ci->size + ClumpSize;
       +                                ie.ia.blocks = (ci->size + ClumpSize + (1<<ABlockLog)-1) >> ABlockLog;
       +                                scorecp(ie.score, ci->score);
       +                                if(ci->type == VtCorruptType)
       +                                        nskip++;
       +                                else{
       +                                        tot++;
       +                                        x = indexsect(ix, ie.score);
       +                                        assert(0 <= x && x < ix->nsects);
       +                                        if(ix->sects[x]->writechan)
       +                                                send(ix->sects[x]->writechan, &ie);
       +                                        if(ix->bloom)
       +                                                markbloomfilter(ix->bloom, ie.score);
       +                                }
       +                        }
       +                }
       +        }
       +        add(&arenaentries, tot);
       +        add(&skipentries, nskip);
       +        sendp(arenadonechan, p);
       +}
       +
       +/*
       + * Convert score into relative bucket number in isect.
       + * Can pass a packed ientry instead of score - score is first.
       + */
       +static u32int
       +score2bucket(ISect *is, uchar *score)
       +{
       +        u32int b;
       +        
       +        b = hashbits(score, 32)/ix->div;
       +        assert(is->start <= b && b < is->stop);
       +        return b - is->start;
       +}
       +
       +/*
       + * Convert offset in index section to bucket number.
       + */
       +static u32int
       +offset2bucket(ISect *is, u64int offset)
       +{
       +        u32int b;
       +        
       +        assert(is->blockbase <= offset);
       +        offset -= is->blockbase;
       +        b = offset/is->blocksize;
       +        assert(b < is->stop-is->start);
       +        return b;
       +}
       +
       +/*
       + * Convert bucket number to offset.
       + */
       +static u64int
       +bucket2offset(ISect *is, u32int b)
       +{
       +        assert(b <= is->stop-is->start);
       +        return is->blockbase + (u64int)b*is->blocksize;
       +}
       +
       +/* 
       + * IEntry buffers to hold initial round of spraying.
       + */
       +typedef struct Buf Buf;
       +struct Buf
       +{
       +        Part *part;                        /* partition being written */
       +        uchar *bp;                /* current block */
       +        uchar *ep;                /* end of block */
       +        uchar *wp;                /* write position in block */
       +        u64int boffset;                /* start offset */
       +        u64int woffset;                /* next write offset */
       +        u64int eoffset;                /* end offset */
       +        u32int nentry;                /* number of entries written */
       +};
       +
       +static void
       +bflush(Buf *buf)
       +{
       +        u32int bufsize;
       +        
       +        if(buf->woffset >= buf->eoffset)
       +                sysfatal("buf index chunk overflow - need bufger index");
       +        bufsize = buf->ep - buf->bp;
       +        if(writepart(buf->part, buf->woffset, buf->bp, bufsize) < 0){
       +                fprint(2, "write %s: %r\n", buf->part->name);
       +                errors = 1;
       +        }
       +        buf->woffset += bufsize;
       +        memset(buf->bp, 0, bufsize);
       +        buf->wp = buf->bp;
       +}
       +
       +static void
       +bwrite(Buf *buf, IEntry *ie)
       +{
       +        if(buf->wp+IEntrySize > buf->ep)
       +                bflush(buf);
       +        assert(buf->bp <= buf->wp && buf->wp < buf->ep);
       +        packientry(ie, buf->wp);
       +        buf->wp += IEntrySize;
       +        assert(buf->bp <= buf->wp && buf->wp <= buf->ep);
       +        buf->nentry++;
       +}
       +
       +/*
       + * Minibuffer.  In-memory data structure holds our place
       + * in the buffer but has no block data.  We are writing and
       + * reading the minibuffers at the same time.  (Careful!)
       + */
       +typedef struct Minibuf Minibuf;
       +struct Minibuf
       +{
       +        u64int boffset;                /* start offset */
       +        u64int roffset;                /* read offset */
       +        u64int woffset;                /* write offset */
       +        u64int eoffset;                /* end offset */
       +        u32int nentry;                /* # entries left to read */
       +        u32int nwentry;        /* # entries written */
       +};
       +
       +/*
       + * Index entry pool.  Used when trying to shuffle around 
       + * the entries in a big buffer into the corresponding M minibuffers.
       + * Sized to hold M*EntriesPerBlock entries, so that there will always
       + * either be room in the pool for another block worth of entries
       + * or there will be an entire block worth of sorted entries to 
       + * write out.
       + */
       +typedef struct IEntryLink IEntryLink;
       +typedef struct IPool IPool;
       +
       +struct IEntryLink
       +{
       +        uchar ie[IEntrySize];                /* raw IEntry */
       +        IEntryLink *next;                /* next in chain */
       +};
       +
       +struct IPool
       +{
       +        ISect *isect;
       +        u32int buck0;                        /* first bucket in pool */
       +        u32int mbufbuckets;        /* buckets per minibuf */
       +        IEntryLink *entry;                /* all IEntryLinks */
       +        u32int nentry;                        /* # of IEntryLinks */
       +        IEntryLink *free;                /* free list */
       +        u32int nfree;                        /* # on free list */
       +        Minibuf *mbuf;                        /* all minibufs */
       +        u32int nmbuf;                        /* # of minibufs */
       +        IEntryLink **mlist;                /* lists for each minibuf */
       +        u32int *mcount;                /* # on each mlist[i] */
       +        u32int bufsize;                        /* block buffer size */
       +        uchar *rbuf;                        /* read buffer */
       +        uchar *wbuf;                        /* write buffer */
       +        u32int epbuf;                        /* entries per block buffer */
       +};
       +
       +/*
       +static int
       +countsokay(IPool *p)
       +{
       +        int i;
       +        u64int n;
       +        
       +        n = 0;
       +        for(i=0; i<p->nmbuf; i++)
       +                n += p->mcount[i];
       +        n += p->nfree;
       +        if(n != p->nentry){
       +                print("free %ud:", p->nfree);
       +                for(i=0; i<p->nmbuf; i++)
       +                        print(" %ud", p->mcount[i]);
       +                print(" = %lld nentry: %ud\n", n, p->nentry);
       +        }
       +        return n == p->nentry;
       +}
       +*/
        
       -        fprint(2, "building a new index %s using %s for temporary storage\n", mainindex->name, argv[1]);
       +static IPool*
       +mkipool(ISect *isect, Minibuf *mbuf, u32int nmbuf, 
       +        u32int mbufbuckets, u32int bufsize)
       +{
       +        u32int i, nentry;
       +        uchar *data;
       +        IPool *p;
       +        IEntryLink *l;
       +        
       +        nentry = (nmbuf+1)*bufsize / IEntrySize;
       +        p = ezmalloc(sizeof(IPool)
       +                +nentry*sizeof(IEntry)
       +                +nmbuf*sizeof(IEntryLink*)
       +                +nmbuf*sizeof(u32int)
       +                +3*bufsize);
       +        
       +        p->isect = isect;
       +        p->mbufbuckets = mbufbuckets;
       +        p->bufsize = bufsize;
       +        p->entry = (IEntryLink*)(p+1);
       +        p->nentry = nentry;
       +        p->mlist = (IEntryLink**)(p->entry+nentry);
       +        p->mcount = (u32int*)(p->mlist+nmbuf);
       +        p->nmbuf = nmbuf;
       +        p->mbuf = mbuf;
       +        data = (uchar*)(p->mcount+nmbuf);
       +        data += bufsize - (u32int)data%bufsize;
       +        p->rbuf = data;
       +        p->wbuf = data+bufsize;
       +        p->epbuf = bufsize/IEntrySize;
        
       -        part = initpart(argv[1], ORDWR|ODIRECT);
       -        if(part == nil)
       -                sysfatal("can't initialize temporary partition: %r");
       +        for(i=0; i<p->nentry; i++){
       +                l = &p->entry[i];
       +                l->next = p->free;
       +                p->free = l;
       +                p->nfree++;
       +        }
       +        return p;
       +}
        
       -        clumps = sortrawientries(mainindex, part, &base, mainindex->bloom);
       -        if(clumps == TWID64)
       -                sysfatal("can't build sorted index: %r");
       -        fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", clumps, base);
       +/* 
       + * Add the index entry ie to the pool p.
       + * Caller must know there is room.
       + */
       +static void
       +ipoolinsert(IPool *p, uchar *ie)
       +{
       +        u32int buck, x;
       +        IEntryLink *l;
       +
       +        assert(p->free != nil);
       +
       +        buck = score2bucket(p->isect, ie);
       +        x = (buck-p->buck0) / p->mbufbuckets;
       +        if(x >= p->nmbuf){
       +                fprint(2, "buck=%ud mbufbucket=%ud x=%ud\n",
       +                        buck, p->mbufbuckets, x);
       +        }
       +        assert(x < p->nmbuf);
        
       -        if(buildindex(mainindex, part, base, clumps, zero) < 0)
       -                sysfatal("can't build new index: %r");
       +        l = p->free;
       +        p->free = l->next;
       +        p->nfree--;
       +        memmove(l->ie, ie, IEntrySize);
       +        l->next = p->mlist[x];
       +        p->mlist[x] = l;
       +        p->mcount[x]++;
       +}        
       +
       +/*
       + * Pull out a block containing as many
       + * entries as possible for minibuffer x.
       + */
       +static u32int
       +ipoolgetbuf(IPool *p, u32int x)
       +{
       +        uchar *bp, *ep, *wp;
       +        IEntryLink *l;
       +        u32int n;
       +        
       +        bp = p->wbuf;
       +        ep = p->wbuf + p->bufsize;
       +        n = 0;
       +        assert(x < p->nmbuf);
       +        for(wp=bp; wp+IEntrySize<=ep && p->mlist[x]; wp+=IEntrySize){
       +                l = p->mlist[x];
       +                p->mlist[x] = l->next;
       +                p->mcount[x]--;
       +                memmove(wp, l->ie, IEntrySize);
       +                l->next = p->free;
       +                p->free = l;
       +                p->nfree++;
       +                n++;
       +        }
       +        memset(wp, 0, ep-wp);
       +        return n;
       +}
       +
       +/*
       + * Read a block worth of entries from the minibuf
       + * into the pool.  Caller must know there is room.
       + */
       +static void
       +ipoolloadblock(IPool *p, Minibuf *mb)
       +{
       +        u32int i, n;
                
       -        if(mainindex->bloom)
       -                writebloom(mainindex->bloom);
       +        assert(mb->nentry > 0);
       +        assert(mb->roffset >= mb->woffset);
       +        assert(mb->roffset < mb->eoffset);
        
       -        threadexitsall(0);
       +        n = p->bufsize/IEntrySize;
       +        if(n > mb->nentry)
       +                n = mb->nentry;
       +        if(readpart(p->isect->part, mb->roffset, p->rbuf, p->bufsize) < 0)
       +                fprint(2, "readpart %s: %r\n", p->isect->part->name);
       +        else{
       +                for(i=0; i<n; i++)
       +                        ipoolinsert(p, p->rbuf+i*IEntrySize);
       +        }
       +        mb->nentry -= n;
       +        mb->roffset += p->bufsize;
        }
       +
       +/*
       + * Write out a block worth of entries to minibuffer x.
       + * If necessary, pick up the data there before overwriting it.
       + */
       +static void
       +ipoolflush0(IPool *pool, u32int x)
       +{
       +        u32int bufsize;
       +        Minibuf *mb;
       +        
       +        mb = pool->mbuf+x;
       +        bufsize = pool->bufsize;
       +        mb->nwentry += ipoolgetbuf(pool, x);
       +        if(mb->nentry > 0 && mb->roffset == mb->woffset){
       +                assert(pool->nfree >= pool->bufsize/IEntrySize);
       +                /*
       +                 * There will be room in the pool -- we just 
       +                 * removed a block worth.
       +                 */
       +                ipoolloadblock(pool, mb);
       +        }
       +        if(writepart(pool->isect->part, mb->woffset, pool->wbuf, bufsize) < 0)
       +                fprint(2, "writepart %s: %r\n", pool->isect->part->name);
       +        mb->woffset += bufsize;
       +}
       +
       +/*
       + * Write out some full block of entries.
       + * (There must be one -- the pool is almost full!)
       + */
       +static void
       +ipoolflush1(IPool *pool)
       +{
       +        u32int i;
       +
       +        assert(pool->nfree <= pool->epbuf);
       +
       +        for(i=0; i<pool->nmbuf; i++){
       +                if(pool->mcount[i] >= pool->epbuf){
       +                        ipoolflush0(pool, i);
       +                        return;
       +                }
       +        }
       +        /* can't be reached - someone must be full */
       +        sysfatal("ipoolflush1");
       +}
       +
       +/*
       + * Flush all the entries in the pool out to disk.
       + * Nothing more to read from disk.
       + */
       +static void
       +ipoolflush(IPool *pool)
       +{
       +        u32int i;
       +        
       +        for(i=0; i<pool->nmbuf; i++)
       +                while(pool->mlist[i])
       +                        ipoolflush0(pool, i);
       +        assert(pool->nfree == pool->nentry);
       +}
       +
       +/*
       + * Third pass.  Pick up each minibuffer from disk into
       + * memory and then write out the buckets.
       + */
       +
       +/*
       + * Compare two packed index entries.  
       + * Usual ordering except break ties by putting higher
       + * index addresses first (assumes have duplicates
       + * due to corruption in the lower addresses).
       + */
       +static int
       +ientrycmpaddr(const void *va, const void *vb)
       +{
       +        int i;
       +        uchar *a, *b;
       +        
       +        a = (uchar*)va;
       +        b = (uchar*)vb;
       +        i = ientrycmp(a, b);
       +        if(i)
       +                return i;
       +        return -memcmp(a+IEntryAddrOff, b+IEntryAddrOff, 8);
       +}
       +
       +static void
       +zerorange(Part *p, u64int o, u64int e)
       +{
       +        static uchar zero[MaxIoSize];
       +        u32int n;
       +        
       +        for(; o<e; o+=n){
       +                n = sizeof zero;
       +                if(o+n > e)
       +                        n = e-o;
       +                if(writepart(p, o, zero, n) < 0)
       +                        fprint(2, "writepart %s: %r\n", p->name);
       +        }
       +}
       +
       +/*
       + * Load a minibuffer into memory and write out the 
       + * corresponding buckets.
       + */
       +static void
       +sortminibuffer(ISect *is, Minibuf *mb, uchar *buf, u32int nbuf, u32int bufsize)
       +{
       +        uchar *buckdata, *p, *q, *ep;
       +        u32int b, lastb, memsize, n;
       +        u64int o;
       +        IBucket ib;
       +        Part *part;
       +        
       +        part = is->part;
       +        buckdata = emalloc(is->blocksize);
       +        
       +        if(mb->nwentry == 0)
       +                return;
       +
       +        /*
       +         * read entire buffer.
       +         */
       +        assert(mb->nwentry*IEntrySize <= mb->woffset-mb->boffset);
       +        assert(mb->woffset-mb->boffset <= nbuf);
       +        if(readpart(part, mb->boffset, buf, mb->woffset-mb->boffset) < 0){
       +                fprint(2, "readpart %s: %r\n", part->name);
       +                errors = 1;
       +                return;
       +        }
       +        assert(*(uint*)buf != 0xa5a5a5a5);
       +        
       +        /*
       +         * remove fragmentation due to IEntrySize
       +         * not evenly dividing Bufsize
       +         */
       +        memsize = (bufsize/IEntrySize)*IEntrySize;
       +        for(o=mb->boffset, p=q=buf; o<mb->woffset; o+=bufsize){
       +                memmove(p, q, memsize);
       +                p += memsize;
       +                q += bufsize;
       +        }
       +        ep = buf + mb->nwentry*IEntrySize;
       +        assert(ep <= buf+nbuf);
       +
       +        /* 
       +         * sort entries
       +         */
       +        qsort(buf, mb->nwentry, IEntrySize, ientrycmpaddr);
       +
       +        /*
       +         * write buckets out
       +         */
       +        n = 0;
       +        lastb = offset2bucket(is, mb->boffset);
       +        for(p=buf; p<ep; p=q){
       +                b = score2bucket(is, p);
       +                for(q=p; q<ep && score2bucket(is, q)==b; q+=IEntrySize)
       +                        ;
       +                if(lastb+1 < b && zero)
       +                        zerorange(part, bucket2offset(is, lastb+1), bucket2offset(is, b));
       +                if(IBucketSize+(q-p) > is->blocksize)
       +                        sysfatal("bucket overflow - make index bigger");
       +                memmove(buckdata+IBucketSize, p, q-p);
       +                ib.n = (q-p)/IEntrySize;
       +                n += ib.n;
       +                packibucket(&ib, buckdata, is->bucketmagic);
       +                if(writepart(part, bucket2offset(is, b), buckdata, is->blocksize) < 0)
       +                        fprint(2, "write %s: %r\n", part->name);
       +                lastb = b;
       +        }
       +        if(lastb+1 < is->stop-is->start && zero)
       +                zerorange(part, bucket2offset(is, lastb+1), bucket2offset(is, is->stop - is->start));
       +
       +        if(n != mb->nwentry)
       +                fprint(2, "sortminibuffer bug: n=%ud nwentry=%ud have=%ld\n", n, mb->nwentry, (ep-buf)/IEntrySize);
       +
       +        free(buckdata);
       +}
       +
       +static void
       +isectproc(void *v)
       +{
       +        u32int buck, bufbuckets, bufsize, epbuf, i, j;
       +        u32int mbufbuckets, n, nbucket, nn, space;
       +        u32int nbuf, nminibuf, xminiclump, prod;
       +        u64int blocksize, offset, xclump;
       +        uchar *data, *p;
       +        Buf *buf;
       +        IEntry ie;
       +        IPool *ipool;
       +        ISect *is;
       +        Minibuf *mbuf, *mb;
       +        
       +        is = v;
       +        blocksize = is->blocksize;
       +        nbucket = is->stop - is->start;
       +
       +        /*
       +         * Three passes:
       +         *        pass 1 - write index entries from arenas into 
       +         *                large sequential sections on index disk.
       +         *                requires nbuf * bufsize memory.
       +         *
       +         *        pass 2 - split each section into minibufs.
       +         *                requires nminibuf * bufsize memory.
       +         *
       +         *        pass 3 - read each minibuf into memory and
       +         *                write buckets out. 
       +         *                requires entries/minibuf * IEntrySize memory.
       +         * 
       +         * The larger we set bufsize the less seeking hurts us.
       +         * 
       +         * The fewer sections and minibufs we have, the less
       +         * seeking hurts us.
       +         * 
       +         * The fewer sections and minibufs we have, the 
       +         * more entries we end up with in each minibuf
       +         * at the end.  
       +         *
       +         * Shoot for using half our memory to hold each
       +         * minibuf.  The chance of a random distribution 
       +         * getting off by 2x is quite low.  
       +         *
       +         * Once that is decided, figure out the smallest 
       +         * nminibuf and nsection/biggest bufsize we can use
       +         * and still fit in the memory constraints.
       +         */
       +        
       +        /* expected number of clump index entries we'll see */
       +        xclump = nbucket * (double)totalclumps/totalbuckets;
       +        
       +        /* number of clumps we want to see in a minibuf */
       +        xminiclump = isectmem/2/IEntrySize;
       +        
       +        /* total number of minibufs we need */
       +        prod = xclump / xminiclump;
       +        
       +        /* if possible, skip second pass */
       +        if(!dumb && prod*MinBufSize < isectmem){
       +                nbuf = prod;
       +                nminibuf = 1;
       +        }else{
       +                /* otherwise use nsection = sqrt(nmini) */
       +                for(nbuf=1; nbuf*nbuf<prod; nbuf++)
       +                        ;
       +                if(nbuf*MinBufSize > isectmem)
       +                        sysfatal("not enough memory");
       +                nminibuf = nbuf;
       +        }
       +        /* size buffer to use extra memory */
       +        bufsize = MinBufSize;
       +        while(bufsize*2*nbuf <= isectmem && bufsize < MaxBufSize)
       +                bufsize *= 2;
       +        data = emalloc(nbuf*bufsize);
       +        epbuf = bufsize/IEntrySize;
       +
       +        fprint(2, "%T %s: %,ud buckets, %,ud groups, %,ud minigroups, %,ud buffer\n",
       +                is->part->name, nbucket, nbuf, nminibuf, bufsize);
       +        /*
       +         * Accept index entries from arena procs.
       +         */
       +        buf = MKNZ(Buf, nbuf);
       +        p = data;
       +        offset = is->blockbase;
       +        bufbuckets = (nbucket+nbuf-1)/nbuf;
       +        for(i=0; i<nbuf; i++){
       +                buf[i].part = is->part;
       +                buf[i].bp = p;
       +                buf[i].wp = p;
       +                p += bufsize;
       +                buf[i].ep = p;
       +                buf[i].boffset = offset;
       +                buf[i].woffset = offset;
       +                if(i < nbuf-1){
       +                        offset += bufbuckets*blocksize;
       +                        buf[i].eoffset = offset;
       +                }else{
       +                        offset = is->blockbase + nbucket*blocksize;
       +                        buf[i].eoffset = offset;
       +                }
       +        }
       +        assert(p == data+nbuf*bufsize);
       +
       +        n = 0;
       +        while(recv(is->writechan, &ie) == 1){
       +                if(ie.ia.addr == 0)
       +                        break;
       +                buck = score2bucket(is, ie.score);
       +                i = buck/bufbuckets;
       +                assert(i < nbuf);
       +                bwrite(&buf[i], &ie);
       +                n++;
       +        }
       +        add(&indexentries, n);
       +        
       +        nn = 0;
       +        for(i=0; i<nbuf; i++){
       +                bflush(&buf[i]);
       +                buf[i].bp = nil;
       +                buf[i].ep = nil;
       +                buf[i].wp = nil;
       +                nn += buf[i].nentry;
       +        }
       +        if(n != nn)
       +                fprint(2, "isectproc bug: n=%ud nn=%ud\n", n, nn);
       +                
       +        free(data);
       +
       +        fprint(2, "%T %s: reordering\n", is->part->name);
       +        
       +        /*
       +         * Rearrange entries into minibuffers and then
       +         * split each minibuffer into buckets.
       +         */
       +        mbuf = MKN(Minibuf, nminibuf);
       +        mbufbuckets = (bufbuckets+nminibuf-1)/nminibuf;
       +        for(i=0; i<nbuf; i++){
       +                /*
       +                 * Set up descriptors.
       +                 */
       +                n = buf[i].nentry;
       +                nn = 0;
       +                offset = buf[i].boffset;
       +                memset(mbuf, 0, nminibuf*sizeof(mbuf[0]));
       +                for(j=0; j<nminibuf; j++){
       +                        mb = &mbuf[j];
       +                        mb->boffset = offset;
       +                        if(j < nminibuf-1){
       +                                offset += mbufbuckets*blocksize;
       +                                mb->eoffset = offset;
       +                        }else
       +                                mb->eoffset = buf[i].eoffset;
       +                        mb->roffset = mb->boffset;
       +                        mb->woffset = mb->boffset;
       +                        mb->nentry = epbuf * (mb->eoffset - mb->boffset)/bufsize;
       +                        if(mb->nentry > buf[i].nentry)
       +                                mb->nentry = buf[i].nentry;
       +                        buf[i].nentry -= mb->nentry;
       +                        nn += mb->nentry;
       +                }
       +                if(n != nn)
       +                        fprint(2, "isectproc bug2: n=%ud nn=%ud (i=%d)\n", n, nn, i);;
       +                /*
       +                 * Rearrange.
       +                 */
       +                if(!dumb && nminibuf == 1){
       +                        mbuf[0].nwentry = mbuf[0].nentry;
       +                        mbuf[0].woffset = buf[i].woffset;
       +                }else{
       +                        ipool = mkipool(is, mbuf, nminibuf, mbufbuckets, bufsize);
       +                        ipool->buck0 = bufbuckets*i;
       +                        for(j=0; j<nminibuf; j++){
       +                                mb = &mbuf[j];
       +                                while(mb->nentry > 0){
       +                                        if(ipool->nfree < epbuf){
       +                                                ipoolflush1(ipool);
       +                                                /* ipoolflush1 might change mb->nentry */        
       +                                                continue;
       +                                        }
       +                                        assert(ipool->nfree >= epbuf);
       +                                        ipoolloadblock(ipool, mb);
       +                                }
       +                        }
       +                        ipoolflush(ipool);
       +                        nn = 0;
       +                        for(j=0; j<nminibuf; j++)
       +                                nn += mbuf[j].nwentry;
       +                        if(n != nn)
       +                                fprint(2, "isectproc bug3: n=%ud nn=%ud (i=%d)\n", n, nn, i);
       +                        free(ipool);
       +                }
       +
       +                /*
       +                 * Make buckets.
       +                 */
       +                space = 0;
       +                for(j=0; j<nminibuf; j++)
       +                        if(space < mbuf[j].woffset - mbuf[j].boffset)
       +                                space = mbuf[j].woffset - mbuf[j].boffset;
       +
       +                data = emalloc(space);
       +                for(j=0; j<nminibuf; j++){
       +                        mb = &mbuf[j];
       +                        sortminibuffer(is, mb, data, space, bufsize);
       +                }
       +                free(data);
       +        }
       +                
       +        sendp(isectdonechan, is);
       +}
       +
       +
       +
 (DIR) diff --git a/src/cmd/venti/srv/checkindex.c b/src/cmd/venti/srv/checkindex.c
       t@@ -109,7 +109,7 @@ checkindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
                int ok, bok;
        u64int found = 0;
        
       -/*ZZZ make buffer size configurable */
       +/* ZZZ make buffer size configurable */
                b = alloczblock(ix->blocksize, 0, ix->blocksize);
                z = alloczblock(ix->blocksize, 1, ix->blocksize);
                ies = initiestream(part, off, clumps, 64*1024);
       t@@ -260,6 +260,8 @@ threadmain(int argc, char *argv[])
        
                if(initventi(argv[0], &conf) < 0)
                        sysfatal("can't init venti: %r");
       +        if(mainindex->bloom && loadbloom(mainindex->bloom) < 0)
       +                sysfatal("can't load bloom filter: %r");
                oldbloom = mainindex->bloom;
                newbloom = nil;
                if(oldbloom){
 (DIR) diff --git a/src/cmd/venti/srv/clump.c b/src/cmd/venti/srv/clump.c
       t@@ -91,7 +91,7 @@ clumpmagic(Arena *arena, u64int aa)
        {
                u8int buf[U32Size];
        
       -        if(readarena(arena, aa, buf, U32Size) < 0)
       +        if(readarena(arena, aa, buf, U32Size) == TWID32)
                        return TWID32;
                return unpackmagic(buf);
        }
       t@@ -138,6 +138,11 @@ loadclump(Arena *arena, u64int aa, int blocks, Clump *cl, u8int *score, int veri
                        freezblock(cb);
                        return nil;
                }
       +        if(cl->info.type == VtCorruptType){
       +                seterr(EOk, "clump is marked corrupt");
       +                freezblock(cb);
       +                return nil;
       +        }
                n -= ClumpSize;
                if(n < cl->info.size){
                        freezblock(cb);
 (DIR) diff --git a/src/cmd/venti/srv/conv.c b/src/cmd/venti/srv/conv.c
       t@@ -23,7 +23,7 @@ static struct {
                ArenaHeadMagic, "ArenaHeadMagic",
                ArenaMagic, "ArenaMagic",
                ISectMagic, "ISectMagic",
       -        BloomMagic, "BloomMagic"
       +        BloomMagic, "BloomMagic",
        };
        
        static char*
       t@@ -138,9 +138,6 @@ unpackarena(Arena *arena, u8int *buf)
                p += U64Size;
                arena->diskstats.sealed = U8GET(p);
                p += U8Size;
       -
       -        arena->memstats = arena->diskstats;
       -
                switch(arena->version){
                case ArenaVersion4:
                        sz = ArenaSize4;
       t@@ -153,6 +150,35 @@ unpackarena(Arena *arena, u8int *buf)
                        seterr(ECorrupt, "arena has bad version number %d", arena->version);
                        return -1;
                }
       +        /*
       +         * Additional fields for the memstats version of the stats.
       +         * Diskstats reflects what is committed to the index.
       +         * Memstats reflects what is in the arena.  Originally intended
       +         * this to be a version 5 extension, but might as well use for
       +         * all the existing version 4 arenas too.
       +         *
       +         * To maintain backwards compatibility with existing venti
       +         * installations using the older format, we define that if 
       +         * memstats == diskstats, then the extension fields are not
       +         * included (see packarena below).  That is, only partially
       +         * indexed arenas have these fields.  Fully indexed arenas
       +         * (in particular, sealed arenas) do not.
       +         */
       +        if(U8GET(p) == 1){
       +                sz += ArenaSize5a-ArenaSize5;
       +                p += U8Size;
       +                arena->memstats.clumps = U32GET(p);
       +                p += U32Size;
       +                arena->memstats.cclumps = U32GET(p);
       +                p += U32Size;
       +                arena->memstats.used = U64GET(p);
       +                p += U64Size;
       +                arena->memstats.uncsize = U64GET(p);
       +                p += U64Size;
       +                arena->memstats.sealed = U8GET(p);
       +                p += U8Size;
       +        }else
       +                arena->memstats = arena->diskstats;
                if(buf + sz != p)
                        sysfatal("unpackarena unpacked wrong amount");
        
       t@@ -162,6 +188,12 @@ unpackarena(Arena *arena, u8int *buf)
        int
        packarena(Arena *arena, u8int *buf)
        {
       +        return _packarena(arena, buf, 0);
       +}
       +
       +int
       +_packarena(Arena *arena, u8int *buf, int forceext)
       +{
                int sz;
                u8int *p;
                u32int t32;
       t@@ -207,6 +239,30 @@ packarena(Arena *arena, u8int *buf)
                p += U64Size;
                U8PUT(p, arena->diskstats.sealed);
                p += U8Size;
       +        
       +        /*
       +         * Extension fields; see above.
       +         */
       +        if(forceext
       +        || arena->memstats.clumps != arena->diskstats.clumps
       +        || arena->memstats.cclumps != arena->diskstats.cclumps
       +        || arena->memstats.used != arena->diskstats.used
       +        || arena->memstats.uncsize != arena->diskstats.uncsize
       +        || arena->memstats.sealed != arena->diskstats.sealed){
       +                sz += ArenaSize5a - ArenaSize5;
       +                U8PUT(p, 1);
       +                p += U8Size;
       +                U32PUT(p, arena->memstats.clumps);
       +                p += U32Size;
       +                U32PUT(p, arena->memstats.cclumps);
       +                p += U32Size;
       +                U64PUT(p, arena->memstats.used, t32);        
       +                p += U64Size;
       +                U64PUT(p, arena->memstats.uncsize, t32);
       +                p += U64Size;
       +                U8PUT(p, arena->memstats.sealed);
       +                p += U8Size;
       +        }
        
                if(buf + sz != p)
                        sysfatal("packarena packed wrong amount");
       t@@ -525,6 +581,8 @@ unpackientry(IEntry *ie, u8int *buf)
                p += U32Size;
                ie->train = U16GET(p);
                p += U16Size;
       +        if(p - buf != IEntryAddrOff)
       +                sysfatal("unpackentry bad IEntryAddrOff amount");
                ie->ia.addr = U64GET(p);
        if(ie->ia.addr>>56) print("%.8H => %llux\n", p, ie->ia.addr);
                p += U64Size;
 (DIR) diff --git a/src/cmd/venti/srv/dat.h b/src/cmd/venti/srv/dat.h
       t@@ -75,23 +75,17 @@ enum
                /*
                 * magic numbers on disk
                 */
       -/*        _ClumpMagic                = 0xd15cb10cU,        / * clump header, deprecated */
       -#define        _ClumpMagic        0xd15cb10cU
       +        _ClumpMagic                = 0xd15cb10cU,        /* clump header, deprecated */
                ClumpFreeMagic                = 0,                /* free clump; terminates active clump log */
        
       -/*        ArenaPartMagic                = 0xa9e4a5e7U,        / * arena partition header */
       -/*        ArenaMagic                = 0xf2a14eadU,        / * arena trailer */
       -/*        ArenaHeadMagic                = 0xd15c4eadU,        / * arena header */
       -#define        ArenaPartMagic                0xa9e4a5e7U
       -#define        ArenaMagic                0xf2a14eadU
       -#define        ArenaHeadMagic                0xd15c4eadU
       -
       -/*        BloomMagic                = 0xb1004eadU,        / * bloom filter header */
       -#define        BloomMagic                0xb1004eadU
       +        ArenaPartMagic                = 0xa9e4a5e7U,        /* arena partition header */
       +        ArenaMagic                = 0xf2a14eadU,        /* arena trailer */
       +        ArenaHeadMagic                = 0xd15c4eadU,        /* arena header */
       +        
       +        BloomMagic                = 0xb1004eadU,        /* bloom filter header */
                BloomMaxHash        = 32,
        
       -/*        ISectMagic                = 0xd15c5ec7U,        / * index header */
       -#define        ISectMagic                0xd15c5ec7U
       +        ISectMagic                = 0xd15c5ec7U,        /* index header */
        
                ArenaPartVersion        = 3,
                ArenaVersion4                = 4,
       t@@ -120,6 +114,7 @@ enum
                ArenaPartSize                = 4 * U32Size,
                ArenaSize4                = 2 * U64Size + 6 * U32Size + ANameSize + U8Size,
                ArenaSize5                        = ArenaSize4 + U32Size,
       +        ArenaSize5a                = ArenaSize5 + 2 * U8Size + 2 * U32Size + 2 * U64Size,
                ArenaHeadSize4                = U64Size + 3 * U32Size + ANameSize,
                ArenaHeadSize5                = ArenaHeadSize4 + U32Size,
                BloomHeadSize        = 4 * U32Size,
       t@@ -137,10 +132,14 @@ enum
                 */
                IBucketSize                = U32Size + U16Size,
                IEntrySize                = U64Size + U32Size + 2*U16Size + 2*U8Size + VtScoreSize,
       -        IEntryTypeOff                = VtScoreSize + U64Size + U32Size + 2 * U16Size,
       +        IEntryTypeOff                = VtScoreSize + U32Size + U16Size + U64Size + U16Size,
       +        IEntryAddrOff                = VtScoreSize + U32Size + U16Size,
        
                MaxClumpBlocks                =  (VtMaxLumpSize + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog,
       +        
       +        IcacheFrac                = 1000000,        /* denominator */
        
       +        SleepForever                = 1000000000,        /* magic value for sleep time */
                /*
                 * dirty flags - order controls disk write order
                 */
       t@@ -356,13 +355,11 @@ struct Arena
                int                blocksize;                /* size of block to read or write */
                u64int                base;                        /* base address on disk */
                u64int                size;                        /* total space in the arena */
       -        u64int                limit;                        /* storage limit for clumps */
                u8int                score[VtScoreSize];        /* score of the entire sealed & summed arena */
        
                int                clumpmax;                /* ClumpInfos per block */
                AState                mem;
                int                inqueue;
       -        DigestState        sha1;
        
                /*
                 * fields stored on disk
       t@@ -477,6 +474,8 @@ struct ISect
                u32int                tabsize;                /* max. bytes in index config */
                Channel        *writechan;
                Channel        *writedonechan;
       +        void                *ig;                /* used by buildindex only */
       +        int                ng;
        
                /*
                 * fields stored on disk
       t@@ -716,7 +715,18 @@ extern        int                writestodevnull;        /* dangerous - for performance debugging */
        extern        int                collectstats;
        extern        QLock        memdrawlock;
        extern        int                icachesleeptime;
       +extern        int                minicachesleeptime;
        extern        int                arenasumsleeptime;
       +extern        int                manualscheduling;
       +extern        int                l0quantum;
       +extern        int                l1quantum;
       +extern        int                ignorebloom;
       +extern        int                icacheprefetch;
       +extern        int                syncwrites;
       +
       +extern        Stats        *stathist;
       +extern        int        nstathist;
       +extern        ulong        stattime;
        
        #ifndef PLAN9PORT
        #pragma varargck type "V" uchar*
 (DIR) diff --git a/src/cmd/venti/srv/dcache.c b/src/cmd/venti/srv/dcache.c
       t@@ -34,7 +34,7 @@ enum
        {
                HashLog                = 9,
                HashSize        = 1<<HashLog,
       -        HashMask        = HashSize - 1
       +        HashMask        = HashSize - 1,
        };
        
        struct DCache
       t@@ -212,8 +212,6 @@ return;
                        lastmiss.part = part;
                        lastmiss.addr = addr;
                }
       -
       -/*        fprint(2, "%s %llx %s\n", part->name, addr, miss ? "miss" : "hit"); */
        }
        
        int
       t@@ -230,6 +228,7 @@ rareadpart(Part *part, u64int addr, u8int *buf, uint n, int load)
                }
                if(load != 2 || addr >= part->size){        /* addr >= part->size: let readpart do the error */        
                        runlock(&ralock);
       +                diskaccess(0);
                        return readpart(part, addr, buf, n);
                }
        
       t@@ -239,6 +238,7 @@ fprint(2, "raread %s %llx\n", part->name, addr);
                nn = dcache.ramax;
                if(addr+nn > part->size)
                        nn = part->size - addr;
       +        diskaccess(0);
                if(readpart(part, addr, dcache.rabuf, nn) < 0){
                        wunlock(&ralock);
                        return -1;
       t@@ -297,7 +297,6 @@ _getdblock(Part *part, u64int addr, int mode, int load)
                /*
                 * look for the block in the cache
                 */
       -/*checkdcache(); */
                qlock(&dcache.lock);
        again:
                for(b = dcache.heads[h]; b != nil; b = b->next){
       t@@ -367,7 +366,6 @@ found:
                        fixheap(b->heap, b);
        
                qunlock(&dcache.lock);
       -/*checkdcache(); */
        
                trace(TraceBlock, "getdblock lock");
                addstat(StatDblockStall, 1);
       t@@ -427,7 +425,6 @@ putdblock(DBlock *b)
                else
                        wunlock(&b->lock);
        
       -/*checkdcache(); */
                qlock(&dcache.lock);
                if(--b->ref == 0 && !b->dirty){
                        if(b->heap == TWID32)
       t@@ -435,7 +432,6 @@ putdblock(DBlock *b)
                        rwakeupall(&dcache.full);
                }
                qunlock(&dcache.lock);
       -/*checkdcache(); */
        }
        
        void
       t@@ -474,6 +470,25 @@ dirtydblock(DBlock *b, int dirty)
                qunlock(&dcache.lock);
        }
        
       +static void
       +unchain(DBlock *b)
       +{
       +        ulong h;
       +        
       +        /*
       +         * unchain the block
       +         */
       +        if(b->prev == nil){
       +                h = pbhash(b->addr);
       +                if(dcache.heads[h] != b)
       +                        sysfatal("bad hash chains in disk cache");
       +                dcache.heads[h] = b->next;
       +        }else
       +                b->prev->next = b->next;
       +        if(b->next != nil)
       +                b->next->prev = b->prev;
       +}
       +
        /*
         * remove some block from use and update the free list and counters
         */
       t@@ -481,7 +496,6 @@ static DBlock*
        bumpdblock(void)
        {
                DBlock *b;
       -        ulong h;
        
                trace(TraceBlock, "bumpdblock enter");
                b = dcache.free;
       t@@ -512,22 +526,28 @@ bumpdblock(void)
        
                trace(TraceBlock, "bumpdblock bumping %s 0x%llux", b->part->name, b->addr);
        
       -        /*
       -         * unchain the block
       -         */
       -        if(b->prev == nil){
       -                h = pbhash(b->addr);
       -                if(dcache.heads[h] != b)
       -                        sysfatal("bad hash chains in disk cache");
       -                dcache.heads[h] = b->next;
       -        }else
       -                b->prev->next = b->next;
       -        if(b->next != nil)
       -                b->next->prev = b->prev;
       -
       +        unchain(b);
                return b;
        }
        
       +void
       +emptydcache(void)
       +{
       +        DBlock *b;
       +        
       +        qlock(&dcache.lock);
       +        while(dcache.nheap > 0){
       +                b = dcache.heap[0];
       +                delheap(b);
       +                if(!b->ref && !b->dirty){
       +                        unchain(b);
       +                        b->next = dcache.free;
       +                        dcache.free = b;
       +                }
       +        }
       +        qunlock(&dcache.lock);
       +}
       +
        /*
         * delete an arbitrary block from the heap
         */
       t@@ -683,6 +703,7 @@ static int
        parallelwrites(DBlock **b, DBlock **eb, int dirty)
        {
                DBlock **p, **q;
       +
                for(p=b; p<eb && (*p)->dirty == dirty; p++){
                        assert(b<=p && p<eb);
                        sendp((*p)->part->writechan, *p);
       t@@ -803,6 +824,7 @@ writeproc(void *v)
                        trace(TraceProc, "wlock %s 0x%llux", p->name, b->addr);
                        wlock(&b->lock);
                        trace(TraceProc, "writepart %s 0x%llux", p->name, b->addr);
       +                diskaccess(0);
                        if(writepart(p, b->addr, b->data, b->size) < 0)
                                fprint(2, "write error: %r\n"); /* XXX details! */
                        addstat(StatApartWrite, 1);
 (DIR) diff --git a/src/cmd/venti/srv/disksched.c b/src/cmd/venti/srv/disksched.c
       t@@ -0,0 +1,88 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +ulong lasttime[2];
       +int manualscheduling;
       +int l0quantum = 120;
       +int l1quantum = 120;
       +ulong lasticachechange;
       +
       +void
       +disksched(void)
       +{
       +        int p, nwrite, nflush, ndirty, tdirty, toflush;
       +        ulong t;
       +        vlong cflush;
       +        Stats *prev;
       +        
       +        /*
       +         * no locks because all the data accesses are atomic.
       +         */
       +        t = time(0);
       +        if(manualscheduling){
       +                lasticachechange = t;
       +                return;
       +        }
       +
       +        if(t-lasttime[0] < l0quantum){
       +                /* level-0 disk access going on */
       +                p = icachedirtyfrac();
       +                if(p < IcacheFrac*5/10){        /* can wait */
       +                        icachesleeptime = SleepForever;
       +                        lasticachechange = t;
       +                }else if(p > IcacheFrac*9/10){        /* can't wait */
       +                        icachesleeptime = 0;
       +                        lasticachechange = t;
       +                }else if(t-lasticachechange > 60){
       +                        /* have minute worth of data for current rate */
       +                        prev = &stathist[(stattime-60+nstathist)%nstathist];
       +
       +                        /* # entries written to index cache */
       +                        nwrite = stats.n[StatIcacheWrite] - prev->n[StatIcacheWrite];
       +                        
       +                        /* # dirty entries in index cache */
       +                        ndirty = stats.n[StatIcacheDirty] - prev->n[StatIcacheDirty];
       +                        
       +                        /* # entries flushed to disk */
       +                        nflush = nwrite - ndirty;
       +                        
       +                        /* want to stay around 70% dirty */
       +                        tdirty = (vlong)stats.n[StatIcacheSize]*700/1000;
       +                        
       +                        /* assume nflush*icachesleeptime is a constant */
       +                        cflush = (vlong)nflush*(icachesleeptime+1);
       +                        
       +                        /* computer number entries to write in next minute */
       +                        toflush = nwrite + (stats.n[StatIcacheDirty] - tdirty);
       +                        
       +                        /* schedule for  that many */
       +                        if(toflush <= 0 || cflush/toflush > 100000)
       +                                icachesleeptime = SleepForever;
       +                        else
       +                                icachesleeptime = cflush/toflush;
       +                }
       +                arenasumsleeptime = SleepForever;
       +                return;
       +        }
       +        if(t-lasttime[1] < l1quantum){
       +                /* level-1 disk access (icache flush) going on */
       +                icachesleeptime = 0;
       +                arenasumsleeptime = SleepForever;
       +                return;
       +        }
       +        /* no disk access going on - no holds barred*/
       +        icachesleeptime = 0;
       +        arenasumsleeptime = 0;
       +}
       +
       +void
       +diskaccess(int level)
       +{
       +        if(level < 0 || level >= nelem(lasttime)){
       +                fprint(2, "bad level in diskaccess; caller=%lux\n", getcallerpc(&level));
       +                return;
       +        }
       +        lasttime[level] = time(0);
       +}
       +
 (DIR) diff --git a/src/cmd/venti/srv/findscore.c b/src/cmd/venti/srv/findscore.c
       t@@ -27,7 +27,7 @@ findscore(Arena *arena, uchar *score)
                u32int clump;
                int i, n, found;
        
       -/*ZZZ remove fprint? */
       +//ZZZ remove fprint?
                if(arena->memstats.clumps)
                        fprint(2, "reading directory for arena=%s with %d entries\n", arena->name, arena->memstats.clumps);
        
 (DIR) diff --git a/src/cmd/venti/srv/fixarenas.c b/src/cmd/venti/srv/fixarenas.c
       t@@ -0,0 +1,1894 @@
       +/*
       + * Check and fix an arena partition.
       + *
       + * This is a lot grittier than the rest of Venti because
       + * it can't just give up if a byte here or there is wrong.
       + *
       + * The rule here (hopefully followed!) is that block corruption
       + * only ever has a local effect -- there are no blocks that you
       + * can wipe out that will cause large portions of 
       + * uncorrupted data blocks to be useless.
       + */
       +
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "whack.h"
       +
       +#pragma varargck type "z" uvlong
       +#pragma varargck type "z" vlong
       +#pragma varargck type "t" uint
       +
       +enum
       +{
       +        K = 1024,
       +        M = 1024*1024,
       +        G = 1024*1024*1024,
       +        
       +        Block = 4096,
       +};
       +
       +int debugsha1;
       +
       +int verbose;
       +Part *part;
       +char *file;
       +char *basename;
       +char *dumpbase;
       +int fix;
       +int badreads;
       +int unseal;
       +uchar zero[MaxDiskBlock];
       +
       +Arena lastarena;
       +ArenaPart ap;
       +uvlong arenasize;
       +int nbadread;
       +int nbad;
       +uvlong partend;
       +void checkarena(vlong, int);
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: fixarenas [-fv] [-a arenasize] [-b blocksize] file [ranges]\n");
       +        threadexitsall(0);
       +}
       +
       +/*
       + * Format number in simplest way that is okay with unittoull.
       + */
       +static int
       +zfmt(Fmt *fmt)
       +{
       +        vlong x;
       +        
       +        x = va_arg(fmt->args, vlong);
       +        if(x == 0)
       +                return fmtstrcpy(fmt, "0");
       +        if(x%G == 0)
       +                return fmtprint(fmt, "%lldG", x/G);
       +        if(x%M == 0)
       +                return fmtprint(fmt, "%lldM", x/M);
       +        if(x%K == 0)
       +                return fmtprint(fmt, "%lldK", x/K);
       +        return fmtprint(fmt, "%lld", x);
       +}
       +
       +/*
       + * Format time like ctime without newline.
       + */
       +static int
       +tfmt(Fmt *fmt)
       +{
       +        uint t;
       +        char buf[30];
       +        
       +        t = va_arg(fmt->args, uint);
       +        strcpy(buf, ctime(t));
       +        buf[28] = 0;
       +        return fmtstrcpy(fmt, buf);
       +}
       +
       +/*
       + * Coalesce messages about unreadable sectors into larger ranges.
       + * bad(0, 0) flushes the buffer.
       + */
       +static void
       +bad(char *msg, vlong o, int len)
       +{
       +        static vlong lb0, lb1;
       +        static char *lmsg;
       +
       +        if(msg == nil)
       +                msg = lmsg;
       +        if(o == -1){
       +                lmsg = nil;
       +                lb0 = 0;
       +                lb1 = 0;
       +                return;
       +        }
       +        if(lb1 != o || (msg && lmsg && strcmp(msg, lmsg) != 0)){
       +                if(lb0 != lb1)
       +                        print("%s %#llux+%#llux (%,lld+%,lld)\n",
       +                                lmsg, lb0, lb1-lb0, lb0, lb1-lb0);
       +                lb0 = o;
       +        }
       +        lmsg = msg;
       +        lb1 = o+len;
       +}
       +
       +/*
       + * Read in the len bytes of data at the offset.  If can't for whatever reason,
       + * fill it with garbage but print an error.
       + */
       +static uchar*
       +readdisk(uchar *buf, vlong offset, int len)
       +{
       +        int i, j, k, n;
       +
       +        if(offset >= partend){
       +                memset(buf, 0xFB, sizeof buf);
       +                return buf;
       +        }
       +        
       +        if(offset+len > partend){
       +                memset(buf, 0xFB, sizeof buf);
       +                len = partend - offset;
       +        }
       +
       +        if(readpart(part, offset, buf, len) >= 0)
       +                return buf;
       +        
       +        /*
       +         * The read failed.  Clear the buffer to nonsense, and
       +         * then try reading in smaller pieces.  If that fails,
       +         * read in even smaller pieces.  And so on down to sectors.
       +         */
       +        memset(buf, 0xFD, len);
       +        for(i=0; i<len; i+=64*K){
       +                n = 64*K;
       +                if(i+n > len)
       +                        n = len-i;
       +                if(readpart(part, offset+i, buf+i, n) >= 0)
       +                        continue;
       +                for(j=i; j<len && j<i+64*K; j+=4*K){
       +                        n = 4*K;
       +                        if(j+n > len)
       +                                n = len-j;
       +                        if(readpart(part, offset+j, buf+j, n) >= 0)
       +                                continue;
       +                        for(k=j; k<len && k<j+4*K; k+=512){
       +                                if(readpart(part, offset+k, buf+k, 512) >= 0)
       +                                        continue;
       +                                bad("disk read failed at", k, 512);
       +                                badreads++;
       +                        }
       +                }
       +        }
       +        bad(nil, 0, 0);
       +        return buf;
       +}
       +
       +/*
       + * Buffer to support running SHA1 hash of the disk.
       + */
       +typedef struct Shabuf Shabuf;
       +struct Shabuf
       +{
       +        int fd;
       +        vlong offset;
       +        DigestState state;
       +        int rollback;
       +        vlong r0;
       +        DigestState *hist;
       +        int nhist;
       +};
       +
       +void
       +sbdebug(Shabuf *sb, char *file)
       +{
       +        int fd;
       +        
       +        if(sb->fd > 0){
       +                close(sb->fd);
       +                sb->fd = 0;
       +        }
       +        if((fd = create(file, OWRITE, 0666)) < 0)
       +                return;
       +        if(fd == 0){
       +                fd = dup(fd, -1);
       +                close(0);
       +        }
       +        sb->fd = fd;
       +}
       +
       +void
       +sbupdate(Shabuf *sb, uchar *p, vlong offset, int len)
       +{
       +        int n, x;
       +        vlong o;
       +
       +        if(sb->rollback && !sb->hist){
       +                sb->r0 = offset;
       +                sb->nhist = 1;
       +                sb->hist = vtmalloc(sb->nhist*sizeof *sb->hist);
       +                memset(sb->hist, 0, sizeof sb->hist[0]);
       +        }
       +        if(sb->r0 == 0)
       +                sb->r0 = offset;
       +
       +        if(sb->offset < offset || sb->offset >= offset+len){
       +                if(0) print("sbupdate %p %#llux+%d but offset=%#llux\n",
       +                        p, offset, len, sb->offset);
       +                return;
       +        }
       +        x = sb->offset - offset;
       +        if(0) print("sbupdate %p %#llux+%d skip %d\n",
       +                sb, offset, len, x);
       +        if(x){
       +                p += x;
       +                offset += x;
       +                len -= x;
       +        }
       +        assert(sb->offset == offset);
       +        
       +        if(sb->fd > 0)
       +                pwrite(sb->fd, p, len, offset - sb->r0);
       +
       +        if(!sb->rollback){
       +                sha1(p, len, nil, &sb->state);
       +                sb->offset += len;
       +                return;
       +        }
       +        
       +        /* save state every 4M so we can roll back quickly */
       +        o = offset - sb->r0;
       +        while(len > 0){
       +                n = 4*M - o%(4*M);
       +                if(n > len)
       +                        n = len;
       +                sha1(p, n, nil, &sb->state);
       +                sb->offset += n;
       +                o += n;
       +                p += n;
       +                len -= n;
       +                if(o%(4*M) == 0){
       +                        x = o/(4*M);
       +                        if(x >= sb->nhist){
       +                                if(x != sb->nhist)
       +                                        print("oops! x=%d nhist=%d\n", x, sb->nhist);
       +                                sb->nhist += 32;
       +                                sb->hist = vtrealloc(sb->hist, sb->nhist*sizeof *sb->hist);
       +                        }
       +                        sb->hist[x] = sb->state;
       +                }
       +        }                
       +}
       +
       +void
       +sbdiskhash(Shabuf *sb, vlong eoffset)
       +{
       +        static uchar dbuf[4*M];
       +        int n;
       +        
       +        while(sb->offset < eoffset){
       +                n = sizeof dbuf;
       +                if(sb->offset+n > eoffset)
       +                        n = eoffset - sb->offset;
       +                readdisk(dbuf, sb->offset, n);
       +                sbupdate(sb, dbuf, sb->offset, n);
       +        }
       +}
       +
       +void
       +sbrollback(Shabuf *sb, vlong offset)
       +{
       +        int x;
       +        vlong o;
       +        Dir d;
       +        
       +        if(!sb->rollback || !sb->r0){
       +                print("cannot rollback sha\n");
       +                return;
       +        }
       +        if(offset >= sb->offset)
       +                return;
       +        o = offset - sb->r0;
       +        x = o/(4*M);
       +        if(x >= sb->nhist){
       +                print("cannot rollback sha\n");
       +                return;
       +        }
       +        sb->state = sb->hist[x];
       +        sb->offset = sb->r0 + x*4*M;
       +        assert(sb->offset <= offset);
       +        
       +        if(sb->fd > 0){
       +                nulldir(&d);
       +                d.length = sb->offset - sb->r0;
       +                dirfwstat(sb->fd, &d);
       +        }
       +}
       +
       +void
       +sbscore(Shabuf *sb, uchar *score)
       +{
       +        if(sb->hist){
       +                free(sb->hist);
       +                sb->hist = nil;
       +        }
       +        sha1(nil, 0, score, &sb->state);
       +}
       +
       +/*
       + * If we're fixing arenas, then editing this memory edits the disk!
       + * It will be written back out as new data is paged in. 
       + */
       +uchar buf[4*M];
       +uchar sbuf[4*M];
       +vlong bufoffset;
       +int buflen;
       +
       +static void pageout(void);
       +static uchar*
       +pagein(vlong offset, int len)
       +{
       +        pageout();
       +        if(offset >= partend){
       +                memset(buf, 0xFB, sizeof buf);
       +                return buf;
       +        }
       +        
       +        if(offset+len > partend){
       +                memset(buf, 0xFB, sizeof buf);
       +                len = partend - offset;
       +        }
       +        bufoffset = offset;
       +        buflen = len;
       +        readdisk(buf, offset, len);
       +        memmove(sbuf, buf, len);
       +        return buf;
       +}
       +
       +static void
       +pageout(void)
       +{
       +        if(buflen==0 || !fix || memcmp(buf, sbuf, buflen) == 0){
       +                buflen = 0;
       +                return;
       +        }
       +        if(writepart(part, bufoffset, buf, buflen) < 0)
       +                print("disk write failed at %#llux+%#ux (%,lld+%,d)\n",
       +                        bufoffset, buflen, bufoffset, buflen);
       +        buflen = 0;
       +}
       +
       +static void
       +zerorange(vlong offset, int len)
       +{
       +        int i;
       +        vlong ooff;
       +        int olen;
       +        enum { MinBlock = 4*K, MaxBlock = 8*K };
       +        
       +        if(0)
       +        if(bufoffset <= offset && offset+len <= bufoffset+buflen){
       +                memset(buf+(offset-bufoffset), 0, len);
       +                return;
       +        }
       +        
       +        ooff = bufoffset;
       +        olen = buflen;
       +        
       +        i = offset%MinBlock;
       +        if(i+len < MaxBlock){
       +                pagein(offset-i, (len+MinBlock-1)&~(MinBlock-1));
       +                memset(buf+i, 0, len);
       +        }else{
       +                pagein(offset-i, MaxBlock);
       +                memset(buf+i, 0, MaxBlock-i);
       +                offset += MaxBlock-i;
       +                len -= MaxBlock-i;
       +                while(len >= MaxBlock){
       +                        pagein(offset, MaxBlock);
       +                        memset(buf, 0, MaxBlock);
       +                        offset += MaxBlock;
       +                        len -= MaxBlock;
       +                }
       +                pagein(offset, (len+MinBlock-1)&~(MinBlock-1));
       +                memset(buf, 0, len);
       +        }
       +        pagein(ooff, olen);
       +}
       +
       +/*
       + * read/write integers
       + *
       +static void
       +p16(uchar *p, u16int u)
       +{
       +        p[0] = (u>>8) & 0xFF;
       +        p[1] = u & 0xFF;
       +}
       +*/
       +
       +static u16int
       +u16(uchar *p)
       +{
       +        return (p[0]<<8)|p[1];
       +}
       +
       +static void
       +p32(uchar *p, u32int u)
       +{
       +        p[0] = (u>>24) & 0xFF;
       +        p[1] = (u>>16) & 0xFF;
       +        p[2] = (u>>8) & 0xFF;
       +        p[3] = u & 0xFF;
       +}
       +
       +static u32int
       +u32(uchar *p)
       +{
       +        return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
       +}
       +
       +/*
       +static void
       +p64(uchar *p, u64int u)
       +{
       +        p32(p, u>>32);
       +        p32(p, u);
       +}
       +*/
       +
       +static u64int
       +u64(uchar *p)
       +{
       +        return ((u64int)u32(p)<<32) | u32(p+4);
       +}
       +
       +static int
       +vlongcmp(const void *va, const void *vb)
       +{
       +        vlong a, b;
       +        
       +        a = *(vlong*)va;
       +        b = *(vlong*)vb;
       +        if(a < b)
       +                return -1;
       +        if(b > a)
       +                return 1;
       +        return 0;
       +}
       +
       +/* D and S are in draw.h */
       +#define D VD
       +#define S VS
       +
       +enum
       +{
       +        D = 0x10000,
       +        Z = 0x20000,
       +        S = 0x30000,
       +        T = 0x40000,
       +        N = 0xFFFF
       +};
       +typedef struct Info Info;
       +struct Info
       +{
       +        int len;
       +        char *name;
       +};
       +
       +Info partinfo[] = {
       +        4,        "magic",
       +        D|4,        "version",
       +        Z|4,        "blocksize",
       +        4,        "arenabase",
       +        0
       +};
       +
       +Info headinfo4[] = {
       +        4,        "magic",
       +        D|4,        "version",
       +        S|ANameSize,        "name",
       +        Z|4,        "blocksize",
       +        Z|8,        "size",
       +        0
       +};
       +
       +Info headinfo5[] = {
       +        4,        "magic",
       +        D|4,        "version",
       +        S|ANameSize,        "name",
       +        Z|4,        "blocksize",
       +        Z|8,        "size",
       +        4,        "clumpmagic",
       +        0
       +};
       +
       +Info tailinfo4[] = {
       +        4,        "magic",
       +        D|4,        "version",
       +        S|ANameSize,        "name",
       +        D|4,        "clumps",
       +        D|4,        "cclumps",
       +        T|4,        "ctime",
       +        T|4,        "wtime",
       +        D|8,        "used",
       +        D|8,        "uncsize",
       +        1,        "sealed",
       +        0
       +};
       +        
       +Info tailinfo4a[] = {
       +        /* tailinfo 4 */
       +        4,        "magic",
       +        D|4,        "version",
       +        S|ANameSize,        "name",
       +        D|4,        "clumps",
       +        D|4,        "cclumps",
       +        T|4,        "ctime",
       +        T|4,        "wtime",
       +        D|8,        "used",
       +        D|8,        "uncsize",
       +        1,        "sealed",
       +
       +        /* mem stats */
       +        1,        "extension",
       +        D|4,        "mem.clumps",
       +        D|4,        "mem.cclumps",
       +        D|8,        "mem.used",
       +        D|8,        "mem.uncsize",
       +        1,        "mem.sealed",
       +        0
       +};
       +        
       +Info tailinfo5[] = {
       +        4,        "magic",
       +        D|4,        "version",
       +        S|ANameSize,        "name",
       +        D|4,        "clumps",
       +        D|4,        "cclumps",
       +        T|4,        "ctime",
       +        T|4,        "wtime",
       +        4,        "clumpmagic",
       +        D|8,        "used",
       +        D|8,        "uncsize",
       +        1,        "sealed",
       +        0
       +};
       +
       +Info tailinfo5a[] = {
       +        /* tailinfo 5 */
       +        4,        "magic",
       +        D|4,        "version",
       +        S|ANameSize,        "name",
       +        D|4,        "clumps",
       +        D|4,        "cclumps",
       +        T|4,        "ctime",
       +        T|4,        "wtime",
       +        4,        "clumpmagic",
       +        D|8,        "used",
       +        D|8,        "uncsize",
       +        1,        "sealed",
       +
       +        /* mem stats */
       +        1,        "extension",
       +        D|4,        "mem.clumps",
       +        D|4,        "mem.cclumps",
       +        D|8,        "mem.used",
       +        D|8,        "mem.uncsize",
       +        1,        "mem.sealed",
       +        0
       +};
       +        
       +void
       +showdiffs(uchar *want, uchar *have, int len, Info *info)
       +{
       +        int n;
       +        
       +        while(len > 0 && (n=info->len&N) > 0){
       +                if(memcmp(have, want, n) != 0){
       +                        switch(info->len){
       +                        case 1:
       +                                print("\t%s: correct=%d disk=%d\n",
       +                                        info->name, *want, *have);
       +                                break;
       +                        case 4:
       +                                print("\t%s: correct=%#ux disk=%#ux\n",
       +                                        info->name, u32(want), u32(have));
       +                                break;
       +                        case D|4:
       +                                print("\t%s: correct=%,ud disk=%,ud\n",
       +                                        info->name, u32(want), u32(have));
       +                                break;
       +                        case T|4:
       +                                print("\t%s: correct=%t\n\t\tdisk=%t\n",
       +                                        info->name, u32(want), u32(have));
       +                                break;
       +                        case Z|4:
       +                                print("\t%s: correct=%z disk=%z\n",
       +                                        info->name, (uvlong)u32(want), (uvlong)u32(have));
       +                                break;
       +                        case D|8:
       +                                print("\t%s: correct=%,lld disk=%,lld\n",
       +                                        info->name, u64(want), u64(have));
       +                                break;
       +                        case Z|8:
       +                                print("\t%s: correct=%z disk=%z\n",
       +                                        info->name, u64(want), u64(have));
       +                                break;
       +                        case S|ANameSize:
       +                                print("\t%s: correct=%s disk=%.*s\n",
       +                                        info->name, (char*)want, 
       +                                        utfnlen((char*)have, ANameSize-1),
       +                                        (char*)have);
       +                                break;
       +                        default:
       +                                print("\t%s: correct=%.*H disk=%.*H\n",
       +                                        info->name, n, want, n, have);
       +                                break;
       +                        }
       +                }
       +                have += n;
       +                want += n;
       +                len -= n;
       +                info++;
       +        }
       +        if(len > 0 && memcmp(have, want, len) != 0){
       +                if(memcmp(want, zero, len) != 0)
       +                        print("!!\textra want data in showdiffs (bug in fixarenas)\n");
       +                else
       +                        print("\tnon-zero data on disk after structure\n");
       +                if(verbose > 1){
       +                        print("want: %.*H\n", len, want);
       +                        print("have: %.*H\n", len, have);
       +                }
       +        }
       +}
       +
       +static int tabsizes[] = { 64*1024, 512*1024, };
       +/*
       + * Poke around on the disk to guess what the ArenaPart numbers are.
       + */
       +void
       +guessgeometry(void)
       +{
       +        int i, j, n, bestn, ndiff, nhead, ntail;
       +        uchar *p, *ep, *sp;
       +        u64int diff[100], head[20], tail[20];
       +        u64int offset, bestdiff;
       +        
       +        ap.version = ArenaPartVersion;
       +
       +        if(arenasize == 0 || ap.blocksize == 0){
       +                /*
       +                 * The ArenaPart block at offset PartBlank may be corrupt or just wrong.
       +                 * Instead, look for the individual arena headers and tails, which there
       +                 * are many of, and once we've seen enough, infer the spacing.
       +                 *
       +                 * Of course, nothing in the file format requires that arenas be evenly
       +                 * spaced, but fmtarenas always does that for us.
       +                 */
       +                nhead = 0;
       +                ntail = 0;
       +                for(offset=PartBlank; offset<partend; offset+=4*M){
       +                        p = pagein(offset, 4*M);
       +                        for(sp=p, ep=p+4*M; p<ep; p+=K){
       +                                if(u32(p) == ArenaHeadMagic && nhead < nelem(head)){
       +                                        if(verbose)
       +                                                print("arena head at %#llx\n", offset+(p-sp));
       +                                        head[nhead++] = offset+(p-sp);
       +                                }
       +                                if(u32(p) == ArenaMagic && ntail < nelem(tail)){
       +                                        tail[ntail++] = offset+(p-sp);
       +                                        if(verbose)
       +                                                print("arena tail at %#llx\n", offset+(p-sp));
       +                                }
       +                        }
       +                        if(nhead == nelem(head) && ntail == nelem(tail))
       +                                break;
       +                }
       +                if(nhead < 3 && ntail < 3)
       +                        sysfatal("too few intact arenas: %d heads, %d tails", nhead, ntail);
       +        
       +                /* 
       +                 * Arena size is likely the most common
       +                 * inter-head or inter-tail spacing.
       +                 */
       +                ndiff = 0;
       +                for(i=1; i<nhead; i++)
       +                        diff[ndiff++] = head[i] - head[i-1];
       +                for(i=1; i<ntail; i++)
       +                        diff[ndiff++] = tail[i] - tail[i-1];
       +                qsort(diff, ndiff, sizeof diff[0], vlongcmp);
       +                bestn = 0;
       +                bestdiff = 0;
       +                for(i=1, n=1; i<=ndiff; i++, n++){
       +                        if(i==ndiff || diff[i] != diff[i-1]){
       +                                if(n > bestn){
       +                                        bestn = n;
       +                                        bestdiff = diff[i-1];
       +                                }
       +                                n = 0;
       +                        }
       +                }
       +                print("arena size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
       +                if(arenasize != 0 && arenasize != bestdiff)
       +                        print("using user-specified size %z instead\n", arenasize);
       +                else
       +                        arenasize = bestdiff;
       +
       +                /*
       +                 * The arena tail for an arena is arenasize-blocksize from the head.
       +                 */
       +                ndiff = 0;
       +                for(i=j=0; i<nhead && j<ntail; ){
       +                        if(tail[j] < head[i]){
       +                                j++;
       +                                continue;
       +                        }
       +                        if(tail[j] < head[i]+arenasize){
       +                                diff[ndiff++] = head[i]+arenasize - tail[j];
       +                                j++;
       +                                continue;
       +                        }
       +                        i++;
       +                }
       +                if(ndiff < 3)
       +                        sysfatal("too few intact arenas: %d head, tail pairs", ndiff);
       +                qsort(diff, ndiff, sizeof diff[0], vlongcmp);
       +                bestn = 0;
       +                bestdiff = 0;
       +                for(i=1, n=1; i<=ndiff; i++, n++){
       +                        if(i==ndiff || diff[i] != diff[i-1]){
       +                                if(n > bestn){
       +                                        bestn = n;
       +                                        bestdiff = diff[i-1];
       +                                }
       +                                n = 0;
       +                        }
       +                }
       +                print("block size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
       +                if(ap.blocksize != 0 && ap.blocksize != bestdiff)
       +                        print("using user-specified size %z instead\n", (vlong)ap.blocksize);
       +                else
       +                        ap.blocksize = bestdiff;
       +                if(ap.blocksize == 0 || ap.blocksize&(ap.blocksize-1))
       +                        sysfatal("block size not a power of two");
       +                if(ap.blocksize > MaxDiskBlock)
       +                        sysfatal("block size too big (max=%d)", MaxDiskBlock);
       +
       +                /*
       +                 * Use head/tail information to deduce arena base.
       +                 */
       +                ndiff = 0;
       +                for(i=0; i<nhead; i++)
       +                        diff[ndiff++] = head[i]%arenasize;
       +                for(i=0; i<ntail; i++)
       +                        diff[ndiff++] = (tail[i]+ap.blocksize)%arenasize;
       +                qsort(diff, ndiff, sizeof diff[0], vlongcmp);
       +                bestn = 0;
       +                bestdiff = 0;
       +                for(i=1, n=1; i<=ndiff; i++, n++){
       +                        if(i==ndiff || diff[i] != diff[i-1]){
       +                                if(n > bestn){
       +                                        bestn = n;
       +                                        bestdiff = diff[i-1];
       +                                }
       +                                n = 0;
       +                        }
       +                }
       +                ap.arenabase = bestdiff;
       +        }
       +        
       +        ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1);
       +        /*
       +         * XXX pick up table, check arenabase.
       +         * XXX pick up table, record base name.
       +         */
       +
       +        /*
       +         * Somewhat standard computation.
       +         * Fmtarenas used to use 64k tab, now uses 512k tab.
       +         */
       +        if(ap.arenabase == 0){
       +                for(i=0; i<nelem(tabsizes); i++){
       +                        ap.arenabase = (PartBlank+HeadSize+tabsizes[i]+ap.blocksize-1)&~(ap.blocksize-1);
       +                        p = pagein(ap.arenabase, Block);
       +                        if(u32(p) == ArenaHeadMagic)
       +                                break;
       +                }
       +        }
       +        p = pagein(ap.arenabase, Block);
       +        print("arena base likely %z%s\n", (vlong)ap.arenabase, 
       +                u32(p)!=ArenaHeadMagic ? " (but no arena head there)" : "");
       +
       +        ap.tabsize = ap.arenabase - ap.tabbase;
       +        
       +}
       +
       +/*
       + * Check the arena partition blocks and then the arenas listed in range.
       + */
       +void
       +checkarenas(char *range)
       +{
       +        char *s, *t;
       +        int i, lo, hi, narena;
       +        uchar dbuf[HeadSize];
       +        uchar *p;
       +
       +        guessgeometry();
       +
       +        partend -= partend%ap.blocksize;
       +
       +        memset(dbuf, 0, sizeof dbuf);
       +        packarenapart(&ap, dbuf);
       +        p = pagein(PartBlank, Block);
       +        if(memcmp(p, dbuf, HeadSize) != 0){
       +                print("on-disk arena part superblock incorrect\n");
       +                showdiffs(dbuf, p, HeadSize, partinfo);
       +        }
       +        memmove(p, dbuf, HeadSize);
       +
       +        narena = (partend-ap.arenabase + arenasize-1)/arenasize;
       +        if(range == nil){
       +                for(i=0; i<narena; i++)
       +                        checkarena(ap.arenabase+(vlong)i*arenasize, i);
       +        }else if(strcmp(range, "none") == 0){
       +                /* nothing */
       +        }else{
       +                /* parse, e.g., -4,8-9,10- */
       +                for(s=range; *s; s=t){
       +                        t = strchr(s, ',');
       +                        if(t)
       +                                *t++ = 0;
       +                        else
       +                                t = s+strlen(s);
       +                        if(*s == '-')
       +                                lo = 0;
       +                        else
       +                                lo = strtol(s, &s, 0);
       +                        hi = lo;
       +                        if(*s == '-'){
       +                                s++;
       +                                if(*s == 0)
       +                                        hi = narena-1;
       +                                else
       +                                        hi = strtol(s, &s, 0);
       +                        }
       +                        if(*s != 0){
       +                                print("bad arena range: %s\n", s);
       +                                continue;
       +                        }
       +                        for(i=lo; i<=hi; i++)
       +                                checkarena(ap.arenabase+(vlong)i*arenasize, i);
       +                }
       +        }
       +}
       +
       +/*
       + * Is there a clump here at p?
       + */
       +static int
       +isclump(uchar *p, Clump *cl, u32int *pmagic)
       +{
       +        int n;
       +        u32int magic;
       +        uchar score[VtScoreSize], *bp;
       +        Unwhack uw;
       +        uchar ubuf[70*1024];
       +        
       +        bp = p;
       +        magic = u32(p);
       +        if(magic == 0)
       +                return 0;
       +        p += U32Size;
       +
       +        cl->info.type = vtfromdisktype(*p);
       +        if(cl->info.type == 0xFF)
       +                return 0;
       +        p++;
       +        cl->info.size = u16(p);
       +        p += U16Size;
       +        cl->info.uncsize = u16(p);
       +        if(cl->info.size > cl->info.uncsize)
       +                return 0;
       +        p += U16Size;
       +        scorecp(cl->info.score, p);
       +        p += VtScoreSize;
       +        cl->encoding = *p;
       +        p++;
       +        cl->creator = u32(p);
       +        p += U32Size;
       +        cl->time = u32(p);
       +        p += U32Size;
       +
       +        switch(cl->encoding){
       +        case ClumpENone:
       +                if(cl->info.size != cl->info.uncsize)
       +                        return 0;
       +                scoremem(score, p, cl->info.size);
       +                if(scorecmp(score, cl->info.score) != 0)
       +                        return 0;
       +                break;
       +        case ClumpECompress:
       +                if(cl->info.size >= cl->info.uncsize)
       +                        return 0;
       +                unwhackinit(&uw);
       +                n = unwhack(&uw, ubuf, cl->info.uncsize, p, cl->info.size);
       +                if(n != cl->info.uncsize)
       +                        return 0;
       +                scoremem(score, ubuf, cl->info.uncsize);
       +                if(scorecmp(score, cl->info.score) != 0)
       +                        return 0;
       +                break;
       +        default:
       +                return 0;
       +        }
       +        p += cl->info.size;
       +        
       +        /* it all worked out in the end */
       +        *pmagic = magic;
       +        return p - bp;
       +}
       +
       +/*
       + * All ClumpInfos seen in this arena.
       + * Kept in binary tree so we can look up by score.
       + */
       +typedef struct Cit Cit;
       +struct Cit
       +{
       +        int left;
       +        int right;
       +        vlong corrupt;
       +        ClumpInfo ci;
       +};
       +Cit *cibuf;
       +int ciroot;
       +int ncibuf, mcibuf;
       +
       +void
       +resetcibuf(void)
       +{
       +        ncibuf = 0;
       +        ciroot = -1;
       +}
       +
       +int*
       +ltreewalk(int *p, uchar *score)
       +{
       +        int i;
       +        
       +        for(;;){
       +                if(*p == -1)
       +                        return p;
       +                i = scorecmp(cibuf[*p].ci.score, score);
       +                if(i == 0)
       +                        return p;
       +                if(i < 0)
       +                        p = &cibuf[*p].right;
       +                else
       +                        p = &cibuf[*p].left;
       +        }
       +        return nil;         /* stupid 8c */
       +}
       +
       +void
       +addcibuf(ClumpInfo *ci, vlong corrupt)
       +{
       +        Cit *cit;
       +        
       +        if(ncibuf == mcibuf){
       +                mcibuf += 131072;
       +                cibuf = vtrealloc(cibuf, mcibuf*sizeof cibuf[0]);
       +        }
       +        cit = &cibuf[ncibuf];
       +        cit->ci = *ci;
       +        cit->left = -1;
       +        cit->right = -1;
       +        cit->corrupt = corrupt;
       +        if(!corrupt)
       +                *ltreewalk(&ciroot, ci->score) = ncibuf;
       +        ncibuf++;
       +}
       +
       +void
       +addcicorrupt(vlong len)
       +{
       +        static ClumpInfo zci;
       +        
       +        addcibuf(&zci, len);
       +}
       +
       +int
       +haveclump(uchar *score)
       +{
       +        int i;
       +        int p;
       +        
       +        p = ciroot;
       +        for(;;){
       +                if(p == -1)
       +                        return 0;
       +                i = scorecmp(cibuf[p].ci.score, score);
       +                if(i == 0)
       +                        return 1;
       +                if(i < 0)
       +                        p = cibuf[p].right;
       +                else
       +                        p = cibuf[p].left;
       +        }
       +        return 0;        /* stupid 8c */
       +}
       +
       +int
       +matchci(ClumpInfo *ci, uchar *p)
       +{
       +        if(ci->type != vtfromdisktype(p[0]))
       +                return 0;
       +        if(ci->size != u16(p+1))
       +                return 0;
       +        if(ci->uncsize != u16(p+3))
       +                return 0;
       +        if(scorecmp(ci->score, p+5) != 0)
       +                return 0;
       +        return 1;
       +}
       +
       +int
       +sealedarena(uchar *p, int blocksize)
       +{
       +        int v, n;
       +        
       +        v = u32(p+4);
       +        switch(v){
       +        default:
       +                return 0;
       +        case ArenaVersion4:
       +                n = ArenaSize4;
       +                break;
       +        case ArenaVersion5:
       +                n = ArenaSize5;
       +                break;
       +        }
       +        if(p[n-1] != 1){
       +                print("arena tail says not sealed\n");
       +                return 0;
       +        }
       +        if(memcmp(p+n, zero, blocksize-VtScoreSize-n) != 0){
       +                print("arena tail followed by non-zero data\n");
       +                return 0;
       +        }
       +        if(memcmp(p+blocksize-VtScoreSize, zero, VtScoreSize) == 0){
       +                print("arena score zero\n");
       +                return 0;
       +        }
       +        return 1;
       +}
       +
       +int
       +okayname(char *name, int n)
       +{
       +        char buf[20];
       +        
       +        if(nameok(name) < 0)
       +                return 0;
       +        sprint(buf, "%d", n);
       +        if(strlen(name) < strlen(buf) 
       +        || strcmp(name+strlen(name)-strlen(buf), buf) != 0)
       +                return 0;
       +        return 1;
       +}
       +
       +int
       +clumpinfocmp(ClumpInfo *a, ClumpInfo *b)
       +{
       +        if(a->type != b->type)
       +                return a->type - b->type;
       +        if(a->size != b->size)
       +                return a->size - b->size;
       +        if(a->uncsize != b->uncsize)
       +                return a->uncsize - b->uncsize;
       +        return scorecmp(a->score, b->score);
       +}
       +
       +ClumpInfo*
       +loadci(vlong offset, Arena *arena, int nci)
       +{
       +        int i, j, per;
       +        uchar *p, *sp;
       +        ClumpInfo *bci, *ci;
       +        
       +        per = arena->blocksize/ClumpInfoSize;
       +        bci = vtmalloc(nci*sizeof bci[0]);
       +        ci = bci;
       +        offset += arena->size - arena->blocksize;
       +        p = sp = nil;
       +        for(i=0; i<nci; i+=per){
       +                if(p == sp){
       +                        sp = pagein(offset-4*M, 4*M);
       +                        p = sp+4*M;
       +                }
       +                p -= arena->blocksize;
       +                offset -= arena->blocksize;
       +                for(j=0; j<per && i+j<nci; j++)
       +                        unpackclumpinfo(ci++, p+j*ClumpInfoSize);
       +        }
       +        return bci;
       +}
       +
       +vlong
       +writeci(vlong offset, Arena *arena, ClumpInfo *ci, int nci)
       +{
       +        int i, j, per;
       +        uchar *p, *sp;
       +        
       +        per = arena->blocksize/ClumpInfoSize;
       +        offset += arena->size - arena->blocksize;
       +        p = sp = nil;
       +        for(i=0; i<nci; i+=per){
       +                if(p == sp){
       +                        sp = pagein(offset-4*M, 4*M);
       +                        p = sp+4*M;
       +                }
       +                p -= arena->blocksize;
       +                offset -= arena->blocksize;
       +                memset(p, 0, arena->blocksize);
       +                for(j=0; j<per && i+j<nci; j++)
       +                        packclumpinfo(ci++, p+j*ClumpInfoSize);
       +        }
       +        pageout();
       +        return offset;
       +}
       +
       +void
       +loadarenabasics(vlong offset0, int anum, ArenaHead *head, Arena *arena)
       +{
       +        char dname[ANameSize];
       +        static char lastbase[ANameSize];
       +        uchar *p;
       +        Arena oarena;
       +        ArenaHead ohead;
       +
       +        /*
       +         * Fmtarenas makes all arenas the same size
       +         * except the last, which may be smaller.
       +         * It uses the same block size for arenas as for
       +         * the arena partition blocks.
       +         */
       +        arena->size = arenasize;
       +        if(offset0+arena->size > partend)
       +                arena->size = partend - offset0;
       +        head->size = arena->size;
       +        
       +        arena->blocksize = ap.blocksize;
       +        head->blocksize = arena->blocksize;
       +        
       +        /* 
       +         * Look for clump magic and name in head/tail blocks.
       +         * All the other info we will reconstruct just in case.
       +         */
       +        p = pagein(offset0, arena->blocksize);
       +        memset(&ohead, 0, sizeof ohead);
       +        if(unpackarenahead(&ohead, p) >= 0){
       +                head->version = ohead.version;
       +                head->clumpmagic = ohead.clumpmagic;
       +                if(okayname(ohead.name, anum))
       +                        strcpy(head->name, ohead.name);
       +        }
       +
       +        p = pagein(offset0+arena->size-arena->blocksize, 
       +                arena->blocksize);
       +        memset(&oarena, 0, sizeof oarena);
       +        if(unpackarena(&oarena, p) >= 0){
       +                arena->version = oarena.version;
       +                arena->clumpmagic = oarena.clumpmagic;
       +                if(okayname(oarena.name, anum))
       +                        strcpy(arena->name, oarena.name);
       +                arena->diskstats.clumps = oarena.diskstats.clumps;
       +print("old arena: sealed=%d\n", oarena.diskstats.sealed);
       +                arena->diskstats.sealed = oarena.diskstats.sealed;
       +        }
       +
       +        /* Head trumps arena. */
       +        if(head->version){
       +                arena->version = head->version;
       +                arena->clumpmagic = head->clumpmagic;
       +        }
       +        if(arena->version == 0)
       +                arena->version = ArenaVersion5;
       +        if(basename)
       +                snprint(arena->name, ANameSize, "%s%d", basename, anum);
       +        else if(lastbase[0])
       +                snprint(arena->name, ANameSize, "%s%d", lastbase, anum);
       +        else if(head->name[0])
       +                strcpy(arena->name, head->name);
       +        else if(arena->name[0] == 0)
       +                sysfatal("cannot determine base name for arena; use -n");
       +        strcpy(lastbase, arena->name);
       +        sprint(dname, "%d", anum);
       +        lastbase[strlen(lastbase)-strlen(dname)] = 0;
       +        
       +        /* Was working in arena, now copy to head. */
       +        head->version = arena->version;
       +        memmove(head->name, arena->name, sizeof head->name);
       +        head->blocksize = arena->blocksize;
       +        head->size = arena->size;
       +}
       +
       +void
       +shahead(Shabuf *sb, vlong offset0, ArenaHead *head)
       +{
       +        uchar headbuf[MaxDiskBlock];
       +        
       +        sb->offset = offset0;
       +        memset(headbuf, 0, sizeof headbuf);
       +        packarenahead(head, headbuf);
       +        sbupdate(sb, headbuf, offset0, head->blocksize);
       +}
       +
       +u32int
       +newclumpmagic(int version)
       +{
       +        u32int m;
       +        
       +        if(version == ArenaVersion4)
       +                return _ClumpMagic;
       +        do{
       +                m = fastrand();
       +        }while(m==0 || m == _ClumpMagic);
       +        return m;
       +}
       +
       +/*
       + * Poke around in the arena to find the clump data
       + * and compute the relevant statistics.
       + */
       +void
       +guessarena(vlong offset0, int anum, ArenaHead *head, Arena *arena,
       +        uchar *oldscore, uchar *score)
       +{
       +        uchar dbuf[MaxDiskBlock];
       +        int needtozero, clumps, nb1, nb2, minclumps;
       +        int inbad, n, ncib, printed, sealing, smart;
       +        u32int magic;
       +        uchar *sp, *ep, *p;
       +        vlong boffset, eoffset, lastclumpend, leaked;
       +        vlong offset, toffset, totalcorrupt, v;
       +        Clump cl;
       +        ClumpInfo *bci, *ci, *eci, *xci;
       +        Cit *bcit, *cit, *ecit;
       +        Shabuf oldsha, newsha;
       +        
       +        /*
       +         * We expect to find an arena, with data, between offset
       +         * and offset+arenasize.  With any luck, the data starts at
       +         * offset+ap.blocksize.  The blocks have variable size and
       +         * aren't padded at all, which doesn't give us any alignment
       +         * constraints.  The blocks are compressed or high entropy,
       +         * but the headers are pretty low entropy (except the score):
       +         *
       +         *        type[1] (range 0 thru 9, 13)
       +         *        size[2]
       +         *        uncsize[2] (<= size)
       +         *
       +         * so we can look for these.  We check the scores as we go,
       +         * so we can't make any wrong turns.  If we find ourselves
       +         * in a dead end, scan forward looking for a new start.
       +         */
       +
       +        resetcibuf();
       +        memset(head, 0, sizeof *head);
       +        memset(arena, 0, sizeof *arena);
       +        memset(oldscore, 0, VtScoreSize);
       +        memset(score, 0, VtScoreSize);
       +        memset(&oldsha, 0, sizeof oldsha);
       +        memset(&newsha, 0, sizeof newsha);
       +        newsha.rollback = 1;
       +
       +        if(0){
       +                sbdebug(&oldsha, "old.sha");
       +                sbdebug(&newsha, "new.sha");
       +        }
       +
       +        loadarenabasics(offset0, anum, head, arena);
       +
       +        /* start the clump hunt */
       +        
       +        clumps = 0;
       +        totalcorrupt = 0;
       +        sealing = 1;
       +        boffset = offset0 + arena->blocksize;
       +        offset = boffset;
       +        eoffset = offset0+arena->size - arena->blocksize;
       +        toffset = eoffset;
       +        sp = pagein(offset0, 4*M);
       +
       +        if(arena->diskstats.sealed){
       +                oldsha.offset = offset0;
       +                sbupdate(&oldsha, sp, offset0, 4*M);
       +        }
       +        ep = sp+4*M;
       +        p = sp + (boffset - offset0);
       +        ncib = arena->blocksize / ClumpInfoSize;        /* ci per block in index */
       +        lastclumpend = offset;
       +        nbad = 0;
       +        inbad = 0;
       +        needtozero = 0;
       +        minclumps = 0;
       +        while(offset < eoffset){
       +                /*
       +                 * Shift buffer if we're running out of room.
       +                 */
       +                if(p+70*K >= ep){
       +                        /*
       +                         * Start the post SHA1 buffer.   By now we should know the
       +                         * clumpmagic and arena version, so we can create a
       +                         * correct head block to get things going.
       +                         */
       +                        if(sealing && fix && newsha.offset == 0){
       +                                newsha.offset = offset0;
       +                                if(arena->clumpmagic == 0){
       +                                        if(arena->version == 0)
       +                                                arena->version = ArenaVersion5;
       +                                        arena->clumpmagic = newclumpmagic(arena->version);
       +                                }
       +                                head->clumpmagic = arena->clumpmagic;
       +                                shahead(&newsha, offset0, head);
       +                        }
       +                        n = 4*M-256*K;
       +                        if(sealing && fix){
       +                                sbdiskhash(&newsha, bufoffset);
       +                                sbupdate(&newsha, buf, bufoffset, 4*M-256*K);
       +                        }
       +                        pagein(bufoffset+n, 4*M);
       +                        p -= n;
       +                        if(arena->diskstats.sealed)
       +                                sbupdate(&oldsha, buf, bufoffset, 4*M);
       +                }
       +
       +                /*
       +                 * Check for a clump at p, which is at offset in the disk.
       +                 * Duplicate clumps happen in corrupted disks
       +                 * (the same pattern gets written many times in a row)
       +                 * and should never happen during regular use.
       +                 */
       +                if((n = isclump(p, &cl, &magic)) > 0){
       +                        /*
       +                         * If we were in the middle of some corrupted data,
       +                         * flush a warning about it and then add any clump
       +                         * info blocks as necessary.
       +                         */
       +                        if(inbad){
       +                                inbad = 0;
       +                                v = offset-lastclumpend;
       +                                if(needtozero){
       +                                        zerorange(lastclumpend, v);
       +                                        sbrollback(&newsha, lastclumpend);
       +                                        print("corrupt clump data - %#llux+%#llux (%,llud bytes)\n",
       +                                                lastclumpend, v, v);
       +                                }
       +                                addcicorrupt(v);
       +                                totalcorrupt += v;
       +                                nb1 = (minclumps+ncib-1)/ncib;
       +                                minclumps += (v+ClumpSize+VtMaxLumpSize-1)/(ClumpSize+VtMaxLumpSize);
       +                                nb2 = (minclumps+ncib-1)/ncib;
       +                                eoffset -= (nb2-nb1)*arena->blocksize;
       +                        }
       +
       +                        if(haveclump(cl.info.score))
       +                                print("warning: duplicate clump %d %V\n", cl.info.type, cl.info.score);
       +
       +                        /*
       +                         * If clumps use different magic numbers, we don't care.
       +                         * We'll just use the first one we find and make the others
       +                         * follow suit.
       +                         */
       +                        if(arena->clumpmagic == 0){
       +                                print("clump type %d size %d score %V magic %x\n",
       +                                        cl.info.type, cl.info.size, cl.info.score, magic);
       +                                arena->clumpmagic = magic;
       +                                if(magic == _ClumpMagic)
       +                                        arena->version = ArenaVersion4;
       +                                else
       +                                        arena->version = ArenaVersion5;
       +                        }
       +                        if(magic != arena->clumpmagic)
       +                                p32(p, arena->clumpmagic);
       +                        if(clumps == 0)
       +                                arena->ctime = cl.time;
       +
       +                        /*
       +                         * Record the clump, update arena stats,
       +                         * grow clump info blocks if needed.
       +                         */
       +                        if(verbose > 1)
       +                                print("\tclump %d: %d %V at %#llux+%#ux (%d)\n", 
       +                                        clumps, cl.info.type, cl.info.score, offset, n, n);
       +                        addcibuf(&cl.info, 0);
       +                        if(minclumps%ncib == 0)
       +                                eoffset -= arena->blocksize;
       +                        minclumps++;
       +                        clumps++;
       +                        if(cl.encoding != ClumpENone)
       +                                arena->diskstats.cclumps++;
       +                        arena->diskstats.uncsize += cl.info.uncsize;
       +                        arena->wtime = cl.time;
       +                        
       +                        /*
       +                         * Move to next clump.
       +                         */
       +                        offset += n;
       +                        p += n;
       +                        lastclumpend = offset;
       +                }else{
       +                        /*
       +                         * Overwrite malformed clump data with zeros later.
       +                         * For now, just record whether it needs to be overwritten.
       +                         * Bad regions must be of size at least ClumpSize.
       +                         * Postponing the overwriting keeps us from writing past
       +                         * the end of the arena data (which might be directory data)
       +                         * with zeros.
       +                         */
       +                        if(!inbad){
       +                                inbad = 1;
       +                                needtozero = 0;
       +                                if(memcmp(p, zero, ClumpSize) != 0)
       +                                        needtozero = 1;
       +                                p += ClumpSize;
       +                                offset += ClumpSize;
       +                                nbad++;
       +                        }else{
       +                                if(*p != 0)
       +                                        needtozero = 1;
       +                                p++;
       +                                offset++;
       +                        }
       +                }
       +        }
       +        pageout();
       +
       +        if(verbose)
       +                print("readable clumps: %d; min. directory entries: %d\n", 
       +                        clumps, minclumps);
       +        arena->diskstats.used = lastclumpend - boffset;
       +        leaked = eoffset - lastclumpend;
       +        if(verbose)
       +                print("used from %#llux to %#llux = %,lld (%,lld unused)\n",
       +                        boffset, lastclumpend, arena->diskstats.used, leaked);
       +
       +        /*
       +         * Finish the SHA1 of the old data.
       +         */
       +        if(arena->diskstats.sealed){
       +                sbdiskhash(&oldsha, toffset);
       +                readdisk(dbuf, toffset, arena->blocksize);
       +                scorecp(dbuf+arena->blocksize-VtScoreSize, zero);
       +                sbupdate(&oldsha, dbuf, toffset, arena->blocksize);
       +                sbscore(&oldsha, oldscore);
       +        }
       +        
       +        /*
       +         * If we still don't know the clump magic, the arena
       +         * must be empty.  It still needs a value, so make 
       +         * something up.
       +         */
       +        if(arena->version == 0)
       +                arena->version = ArenaVersion5;
       +        if(arena->clumpmagic == 0){
       +                if(arena->version == ArenaVersion4)
       +                        arena->clumpmagic = _ClumpMagic;
       +                else{
       +                        do
       +                                arena->clumpmagic = fastrand();
       +                        while(arena->clumpmagic==_ClumpMagic
       +                                ||arena->clumpmagic==0);
       +                }
       +                head->clumpmagic = arena->clumpmagic;
       +        }
       +
       +        /*
       +         * Guess at number of clumpinfo blocks to load.
       +         * If we guess high, it's no big deal.  If we guess low,
       +         * we'll be forced into rewriting the whole directory.
       +         * Still not such a big deal.
       +         */
       +        if(clumps == 0 || arena->diskstats.used == totalcorrupt)
       +                goto Nocib;
       +        if(clumps < arena->diskstats.clumps)
       +                clumps = arena->diskstats.clumps;
       +        if(clumps < ncibuf)
       +                clumps = ncibuf;
       +        clumps += totalcorrupt/
       +                ((arena->diskstats.used - totalcorrupt)/clumps);
       +        clumps += totalcorrupt/2000;
       +        if(clumps < minclumps)
       +                clumps = minclumps;
       +        clumps += ncib-1;
       +        clumps -= clumps%ncib;
       +
       +        /*
       +         * Can't write into the actual data.
       +         */
       +        v = offset0 + arena->size - arena->blocksize;
       +        v -= (clumps+ncib-1)/ncib * arena->blocksize;
       +        if(v < lastclumpend){
       +                v = offset0 + arena->size - arena->blocksize;
       +                clumps = (v-lastclumpend)/arena->blocksize * ncib;
       +        }
       +        
       +        if(clumps < minclumps)
       +                print("cannot happen?\n");
       +
       +        /*
       +         * Check clumpinfo blocks against directory we created.
       +         * The tricky part is handling the corrupt sections of arena.
       +         * If possible, we remark just the affected directory entries
       +         * rather than slide everything down.
       +         * 
       +         * Allocate clumps+1 blocks and check that we don't need
       +         * the last one at the end.
       +         */
       +        bci = loadci(offset0, arena, clumps+1);
       +        eci = bci+clumps+1;
       +        bcit = cibuf;
       +        ecit = cibuf+ncibuf;
       +        smart = 1;
       +Again:
       +        nbad = 0;
       +        ci = bci;
       +        for(cit=bcit; cit<ecit && ci<eci; cit++){
       +                if(cit->corrupt){
       +                        vlong n, m;
       +                        if(smart){
       +                                /*
       +                                 * If we can, just mark existing entries as corrupt.
       +                                 */
       +                                n = cit->corrupt;
       +                                for(xci=ci; n>0 && xci<eci; xci++)
       +                                        n -= ClumpSize+xci->size;
       +                                if(n > 0 || xci >= eci)
       +                                        goto Dumb;
       +                                printed = 0;
       +                                for(; ci<xci; ci++){
       +                                        if(verbose && ci->type != VtCorruptType){
       +                                                if(!printed){
       +                                                        print("marking directory %d-%d as corrupt\n",
       +                                                                (int)(ci-bci), (int)(xci-bci));
       +                                                        printed = 1;
       +                                                }
       +                                                print("\ttype=%d size=%d uncsize=%d score=%V\n",
       +                                                        ci->type, ci->size, ci->uncsize, ci->score);
       +                                        }
       +                                        ci->type = VtCorruptType;
       +                                }
       +                        }else{
       +                        Dumb:
       +                                print("\trewriting clump directory\n");
       +                                /*
       +                                 * Otherwise, blaze a new trail.
       +                                 */
       +                                n = cit->corrupt;
       +                                while(n > 0 && ci < eci){
       +                                        if(n < ClumpSize)
       +                                                sysfatal("bad math in clump corrupt");
       +                                        if(n <= VtMaxLumpSize+ClumpSize)
       +                                                m = n;
       +                                        else{
       +                                                m = VtMaxLumpSize+ClumpSize;
       +                                                if(n-m < ClumpSize)
       +                                                        m -= ClumpSize;
       +                                        }
       +                                        ci->type = VtCorruptType;
       +                                        ci->size = m-ClumpSize;
       +                                        ci->uncsize = m-ClumpSize;
       +                                        memset(ci->score, 0, VtScoreSize);
       +                                        ci++;
       +                                        n -= m;
       +                                }
       +                        }
       +                        continue;
       +                }
       +                if(clumpinfocmp(&cit->ci, ci) != 0){
       +                        if(verbose && (smart || verbose>1)){
       +                                print("clumpinfo %d\n", (int)(ci-bci));
       +                                print("\twant: %d %d %d %V\n", 
       +                                        cit->ci.type, cit->ci.size,
       +                                        cit->ci.uncsize, cit->ci.score);
       +                                print("\thave: %d %d %d %V\n", 
       +                                        ci->type, ci->size, 
       +                                        ci->uncsize, ci->score);
       +                        }
       +                        *ci = cit->ci;
       +                        nbad++;
       +                }
       +                ci++;
       +        }
       +        if(ci >= eci || cit < ecit){
       +                print("ran out of space editing existing directory; rewriting\n");
       +                print("# eci %ld ci %ld ecit %ld cit %ld\n", eci-bci, ci-bci, ecit-bcit, cit-bcit);
       +                assert(smart);        /* can't happen second time thru */
       +                smart = 0;
       +                goto Again;
       +        }
       +        
       +        assert(ci <= eci);
       +        arena->diskstats.clumps = ci-bci;
       +        eoffset = writeci(offset0, arena, bci, ci-bci);
       +        if(sealing && fix)
       +                sbrollback(&newsha, v);
       +print("eoffset=%lld lastclumpend=%lld diff=%lld unseal=%d\n", eoffset, lastclumpend, eoffset-lastclumpend, unseal);
       +        if(lastclumpend > eoffset)
       +                print("arena directory overwrote blocks!  cannot happen!\n");
       +        free(bci);
       +        if(smart && nbad)
       +                print("arena directory has %d bad or missing entries\n", nbad);
       +Nocib:
       +        if(eoffset - lastclumpend > 64*1024 && (!arena->diskstats.sealed || unseal)){
       +                if(arena->diskstats.sealed)
       +                        print("unsealing arena\n");
       +                sealing = 0;
       +                memset(oldscore, 0, VtScoreSize);
       +        }
       +
       +        /*
       +         * Finish the SHA1 of the new data - only meaningful
       +         * if we've been writing to disk (`fix').
       +         */
       +        arena->diskstats.sealed = sealing;
       +        arena->memstats = arena->diskstats;
       +        if(sealing && fix){
       +                uchar tbuf[MaxDiskBlock];
       +                
       +                sbdiskhash(&newsha, toffset);
       +                memset(tbuf, 0, sizeof tbuf);
       +                packarena(arena, tbuf);
       +                sbupdate(&newsha, tbuf, toffset, arena->blocksize);
       +                sbscore(&newsha, score);
       +        }
       +}
       +
       +void
       +dumparena(vlong offset, int anum, Arena *arena)
       +{
       +        char buf[1000];
       +        vlong o, e;
       +        int fd, n;
       +        
       +        snprint(buf, sizeof buf, "%s.%d", dumpbase, anum);
       +        if((fd = create(buf, OWRITE, 0666)) < 0){
       +                fprint(2, "create %s: %r\n", buf);
       +                return;
       +        }
       +        e = offset+arena->size;
       +        for(o=offset; o<e; o+=n){
       +                n = 4*M;
       +                if(o+n > e)
       +                        n = e-o;
       +                if(pwrite(fd, pagein(o, n), n, o-offset) != n){
       +                        fprint(2, "write %s at %#llux: %r\n", buf, o-offset);
       +                        return;
       +                }
       +        }
       +}
       +
       +void
       +checkarena(vlong offset, int anum)
       +{
       +        uchar dbuf[MaxDiskBlock];
       +        uchar *p, oldscore[VtScoreSize], score[VtScoreSize];
       +        Arena arena, oarena;
       +        ArenaHead head;
       +        Info *fmt, *fmta;
       +        int sz;
       +        
       +        print("# arena %d: offset %#llux\n", anum, offset);
       +
       +        if(offset >= partend){
       +                print("arena offset out of bounds\n");
       +                return;
       +        }
       +
       +        guessarena(offset, anum, &head, &arena, oldscore, score);
       +
       +        if(verbose){
       +                print("#\tversion=%d name=%s blocksize=%d size=%z",
       +                        head.version, head.name, head.blocksize, head.size);
       +                if(head.clumpmagic)
       +                        print(" clumpmagic=%#.8ux", head.clumpmagic);
       +                print("\n#\tclumps=%d cclumps=%d used=%,lld uncsize=%,lld\n",
       +                        arena.diskstats.clumps, arena.diskstats.cclumps,
       +                        arena.diskstats.used, arena.diskstats.uncsize);
       +                print("#\tctime=%t\n", arena.ctime);
       +                print("#\twtime=%t\n", arena.wtime);
       +                if(arena.diskstats.sealed)
       +                        print("#\tsealed score=%V\n", score);
       +        }
       +
       +        if(dumpbase){
       +                dumparena(offset, anum, &arena);
       +                return;
       +        }
       +
       +        memset(dbuf, 0, sizeof dbuf);
       +        packarenahead(&head, dbuf);
       +        p = pagein(offset, arena.blocksize);
       +        if(memcmp(dbuf, p, arena.blocksize) != 0){
       +                print("on-disk arena header incorrect\n");
       +                showdiffs(dbuf, p, arena.blocksize, 
       +                        arena.version==ArenaVersion4 ? headinfo4 : headinfo5);
       +        }
       +        memmove(p, dbuf, arena.blocksize);
       +        
       +        memset(dbuf, 0, sizeof dbuf);
       +        packarena(&arena, dbuf);
       +        if(arena.diskstats.sealed)
       +                scorecp(dbuf+arena.blocksize-VtScoreSize, score);
       +        p = pagein(offset+arena.size-arena.blocksize, arena.blocksize);
       +        memset(&oarena, 0, sizeof oarena);
       +        unpackarena(&oarena, p);
       +        if(arena.version == ArenaVersion4){
       +                sz = ArenaSize4;
       +                fmt = tailinfo4;
       +                fmta = tailinfo4a;
       +        }else{
       +                sz = ArenaSize5;
       +                fmt = tailinfo5;
       +                fmta = tailinfo5a;
       +        }
       +        if(p[sz] == 1){
       +                fmt = fmta;
       +                if(oarena.diskstats.sealed){
       +                        /*
       +                         * some arenas were sealed with the extension
       +                         * before we adopted the convention that if it didn't
       +                         * add new information it gets dropped.
       +                         */
       +                        _packarena(&arena, dbuf, 1);
       +                }
       +        }
       +        if(memcmp(dbuf, p, arena.blocksize-VtScoreSize) != 0){
       +                print("on-disk arena tail incorrect\n");
       +                showdiffs(dbuf, p, arena.blocksize-VtScoreSize, fmt);
       +        }
       +        if(arena.diskstats.sealed){
       +                if(oarena.diskstats.sealed)
       +                if(scorecmp(p+arena.blocksize-VtScoreSize, oldscore) != 0){
       +                        print("on-disk arena seal score incorrect\n");
       +                        print("\tcorrect=%V\n", oldscore);
       +                        print("\t   disk=%V\n", p+arena.blocksize-VtScoreSize);
       +                }
       +                if(fix && scorecmp(p+arena.blocksize-VtScoreSize, score) != 0){
       +                        print("%ssealing arena%s: %V\n", 
       +                                oarena.diskstats.sealed ? "re" : "",
       +                                scorecmp(oldscore, score) == 0 ? 
       +                                        "" : " after changes", score);
       +                }
       +        }
       +        memmove(p, dbuf, arena.blocksize);
       +        
       +        pageout();
       +}
       +
       +AMapN*
       +buildamap(void)
       +{
       +        uchar *p;
       +        vlong o;
       +        ArenaHead h;
       +        AMapN *an;
       +        AMap *m;
       +        
       +        an = vtmallocz(sizeof *an);
       +        for(o=ap.arenabase; o<partend; o+=arenasize){
       +                p = pagein(o, Block);
       +                if(unpackarenahead(&h, p) >= 0){
       +                        an->map = vtrealloc(an->map, (an->n+1)*sizeof an->map[0]);
       +                        m = &an->map[an->n++];
       +                        m->start = o;
       +                        m->stop = o+h.size;
       +                        strcpy(m->name, h.name);
       +                }
       +        }
       +        return an;        
       +}
       +
       +void
       +checkmap(void)
       +{
       +        char *s;
       +        uchar *p;
       +        int i, len;
       +        AMapN *an;
       +        Fmt fmt;
       +        
       +        an = buildamap();
       +        fmtstrinit(&fmt);
       +        fmtprint(&fmt, "%ud\n", an->n);
       +        for(i=0; i<an->n; i++)
       +                fmtprint(&fmt, "%s\t%lld\t%lld\n",
       +                        an->map[i].name, an->map[i].start, an->map[i].stop);
       +        s = fmtstrflush(&fmt);
       +        len = strlen(s);
       +        if(len > ap.tabsize){
       +                print("arena partition map too long: need %z bytes have %z\n",
       +                        (vlong)len, (vlong)ap.tabsize);
       +                len = ap.tabsize;
       +        }
       +        
       +        if(ap.tabsize >= 4*M){        /* can't happen - max arenas is 2000 */
       +                print("arena partition map *way* too long\n");
       +                return;
       +        }
       +
       +        p = pagein(ap.tabbase, ap.tabsize);
       +        if(memcmp(p, s, len) != 0){
       +                print("arena partition map incorrect; rewriting.\n");
       +                memmove(p, s, len);
       +        }
       +        pageout();
       +}
       +
       +int mainstacksize = 512*1024;
       +
       +void
       +threadmain(int argc, char **argv)
       +{
       +        int mode;
       +        
       +        mode = OREAD;
       +        readonly = 1;        
       +        ARGBEGIN{
       +        case 'U':
       +                unseal = 1;
       +                break;
       +        case 'a':
       +                arenasize = unittoull(EARGF(usage()));
       +                break;
       +        case 'b':
       +                ap.blocksize = unittoull(EARGF(usage()));
       +                break;
       +        case 'f':
       +                fix = 1;
       +                mode = ORDWR;
       +                readonly = 0;
       +                break;
       +        case 'n':
       +                basename = EARGF(usage());
       +                break;
       +        case 'v':
       +                verbose++;
       +                break;
       +        case 'x':
       +                dumpbase = EARGF(usage());
       +                break;
       +        default:
       +                usage();
       +        }ARGEND
       +        
       +        if(argc != 1 && argc != 2)
       +                usage();
       +
       +        file = argv[0];
       +        
       +        ventifmtinstall();
       +        fmtinstall('z', zfmt);
       +        fmtinstall('t', tfmt);
       +        quotefmtinstall();
       +        
       +        part = initpart(file, mode|ODIRECT);
       +        if(part == nil)
       +                sysfatal("can't open %s: %r", file);
       +        partend = part->size;
       +        
       +        checkarenas(argc > 1 ? argv[1] : nil);
       +        checkmap();
       +        threadexitsall(nil);
       +}
       +
 (DIR) diff --git a/src/cmd/venti/srv/fns.h b/src/cmd/venti/srv/fns.h
       t@@ -24,8 +24,13 @@ void                delaykickicache(void);
        void                delaykickround(Round*);
        void                delaykickroundproc(void*);
        void                dirtydblock(DBlock*, int);
       +void                diskaccess(int);
       +void                disksched(void);
        AState        diskstate(void);
        void                *emalloc(ulong);
       +void                emptydcache(void);
       +void                emptyicache(void);
       +void                emptylumpcache(void);
        void                *erealloc(void *, ulong);
        char                *estrdup(char*);
        void                *ezmalloc(ulong);
       t@@ -49,6 +54,7 @@ u32int                hashbits(u8int *score, int nbits);
        int                httpdinit(char *address, char *webroot);
        int                iaddrcmp(IAddr *ia1, IAddr *ia2);
        IEntry*        icachedirty(u32int, u32int, u64int);
       +ulong        icachedirtyfrac(void);
        void                icacheclean(IEntry*);
        int                ientrycmp(const void *vie1, const void *vie2);
        char                *ifileline(IFile *f);
       t@@ -77,6 +83,7 @@ int                insertscore(u8int *score, IAddr *ia, int write);
        void                kickdcache(void);
        void                kickicache(void);
        void                kickround(Round*, int wait);
       +int                loadbloom(Bloom*);
        ZBlock                *loadclump(Arena *arena, u64int aa, int blocks, Clump *cl, u8int *score, int verify);
        DBlock        *loadibucket(Index *index, u8int *score, ISect **is, u32int *buck, IBucket *ib);
        int                loadientry(Index *index, u8int *score, int type, IEntry *ie);
       t@@ -98,6 +105,7 @@ int                okamap(AMap *am, int n, u64int start, u64int stop, char *what);
        int                okibucket(IBucket*, ISect*);
        int                outputamap(Fmt *f, AMap *am, int n);
        int                outputindex(Fmt *f, Index *ix);
       +int                _packarena(Arena *arena, u8int *buf, int);
        int                packarena(Arena *arena, u8int *buf);
        int                packarenahead(ArenaHead *head, u8int *buf);
        int                packarenapart(ArenaPart *as, u8int *buf);
       t@@ -129,6 +137,7 @@ ZBlock                *readfile(char *name);
        int                readifile(IFile *f, char *name);
        Packet                *readlump(u8int *score, int type, u32int size, int *cached);
        int                readpart(Part *part, u64int addr, u8int *buf, u32int n);
       +int                resetbloom(Bloom*);
        int                runconfig(char *config, Config*);
        int                scorecmp(u8int *, u8int *);
        void                scoremem(u8int *score, u8int *buf, int size);
 (DIR) diff --git a/src/cmd/venti/srv/graph.c b/src/cmd/venti/srv/graph.c
       t@@ -55,7 +55,11 @@ ginit(void)
                        
                first = 0;
                memimageinit();
       +#ifdef PLAN9PORT
                smallfont = openmemsubfont(unsharp("#9/font/lucsans/lstr.10"));
       +#else
       +        smallfont = openmemsubfont("/lib/font/bit/lucidasans/lstr.10");
       +#endif
                black = memblack;
                blue = allocrepl(DBlue);
                red = allocrepl(DRed);
       t@@ -121,7 +125,7 @@ statgraph(Graph *g)
                if(g->wid > nelem(bin))
                        g->wid = nelem(bin);
                if(g->fill < 0)
       -                g->fill = ((uint)(uintptr)g->arg>>8)%nelem(lofill);
       +                g->fill = ((uint)g->arg>>8)%nelem(lofill);
                if(g->fill > nelem(lofill))
                        g->fill %= nelem(lofill);
                
       t@@ -151,7 +155,7 @@ statgraph(Graph *g)
                qlock(&memdrawlock);
                ginit();
                if(smallfont==nil || black==nil || blue==nil || red==nil || hifill==nil || lofill==nil){
       -                werrstr("graphics initialization failed");
       +                werrstr("graphics initialization failed: %r");
                        qunlock(&memdrawlock);
                        return nil;
                }
       t@@ -186,12 +190,12 @@ statgraph(Graph *g)
                        if(0)
                        if(lastlo != -1){
                                if(lastlo < lo)
       -                                memimagedraw(m, Rect(x-1, lastlo, x, lo), hifill[g->fill], ZP, memopaque, ZP, S);
       +                                memimagedraw(m, Rect(x-1, lastlo, x, lo), hifill[g->fill%nelem(hifill)], ZP, memopaque, ZP, S);
                                else if(lastlo > lo)
       -                                memimagedraw(m, Rect(x-1, lo, x, lastlo), hifill[g->fill], ZP, memopaque, ZP, S);
       +                                memimagedraw(m, Rect(x-1, lo, x, lastlo), hifill[g->fill%nelem(hifill)], ZP, memopaque, ZP, S);
                        }
       -                memimagedraw(m, Rect(x, hi, x+1,lo), hifill[g->fill], ZP, memopaque, ZP, S);
       -                memimagedraw(m, Rect(x, lo, x+1, r.max.y), lofill[g->fill], ZP, memopaque, ZP, S);
       +                memimagedraw(m, Rect(x, hi, x+1,lo), hifill[g->fill%nelem(hifill)], ZP, memopaque, ZP, S);
       +                memimagedraw(m, Rect(x, lo, x+1, r.max.y), lofill[g->fill%nelem(lofill)], ZP, memopaque, ZP, S);
                        lastlo = lo;
                }
        
 (DIR) diff --git a/src/cmd/venti/srv/httpd.c b/src/cmd/venti/srv/httpd.c
       t@@ -9,7 +9,7 @@ extern QLock memdrawlock;
        enum
        {
                ObjNameSize        = 64,
       -        MaxObjs                = 16
       +        MaxObjs                = 64
        };
        
        struct HttpObj
       t@@ -28,6 +28,12 @@ static        int                dindex(HConnect *c);
        static        int                xindex(HConnect *c);
        static        int                xlog(HConnect *c);
        static        int                sindex(HConnect *c);
       +static        int                hempty(HConnect *c);
       +static        int                hlcacheempty(HConnect *c);
       +static        int                hdcacheempty(HConnect *c);
       +static        int                hicacheempty(HConnect *c);
       +static        int                hicachekick(HConnect *c);
       +static        int                hdcachekick(HConnect *c);
        static        int                hicacheflush(HConnect *c);
        static        int                hdcacheflush(HConnect *c);
        static        int                notfound(HConnect *c);
       t@@ -53,10 +59,17 @@ httpdinit(char *address, char *dir)
                httpdobj("/xindex", xindex);
                httpdobj("/flushicache", hicacheflush);
                httpdobj("/flushdcache", hdcacheflush);
       +        httpdobj("/kickicache", hicachekick);
       +        httpdobj("/kickdcache", hdcachekick);
                httpdobj("/graph/", xgraph);
       +        httpdobj("/set", xset);
                httpdobj("/set/", xset);
                httpdobj("/log", xlog);
                httpdobj("/log/", xlog);
       +        httpdobj("/empty", hempty);
       +        httpdobj("/emptyicache", hicacheempty);
       +        httpdobj("/emptylumpcache", hlcacheempty);
       +        httpdobj("/emptydcache", hdcacheempty);
        
                if(vtproc(listenproc, address) < 0)
                        return -1;
       t@@ -105,8 +118,6 @@ listenproc(void *vaddress)
                char *address, ndir[NETPATHLEN], dir[NETPATHLEN];
                int ctl, nctl, data;
        
       -/*sleep(1000);        // let strace find us */
       -
                address = vaddress;
                ctl = announce(address, dir);
                if(ctl < 0){
       t@@ -148,7 +159,6 @@ httpproc(void *v)
                HConnect *c;
                int ok, i, n;
        
       -/*sleep(1000);        // let strace find us */
                c = v;
        
                for(;;){
       t@@ -182,7 +192,7 @@ httpproc(void *v)
        }
        
        static int
       -percent(long v, long total)
       +percent(ulong v, ulong total)
        {
                if(total == 0)
                        total = 1;
       t@@ -240,6 +250,31 @@ preqtext(HConnect *c)
        }
        
        static int
       +herror(HConnect *c)
       +{
       +        int n;
       +        Hio *hout;
       +        
       +        hout = &c->hout;
       +        n = snprint(c->xferbuf, HBufSize, "<html><head><title>Error</title></head>\n<body><h1>Error</h1>\n<pre>%r</pre>\n</body></html>");
       +        hprint(hout, "%s %s\r\n", hversion, "400 Bad Request");
       +        hprint(hout, "Date: %D\r\n", time(nil));
       +        hprint(hout, "Server: Venti\r\n");
       +        hprint(hout, "Content-Type: text/html\r\n");
       +        hprint(hout, "Content-Length: %d\r\n", n);
       +        if(c->head.closeit)
       +                hprint(hout, "Connection: close\r\n");
       +        else if(!http11(c))
       +                hprint(hout, "Connection: Keep-Alive\r\n");
       +        hprint(hout, "\r\n");
       +
       +        if(c->req.meth == nil || strcmp(c->req.meth, "HEAD") != 0)
       +                hwrite(hout, c->xferbuf, n);
       +
       +        return hflush(hout);
       +}
       +        
       +static int
        notfound(HConnect *c)
        {
                int r;
       t@@ -325,21 +360,53 @@ static struct
                "logging",        &ventilogging,
                "stats",        &collectstats,
                "icachesleeptime",        &icachesleeptime,
       +        "minicachesleeptime",        &minicachesleeptime,
                "arenasumsleeptime",        &arenasumsleeptime,
       +        "l0quantum",        &l0quantum,
       +        "l1quantum",        &l1quantum,
       +        "manualscheduling",        &manualscheduling,
       +        "ignorebloom",        &ignorebloom,
       +        "syncwrites",        &syncwrites,
       +        "icacheprefetch",        &icacheprefetch,
                0
        };
        
        static int
       +xsetlist(HConnect *c)
       +{
       +        int i;
       +        
       +        if(preqtype(c, "text/plain") < 0)
       +                return -1;
       +        for(i=0; namedints[i].name; i++)
       +                print("%s = %d\n", namedints[i].name, *namedints[i].p);
       +        hflush(&c->hout);
       +        return 0;
       +}
       +
       +
       +
       +static int
        xset(HConnect *c)
        {
                int i, nf, r;
                char *f[10], *s;
        
       +        if(strcmp(c->req.uri, "/set") == 0 || strcmp(c->req.uri, "/set/") == 0)
       +                return xsetlist(c);
       +
                s = estrdup(c->req.uri);
                nf = getfields(s+strlen("/set/"), f, nelem(f), 1, "/");
        
       -        if(nf < 1)
       -                return notfound(c);
       +        if(nf < 1){
       +                r = preqtext(c);
       +                if(r < 0)
       +                        return r;
       +                for(i=0; namedints[i].name; i++)
       +                        hprint(&c->hout, "%s = %d\n", namedints[i].name, *namedints[i].p);
       +                hflush(&c->hout);
       +                return 0;
       +        }
                for(i=0; namedints[i].name; i++){
                        if(strcmp(f[0], namedints[i].name) == 0){
                                if(nf >= 2)
       t@@ -495,6 +562,108 @@ darena(Hio *hout, Arena *arena)
        }
        
        static int
       +hempty(HConnect *c)
       +{
       +        Hio *hout;
       +        int r;
       +
       +        r = preqtext(c);
       +        if(r < 0)
       +                return r;
       +        hout = &c->hout;
       +
       +        emptylumpcache();
       +        emptydcache();
       +        emptyicache();
       +        hprint(hout, "emptied all caches\n");
       +        hflush(hout);
       +        return 0;
       +}
       +
       +static int
       +hlcacheempty(HConnect *c)
       +{
       +        Hio *hout;
       +        int r;
       +
       +        r = preqtext(c);
       +        if(r < 0)
       +                return r;
       +        hout = &c->hout;
       +
       +        emptylumpcache();
       +        hprint(hout, "emptied lumpcache\n");
       +        hflush(hout);
       +        return 0;
       +}
       +
       +static int
       +hicacheempty(HConnect *c)
       +{
       +        Hio *hout;
       +        int r;
       +
       +        r = preqtext(c);
       +        if(r < 0)
       +                return r;
       +        hout = &c->hout;
       +
       +        emptyicache();
       +        hprint(hout, "emptied icache\n");
       +        hflush(hout);
       +        return 0;
       +}
       +
       +static int
       +hdcacheempty(HConnect *c)
       +{
       +        Hio *hout;
       +        int r;
       +
       +        r = preqtext(c);
       +        if(r < 0)
       +                return r;
       +        hout = &c->hout;
       +
       +        emptydcache();
       +        hprint(hout, "emptied dcache\n");
       +        hflush(hout);
       +        return 0;
       +}
       +static int
       +hicachekick(HConnect *c)
       +{
       +        Hio *hout;
       +        int r;
       +
       +        r = preqtext(c);
       +        if(r < 0)
       +                return r;
       +        hout = &c->hout;
       +
       +        kickicache();
       +        hprint(hout, "kicked icache\n");
       +        hflush(hout);
       +        return 0;
       +}
       +
       +static int
       +hdcachekick(HConnect *c)
       +{
       +        Hio *hout;
       +        int r;
       +
       +        r = preqtext(c);
       +        if(r < 0)
       +                return r;
       +        hout = &c->hout;
       +
       +        kickdcache();
       +        hprint(hout, "kicked dcache\n");
       +        hflush(hout);
       +        return 0;
       +}
       +static int
        hicacheflush(HConnect *c)
        {
                Hio *hout;
       t@@ -569,6 +738,7 @@ rawgraph(Stats *s, Stats *t, void *va)
        {
                Arg *a;
        
       +        USED(s);
                a = va;
                return t->n[a->index];
        }
       t@@ -587,6 +757,7 @@ pctgraph(Stats *s, Stats *t, void *va)
        {
                Arg *a;
        
       +        USED(s);
                a = va;
                return percent(t->n[a->index], t->n[a->index2]);
        }
       t@@ -722,7 +893,7 @@ static char* graphname[] =
                "isectwritebyte",
        
                "sumread",
       -        "sumreadbyte"
       +        "sumreadbyte",
        };
        
        static int
       t@@ -733,7 +904,6 @@ findname(char *s)
                for(i=0; i<nelem(graphname); i++)
                        if(strcmp(graphname[i], s) == 0)
                                return i;
       -fprint(2, "no name '%s'\n", s);
                return -1;
        }
        
       t@@ -769,10 +939,14 @@ xgraph(HConnect *c)
        if(0) fprint(2, "graph %s\n" ,s);
                memset(&g, 0, sizeof g);
                nf = getfields(s+strlen("/graph/"), f, nelem(f), 1, "/");
       -        if(nf < 1)
       -                goto notfound;
       -        if((arg.index = findname(f[0])) == -1 && strcmp(f[0], "*") != 0)
       -                goto notfound;
       +        if(nf < 1){
       +                werrstr("bad syntax -- not enough fields");
       +                goto error;
       +        }
       +        if((arg.index = findname(f[0])) == -1 && strcmp(f[0], "*") != 0){
       +                werrstr("unknown name %s", f[0]);
       +                goto error;
       +        }
                g.arg = &arg;
                g.t0 = -120;
                g.t1 = 0;
       t@@ -793,14 +967,18 @@ if(0) fprint(2, "graph %s\n" ,s);
                        else if(strncmp(f[i], "max=", 4) == 0)
                                g.max = atoi(f[i]+4);
                        else if(strncmp(f[i], "pct=", 4) == 0){
       -                        if((arg.index2 = findname(f[i]+4)) == -1)
       -                                goto notfound;
       +                        if((arg.index2 = findname(f[i]+4)) == -1){
       +                                werrstr("unknown name %s", f[i]+4);
       +                                goto error;
       +                        }
                                g.fn = pctgraph;
                                g.min = 0;
                                g.max = 100;
                        }else if(strncmp(f[i], "pctdiff=", 8) == 0){
       -                        if((arg.index2 = findname(f[i]+8)) == -1)
       -                                goto notfound;
       +                        if((arg.index2 = findname(f[i]+8)) == -1){
       +                                werrstr("unknown name %s", f[i]+8);
       +                                goto error;
       +                        }
                                g.fn = pctdiffgraph;
                                g.min = 0;
                                g.max = 100;
       t@@ -830,7 +1008,7 @@ if(0) fprint(2, "graph %s\n" ,s);
        
                m = statgraph(&g);
                if(m == nil)
       -                goto notfound;
       +                goto error;
        
                if(preqtype(c, "image/png") < 0)
                        return -1;
       t@@ -843,9 +1021,9 @@ if(0) fprint(2, "graph %s\n" ,s);
                free(s);
                return 0;
        
       -notfound:
       +error:
                free(s);
       -        return notfound(c);
       +        return herror(c);
        }
        
        static int
       t@@ -944,7 +1122,6 @@ vtloghdump(Hio *h, VtLog *l)
                
                name = l ? l->name : "&lt;nil&gt;";
        
       -fprint(2, "hdump xfer %d\n", h->xferenc);
                hprint(h, "<html><head>\n");
                hprint(h, "<title>Venti Server Log: %s</title>\n", name);
                hprint(h, "</head><body>\n");
 (DIR) diff --git a/src/cmd/venti/srv/icache.c b/src/cmd/venti/srv/icache.c
       t@@ -11,6 +11,7 @@ struct ICache
                int        bits;                        /* bits to use for indexing heads */
                u32int        size;                        /* number of heads; == 1 << bits, should be < entries */
                IEntry        *base;                        /* all allocated hash table entries */
       +        IEntry        *free;
                u32int        entries;                /* elements in base */
                IEntry        *dirty;                /* chain of dirty elements */
                u32int        ndirty;
       t@@ -23,6 +24,8 @@ struct ICache
                int                nlast;
        };
        
       +int icacheprefetch = 1;
       +
        static ICache icache;
        
        static IEntry        *icachealloc(IAddr *ia, u8int *score);
       t@@ -45,6 +48,12 @@ initicache(int bits, int depth)
                setstat(StatIcacheSize, icache.entries);
        }
        
       +ulong
       +icachedirtyfrac(void)
       +{
       +        return (vlong)icache.ndirty*IcacheFrac / icache.entries;
       +}
       +
        u32int
        hashbits(u8int *sc, int bits)
        {
       t@@ -141,14 +150,16 @@ lookupscore(u8int *score, int type, IAddr *ia, int *rac)
                 * load the table of contents for that arena into the cache.
                 */
                ie = icachealloc(&d.ia, score);
       -        icache.last[icache.nlast++%nelem(icache.last)] = amapitoa(mainindex, ie->ia.addr, &aa);
       -        aa = ie->ia.addr - aa;        /* compute base addr of arena */
       -        for(i=0; i<nelem(icache.last); i++)
       -                if(icache.last[i] != icache.last[0])
       -                        break;
       -        if(i==nelem(icache.last) && icache.lastload != icache.last[0]){
       -                load = icache.last[0];
       -                icache.lastload = load;
       +        if(icacheprefetch){
       +                icache.last[icache.nlast++%nelem(icache.last)] = amapitoa(mainindex, ie->ia.addr, &aa);
       +                aa = ie->ia.addr - aa;        /* compute base addr of arena */
       +                for(i=0; i<nelem(icache.last); i++)
       +                        if(icache.last[i] != icache.last[0])
       +                                break;
       +                if(i==nelem(icache.last) && icache.lastload != icache.last[0]){
       +                        load = icache.last[0];
       +                        icache.lastload = load;
       +                }
                }
        
        found:
       t@@ -249,6 +260,11 @@ icachealloc(IAddr *ia, u8int *score)
                        trace(TraceLump, "icachealloc unused");
                        goto Found;
                }
       +        
       +        if((ie = icache.free) != nil){
       +                icache.free = ie->next;
       +                goto Found;
       +        }
        
                h = icache.stolen;
                for(i=0;; i++){
       t@@ -346,3 +362,21 @@ icacheclean(IEntry *ie)
                trace(TraceProc, "icachedirty exit");
        }
        
       +void
       +emptyicache(void)
       +{
       +        int i;
       +        IEntry *ie, **lie;
       +        
       +        qlock(&icache.lock);
       +        for(i=0; i<icache.size; i++)
       +        for(lie=&icache.heads[i]; (ie=*lie); ){
       +                if(ie->dirty == 0){
       +                        *lie = ie->next;
       +                        ie->next = icache.free;
       +                        icache.free = ie;
       +                }else
       +                        lie = &ie->next;
       +        }
       +        qunlock(&icache.lock);
       +}
 (DIR) diff --git a/src/cmd/venti/srv/icachewrite.c b/src/cmd/venti/srv/icachewrite.c
       t@@ -12,6 +12,7 @@ static void icachewritecoord(void*);
        static IEntry *iesort(IEntry*);
        
        int icachesleeptime = 1000;        /* milliseconds */
       +int minicachesleeptime = 50;
        
        enum
        {
       t@@ -74,7 +75,7 @@ nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf)
        static int
        icachewritesect(Index *ix, ISect *is, u8int *buf)
        {
       -        int err, h, bsize;
       +        int err, h, bsize, t;
                u32int lo, hi;
                u64int addr, naddr;
                uint nbuf, off;
       t@@ -96,7 +97,14 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
                err = 0;
        
                while(iedirty){
       -                sleep(icachesleeptime);
       +                disksched();
       +                while((t=icachesleeptime) == SleepForever){
       +                        sleep(1000);
       +                        disksched();
       +                }
       +                if(t < minicachesleeptime)
       +                        t = minicachesleeptime;
       +                sleep(t);
                        trace(TraceProc, "icachewritesect nextchunk");
                        chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf);
        
       t@@ -146,12 +154,15 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
                                                break;
                                }
                                packibucket(&ib, buf+off, is->bucketmagic);
       +                        /* XXX not right - must update cache after writepart */
                                if((b = _getdblock(is->part, naddr, ORDWR, 0)) != nil){
                                        memmove(b->data, buf+off, bsize);
                                        putdblock(b);
                                }
                        }
        
       +                diskaccess(1);
       +
                        trace(TraceProc, "icachewritesect writepart", addr, nbuf);
                        if(writepart(is->part, addr, buf, nbuf) < 0){
                                /* XXX */
       t@@ -171,6 +182,7 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
        static void
        icachewriteproc(void *v)
        {
       +        int ret;
                uint bsize;
                ISect *is;
                Index *ix;
       t@@ -188,17 +200,17 @@ icachewriteproc(void *v)
                        trace(TraceProc, "icachewriteproc recv");
                        recv(is->writechan, 0);
                        trace(TraceWork, "start");
       -                icachewritesect(ix, is, buf);
       +                ret = icachewritesect(ix, is, buf);
                        trace(TraceProc, "icachewriteproc send");
                        trace(TraceWork, "finish");
       -                send(is->writedonechan, 0);
       +                sendul(is->writedonechan, ret);
                }
        }
        
        static void
        icachewritecoord(void *v)
        {
       -        int i;
       +        int i, err;
                Index *ix;
                AState as;
        
       t@@ -216,9 +228,9 @@ icachewritecoord(void *v)
                        as = diskstate();
                        if(as.arena==iwrite.as.arena && as.aa==iwrite.as.aa){
                                /* will not be able to do anything more than last flush - kick disk */
       -                        trace(TraceProc, "icachewritecoord flush dcache");
       +                        trace(TraceProc, "icachewritecoord kick dcache");
                                kickdcache();
       -                        trace(TraceProc, "icachewritecoord flushed dcache");
       +                        trace(TraceProc, "icachewritecoord kicked dcache");
                        }
                        iwrite.as = as;
        
       t@@ -229,13 +241,15 @@ icachewritecoord(void *v)
                                if(ix->bloom)
                                        send(ix->bloom->writechan, 0);
                        
       +                        err = 0;
                                for(i=0; i<ix->nsects; i++)
       -                                recv(ix->sects[i]->writedonechan, 0);
       +                                err |= recvul(ix->sects[i]->writedonechan);
                                if(ix->bloom)
       -                                recv(ix->bloom->writedonechan, 0);
       +                                err |= recvul(ix->bloom->writedonechan);
        
       -                        trace(TraceProc, "icachewritecoord donewrite");
       -                        setatailstate(&iwrite.as);
       +                        trace(TraceProc, "icachewritecoord donewrite err=%d", err);
       +                        if(err == 0)
       +                                setatailstate(&iwrite.as);
                        }
                        icacheclean(nil);        /* wake up anyone waiting */
                        trace(TraceWork, "finish");
 (DIR) diff --git a/src/cmd/venti/srv/index.c b/src/cmd/venti/srv/index.c
       t@@ -23,17 +23,11 @@
        #include "dat.h"
        #include "fns.h"
        
       -/*static int        bucklook(u8int *score, int type, u8int *data, int n); */
       -/*static int        writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b); */
       -/*static int        okibucket(IBucket *ib, ISect *is); */
        static int        initindex1(Index*);
        static ISect        *initisect1(ISect *is);
       -/*static int        splitiblock(Index *ix, DBlock *b, ISect *is, u32int buck, IBucket *ib); */
        
        #define KEY(k,d)        ((d) ? (k)>>(32-(d)) : 0)
        
       -/*static QLock        indexlock;        //ZZZ */
       -
        static char IndexMagic[] = "venti index configuration";
        
        Index*
       t@@ -375,6 +369,8 @@ initisect(Part *part)
                        seterr(EAdmin, "can't read index section header: %r");
                        return nil;
                }
       +print("read %s at %d: %.2ux %.2ux %.2ux %.2ux\n",
       +        part->name, PartBlank, b->data[0], b->data[1], b->data[2], b->data[3]);
        
                is = MKZ(ISect);
                if(is == nil){
       t@@ -457,9 +453,10 @@ initisect1(ISect *is)
                v = is->part->size & ~(u64int)(is->blocksize - 1);
                if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
                        seterr(ECorrupt, "invalid blocks in index section %s", is->name);
       -/*ZZZZZZZZZ */
       -/*                freeisect(is); */
       -/*                return nil; */
       +                /* ZZZ what to do? 
       +                freeisect(is);
       +                return nil;
       +                */
                }
        
                if(is->stop - is->start > is->blocks){
       t@@ -482,9 +479,10 @@ wbisect(ISect *is)
                ZBlock *b;
        
                b = alloczblock(HeadSize, 1, 0);
       -        if(b == nil)
       -/*ZZZ set error? */
       +        if(b == nil){
       +                /* ZZZ set error? */
                        return -1;
       +        }
        
                if(packisect(is, b->data) < 0){
                        seterr(ECorrupt, "can't make index section header: %r");
       t@@ -789,7 +787,7 @@ loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib, in
        /*
         * find the number of the index section holding score
         */
       -static int
       +int
        indexsect1(Index *ix, u8int *score)
        {
                return indexsect0(ix, hashbits(score, 32) / ix->div);
 (DIR) diff --git a/src/cmd/venti/srv/lump.c b/src/cmd/venti/srv/lump.c
       t@@ -2,6 +2,7 @@
        #include "dat.h"
        #include "fns.h"
        
       +int                        syncwrites = 0;
        int                        queuewrites = 0;
        int                        writestodevnull = 0;
        
       t@@ -45,7 +46,7 @@ readlump(u8int *score, int type, u32int size, int *cached)
                        *cached = 0;
        
                if(lookupscore(score, type, &ia, &rac) < 0){
       -                /*ZZZ place to check for someone trying to guess scores */
       +                /* ZZZ place to check for someone trying to guess scores */
                        seterr(EOk, "no block with score %V/%d exists", score, type);
        
                        putlump(u);
       t@@ -92,7 +93,15 @@ writelump(Packet *p, u8int *score, int type, u32int creator, uint ms)
                if(u->data != nil){
                        ok = 0;
                        if(packetcmp(p, u->data) != 0){
       -                        seterr(EStrange, "score collision");
       +                        uchar nscore[VtScoreSize];
       +
       +                        packetsha1(u->data, nscore);
       +                        if(scorecmp(u->score, score) != 0)
       +                                seterr(EStrange, "lookuplump returned bad score %V not %V", u->score, score);
       +                        else if(scorecmp(u->score, nscore) != 0)
       +                                seterr(EStrange, "lookuplump returned bad data %V not %V", nscore, u->score);
       +                        else
       +                                seterr(EStrange, "score collision %V", score);
                                ok = -1;
                        }
                        packetfree(p);
       t@@ -138,7 +147,13 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms)
                        if(old != nil){
                                ok = 0;
                                if(packetcmp(p, old) != 0){
       -                                seterr(EStrange, "score collision");
       +                                uchar nscore[VtScoreSize];
       +
       +                                packetsha1(old, nscore);
       +                                if(scorecmp(u->score, nscore) != 0)
       +                                        seterr(EStrange, "readilump returned bad data %V not %V", nscore, u->score);
       +                                else
       +                                        seterr(EStrange, "score collision %V", u->score);
                                        ok = -1;
                                }
                                packetfree(p);
       t@@ -160,6 +175,12 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms)
                        insertlump(u, p);
                else
                        packetfree(p);
       +        
       +        if(syncwrites){
       +                flushdcache();
       +                flushicache();
       +                flushdcache();
       +        }
        
                ms = msec() - ms;
                addstat2(StatRpcWriteNew, 1, StatRpcWriteNewTime, ms);
 (DIR) diff --git a/src/cmd/venti/srv/lumpcache.c b/src/cmd/venti/srv/lumpcache.c
       t@@ -11,7 +11,7 @@ enum
        {
                HashLog                = 9,
                HashSize        = 1<<HashLog,
       -        HashMask        = HashSize - 1
       +        HashMask        = HashSize - 1,
        };
        
        struct LumpCache
       t@@ -175,7 +175,6 @@ again:
                 * remove it from the heap, and fix up the heap.
                 */
                size = packetasize(p);
       -/*ZZZ */
                while(lumpcache.avail < size){
                        trace(TraceLump, "insertlump bump");
                        CHECK(checklumpcache());
       t@@ -277,6 +276,15 @@ bumplump(void)
                return b;
        }
        
       +void
       +emptylumpcache(void)
       +{
       +        qlock(&lumpcache.lock);
       +        while(bumplump())
       +                ;
       +        qunlock(&lumpcache.lock);
       +}
       +
        /*
         * delete an arbitrary block from the heap
         */
       t@@ -415,3 +423,4 @@ checklumpcache(void)
                if(lumpcache.nheap + nfree + refed != lumpcache.nblocks)
                        sysfatal("lc: missing blocks: %d %d %d %d", lumpcache.nheap, refed, nfree, lumpcache.nblocks);
        }
       +
 (DIR) diff --git a/src/cmd/venti/srv/lumpqueue.c b/src/cmd/venti/srv/lumpqueue.c
       t@@ -58,22 +58,6 @@ initlumpqueues(int nq)
                                seterr(EOk, "can't start write queue slave: %r");
                                return -1;
                        }
       -                if(vtproc(queueproc, q) < 0){
       -                        seterr(EOk, "can't start write queue slave: %r");
       -                        return -1;
       -                }
       -                if(vtproc(queueproc, q) < 0){
       -                        seterr(EOk, "can't start write queue slave: %r");
       -                        return -1;
       -                }
       -                if(vtproc(queueproc, q) < 0){
       -                        seterr(EOk, "can't start write queue slave: %r");
       -                        return -1;
       -                }
       -                if(vtproc(queueproc, q) < 0){
       -                        seterr(EOk, "can't start write queue slave: %r");
       -                        return -1;
       -                }
                }
        
                return 0;
 (DIR) diff --git a/src/cmd/venti/srv/mirrorarenas.c b/src/cmd/venti/srv/mirrorarenas.c
       t@@ -0,0 +1,464 @@
       +/*
       + * Mirror one arena partition onto another.  
       + * Be careful to copy only new data.
       + */
       +
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +Channel *writechan;
       +
       +typedef struct Write Write;
       +struct Write
       +{
       +        uchar *p;
       +        int n;
       +        uvlong o;
       +        int error;
       +};
       +
       +Part *src;
       +Part *dst;
       +int force;
       +int verbose;
       +char *status;
       +uvlong astart, aend;
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: mirrorarenas [-v] src dst [ranges]\n");
       +        threadexitsall("usage");
       +}
       +
       +int
       +ereadpart(Part *p, u64int offset, u8int *buf, u32int count)
       +{
       +        if(readpart(p, offset, buf, count) != count){
       +                print("%T readpart %s at %#llux+%ud: %r\n", p->name, offset, count);
       +                return -1;
       +        }
       +        return 0;
       +}
       +                
       +int
       +ewritepart(Part *p, u64int offset, u8int *buf, u32int count)
       +{
       +        if(writepart(p, offset, buf, count) != count){
       +                print("%T writepart %s at %#llux+%ud: %r\n", p->name, offset, count);
       +                return -1;
       +        }
       +        return 0;
       +}
       +
       +/*
       + * Extra proc to do writes to dst, so that we can overlap reading
       + * src with writing dst during copy.  This is an easy factor of two
       + * (almost) in performance.
       + */
       +static void
       +writeproc(void *v)
       +{
       +        Write *w;
       +        
       +        USED(v);
       +        while((w = recvp(writechan)) != nil){
       +                if(w->n == 0)
       +                        continue;
       +                if(ewritepart(dst, w->o, w->p, w->n) < 0)
       +                        w->error = 1;
       +        }
       +}
       +
       +int
       +copy(uvlong start, uvlong end, char *what, DigestState *ds)
       +{
       +        int i, n;
       +        uvlong o;
       +        static uchar tmp[2][1024*1024];
       +        Write w[2];
       +        
       +        assert(start <= end);
       +        assert(astart <= start && start < aend);
       +        assert(astart <= end && end <= aend);
       +
       +        if(verbose && start != end)
       +                print("%T   copy %,llud-%,llud %s\n", start, end, what);
       +
       +        i = 0;
       +        memset(w, 0, sizeof w);
       +        for(o=start; o<end; o+=n){
       +                if(w[i].error)
       +                        goto error;
       +                n = sizeof tmp[i];
       +                if(o+n > end)
       +                        n = end - o;
       +                if(ereadpart(src, o, tmp[i], n) < 0)
       +                        goto error;
       +                w[i].p = tmp[i];
       +                w[i].o = o;
       +                w[i].n = n;
       +                w[i].error = 0;
       +                sendp(writechan, &w[i]);
       +                if(ds)
       +                        sha1(tmp[i], n, nil, ds);
       +                i = 1-i;
       +        }
       +        if(w[i].error)
       +                goto error;
       +
       +        /*
       +         * wait for queued write to finish
       +         */
       +        w[i].p = nil;
       +        w[i].o = 0;
       +        w[i].n = 0;
       +        w[i].error = 0;
       +        sendp(writechan, &w[i]);
       +        i = 1-i;
       +        if(w[i].error)
       +                return -1;
       +        return 0;
       +
       +error:
       +        /*
       +         * sync with write proc
       +         */
       +        w[i].p = nil;
       +        w[i].o = 0;
       +        w[i].n = 0;
       +        w[i].error = 0;
       +        sendp(writechan, &w[i]);
       +        return -1;
       +}
       +
       +/* single-threaded, for reference */
       +int
       +copy1(uvlong start, uvlong end, char *what, DigestState *ds)
       +{
       +        int n;
       +        uvlong o;
       +        static uchar tmp[1024*1024];
       +        
       +        assert(start <= end);
       +        assert(astart <= start && start < aend);
       +        assert(astart <= end && end <= aend);
       +
       +        if(verbose && start != end)
       +                print("%T   copy %,llud-%,llud %s\n", start, end, what);
       +
       +        for(o=start; o<end; o+=n){
       +                n = sizeof tmp;
       +                if(o+n > end)
       +                        n = end - o;
       +                if(ereadpart(src, o, tmp, n) < 0)
       +                        return -1;
       +                if(ds)
       +                        sha1(tmp, n, nil, ds);
       +                if(ewritepart(dst, o, tmp, n) < 0)
       +                        return -1;
       +        }
       +        return 0;
       +}
       +
       +int
       +asha1(Part *p, uvlong start, uvlong end, DigestState *ds)
       +{
       +        int n;
       +        uvlong o;
       +        static uchar tmp[1024*1024];
       +
       +        if(start == end)
       +                return 0;
       +        assert(start < end);
       +
       +        if(verbose)
       +                print("%T   sha1 %,llud-%,llud\n", start, end);
       +
       +        for(o=start; o<end; o+=n){
       +                n = sizeof tmp;
       +                if(o+n > end)
       +                        n = end - o;
       +                if(ereadpart(p, o, tmp, n) < 0)
       +                        return -1;
       +                sha1(tmp, n, nil, ds);
       +        }
       +        return 0;
       +}
       +
       +uvlong
       +rdown(uvlong a, int b)
       +{
       +        return a-a%b;
       +}
       +
       +uvlong
       +rup(uvlong a, int b)
       +{
       +        if(a%b == 0)
       +                return a;
       +        return a+b-a%b;
       +}
       +
       +void
       +mirror(Arena *sa, Arena *da)
       +{
       +        vlong v, si, di, end;
       +        int clumpmax, blocksize;
       +        static uchar buf[MaxIoSize];
       +        ArenaHead h;
       +        DigestState xds, *ds;
       +        vlong shaoff, base;
       +        
       +        base = sa->base;
       +        blocksize = sa->blocksize;
       +        end = sa->base + sa->size;
       +        
       +        astart = base - blocksize;
       +        aend = end + blocksize;
       +        
       +        shaoff = 0;
       +
       +        if(force){
       +                copy(astart, aend, "all", nil);
       +                return;
       +        }
       +
       +        if(verbose)
       +                print("%T %s (%,llud-%,llud)\n", sa->name, astart, aend);
       +
       +        if(sa->diskstats.sealed && da->diskstats.sealed && scorecmp(da->score, zeroscore) != 0){
       +                if(scorecmp(sa->score, da->score) == 0)
       +                        return;
       +                print("%T arena %s: sealed score mismatch %V vs %V\n", sa->name, sa->score, da->score);
       +                status = "errors";
       +                return;
       +        }
       +        if(da->diskstats.sealed && scorecmp(da->score, zeroscore) != 0){
       +                print("%T arena %s: dst is sealed, src is not\n", sa->name);
       +                status = "errors";
       +                return;
       +        }
       +        if(sa->diskstats.used < da->diskstats.used){
       +                print("%T arena %s: src used %,lld < dst used %,lld\n", sa->name, sa->diskstats.used, da->diskstats.used);
       +                status = "errors";
       +                return;
       +        }
       +
       +        if(da->clumpmagic != sa->clumpmagic){
       +                /*
       +                 * Write this now to reduce the window in which
       +                 * the head and tail disagree about clumpmagic.
       +                 */
       +                da->clumpmagic = sa->clumpmagic;
       +                memset(buf, 0, sizeof buf);
       +                packarena(da, buf);
       +                if(ewritepart(dst, end, buf, blocksize) < 0)
       +                        return;
       +        }
       +        
       +        memset(&h, 0, sizeof h);
       +        h.version = da->version;
       +        strcpy(h.name, da->name);
       +        h.blocksize = da->blocksize;
       +        h.size = da->size + 2*da->blocksize;
       +        h.clumpmagic = da->clumpmagic;
       +        memset(buf, 0, sizeof buf);
       +        packarenahead(&h, buf);
       +        if(ewritepart(dst, base - blocksize, buf, blocksize) < 0)
       +                return;
       +
       +        ds = nil;
       +        if(sa->diskstats.sealed && scorecmp(sa->score, zeroscore) != 0){
       +                /* start sha1 state with header */
       +                memset(&xds, 0, sizeof xds);
       +                ds = &xds;
       +                sha1(buf, blocksize, nil, ds);
       +                shaoff = base;
       +        }
       +        
       +        if(sa->diskstats.used != da->diskstats.used){
       +                di = base+rdown(da->diskstats.used, blocksize);
       +                si = base+rup(sa->diskstats.used, blocksize);
       +                if(ds && asha1(dst, shaoff, di, ds) < 0)
       +                        return;
       +                if(copy(di, si, "data", ds) < 0)
       +                        return;
       +                shaoff = si;
       +        }
       +        
       +        clumpmax = sa->clumpmax;
       +        di = end - da->diskstats.clumps/clumpmax * blocksize;
       +        si = end - (sa->diskstats.clumps+clumpmax-1)/clumpmax * blocksize;
       +
       +        if(sa->diskstats.sealed){
       +                /*
       +                 * might be a small hole between the end of the 
       +                 * data and the beginning of the directory.
       +                 */
       +                v = base+rup(sa->diskstats.used, blocksize);
       +                if(ds && asha1(dst, shaoff, v, ds) < 0)
       +                        return;
       +                if(copy(v, si, "hole", ds) < 0)
       +                        return;
       +                shaoff = si;
       +        }
       +
       +        if(da->diskstats.clumps != sa->diskstats.clumps){
       +                if(ds && asha1(dst, shaoff, si, ds) < 0)
       +                        return;
       +                if(copy(si, di, "directory", ds) < 0)        /* si < di  because clumpinfo blocks grow down */
       +                        return;
       +                shaoff = di;
       +        }
       +
       +        da->ctime = sa->ctime;
       +        da->wtime = sa->wtime;
       +        da->diskstats = sa->diskstats;
       +        da->diskstats.sealed = 0;
       +        
       +        memset(buf, 0, sizeof buf);
       +        packarena(da, buf);
       +        if(ewritepart(dst, end, buf, blocksize) < 0)
       +                return;
       +
       +        if(ds){
       +                asha1(dst, shaoff, end, ds);
       +                da->diskstats.sealed = 1;
       +                memset(buf, 0, sizeof buf);
       +                packarena(da, buf);
       +                sha1(buf, blocksize, da->score, ds);
       +                if(scorecmp(sa->score, da->score) == 0){
       +                        if(verbose)
       +                                print("%T arena %s: %V\n", sa->name, da->score);
       +                        scorecp(buf+blocksize-VtScoreSize, da->score);
       +                        if(ewritepart(dst, end, buf, blocksize) < 0)
       +                                return;
       +                }else{
       +                        print("%T arena %s: sealing dst: score mismatch: %V vs %V\n", sa->name, sa->score, da->score);
       +                        memset(&xds, 0, sizeof xds);
       +                        asha1(dst, base-blocksize, end, &xds);
       +                        sha1(buf, blocksize, da->score, &xds);
       +                        print("%T   reseal: %V\n", da->score);
       +                        status = "errors";
       +                }
       +        }
       +}
       +
       +void
       +mirrormany(ArenaPart *sp, ArenaPart *dp, char *range)
       +{
       +        int i, lo, hi;
       +        char *s, *t;
       +        Arena *sa, *da;
       +
       +        if(range == nil){
       +                for(i=0; i<sp->narenas; i++){
       +                        sa = sp->arenas[i];
       +                        da = dp->arenas[i];
       +                        mirror(sa, da);
       +                }
       +                return;
       +        }
       +        if(strcmp(range, "none") == 0)
       +                return;
       +
       +        for(s=range; *s; s=t){
       +                t = strchr(s, ',');
       +                if(t)
       +                        *t++ = 0;
       +                else
       +                        t = s+strlen(s);
       +                if(*s == '-')
       +                        lo = 0;
       +                else
       +                        lo = strtol(s, &s, 0);
       +                hi = lo;
       +                if(*s == '-'){
       +                        s++;
       +                        if(*s == 0)
       +                                hi = sp->narenas-1;
       +                        else
       +                                hi = strtol(s, &s, 0);
       +                }
       +                if(*s != 0){
       +                        print("%T bad arena range: %s\n", s);
       +                        continue;
       +                }
       +                for(i=lo; i<=hi; i++){
       +                        sa = sp->arenas[i];
       +                        da = dp->arenas[i];
       +                        mirror(sa, da);
       +                }
       +        }        
       +}
       +
       +
       +void
       +threadmain(int argc, char **argv)
       +{
       +        int i;
       +        Arena *sa, *da;
       +        ArenaPart *s, *d;
       +        char *ranges;
       +        
       +        ventifmtinstall();
       +
       +        ARGBEGIN{
       +        case 'F':
       +                force = 1;
       +                break;
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +        }ARGEND
       +        
       +        if(argc != 2 && argc != 3)
       +                usage();
       +        ranges = nil;
       +        if(argc == 3)
       +                ranges = argv[2];
       +
       +        if((src = initpart(argv[0], OREAD)) == nil)
       +                sysfatal("initpart %s: %r", argv[0]);
       +        if((dst = initpart(argv[1], ORDWR)) == nil)
       +                sysfatal("initpart %s: %r", argv[1]);
       +        if((s = initarenapart(src)) == nil)
       +                sysfatal("initarenapart %s: %r", argv[0]);
       +        for(i=0; i<s->narenas; i++)
       +                delarena(s->arenas[i]);
       +        if((d = initarenapart(dst)) == nil)
       +                sysfatal("loadarenapart %s: %r", argv[1]);
       +        for(i=0; i<d->narenas; i++)
       +                delarena(d->arenas[i]);
       +        
       +        /*
       +         * The arena geometries must match or all bets are off.
       +         */
       +        if(s->narenas != d->narenas)
       +                sysfatal("arena count mismatch: %d vs %d", s->narenas, d->narenas);
       +        for(i=0; i<s->narenas; i++){
       +                sa = s->arenas[i];
       +                da = d->arenas[i];
       +                if(sa->version != da->version)
       +                        sysfatal("arena %d: version mismatch: %d vs %d", i, sa->version, da->version);
       +                if(sa->blocksize != da->blocksize)
       +                        sysfatal("arena %d: blocksize mismatch: %d vs %d", i, sa->blocksize, da->blocksize);
       +                if(sa->size != da->size)
       +                        sysfatal("arena %d: size mismatch: %,lld vs %,lld", i, sa->size, da->size);
       +                if(strcmp(sa->name, da->name) != 0)
       +                        sysfatal("arena %d: name mismatch: %s vs %s", i, sa->name, da->name);
       +        }
       +        
       +        /*
       +         * Mirror one arena at a time.
       +         */
       +        writechan = chancreate(sizeof(void*), 0);
       +        vtproc(writeproc, nil);
       +        mirrormany(s, d, ranges);
       +        sendp(writechan, nil);
       +        threadexitsall(status);
       +}
 (DIR) diff --git a/src/cmd/venti/srv/mkfile b/src/cmd/venti/srv/mkfile
       t@@ -11,6 +11,7 @@ LIBOFILES=\
                config.$O\
                conv.$O\
                dcache.$O\
       +        disksched.$O\
                dump.$O\
                graph.$O\
                httpd.$O\
       t@@ -52,11 +53,13 @@ TARG=\
                fmtbloom\
                fmtisect\
                fmtindex\
       +        fixarenas\
                buildindex\
                checkarenas\
                checkindex\
                clumpstats\
                findscore\
       +        mirrorarenas\
                rdarena\
                wrarena\
                syncindex\
 (DIR) diff --git a/src/cmd/venti/srv/part.c b/src/cmd/venti/srv/part.c
       t@@ -145,8 +145,6 @@ initpart(char *name, int mode)
                if(hi == 0)
                        hi = dir->length;
                part->size = hi - part->offset;
       -fprint(2, "part %s: file %s offset %,lld size %,lld\n", 
       -        name, file, part->offset, part->size);
        #ifdef CANBLOCKSIZE
                {
                        struct statfs sfs;
       t@@ -203,10 +201,32 @@ prwb(char *name, int fd, int isread, u64int offset, void *vbuf, u32int count, u3
                u32int c, delta, icount, opsize;
                int r;
        
       +        icount = count;
                buf = vbuf;
       +
       +#ifndef PLAN9PORT
       +        op = isread ? "read" : "write";
       +        dst = buf;
       +        freetmp = nil;
       +        while(count > 0){
       +                opsize = min(count, 131072 /* blocksize */);
       +                if(isread)
       +                        r = pread(fd, dst, opsize, offset);
       +                else
       +                        r = pwrite(fd, dst, opsize, offset);
       +                if(r <= 0)
       +                        goto Error;
       +                offset += r;
       +                count -= r;
       +                dst += r;
       +                if(r != opsize)
       +                        goto Error;
       +        }
       +        return icount;
       +#endif
       +
                tmp = nil;
                freetmp = nil;
       -        icount = count;
                opsize = blocksize;
        
                if(count == 0){
       t@@ -313,7 +333,7 @@ print("FAILED isread=%d r=%d count=%d blocksize=%d\n", isread, r, count, blocksi
                                memmove(buf, tmp, count);
                        else{
                                memmove(tmp, buf, count);
       -                        if(pwrite(fd, tmp, blocksize, offset) != blocksize){
       +                        if(pwrite(fd, tmp, opsize, offset) != blocksize){
                                        dst = tmp;
                                        op = "write";
                                        goto Error;
       t@@ -332,9 +352,16 @@ Error:
                return -1;
        }
        
       +#ifndef PLAN9PORT
       +static int sdreset(Part*);
       +static int reopen(Part*);
       +static int threadspawnl(int[3], char*, char*, ...);
       +#endif
       +
        int
        rwpart(Part *part, int isread, u64int offset, u8int *buf, u32int count)
        {
       +        int n, try;
                u32int blocksize;
        
                trace(TraceDisk, "%s %s %ud at 0x%llx", 
       t@@ -351,9 +378,33 @@ rwpart(Part *part, int isread, u64int offset, u8int *buf, u32int count)
                if(blocksize == 0)
                        blocksize = 4096;
        
       -        return prwb(part->filename, part->fd, isread, part->offset+offset, buf, count, blocksize);
       -}
       +        for(try=0;; try++){
       +                n = prwb(part->filename, part->fd, isread, part->offset+offset, buf, count, blocksize);
       +                if(n >= 0 || try > 10)
       +                        break;
        
       +#ifndef PLAN9PORT
       +            {
       +                char err[ERRMAX];
       +                /*
       +                 * This happens with the sdmv disks frustratingly often.
       +                 * Try to fix things up and continue.
       +                 */
       +                rerrstr(err, sizeof err);
       +                if(strstr(err, "i/o timeout") || strstr(err, "i/o error")){
       +                        if(sdreset(part) >= 0)
       +                                reopen(part);
       +                        continue;
       +                }else if(strstr(err, "partition has changed")){
       +                        reopen(part);
       +                        continue;
       +                }
       +            }
       +#endif
       +                break;
       +        }
       +        return n;
       +}
        int
        readpart(Part *part, u64int offset, u8int *buf, u32int count)
        {
       t@@ -391,3 +442,200 @@ readfile(char *name)
                return b;
        }
        
       +
       +
       +
       +
       +
       +
       +
       +#ifndef PLAN9PORT
       +static int
       +sdreset(Part *part)
       +{
       +        char *name, *p;
       +        int i, fd, xfd[3], rv;
       +        static QLock resetlk;
       +        Dir *d, *dd;
       +        
       +        fprint(2, "sdreset %s\n", part->name);
       +        name = emalloc(strlen(part->filename)+20);
       +        strcpy(name, part->filename);
       +        p = strrchr(name, '/');
       +        if(p)
       +                p++;
       +        else
       +                p = name;
       +        
       +        strcpy(p, "ctl");
       +        d = dirstat(name);
       +        if(d == nil){
       +                free(name);
       +                return -1;
       +        }
       +
       +        /*
       +         * We don't need multiple people resetting the disk.
       +         */
       +        qlock(&resetlk);
       +        if((fd = open(name, OWRITE)) < 0)
       +                goto error;
       +        dd = dirfstat(fd);
       +        if(d && dd && d->qid.vers != dd->qid.vers){
       +                fprint(2, "sdreset %s: got scooped\n", part->name);
       +                /* Someone else got here first. */
       +                if(access(part->filename, AEXIST) >= 0)
       +                        goto ok;
       +                goto error;
       +        }
       +
       +        /*
       +         * Write "reset" to the ctl file to cause the chipset
       +         * to reinitialize itself (specific to sdmv driver).
       +         * Ignore error in case using other disk.
       +         */
       +        fprint(2, "sdreset %s: reset ctl\n", part->name);
       +        write(fd, "reset", 5);
       +
       +        if(access(part->filename, AEXIST) >= 0)
       +                goto ok;
       +
       +        /*
       +         * Re-run fdisk and prep.  Don't use threadwaitchan
       +         * to avoid coordinating for it.  Reopen ctl because 
       +         * we reset the disk.
       +         */
       +        strcpy(p, "ctl");
       +        close(fd);
       +        if((fd = open(name, OWRITE)) < 0)
       +                goto error;
       +        strcpy(p, "data");
       +        xfd[0] = open("/dev/null", OREAD);
       +        xfd[1] = dup(fd, -1);
       +        xfd[2] = dup(2, -1);
       +        fprint(2, "sdreset %s: run fdisk %s\n", part->name, name);
       +        if(threadspawnl(xfd, "/bin/disk/fdisk", "disk/fdisk", "-p", name, nil) < 0){
       +                close(xfd[0]);
       +                close(xfd[1]);
       +                close(xfd[2]);
       +                goto error;
       +        }
       +        strcpy(p, "plan9");
       +        for(i=0; i<=20; i++){
       +                sleep(i*100);
       +                if(access(part->filename, AEXIST) >= 0)
       +                        goto ok;
       +                if(access(name, AEXIST) >= 0)
       +                        goto prep;
       +        }
       +        goto error;
       +        
       +prep:
       +        strcpy(p, "ctl");
       +        close(fd);
       +        if((fd = open(name, OWRITE)) < 0)
       +                goto error;
       +        strcpy(p, "plan9");
       +        xfd[0] = open("/dev/null", OREAD);
       +        xfd[1] = dup(fd, -1);
       +        xfd[2] = dup(2, -1);
       +        fprint(2, "sdreset %s: run prep\n", part->name);
       +        if(threadspawnl(xfd, "/bin/disk/prep", "disk/prep", "-p", name, nil) < 0){
       +                close(xfd[0]);
       +                close(xfd[1]);
       +                close(xfd[2]);
       +                goto error;
       +        }
       +        for(i=0; i<=20; i++){
       +                sleep(i*100);
       +                if(access(part->filename, AEXIST) >= 0)
       +                        goto ok;
       +        }
       +
       +error:
       +        fprint(2, "sdreset %s: error: %r\n", part->name);
       +        rv = -1;
       +        if(fd >= 0)
       +                close(fd);
       +        goto out;
       +
       +ok:
       +        fprint(2, "sdreset %s: all okay\n", part->name);
       +        rv = 0;
       +        goto out;
       +
       +out:
       +        free(name);
       +        qunlock(&resetlk);
       +        return rv;
       +}
       +
       +static int
       +reopen(Part *part)
       +{
       +        int fd;
       +        
       +        fprint(2, "reopen %s\n", part->filename);
       +        if((fd = open(part->filename, ORDWR)) < 0){
       +                fprint(2, "reopen %s: %r\n", part->filename);
       +                return -1;
       +        }        
       +        if(fd != part->fd){
       +                dup(fd, part->fd);
       +                close(fd);
       +        }
       +        return 0;
       +}
       +
       +typedef struct Spawn Spawn;
       +struct Spawn
       +{
       +        Channel *c;
       +        int fd[3];
       +        char *file;
       +        char **argv;
       +};
       +
       +static void
       +spawnproc(void *v)
       +{
       +        int i, *fd;
       +        Spawn *s;
       +        
       +        rfork(RFFDG);
       +        s = v;
       +        fd = s->fd;
       +        for(i=0; i<3; i++)
       +                dup(fd[i], i);
       +        if(fd[0] > 2)
       +                close(fd[0]);
       +        if(fd[1] > 2 && fd[1] != fd[0])
       +                close(fd[1]);
       +        if(fd[2] > 2 && fd[2] != fd[1] && fd[2] != fd[0])
       +                close(fd[2]);
       +        procexec(s->c, s->file, s->argv);
       +}
       +
       +static int
       +threadspawnl(int fd[3], char *file, char *argv0, ...)
       +{
       +        int pid;
       +        Spawn s;
       +        
       +        s.c = chancreate(sizeof(void*), 0);
       +        memmove(s.fd, fd, sizeof(s.fd));
       +        s.file = file;
       +        s.argv = &argv0;
       +        vtproc(spawnproc, &s);
       +        pid = recvul(s.c);
       +        if(pid < 0)
       +                return -1;
       +        close(fd[0]);
       +        if(fd[1] != fd[0])
       +                close(fd[1]);
       +        if(fd[2] != fd[1] && fd[2] != fd[0])
       +                close(fd[2]);
       +        return pid;
       +}
       +
       +#endif
 (DIR) diff --git a/src/cmd/venti/srv/printarenapart.c b/src/cmd/venti/srv/printarenapart.c
       t@@ -0,0 +1,160 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +uchar buf[64*1024];
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: printarenapart arenafile [offset]\n");
       +        threadexitsall("usage");
       +}
       +
       +static void
       +rdarena(Arena *arena, u64int offset)
       +{
       +        u64int a, aa, e;
       +        u32int magic;
       +        Clump cl;
       +        uchar score[VtScoreSize];
       +        ZBlock *lump;
       +
       +        printarena(2, arena);
       +
       +        a = arena->base;
       +        e = arena->base + arena->size;
       +        if(offset != ~(u64int)0) {
       +                if(offset >= e-a)
       +                        sysfatal("bad offset %llud >= %llud\n",
       +                                offset, e-a);
       +                aa = offset;
       +        } else
       +                aa = 0;
       +
       +        for(; aa < e; aa += ClumpSize+cl.info.size) {
       +                magic = clumpmagic(arena, aa);
       +                if(magic == ClumpFreeMagic)
       +                        break;
       +                if(magic != arena->clumpmagic) {
       +                        fprint(2, "illegal clump magic number %#8.8ux offset %llud\n",
       +                                magic, aa);
       +                        break;
       +                }
       +                lump = loadclump(arena, aa, 0, &cl, score, 0);
       +                if(lump == nil) {
       +                        fprint(2, "clump %llud failed to read: %r\n", aa);
       +                        break;
       +                }
       +                if(cl.info.type != VtCorruptType) {
       +                        scoremem(score, lump->data, cl.info.uncsize);
       +                        if(scorecmp(cl.info.score, score) != 0) {
       +                                fprint(2, "clump %llud has mismatched score\n", aa);
       +                                break;
       +                        }
       +                        if(vttypevalid(cl.info.type) < 0) {
       +                                fprint(2, "clump %llud has bad type %d\n", aa, cl.info.type);
       +                                break;
       +                        }
       +                }
       +                print("%22llud %V %3d %5d\n", aa, score, cl.info.type, cl.info.uncsize);
       +                freezblock(lump);
       +        }
       +        print("end offset %llud\n", aa);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        char *file, *p, *name;
       +        char *table;
       +        u64int offset;
       +        Part *part;
       +        ArenaPart ap;
       +        ArenaHead head;
       +        Arena tail;
       +        char ct[40], mt[40];
       +
       +        readonly = 1;        /* for part.c */
       +        ARGBEGIN{
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        switch(argc) {
       +        default:
       +                usage();
       +        case 1:
       +                file = argv[0];
       +        }
       +
       +        ventifmtinstall();
       +        statsinit();
       +
       +        part = initpart(file, OREAD|ODIRECT);
       +        if(part == nil)
       +                sysfatal("can't open file %s: %r", file);
       +        if(readpart(part, PartBlank, buf, sizeof buf) < 0)
       +                sysfatal("can't read file %s: %r", file);
       +
       +        if(unpackarenapart(&ap, buf) < 0)
       +                sysfatal("corrupted arena part header: %r");
       +
       +        print("# arena part version=%d blocksize=%d arenabase=%d\n",
       +                ap.version, ap.blocksize, ap.arenabase);
       +        ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1);
       +        ap.tabsize = ap.arenabase - ap.tabbase;
       +
       +print("A");
       +        table = malloc(ap.tabsize+1);
       +        if(readpart(part, ap.tabbase, (uchar*)table, ap.tabsize) < 0)
       +                sysfatal("read %s: %r", file);
       +        table[ap.tabsize] = 0;
       +
       +print("A");
       +        partblocksize(part, ap.blocksize);
       +        initdcache(8 * MaxDiskBlock);
       +
       +print("A");
       +/* XXX - read the number of arenas from the first line */
       +        for(p=table; p && *p; p=strchr(p, '\n')){
       +                if(*p == '\n')
       +                        p++;
       +                name = p;
       +                p = strpbrk(p, " \t");
       +                if(p == nil){
       +                        fprint(2, "bad line: %s\n", name);
       +                        break;
       +                }
       +print("%p\n", p);
       +                offset = strtoull(p, nil, 0);
       +                if(readpart(part, offset, buf, sizeof buf) < 0){
       +                        fprint(2, "%s: read %s: %r\n", argv0, file);
       +                        continue;
       +                }
       +                if(unpackarenahead(&head, buf) < 0){
       +                        fprint(2, "%s: unpackarenahead: %r\n", argv0);
       +                        continue;
       +                }
       +                if(readpart(part, offset+head.size-head.blocksize, buf, head.blocksize) < 0){
       +                        fprint(2, "%s: read %s: %r\n", argv0, file);
       +                        continue;
       +                }
       +                if(unpackarena(&tail, buf) < 0){
       +                        fprint(2, "%s: unpackarena: %r\n", argv0);
       +                        continue;
       +                }
       +                print("arena %s %lld clumps=%,d cclumps=%,d used=%,lld uncsize=%,lld%s\n",
       +                        tail.name, offset,
       +                        tail.diskstats.clumps, tail.diskstats.cclumps,
       +                        tail.diskstats.used, tail.diskstats.uncsize,
       +                        tail.diskstats.sealed ? " sealed" : "");
       +                strcpy(ct, ctime(tail.ctime));
       +                ct[28] = 0;
       +                strcpy(mt, ctime(tail.wtime));
       +                mt[28] = 0;
       +                print("\tctime=%s\n\tmtime=%s\n", ct, mt);
       +        }
       +        threadexitsall(0);
       +}
 (DIR) diff --git a/src/cmd/venti/srv/printarenas.c b/src/cmd/venti/srv/printarenas.c
       t@@ -36,7 +36,7 @@ shoulddump(char *name, int argc, char **argv)
        
        enum
        {
       -        ClumpChunks = 32*1024
       +        ClumpChunks = 32*1024,
        };
        
        void
 (DIR) diff --git a/src/cmd/venti/srv/sortientry.c b/src/cmd/venti/srv/sortientry.c
       t@@ -61,7 +61,7 @@ sortrawientries(Index *ix, Part *tmp, u64int *base, Bloom *bloom)
                u32int n;
                int i, ok;
        
       -/*ZZZ should allow configuration of bits, bucket size */
       +/* ZZZ should allow configuration of bits, bucket size */
                ib = initiebucks(tmp, 8, 64*1024);
                if(ib == nil){
                        seterr(EOk, "can't create sorting buckets: %r");
       t@@ -116,10 +116,7 @@ readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *b)
                ClumpInfo *ci, *cis;
                u32int clump;
                int i, n, ok, nskip;
       -/*        static Biobuf bout; */
        
       -/*ZZZ remove fprint? */
       -/*fprint(2, "ra %s %d %d\n", arena->name, arena->memstats.clumps, arena->diskstats.clumps); */
                if(arena->memstats.clumps)
                        fprint(2, "\tarena %s: %d entries\n", arena->name, arena->memstats.clumps);
                else
       t@@ -129,7 +126,6 @@ readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *b)
                ok = 0;
                nskip = 0;
                memset(&ie, 0, sizeof(IEntry));
       -/*        Binit(&bout, 1, OWRITE); */
                for(clump = 0; clump < arena->memstats.clumps; clump += n){
                        n = ClumpChunks;
                        if(n > arena->memstats.clumps - clump)
       t@@ -148,18 +144,15 @@ readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *b)
                                a += ci->size + ClumpSize;
                                ie.ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
                                scorecp(ie.score, ci->score);
       -                /*        Bprint(&bout, "%22lld %V %3d %5d\n", */
       -                /*                ie.ia.addr, ie.score, ie.ia.type, ie.ia.size); */
                                if(ci->type == VtCorruptType){
       -                        /*        print("! %V %22lld %3d %5d %3d\n", */
       -                        /*                ie.score, ie.ia.addr, ie.ia.type, ie.ia.size, ie.ia.blocks); */
       +                                if(0) print("! %V %22lld %3d %5d %3d\n",
       +                                        ie.score, ie.ia.addr, ie.ia.type, ie.ia.size, ie.ia.blocks);
                                        nskip++;
                                }else
                                        sprayientry(ib, &ie);
                                markbloomfilter(b, ie.score);
                        }
                }
       -/*        Bterm(&bout); */
                free(cis);
                if(ok < 0)
                        return TWID32;
       t@@ -358,8 +351,8 @@ readiebuck(IEBucks *ib, int b)
                m = ib->bucks[b].used;
                if(m == 0)
                        m = ib->usable;
       -/*        if(ib->bucks[b].total) */
       -/*                fprint(2, "\tbucket %d: %d entries\n", b, ib->bucks[b].total/IEntrySize); */
       +        if(0) if(ib->bucks[b].total)
       +                fprint(2, "\tbucket %d: %d entries\n", b, ib->bucks[b].total/IEntrySize);
                while(head != TWID32){
                        if(readpart(ib->part, (u64int)head * ib->size, &ib->buf[n], m+U32Size) < 0){
                                seterr(EOk, "can't read index sort bucket: %r");
 (DIR) diff --git a/src/cmd/venti/srv/stats.c b/src/cmd/venti/srv/stats.c
       t@@ -80,7 +80,7 @@ Statdesc statdesc[NStat] =
                { "isect block write bytes", },
        
                { "sum reads", },
       -        { "sum read bytes", }
       +        { "sum read bytes", },
        };
        
        QLock statslock;
 (DIR) diff --git a/src/cmd/venti/srv/syncarena.c b/src/cmd/venti/srv/syncarena.c
       t@@ -30,12 +30,11 @@ syncarena(Arena *arena, u64int start, u32int n, int zok, int fix)
                ZBlock *lump;
                Clump cl;
                ClumpInfo ci;
       -        static ClumpInfo zci = { -1 };
       +        static ClumpInfo zci = { .type = -1 };
                u8int score[VtScoreSize];
                u64int uncsize, used, aa;
                u32int clump, clumps, cclumps, magic;
                int err, flush, broken;
       -        AState as;
        
                used = arena->memstats.used;
                clumps = arena->memstats.clumps;
       t@@ -133,19 +132,21 @@ syncarena(Arena *arena, u64int start, u32int n, int zok, int fix)
                        flushdcache();
                }
        
       +fprint(2, "arena %s: start=%lld fix=%d flush=%d %lld->%lld %ud->%ud %ud->%ud %lld->%lld\n",
       +        arena->name,
       +        start,
       +        fix,
       +        flush,
       +        used, arena->memstats.used,
       +        clumps, arena->memstats.clumps,
       +        cclumps, arena->memstats.cclumps,
       +        uncsize, arena->memstats.uncsize);
       +
                if(used != arena->memstats.used
                || clumps != arena->memstats.clumps
                || cclumps != arena->memstats.cclumps
                || uncsize != arena->memstats.uncsize)
                        err |= SyncHeader;
       -        if(start && (err&SyncHeader)){
       -                trace(TraceProc, "syncarena setdcachestate");
       -                as.arena = arena;
       -                as.aa = start+arena->memstats.used;
       -                as.stats = arena->memstats;
       -                setdcachestate(&as);
       -        }
       -
                return err;
        }
        
 (DIR) diff --git a/src/cmd/venti/srv/syncindex.c b/src/cmd/venti/srv/syncindex.c
       t@@ -48,6 +48,8 @@ threadmain(int argc, char *argv[])
                ventifmtinstall();
                if(initventi(argv[0], &conf) < 0)
                        sysfatal("can't init venti: %r");
       +        if(mainindex->bloom && loadbloom(mainindex->bloom) < 0)
       +                sysfatal("can't load bloom filter: %r");
        
                if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
                        bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
 (DIR) diff --git a/src/cmd/venti/srv/syncindex0.c b/src/cmd/venti/srv/syncindex0.c
       t@@ -121,6 +121,7 @@ int
        syncindex(Index *ix, int fix, int mustflush, int check)
        {
                Arena *arena;
       +        AState as;
                u64int a;
                u32int clump;
                int i, e, e1, ok, ok1, flush;
       t@@ -130,7 +131,12 @@ syncindex(Index *ix, int fix, int mustflush, int check)
                for(i = 0; i < ix->narenas; i++){
                        trace(TraceProc, "syncindex start %d", i);
                        arena = ix->arenas[i];
       -                clump = arena->memstats.clumps;
       +                /*
       +                 * Syncarena will scan through the arena looking for blocks
       +                 * that have been forgotten.  It will update arena->memstats.used,
       +                 * so save the currenct copy as the place to start the 
       +                 * syncarenaindex scan.
       +                 */
                        a = arena->memstats.used;
                        e = syncarena(arena, ix->amap[i].start, TWID32, fix, fix);
                        e1 = e;
       t@@ -138,15 +144,23 @@ syncindex(Index *ix, int fix, int mustflush, int check)
                                e1 &= ~(SyncHeader|SyncCIZero|SyncCIErr);
                        if(e1 == SyncHeader)
                                fprint(2, "arena %s: header is out-of-date\n", arena->name);
       +                clump = arena->diskstats.clumps;
                        if(e1)
                                ok = -1;
                        else{
                                ok1 = syncarenaindex(ix, arena, clump, a + ix->amap[i].start, fix, &flush, check);
                                if(ok1 < 0)
                                        fprint(2, "syncarenaindex: %r\n");
       +fprint(2, "arena %s: wbarena in syncindex\n", arena->name);
                                if(fix && ok1==0 && (e & SyncHeader) && wbarena(arena) < 0)
                                        fprint(2, "arena=%s header write failed: %r\n", arena->name);
                                ok |= ok1;
       +
       +fprint(2, "arena %s: setdcachestate\n", arena->name);
       +                        as.arena = arena;
       +                        as.aa = ix->amap[i].start + arena->memstats.used;
       +                        as.stats = arena->memstats;
       +                        setdcachestate(&as);
                        }
                }
                if(missing || wrong)
 (DIR) diff --git a/src/cmd/venti/srv/unwhack.c b/src/cmd/venti/srv/unwhack.c
       t@@ -23,7 +23,7 @@ static uchar lenval[1 << (DBigLenBits - 1)] =
        static uchar lenbits[] =
        {
                0, 0, 0,
       -        2, 3, 5, 5
       +        2, 3, 5, 5,
        };
        
        static uchar offbits[16] =
 (DIR) diff --git a/src/cmd/venti/srv/utils.c b/src/cmd/venti/srv/utils.c
       t@@ -148,6 +148,7 @@ emalloc(ulong n)
                        sysfatal("out of memory allocating %lud", n);
                }
                memset(p, 0xa5, n);
       +        setmalloctag(p, getcallerpc(&n));
        if(0)print("emalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n));
                return p;
        }
       t@@ -164,6 +165,7 @@ ezmalloc(ulong n)
                        sysfatal("out of memory allocating %lud", n);
                }
                memset(p, 0, n);
       +        setmalloctag(p, getcallerpc(&n));
        if(0)print("ezmalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n));
                return p;
        }
       t@@ -177,6 +179,7 @@ erealloc(void *p, ulong n)
                                abort();
                        sysfatal("out of memory allocating %lud", n);
                }
       +        setrealloctag(p, getcallerpc(&p));
        if(0)print("erealloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&p));
                return p;
        }
       t@@ -190,6 +193,7 @@ estrdup(char *s)
                n = strlen(s) + 1;
                t = emalloc(n);
                memmove(t, s, n);
       +        setmalloctag(t, getcallerpc(&s));
        if(0)print("estrdup %p-%p by %lux\n", t, (char*)t+n, getcallerpc(&s));
                return t;
        }
       t@@ -231,6 +235,7 @@ ventifmtinstall(void)
                fmtinstall('F', vtfcallfmt);
                fmtinstall('H', encodefmt);
                fmtinstall('I', ientryfmt);
       +        fmtinstall('T', vttimefmt);
                fmtinstall('V', vtscorefmt);
        }
        
 (DIR) diff --git a/src/cmd/venti/srv/venti.c b/src/cmd/venti/srv/venti.c
       t@@ -105,6 +105,8 @@ threadmain(int argc, char *argv[])
                fprint(2, "conf...");
                if(initventi(configfile, &config) < 0)
                        sysfatal("can't init server: %r");
       +        if(mainindex->bloom && loadbloom(mainindex->bloom) < 0)
       +                sysfatal("can't load bloom filter: %r");
        
                if(mem == 0)
                        mem = config.mem;
       t@@ -210,8 +212,8 @@ ventiserver(void *v)
                        trace(TraceRpc, "<- %F", &r->tx);
                        r->rx.msgtype = r->tx.msgtype+1;
                        addstat(StatRpcTotal, 1);
       -        /*        print("req (arenas[0]=%p sects[0]=%p) %F\n", */
       -        /*                mainindex->arenas[0], mainindex->sects[0], &r->tx); */
       +                if(0) print("req (arenas[0]=%p sects[0]=%p) %F\n",
       +                        mainindex->arenas[0], mainindex->sects[0], &r->tx);
                        switch(r->tx.msgtype){
                        default:
                                vtrerror(r, "unknown request");
 (DIR) diff --git a/src/cmd/venti/srv/verifyarena.c b/src/cmd/venti/srv/verifyarena.c
       t@@ -3,65 +3,102 @@
        #include "fns.h"
        
        static int        verbose;
       +static int        fd;
       +static uchar        *data;
       +static int        blocksize;
       +static int        sleepms;
        
        void
        usage(void)
        {
       -        fprint(2, "usage: verifyarena [-v]\n");
       +        fprint(2, "usage: verifyarena [-b blocksize] [-s ms] [-v] [arenapart [name...]]\n");
                threadexitsall(0);
        }
        
       -static void
       +static int
       +preadblock(uchar *buf, int n, vlong off)
       +{
       +        int nr, m;
       +
       +        for(nr = 0; nr < n; nr += m){
       +                m = n - nr;
       +                m = pread(fd, &buf[nr], m, off+nr);
       +                if(m <= 0){
       +                        if(m == 0)
       +                                werrstr("early eof");
       +                        return -1;
       +                }
       +        }
       +        return 0;
       +}
       +
       +static int
        readblock(uchar *buf, int n)
        {
                int nr, m;
        
                for(nr = 0; nr < n; nr += m){
                        m = n - nr;
       -                m = read(0, &buf[nr], m);
       -                if(m <= 0)
       -                        sysfatal("can't read arena from standard input: %r");
       +                m = read(fd, &buf[nr], m);
       +                if(m <= 0){
       +                        if(m == 0)
       +                                werrstr("early eof");
       +                        return -1;
       +                }
                }
       +        return 0;
        }
        
        static void
       -verifyarena(void)
       +verifyarena(char *name, vlong len)
        {
                Arena arena;
                ArenaHead head;
       -        ZBlock *b;
                DigestState s;
                u64int n, e;
                u32int bs;
                u8int score[VtScoreSize];
        
       -        fprint(2, "verify arena from standard input\n");
       +        fprint(2, "verify %s\n", name);
        
                memset(&arena, 0, sizeof arena);
                memset(&s, 0, sizeof s);
        
                /*
       -         * read the little bit, which will included the header
       +         * read a little bit, which will include the header
                 */
       -        bs = MaxIoSize;
       -        b = alloczblock(bs, 0, 0);
       -        readblock(b->data, HeadSize);
       -        sha1(b->data, HeadSize, nil, &s);
       -        if(unpackarenahead(&head, b->data) < 0)
       -                sysfatal("corrupted arena header: %r");
       +        if(readblock(data, HeadSize) < 0){
       +                fprint(2, "%s: reading header: %r\n", name);
       +                return;
       +        }
       +        sha1(data, HeadSize, nil, &s);
       +        if(unpackarenahead(&head, data) < 0){
       +                fprint(2, "%s: corrupt arena header: %r\n", name);
       +                return;
       +        }
                if(head.version != ArenaVersion4 && head.version != ArenaVersion5)
       -                fprint(2, "warning: unknown arena version %d\n", head.version);
       +                fprint(2, "%s: warning: unknown arena version %d\n", name, head.version);
       +        if(len != 0 && len != head.size)
       +                fprint(2, "%s: warning: unexpected length %lld != %lld\n", name, head.size, len);
       +        if(strcmp(name, "<stdin>") != 0 && strcmp(head.name, name) != 0)
       +                fprint(2, "%s: warning: unexpected name %s\n", name, head.name);
        
                /*
                 * now we know how much to read
                 * read everything but the last block, which is special
                 */
                e = head.size - head.blocksize;
       +        bs = blocksize;
                for(n = HeadSize; n < e; n += bs){
                        if(n + bs > e)
                                bs = e - n;
       -                readblock(b->data, bs);
       -                sha1(b->data, bs, nil, &s);
       +                if(readblock(data, bs) < 0){
       +                        fprint(2, "%s: read data: %r\n", name);
       +                        return;
       +                }
       +                sha1(data, bs, nil, &s);
       +                if(sleepms)
       +                        sleep(sleepms);
                }
        
                /*
       t@@ -69,8 +106,11 @@ verifyarena(void)
                 * the sum is calculated assuming the slot for the sum is zero.
                 */
                bs = head.blocksize;
       -        readblock(b->data, bs);
       -        sha1(b->data, bs-VtScoreSize, nil, &s);
       +        if(readblock(data, bs) < 0){
       +                fprint(2, "%s: read last block: %r\n", name);
       +                return;
       +        }
       +        sha1(data, bs-VtScoreSize, nil, &s);
                sha1(zeroscore, VtScoreSize, nil, &s);
                sha1(nil, 0, score, &s);
        
       t@@ -78,37 +118,73 @@ verifyarena(void)
                 * validity check on the trailer
                 */
                arena.blocksize = head.blocksize;
       -        if(unpackarena(&arena, b->data) < 0)
       -                sysfatal("corrupted arena trailer: %r");
       -        scorecp(arena.score, &b->data[arena.blocksize - VtScoreSize]);
       -
       -        if(namecmp(arena.name, head.name) != 0)
       -                sysfatal("arena header and trailer names clash: %s vs. %s\n", head.name, arena.name);
       -        if(arena.version != head.version)
       -                sysfatal("arena header and trailer versions clash: %d vs. %d\n", head.version, arena.version);
       +        if(unpackarena(&arena, data) < 0){
       +                fprint(2, "%s: corrupt arena trailer: %r\n", name);
       +                return;
       +        }
       +        scorecp(arena.score, &data[arena.blocksize - VtScoreSize]);
       +
       +        if(namecmp(arena.name, head.name) != 0){
       +                fprint(2, "%s: wrong name in trailer: %s vs. %s\n", 
       +                        name, head.name, arena.name);
       +                return;
       +        }
       +        if(arena.version != head.version){
       +                fprint(2, "%s: wrong version in trailer: %d vs. %d\n", 
       +                        name, head.version, arena.version);
       +                return;
       +        }
                arena.size = head.size - 2 * head.blocksize;
        
                /*
                 * check for no checksum or the same
                 */
       -        if(scorecmp(score, arena.score) != 0){
       -                if(scorecmp(zeroscore, arena.score) != 0)
       -                        fprint(2, "warning: mismatched checksums for arena=%s, found=%V calculated=%V",
       -                                arena.name, arena.score, score);
       -                scorecp(arena.score, score);
       -        }else
       -                fprint(2, "matched score\n");
       -
       +        if(scorecmp(score, arena.score) == 0)
       +                fprint(2, "%s: verified score\n", name);
       +        else if(scorecmp(zeroscore, arena.score) == 0)
       +                fprint(2, "%s: unsealed\n", name);
       +        else{
       +                fprint(2, "%s: mismatch checksum - found=%V calculated=%V\n",
       +                        name, arena.score, score);
       +                return;
       +        }
                printarena(2, &arena);
        }
        
       +static int
       +shouldcheck(char *name, char **s, int n)
       +{
       +        int i;
       +        
       +        if(n == 0)
       +                return 1;
       +
       +        for(i=0; i<n; i++){
       +                if(s[i] && strcmp(name, s[i]) == 0){
       +                        s[i] = nil;
       +                        return 1;
       +                }
       +        }
       +        return 0;
       +}
       +
        void
        threadmain(int argc, char *argv[])
        {
       +        int i, nline;
       +        char *p, *q, *table, *f[10], line[256];
       +        vlong start, stop;
       +        ArenaPart ap;
       +        
                ventifmtinstall();
       -        statsinit();
       -
       +        blocksize = MaxIoSize;
                ARGBEGIN{
       +        case 'b':
       +                blocksize = unittoull(EARGF(usage()));
       +                break;
       +        case 's':
       +                sleepms = atoi(EARGF(usage()));
       +                break;
                case 'v':
                        verbose++;
                        break;
       t@@ -117,11 +193,69 @@ threadmain(int argc, char *argv[])
                        break;
                }ARGEND
        
       -        readonly = 1;
       +        data = vtmalloc(blocksize);
       +        if(argc == 0){
       +                fd = 0;
       +                verifyarena("<stdin>", 0);
       +                threadexitsall(nil);
       +        }
       +        
       +        if((fd = open(argv[0], OREAD)) < 0)
       +                sysfatal("open %s: %r", argv[0]);
        
       -        if(argc != 0)
       -                usage();
       +        if(preadblock(data, 8192, PartBlank) < 0)
       +                sysfatal("read arena part header: %r");
       +        if(unpackarenapart(&ap, data) < 0)
       +                sysfatal("corrupted arena part header: %r");
       +        fprint(2, "# arena part version=%d blocksize=%d arenabase=%d\n",
       +                ap.version, ap.blocksize, ap.arenabase);
       +        ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1);
       +        ap.tabsize = ap.arenabase - ap.tabbase;
       +        table = malloc(ap.tabsize+1);
       +        if(preadblock((uchar*)table, ap.tabsize, ap.tabbase) < 0)
       +                sysfatal("reading arena part directory: %r");
       +        table[ap.tabsize] = 0;
       +        
       +        nline = atoi(table);
       +        p = strchr(table, '\n');
       +        if(p)
       +                p++;
       +        for(i=0; i<nline; i++){
       +                if(p == nil){
       +                        fprint(2, "warning: unexpected arena table end\n");
       +                        break;
       +                }
       +                q = strchr(p, '\n');
       +                if(q)
       +                        *q++ = 0;
       +                if(strlen(p) >= sizeof line){
       +                        fprint(2, "warning: long arena table line: %s\n", p);
       +                        p = q;
       +                        continue;
       +                }
       +                strcpy(line, p);
       +                memset(f, 0, sizeof f);
       +                if(tokenize(line, f, nelem(f)) < 3){
       +                        fprint(2, "warning: bad arena table line: %s\n", p);
       +                        p = q;
       +                        continue;
       +                }
       +                p = q;
       +                if(shouldcheck(f[0], argv+1, argc-1)){
       +                        start = strtoull(f[1], 0, 0);
       +                        stop = strtoull(f[2], 0, 0);
       +                        if(stop <= start){
       +                                fprint(2, "%s: bad start,stop %lld,%lld\n", f[0], stop, start);
       +                                continue;
       +                        }
       +                        if(seek(fd, start, 0) < 0)
       +                                fprint(2, "%s: seek to start: %r\n", f[0]);
       +                        verifyarena(f[0], stop - start);
       +                }
       +        }
       +        for(i=1; i<argc; i++)
       +                if(argv[i] != 0)
       +                        fprint(2, "%s: did not find arena\n", argv[i]);
        
       -        verifyarena();
       -        threadexitsall(0);
       +        threadexitsall(nil);
        }
 (DIR) diff --git a/src/cmd/venti/srv/wrarena.c b/src/cmd/venti/srv/wrarena.c
       t@@ -83,8 +83,8 @@ rdarena(Arena *arena, u64int offset)
                        if(magic == ClumpFreeMagic)
                                break;
                        if(magic != arena->clumpmagic) {
       -                /*        fprint(2, "illegal clump magic number %#8.8ux offset %llud\n", */
       -                /*                magic, aa); */
       +                        if(0) fprint(2, "illegal clump magic number %#8.8ux offset %llud\n",
       +                                magic, aa);
                                break;
                        }
                        lump = loadclump(arena, aa, 0, &cl, score, 0);
 (DIR) diff --git a/src/cmd/venti/srv/zblock.c b/src/cmd/venti/srv/zblock.c
       t@@ -5,11 +5,13 @@
        void
        fmtzbinit(Fmt *f, ZBlock *b)
        {
       -        memset(f, 0, sizeof *f);
       -        fmtlocaleinit(f, nil, nil, nil);
       +        f->runes = 0;
                f->start = b->data;
                f->to = f->start;
                f->stop = (char*)f->start + b->len;
       +        f->flush = nil;
       +        f->farg = nil;
       +        f->nfmt = 0;
        }
        
        #define ROUNDUP(p, n) ((void*)(((uintptr)(p)+(n)-1)&~(uintptr)((n)-1)))
 (DIR) diff --git a/src/cmd/venti/srv/zeropart.c b/src/cmd/venti/srv/zeropart.c
       t@@ -10,10 +10,6 @@ zeropart(Part *part, int blocksize)
                int w;
        
                fprint(2, "clearing the partition\n");
       -/*fprint(2, "NOT!\n"); */
       -/*return; */
       -/*b=alloczblock(MaxIoSize, 1, blocksize); */
       -/*freezblock(b); */
                b = alloczblock(MaxIoSize, 1, blocksize);
        
                w = 0;