sysproc.c - vx32 - Local 9vx git repository for patches.
 (HTM) git clone git://r-36.net/vx32
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
       sysproc.c (23448B)
       ---
            1 #define        WANT_M
            2 #include        "u.h"
            3 #include        "tos.h"
            4 #include        "lib.h"
            5 #include        "mem.h"
            6 #include        "dat.h"
            7 #include        "fns.h"
            8 #include        "error.h"
            9 
           10 #include        "a.out.h"
           11 
           12 int        shargs(char*, int, char**);
           13 
           14 extern void checkpages(void);
           15 extern void checkpagerefs(void);
           16 
           17 long
           18 sysr1(uint32 *x)
           19 {
           20         vx32sysr1();
           21         return 0;
           22 }
           23 
           24 long
           25 sysrfork(uint32 *arg)
           26 {
           27         Proc *p;
           28         int n, i;
           29         Fgrp *ofg;
           30         Pgrp *opg;
           31         Rgrp *org;
           32         Egrp *oeg;
           33         ulong pid, flag;
           34         Mach *wm;
           35 
           36         flag = arg[0];
           37         /* Check flags before we commit */
           38         if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
           39                 error(Ebadarg);
           40         if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
           41                 error(Ebadarg);
           42         if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
           43                 error(Ebadarg);
           44 
           45         if((flag&RFPROC) == 0) {
           46                 if(flag & (RFMEM|RFNOWAIT))
           47                         error(Ebadarg);
           48                 if(flag & (RFFDG|RFCFDG)) {
           49                         ofg = up->fgrp;
           50                         if(flag & RFFDG)
           51                                 up->fgrp = dupfgrp(ofg);
           52                         else
           53                                 up->fgrp = dupfgrp(nil);
           54                         closefgrp(ofg);
           55                 }
           56                 if(flag & (RFNAMEG|RFCNAMEG)) {
           57                         opg = up->pgrp;
           58                         up->pgrp = newpgrp();
           59                         if(flag & RFNAMEG)
           60                                 pgrpcpy(up->pgrp, opg);
           61                         /* inherit noattach */
           62                         up->pgrp->noattach = opg->noattach;
           63                         closepgrp(opg);
           64                 }
           65                 if(flag & RFNOMNT)
           66                         up->pgrp->noattach = 1;
           67                 if(flag & RFREND) {
           68                         org = up->rgrp;
           69                         up->rgrp = newrgrp();
           70                         closergrp(org);
           71                 }
           72                 if(flag & (RFENVG|RFCENVG)) {
           73                         oeg = up->egrp;
           74                         up->egrp = smalloc(sizeof(Egrp));
           75                         up->egrp->ref.ref = 1;
           76                         if(flag & RFENVG)
           77                                 envcpy(up->egrp, oeg);
           78                         closeegrp(oeg);
           79                 }
           80                 if(flag & RFNOTEG)
           81                         up->noteid = incref(&noteidalloc);
           82                 return 0;
           83         }
           84 
           85         p = newproc();
           86 
           87         p->fpsave = up->fpsave;
           88         p->scallnr = up->scallnr;
           89         p->s = up->s;
           90         p->nerrlab = 0;
           91         p->slash = up->slash;
           92         p->dot = up->dot;
           93         incref(&p->dot->ref);
           94 
           95         memmove(p->note, up->note, sizeof(p->note));
           96         p->privatemem = up->privatemem;
           97         p->noswap = up->noswap;
           98         p->nnote = up->nnote;
           99         p->notified = 0;
          100         p->lastnote = up->lastnote;
          101         p->notify = up->notify;
          102         p->ureg = up->ureg;
          103         p->dbgreg = 0;
          104 
          105         /* Make a new set of memory segments */
          106         n = flag & RFMEM;
          107         qlock(&p->seglock);
          108         if(waserror()){
          109                 qunlock(&p->seglock);
          110                 nexterror();
          111         }
          112         for(i = 0; i < NSEG; i++)
          113                 if(up->seg[i])
          114                         p->seg[i] = dupseg(up->seg, i, n);
          115         qunlock(&p->seglock);
          116         poperror();
          117 
          118         /* File descriptors */
          119         if(flag & (RFFDG|RFCFDG)) {
          120                 if(flag & RFFDG)
          121                         p->fgrp = dupfgrp(up->fgrp);
          122                 else
          123                         p->fgrp = dupfgrp(nil);
          124         }
          125         else {
          126                 p->fgrp = up->fgrp;
          127                 incref(&p->fgrp->ref);
          128         }
          129 
          130         /* Process groups */
          131         if(flag & (RFNAMEG|RFCNAMEG)) {
          132                 p->pgrp = newpgrp();
          133                 if(flag & RFNAMEG)
          134                         pgrpcpy(p->pgrp, up->pgrp);
          135                 /* inherit noattach */
          136                 p->pgrp->noattach = up->pgrp->noattach;
          137         }
          138         else {
          139                 p->pgrp = up->pgrp;
          140                 incref(&p->pgrp->ref);
          141         }
          142         if(flag & RFNOMNT)
          143                 up->pgrp->noattach = 1;
          144 
          145         if(flag & RFREND)
          146                 p->rgrp = newrgrp();
          147         else {
          148                 incref(&up->rgrp->ref);
          149                 p->rgrp = up->rgrp;
          150         }
          151 
          152         /* Environment group */
          153         if(flag & (RFENVG|RFCENVG)) {
          154                 p->egrp = smalloc(sizeof(Egrp));
          155                 p->egrp->ref.ref = 1;
          156                 if(flag & RFENVG)
          157                         envcpy(p->egrp, up->egrp);
          158         }
          159         else {
          160                 p->egrp = up->egrp;
          161                 incref(&p->egrp->ref);
          162         }
          163         p->hang = up->hang;
          164         p->procmode = up->procmode;
          165 
          166         /* Craft a return frame which will cause the child to pop out of
          167          * the scheduler in user mode with the return register zero
          168          */
          169         forkchild(p, up->dbgreg);
          170 
          171         p->parent = up;
          172         p->parentpid = up->pid;
          173         if(flag&RFNOWAIT)
          174                 p->parentpid = 0;
          175         else {
          176                 lock(&up->exl);
          177                 up->nchild++;
          178                 unlock(&up->exl);
          179         }
          180         if((flag&RFNOTEG) == 0)
          181                 p->noteid = up->noteid;
          182 
          183         p->fpstate = up->fpstate;
          184         pid = p->pid;
          185         memset(p->time, 0, sizeof(p->time));
          186         p->time[TReal] = msec();
          187 
          188         kstrdup(&p->text, up->text);
          189         kstrdup(&p->user, up->user);
          190         /*
          191          *  since the bss/data segments are now shareable,
          192          *  any mmu info about this process is now stale
          193          *  (i.e. has bad properties) and has to be discarded.
          194          */
          195         flushmmu();
          196         p->basepri = up->basepri;
          197         p->priority = up->basepri;
          198         p->fixedpri = up->fixedpri;
          199         p->mp = up->mp;
          200         wm = up->wired;
          201         if(wm)
          202                 procwired(p, wm->machno);
          203         ready(p);
          204         sched();
          205         return pid;
          206 }
          207 
          208 static uint32
          209 l2be(uint32 l)
          210 {
          211         uchar *cp;
          212 
          213         cp = (uchar*)&l;
          214         return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
          215 }
          216 
          217 static char Echanged[] = "exec arguments changed underfoot";
          218 
          219 long
          220 sysexec(uint32 *arg)
          221 {
          222         char *volatile elem, *volatile file, *ufile;
          223         Chan *volatile tc;
          224 
          225         /*
          226          * Open the file, remembering the final element and the full name.
          227          */
          228         file = nil;
          229         elem = nil;
          230         tc = nil;
          231         if(waserror()){
          232                 if(file)
          233                         free(file);
          234                 if(elem)
          235                         free(elem);
          236                 if(tc)
          237                         cclose(tc);
          238                 nexterror();
          239         }
          240 
          241         ufile = uvalidaddr(arg[0], 1, 0);
          242         file = validnamedup(ufile, 1);
          243         tc = namec(file, Aopen, OEXEC, 0);
          244         kstrdup((char**)&elem, up->genbuf);
          245 
          246         /*
          247          * Read the header.  If it's a #!, fill in progarg[] with info and repeat.
          248          */
          249         int i, n, nprogarg;
          250         char *progarg[sizeof(Exec)/2+1];
          251         char *prog, *p;
          252         char line[sizeof(Exec)+1];
          253         Exec exec;
          254 
          255         nprogarg = 0;
          256         n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
          257         if(n < 2)
          258                 error(Ebadexec);
          259         p = (char*)&exec;
          260         if(p[0] == '#' && p[1] == '!'){
          261                 memmove(line, p, n);
          262                 nprogarg = shargs(line, n, progarg);
          263                 if(nprogarg == 0)
          264                         error(Ebadexec);
          265                 
          266                 /* The original file becomes an extra arg after #! line */
          267                 progarg[nprogarg++] = file;
          268                 
          269                 /*
          270                  * Take the #! $0 as a file to open, and replace
          271                  * $0 with the original path's name.
          272                  */
          273                 prog = progarg[0];
          274                 progarg[0] = elem;
          275                 cclose(tc);
          276                 tc = nil;        /* in case namec errors out */
          277                 tc = namec(prog, Aopen, OEXEC, 0);
          278                 n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
          279                 if(n < 2)
          280                         error(Ebadexec);
          281         }
          282 
          283         /* 
          284          * #! has had its chance, now we need a real binary
          285          */
          286         uint32 magic, entry, text, etext, data, edata, bss, ebss;
          287 
          288         magic = l2be(exec.magic);
          289         if(n != sizeof(Exec) || l2be(exec.magic) != AOUT_MAGIC)
          290                 error(Ebadexec);
          291 
          292         entry = l2be(exec.entry);
          293         text = l2be(exec.text);
          294         data = l2be(exec.data);
          295         bss = l2be(exec.bss);
          296         etext = ROUND(UTZERO+sizeof(Exec)+text, BY2PG);
          297         edata = ROUND(etext + data, BY2PG);
          298         ebss = ROUND(etext + data + bss, BY2PG);
          299         
          300 //iprint("entry %#lux text %#lux data %#lux bss %#lux\n", entry, text, data, bss);
          301 //iprint("etext %#lux edata %#lux ebss %#lux\n", etext, edata, ebss);
          302 
          303         if(entry < UTZERO+sizeof(Exec) || entry >= UTZERO+sizeof(Exec)+text)
          304                 error(Ebadexec);
          305         
          306         /* many overflow possibilities */
          307         if(text >= USTKTOP || data >= USTKTOP || bss >= USTKTOP
          308         || etext >= USTKTOP || edata >= USTKTOP || ebss >= USTKTOP
          309         || etext >= USTKTOP || edata < etext || ebss < edata)
          310                 error(Ebadexec);
          311 
          312         /*
          313          * Copy argv into new stack segment temporarily mapped elsewhere.
          314          * Be careful: multithreaded program could be changing argv during this.
          315          * Pass 1: count number of arguments, string bytes.
          316          */
          317         int nargv, strbytes;
          318         uint32 argp, ssize, spage;
          319 
          320         strbytes = 0;
          321         for(i=0; i<nprogarg; i++)
          322                 strbytes += strlen(progarg[i]) + 1;
          323 
          324         argp = arg[1];
          325         for(nargv=0;; nargv++, argp += BY2WD){
          326                 uint32 a;
          327                 char *str;
          328 
          329                 a = *(uint32*)uvalidaddr(argp, BY2WD, 0);
          330                 if(a == 0)
          331                         break;
          332                 str = uvalidaddr(a, 1, 0);
          333                 n = ((char*)vmemchr(str, 0, 0x7FFFFFFF) - str) + 1;
          334                 if(nprogarg > 0 && nargv == 0)
          335                         continue;        /* going to skip argv[0] on #! */
          336                 strbytes += n;
          337         }
          338         if(nargv == 0)
          339                 error("exec missing argv");
          340 
          341         /* 
          342          * Skip over argv[0] if using #!.  Waited until now so that
          343          * string would still be checked for validity during loop.
          344          */
          345         if(nprogarg > 0){
          346                 nargv--;
          347                 arg[1] += BY2WD;
          348         }
          349 
          350         ssize = BY2WD*((nprogarg+nargv)+1) + ROUND(strbytes, BY2WD) + sizeof(Tos);
          351 
          352         /*
          353          * 8-byte align SP for those (e.g. sparc) that need it.
          354          * execregs() will subtract another 4 bytes for argc.
          355          */
          356         if((ssize+4) & 7)
          357                 ssize += 4;
          358         spage = (ssize+(BY2PG-1)) >> PGSHIFT;
          359 
          360         /*
          361          * Pass 2: build the stack segment, being careful not to assume
          362          * that the counts from pass 1 are still valid.
          363          */
          364         if(spage > TSTKSIZ)
          365                 error(Enovmem);
          366 
          367         qlock(&up->seglock);
          368         if(waserror()){
          369                 if(up->seg[ESEG]){
          370                         putseg(up->seg[ESEG]);
          371                         up->seg[ESEG] = nil;
          372                 }
          373                 qunlock(&up->seglock);
          374                 nexterror();
          375         }
          376         up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BY2PG);
          377         flushmmu();        // Needed for Plan 9 VX  XXX really?
          378 
          379         /*
          380          * Top-of-stack structure.
          381          */
          382         uchar *uzero;
          383         uzero = up->pmmu.uzero;
          384         Tos *tos;
          385         uint32 utos;
          386         utos = USTKTOP - sizeof(Tos);
          387         tos = (Tos*)(uzero + utos + TSTKTOP - USTKTOP);
          388         tos->cyclefreq = m->cyclefreq;
          389         cycles((uvlong*)&tos->pcycles);
          390         tos->pcycles = -tos->pcycles;
          391         tos->kcycles = tos->pcycles;
          392         tos->clock = 0;
          393 
          394         /*
          395          * Argument pointers and strings, together.
          396          */
          397         char *bp, *ep;
          398         uint32 *targp;
          399         uint32 ustrp, uargp;
          400 
          401         ustrp = utos - ROUND(strbytes, BY2WD);
          402         uargp = ustrp - BY2WD*((nprogarg+nargv)+1);
          403         bp = (char*)(uzero + ustrp + TSTKTOP - USTKTOP);
          404         ep = bp + strbytes;
          405         p = bp;
          406         targp = (uint32*)(uzero + uargp + TSTKTOP - USTKTOP);
          407         
          408         /* #! args are trusted */
          409         for(i=0; i<nprogarg; i++){
          410                 n = strlen(progarg[i]) + 1;
          411                 if(n  > ep - p)
          412                         error(Echanged);
          413                 memmove(p, progarg[i], n);
          414                 p += n;
          415                 *targp++ = ustrp;
          416                 ustrp += n;
          417         }
          418         
          419         /* the rest are not */
          420         argp = arg[1];
          421         for(i=0; i<nargv; i++){
          422                 uint32 a;
          423                 char *str;
          424                 
          425                 a = *(uint32*)uvalidaddr(argp, BY2WD, 0);
          426                 argp += BY2WD;
          427                 
          428                 str = uvalidaddr(a, 1, 0);
          429                 n = ((char*)vmemchr(str, 0, 0x7FFFFFFF) - str) + 1;
          430                 if(n  > ep - p)
          431                         error(Echanged);
          432                 memmove(p, str, n);
          433                 p += n;
          434                 *targp++ = ustrp;
          435                 ustrp += n;
          436         }
          437 
          438         if(*(uint32*)uvalidaddr(argp, BY2WD, 0) != 0)
          439                 error(Echanged);        
          440         *targp = 0;
          441 
          442         /*
          443          * But wait, there's more: prepare an arg copy for up->args
          444          * using the copy we just made in the temporary segment.
          445          */
          446         char *args;
          447         int nargs;
          448 
          449         n = p - bp;        /* includes NUL on last arg, so must be > 0 */
          450         if(n <= 0)        /* nprogarg+nargv > 0; checked above */
          451                 error(Egreg);
          452         if(n > 128)
          453                 n = 128;
          454         args = smalloc(n);
          455         if(waserror()){
          456                 free(args);
          457                 nexterror();
          458         }
          459         memmove(args, bp, n);
          460         /* find beginning of UTF character boundary to place final NUL */
          461         while(n > 0 && (args[n-1]&0xC0) == 0x80)
          462                 n--;
          463         args[n-1] = '\0';
          464         nargs = n;
          465 
          466         /*
          467          * Now we're ready to commit.
          468          */
          469         free(up->text);
          470         up->text = elem;
          471         free(up->args);
          472         up->args = args;
          473         up->nargs = n;
          474         elem = nil;
          475         poperror();        /* args */
          476 
          477         /*
          478          * Free old memory.  Special segments maintained across exec.
          479          */
          480         Segment *s;
          481         for(i = SSEG; i <= BSEG; i++) {
          482                 putseg(up->seg[i]);
          483                 up->seg[i] = nil;        /* in case of error */
          484         }
          485         for(i = BSEG+1; i< NSEG; i++) {
          486                 s = up->seg[i];
          487                 if(s && (s->type&SG_CEXEC)) {
          488                         putseg(s);
          489                         up->seg[i] = nil;
          490                 }
          491         }
          492         
          493         /*
          494          * Close on exec
          495          */
          496         Fgrp *f;
          497         f = up->fgrp;
          498         for(i=0; i<=f->maxfd; i++)
          499                 fdclose(i, CCEXEC);
          500 
          501         /* Text.  Shared. Attaches to cache image if possible */
          502         /* attachimage returns a locked cache image */
          503         Image *img;
          504         Segment *ts;
          505         img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (etext-UTZERO)>>PGSHIFT);
          506         ts = img->s;
          507         up->seg[TSEG] = ts;
          508         ts->flushme = 1;
          509         ts->fstart = 0;
          510         ts->flen = sizeof(Exec)+text;
          511         unlock(&img->ref.lk);
          512 
          513         /* Data. Shared. */
          514         s = newseg(SG_DATA, etext, (edata-etext)>>PGSHIFT);
          515         up->seg[DSEG] = s;
          516 
          517         /* Attached by hand */
          518         incref(&img->ref);
          519         s->image = img;
          520         s->fstart = ts->fstart+ts->flen;
          521         s->flen = data;
          522 
          523         /* BSS. Zero fill on demand */
          524         up->seg[BSEG] = newseg(SG_BSS, edata, (ebss-edata)>>PGSHIFT);
          525 
          526         /*
          527          * Move the stack
          528          */
          529         s = up->seg[ESEG];
          530         up->seg[ESEG] = 0;
          531         up->seg[SSEG] = s;
          532         qunlock(&up->seglock);
          533         poperror();        /* seglock */
          534 
          535         s->base = USTKTOP-USTKSIZE;
          536         s->top = USTKTOP;
          537         relocateseg(s, USTKTOP-TSTKTOP);
          538 
          539         /*
          540          *  '/' processes are higher priority (hack to make /ip more responsive).
          541          */
          542         if(devtab[tc->type]->dc == L'/')
          543                 up->basepri = PriRoot;
          544         up->priority = up->basepri;
          545         poperror();        /* tc, elem, file */
          546         cclose(tc);
          547         free(file);
          548         // elem is now up->text
          549 
          550         /*
          551          *  At this point, the mmu contains info about the old address
          552          *  space and needs to be flushed
          553          */
          554         flushmmu();
          555         qlock(&up->debug);
          556         up->nnote = 0;
          557         up->notify = 0;
          558         up->notified = 0;
          559         up->privatemem = 0;
          560         procsetup(up);
          561         qunlock(&up->debug);
          562         if(up->hang)
          563                 up->procctl = Proc_stopme;
          564 
          565         return execregs(entry, USTKTOP - uargp, nprogarg+nargv);
          566 }
          567 
          568 int
          569 shargs(char *s, int n, char **ap)
          570 {
          571         int i;
          572 
          573         s += 2;
          574         n -= 2;                /* skip #! */
          575         for(i=0; s[i]!='\n'; i++)
          576                 if(i == n-1)
          577                         return 0;
          578         s[i] = 0;
          579         *ap = 0;
          580         i = 0;
          581         for(;;) {
          582                 while(*s==' ' || *s=='\t')
          583                         s++;
          584                 if(*s == 0)
          585                         break;
          586                 i++;
          587                 *ap++ = s;
          588                 *ap = 0;
          589                 while(*s && *s!=' ' && *s!='\t')
          590                         s++;
          591                 if(*s == 0)
          592                         break;
          593                 else
          594                         *s++ = 0;
          595         }
          596         return i;
          597 }
          598 
          599 int
          600 return0(void *v)
          601 {
          602         return 0;
          603 }
          604 
          605 long
          606 syssleep(uint32 *arg)
          607 {
          608 
          609         int n;
          610 
          611         n = arg[0];
          612         if(n <= 0) {
          613                 yield();
          614                 return 0;
          615         }
          616         if(n < TK2MS(1))
          617                 n = TK2MS(1);
          618         tsleep(&up->sleep, return0, 0, n);
          619         return 0;
          620 }
          621 
          622 long
          623 sysalarm(uint32 *arg)
          624 {
          625         return procalarm(arg[0]);
          626 }
          627 
          628 long
          629 sysexits(uint32 *arg)
          630 {
          631         char *status;
          632         char *inval = "invalid exit string";
          633         char buf[ERRMAX];
          634 
          635         if(arg[0]){
          636                 if(waserror())
          637                         status = inval;
          638                 else{
          639                         status = uvalidaddr(arg[0], 1, 0);
          640                         if(vmemchr(status, 0, ERRMAX) == 0){
          641                                 memmove(buf, status, ERRMAX);
          642                                 buf[ERRMAX-1] = 0;
          643                                 status = buf;
          644                         }
          645                         poperror();
          646                 }
          647 
          648         }else
          649                 status = nil;
          650         pexit(status, 1);
          651         return 0;                /* not reached */
          652 }
          653 
          654 long
          655 sys_wait(uint32 *arg)
          656 {
          657         int pid;
          658         Waitmsg w;
          659         OWaitmsg *ow;
          660 
          661         if(arg[0] == 0)
          662                 return pwait(nil);
          663 
          664         ow = uvalidaddr(arg[0], sizeof(OWaitmsg), 1);
          665         evenaddr(arg[0]);
          666         pid = pwait(&w);
          667         if(pid >= 0){
          668                 readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
          669                 readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
          670                 readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
          671                 readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
          672                 strncpy(ow->msg, w.msg, sizeof(ow->msg));
          673                 ow->msg[sizeof(ow->msg)-1] = '\0';
          674         }
          675         return pid;
          676 }
          677 
          678 long
          679 sysawait(uint32 *arg)
          680 {
          681         int i;
          682         int pid;
          683         Waitmsg w;
          684         uint32 n;
          685         char *buf;
          686 
          687         n = arg[1];
          688         buf = uvalidaddr(arg[0], n, 1);
          689         pid = pwait(&w);
          690         if(pid < 0)
          691                 return -1;
          692         i = snprint(buf, n, "%d %lud %lud %lud %q",
          693                 w.pid,
          694                 w.time[TUser], w.time[TSys], w.time[TReal],
          695                 w.msg);
          696 
          697         return i;
          698 }
          699 
          700 void
          701 werrstr(char *fmt, ...)
          702 {
          703         va_list va;
          704 
          705         if(up == nil)
          706                 return;
          707 
          708         va_start(va, fmt);
          709         vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
          710         va_end(va);
          711 }
          712 
          713 static long
          714 generrstr(uint32 addr, uint nbuf)
          715 {
          716         char tmp[ERRMAX];
          717         char *buf;
          718 
          719         if(nbuf == 0)
          720                 error(Ebadarg);
          721         buf = uvalidaddr(addr, nbuf, 1);
          722         if(nbuf > sizeof tmp)
          723                 nbuf = sizeof tmp;
          724         memmove(tmp, buf, nbuf);
          725 
          726         /* make sure it's NUL-terminated */
          727         tmp[nbuf-1] = '\0';
          728         memmove(buf, up->syserrstr, nbuf);
          729         buf[nbuf-1] = '\0';
          730         memmove(up->syserrstr, tmp, nbuf);
          731         return 0;
          732 }
          733 
          734 long
          735 syserrstr(uint32 *arg)
          736 {
          737         return generrstr(arg[0], arg[1]);
          738 }
          739 
          740 /* compatibility for old binaries */
          741 long
          742 sys_errstr(uint32 *arg)
          743 {
          744         return generrstr(arg[0], 64);
          745 }
          746 
          747 long
          748 sysnotify(uint32 *arg)
          749 {
          750         if(arg[0] != 0)
          751                 uvalidaddr(arg[0], 1, 0);
          752         up->notify = arg[0];        /* checked again when used */
          753         return 0;
          754 }
          755 
          756 long
          757 sysnoted(uint32 *arg)
          758 {
          759         if(arg[0]!=NRSTR && !up->notified)
          760                 error(Egreg);
          761         return 0;
          762 }
          763 
          764 long
          765 syssegbrk(uint32 *arg)
          766 {
          767         int i;
          768         uint32 addr;
          769         Segment *s;
          770 
          771         addr = arg[0];
          772         for(i = 0; i < NSEG; i++) {
          773                 s = up->seg[i];
          774                 if(s == 0 || addr < s->base || addr >= s->top)
          775                         continue;
          776                 switch(s->type&SG_TYPE) {
          777                 case SG_TEXT:
          778                 case SG_DATA:
          779                 case SG_STACK:
          780                         error(Ebadarg);
          781                 default:
          782                         return ibrk(arg[1], i);
          783                 }
          784         }
          785 
          786         error(Ebadarg);
          787         return 0;                /* not reached */
          788 }
          789 
          790 long
          791 syssegattach(uint32 *arg)
          792 {
          793         return segattach(up, arg[0], uvalidaddr(arg[1], 1, 0), arg[2], arg[3]);
          794 }
          795 
          796 long
          797 syssegdetach(uint32 *arg)
          798 {
          799         int i;
          800         uint32 addr;
          801         Segment *s;
          802 
          803         qlock(&up->seglock);
          804         if(waserror()){
          805                 qunlock(&up->seglock);
          806                 nexterror();
          807         }
          808 
          809         s = 0;
          810         addr = arg[0];
          811         for(i = 0; i < NSEG; i++)
          812                 if((s = up->seg[i])) {
          813                         qlock(&s->lk);
          814                         if((addr >= s->base && addr < s->top) ||
          815                            (s->top == s->base && addr == s->base))
          816                                 goto found;
          817                         qunlock(&s->lk);
          818                 }
          819 
          820         error(Ebadarg);
          821 
          822 found:
          823         /*
          824          * Check we are not detaching the initial stack segment.
          825          */
          826         if(s == up->seg[SSEG]){
          827                 qunlock(&s->lk);
          828                 error(Ebadarg);
          829         }
          830         up->seg[i] = 0;
          831         qunlock(&s->lk);
          832         putseg(s);
          833         qunlock(&up->seglock);
          834         poperror();
          835 
          836         /* Ensure we flush any entries from the lost segment */
          837         flushmmu();
          838         return 0;
          839 }
          840 
          841 long
          842 syssegfree(uint32 *arg)
          843 {
          844         Segment *s;
          845         uint32 from, to;
          846 
          847         from = arg[0];
          848         s = seg(up, from, 1);
          849         if(s == nil)
          850                 error(Ebadarg);
          851         to = (from + arg[1]) & ~(BY2PG-1);
          852         from = PGROUND(from);
          853 
          854         if(to > s->top) {
          855                 qunlock(&s->lk);
          856                 error(Ebadarg);
          857         }
          858 
          859         mfreeseg(s, from, (to - from) / BY2PG);
          860         qunlock(&s->lk);
          861         flushmmu();
          862 
          863         return 0;
          864 }
          865 
          866 /* For binary compatibility */
          867 long
          868 sysbrk_(uint32 *arg)
          869 {
          870         return ibrk(arg[0], BSEG);
          871 }
          872 
          873 long
          874 sysrendezvous(uint32 *arg)
          875 {
          876         uintptr tag, val;
          877         Proc *p, **l;
          878 
          879         tag = arg[0];
          880         l = &REND(up->rgrp, tag);
          881         up->rendval = ~(uintptr)0;
          882 
          883         lock(&up->rgrp->ref.lk);
          884         for(p = *l; p; p = p->rendhash) {
          885                 if(p->rendtag == tag) {
          886                         *l = p->rendhash;
          887                         val = p->rendval;
          888                         p->rendval = arg[1];
          889 
          890                         while(p->mach != 0)
          891                                 ;
          892                         ready(p);
          893                         unlock(&up->rgrp->ref.lk);
          894                         return val;
          895                 }
          896                 l = &p->rendhash;
          897         }
          898 
          899         /* Going to sleep here */
          900         up->rendtag = tag;
          901         up->rendval = arg[1];
          902         up->rendhash = *l;
          903         *l = up;
          904         up->state = Rendezvous;
          905         unlock(&up->rgrp->ref.lk);
          906 
          907         sched();
          908 
          909         return up->rendval;
          910 }
          911 
          912 /*
          913  * The implementation of semaphores is complicated by needing
          914  * to avoid rescheduling in syssemrelease, so that it is safe
          915  * to call from real-time processes.  This means syssemrelease
          916  * cannot acquire any qlocks, only spin locks.
          917  * 
          918  * Semacquire and semrelease must both manipulate the semaphore
          919  * wait list.  Lock-free linked lists only exist in theory, not
          920  * in practice, so the wait list is protected by a spin lock.
          921  * 
          922  * The semaphore value *addr is stored in user memory, so it
          923  * cannot be read or written while holding spin locks.
          924  * 
          925  * Thus, we can access the list only when holding the lock, and
          926  * we can access the semaphore only when not holding the lock.
          927  * This makes things interesting.  Note that sleep's condition function
          928  * is called while holding two locks - r and up->rlock - so it cannot
          929  * access the semaphore value either.
          930  * 
          931  * An acquirer announces its intention to try for the semaphore
          932  * by putting a Sema structure onto the wait list and then
          933  * setting Sema.waiting.  After one last check of semaphore,
          934  * the acquirer sleeps until Sema.waiting==0.  A releaser of n
          935  * must wake up n acquirers who have Sema.waiting set.  It does
          936  * this by clearing Sema.waiting and then calling wakeup.
          937  * 
          938  * There are three interesting races here.  
          939  
          940  * The first is that in this particular sleep/wakeup usage, a single
          941  * wakeup can rouse a process from two consecutive sleeps!  
          942  * The ordering is:
          943  * 
          944  *         (a) set Sema.waiting = 1
          945  *         (a) call sleep
          946  *         (b) set Sema.waiting = 0
          947  *         (a) check Sema.waiting inside sleep, return w/o sleeping
          948  *         (a) try for semaphore, fail
          949  *         (a) set Sema.waiting = 1
          950  *         (a) call sleep
          951  *         (b) call wakeup(a)
          952  *         (a) wake up again
          953  * 
          954  * This is okay - semacquire will just go around the loop
          955  * again.  It does mean that at the top of the for(;;) loop in
          956  * semacquire, phore.waiting might already be set to 1.
          957  * 
          958  * The second is that a releaser might wake an acquirer who is
          959  * interrupted before he can acquire the lock.  Since
          960  * release(n) issues only n wakeup calls -- only n can be used
          961  * anyway -- if the interrupted process is not going to use his
          962  * wakeup call he must pass it on to another acquirer.
          963  * 
          964  * The third race is similar to the second but more subtle.  An
          965  * acquirer sets waiting=1 and then does a final canacquire()
          966  * before going to sleep.  The opposite order would result in
          967  * missing wakeups that happen between canacquire and
          968  * waiting=1.  (In fact, the whole point of Sema.waiting is to
          969  * avoid missing wakeups between canacquire() and sleep().) But
          970  * there can be spurious wakeups between a successful
          971  * canacquire() and the following semdequeue().  This wakeup is
          972  * not useful to the acquirer, since he has already acquired
          973  * the semaphore.  Like in the previous case, though, the
          974  * acquirer must pass the wakeup call along.
          975  * 
          976  * This is all rather subtle.  The code below has been verified
          977  * with the spin model /sys/src/9/port/semaphore.p.  The
          978  * original code anticipated the second race but not the first
          979  * or third, which were caught only with spin.  The first race
          980  * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
          981  * It was lucky that my abstract model of sleep/wakeup still managed
          982  * to preserve that behavior.
          983  *
          984  * I remain slightly concerned about memory coherence
          985  * outside of locks.  The spin model does not take 
          986  * queued processor writes into account so we have to
          987  * think hard.  The only variables accessed outside locks
          988  * are the semaphore value itself and the boolean flag
          989  * Sema.waiting.  The value is only accessed with cmpswap,
          990  * whose job description includes doing the right thing as
          991  * far as memory coherence across processors.  That leaves
          992  * Sema.waiting.  To handle it, we call coherence() before each
          993  * read and after each write.                - rsc
          994  */
          995 
          996 /* Add semaphore p with addr a to list in seg. */
          997 static void
          998 semqueue(Segment *s, long *a, Sema *p)
          999 {
         1000         memset(p, 0, sizeof *p);
         1001         p->addr = a;
         1002         lock(&s->sema.rendez.lk);        /* uses s->sema.Rendez.Lock, but no one else is */
         1003         p->next = &s->sema;
         1004         p->prev = s->sema.prev;
         1005         p->next->prev = p;
         1006         p->prev->next = p;
         1007         unlock(&s->sema.rendez.lk);
         1008 }
         1009 
         1010 /* Remove semaphore p from list in seg. */
         1011 static void
         1012 semdequeue(Segment *s, Sema *p)
         1013 {
         1014         lock(&s->sema.rendez.lk);
         1015         p->next->prev = p->prev;
         1016         p->prev->next = p->next;
         1017         unlock(&s->sema.rendez.lk);
         1018 }
         1019 
         1020 /* Wake up n waiters with addr a on list in seg. */
         1021 static void
         1022 semwakeup(Segment *s, long *a, long n)
         1023 {
         1024         Sema *p;
         1025         
         1026         lock(&s->sema.rendez.lk);
         1027         for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){
         1028                 if(p->addr == a && p->waiting){
         1029                         p->waiting = 0;
         1030                         coherence();
         1031                         wakeup(&p->rendez);
         1032                         n--;
         1033                 }
         1034         }
         1035         unlock(&s->sema.rendez.lk);
         1036 }
         1037 
         1038 /* Add delta to semaphore and wake up waiters as appropriate. */
         1039 static long
         1040 semrelease(Segment *s, long *addr, long delta)
         1041 {
         1042         long value;
         1043 
         1044         do
         1045                 value = *addr;
         1046         while(!cmpswap(addr, value, value+delta));
         1047         semwakeup(s, addr, delta);
         1048         return value+delta;
         1049 }
         1050 
         1051 /* Try to acquire semaphore using compare-and-swap */
         1052 static int
         1053 canacquire(long *addr)
         1054 {
         1055         long value;
         1056         
         1057         while((value=*addr) > 0)
         1058                 if(cmpswap(addr, value, value-1))
         1059                         return 1;
         1060         return 0;
         1061 }                
         1062 
         1063 /* Should we wake up? */
         1064 static int
         1065 semawoke(void *p)
         1066 {
         1067         coherence();
         1068         return !((Sema*)p)->waiting;
         1069 }
         1070 
         1071 /* Acquire semaphore (subtract 1). */
         1072 static int
         1073 semacquire(Segment *s, long *addr, int block)
         1074 {
         1075         int acquired;
         1076         Sema phore;
         1077 
         1078         if(canacquire(addr))
         1079                 return 1;
         1080         if(!block)
         1081                 return 0;
         1082 
         1083         acquired = 0;
         1084         semqueue(s, addr, &phore);
         1085         for(;;){
         1086                 phore.waiting = 1;
         1087                 coherence();
         1088                 if(canacquire(addr)){
         1089                         acquired = 1;
         1090                         break;
         1091                 }
         1092                 if(waserror())
         1093                         break;
         1094                 sleep(&phore.rendez, semawoke, &phore);
         1095                 poperror();
         1096         }
         1097         semdequeue(s, &phore);
         1098         coherence();        /* not strictly necessary due to lock in semdequeue */
         1099         if(!phore.waiting)
         1100                 semwakeup(s, addr, 1);
         1101         if(!acquired)
         1102                 nexterror();
         1103         return 1;
         1104 }
         1105 
         1106 long
         1107 syssemacquire(uint32 *arg)
         1108 {
         1109         int block;
         1110         long *addr;
         1111         Segment *s;
         1112 
         1113         addr = uvalidaddr(arg[0], sizeof(long), 1);
         1114         evenaddr(arg[0]);
         1115         block = arg[1];
         1116         
         1117         if((s = seg(up, arg[0], 0)) == nil)
         1118                 error(Ebadarg);
         1119         if(*addr < 0)
         1120                 error(Ebadarg);
         1121         return semacquire(s, addr, block);
         1122 }
         1123 
         1124 long
         1125 syssemrelease(uint32 *arg)
         1126 {
         1127         long *addr, delta;
         1128         Segment *s;
         1129 
         1130         addr = uvalidaddr(arg[0], sizeof(long), 1);
         1131         evenaddr(arg[0]);
         1132         delta = arg[1];
         1133 
         1134         if((s = seg(up, arg[0], 0)) == nil)
         1135                 error(Ebadarg);
         1136         if(delta < 0 || *addr < 0)
         1137                 error(Ebadarg);
         1138         return semrelease(s, addr, arg[1]);
         1139 }