sysproc.c - vx32 - Local 9vx git repository for patches.
(HTM) git clone git://r-36.net/vx32
(DIR) Log
(DIR) Files
(DIR) Refs
---
sysproc.c (23448B)
---
1 #define WANT_M
2 #include "u.h"
3 #include "tos.h"
4 #include "lib.h"
5 #include "mem.h"
6 #include "dat.h"
7 #include "fns.h"
8 #include "error.h"
9
10 #include "a.out.h"
11
12 int shargs(char*, int, char**);
13
14 extern void checkpages(void);
15 extern void checkpagerefs(void);
16
17 long
18 sysr1(uint32 *x)
19 {
20 vx32sysr1();
21 return 0;
22 }
23
24 long
25 sysrfork(uint32 *arg)
26 {
27 Proc *p;
28 int n, i;
29 Fgrp *ofg;
30 Pgrp *opg;
31 Rgrp *org;
32 Egrp *oeg;
33 ulong pid, flag;
34 Mach *wm;
35
36 flag = arg[0];
37 /* Check flags before we commit */
38 if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
39 error(Ebadarg);
40 if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
41 error(Ebadarg);
42 if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
43 error(Ebadarg);
44
45 if((flag&RFPROC) == 0) {
46 if(flag & (RFMEM|RFNOWAIT))
47 error(Ebadarg);
48 if(flag & (RFFDG|RFCFDG)) {
49 ofg = up->fgrp;
50 if(flag & RFFDG)
51 up->fgrp = dupfgrp(ofg);
52 else
53 up->fgrp = dupfgrp(nil);
54 closefgrp(ofg);
55 }
56 if(flag & (RFNAMEG|RFCNAMEG)) {
57 opg = up->pgrp;
58 up->pgrp = newpgrp();
59 if(flag & RFNAMEG)
60 pgrpcpy(up->pgrp, opg);
61 /* inherit noattach */
62 up->pgrp->noattach = opg->noattach;
63 closepgrp(opg);
64 }
65 if(flag & RFNOMNT)
66 up->pgrp->noattach = 1;
67 if(flag & RFREND) {
68 org = up->rgrp;
69 up->rgrp = newrgrp();
70 closergrp(org);
71 }
72 if(flag & (RFENVG|RFCENVG)) {
73 oeg = up->egrp;
74 up->egrp = smalloc(sizeof(Egrp));
75 up->egrp->ref.ref = 1;
76 if(flag & RFENVG)
77 envcpy(up->egrp, oeg);
78 closeegrp(oeg);
79 }
80 if(flag & RFNOTEG)
81 up->noteid = incref(¬eidalloc);
82 return 0;
83 }
84
85 p = newproc();
86
87 p->fpsave = up->fpsave;
88 p->scallnr = up->scallnr;
89 p->s = up->s;
90 p->nerrlab = 0;
91 p->slash = up->slash;
92 p->dot = up->dot;
93 incref(&p->dot->ref);
94
95 memmove(p->note, up->note, sizeof(p->note));
96 p->privatemem = up->privatemem;
97 p->noswap = up->noswap;
98 p->nnote = up->nnote;
99 p->notified = 0;
100 p->lastnote = up->lastnote;
101 p->notify = up->notify;
102 p->ureg = up->ureg;
103 p->dbgreg = 0;
104
105 /* Make a new set of memory segments */
106 n = flag & RFMEM;
107 qlock(&p->seglock);
108 if(waserror()){
109 qunlock(&p->seglock);
110 nexterror();
111 }
112 for(i = 0; i < NSEG; i++)
113 if(up->seg[i])
114 p->seg[i] = dupseg(up->seg, i, n);
115 qunlock(&p->seglock);
116 poperror();
117
118 /* File descriptors */
119 if(flag & (RFFDG|RFCFDG)) {
120 if(flag & RFFDG)
121 p->fgrp = dupfgrp(up->fgrp);
122 else
123 p->fgrp = dupfgrp(nil);
124 }
125 else {
126 p->fgrp = up->fgrp;
127 incref(&p->fgrp->ref);
128 }
129
130 /* Process groups */
131 if(flag & (RFNAMEG|RFCNAMEG)) {
132 p->pgrp = newpgrp();
133 if(flag & RFNAMEG)
134 pgrpcpy(p->pgrp, up->pgrp);
135 /* inherit noattach */
136 p->pgrp->noattach = up->pgrp->noattach;
137 }
138 else {
139 p->pgrp = up->pgrp;
140 incref(&p->pgrp->ref);
141 }
142 if(flag & RFNOMNT)
143 up->pgrp->noattach = 1;
144
145 if(flag & RFREND)
146 p->rgrp = newrgrp();
147 else {
148 incref(&up->rgrp->ref);
149 p->rgrp = up->rgrp;
150 }
151
152 /* Environment group */
153 if(flag & (RFENVG|RFCENVG)) {
154 p->egrp = smalloc(sizeof(Egrp));
155 p->egrp->ref.ref = 1;
156 if(flag & RFENVG)
157 envcpy(p->egrp, up->egrp);
158 }
159 else {
160 p->egrp = up->egrp;
161 incref(&p->egrp->ref);
162 }
163 p->hang = up->hang;
164 p->procmode = up->procmode;
165
166 /* Craft a return frame which will cause the child to pop out of
167 * the scheduler in user mode with the return register zero
168 */
169 forkchild(p, up->dbgreg);
170
171 p->parent = up;
172 p->parentpid = up->pid;
173 if(flag&RFNOWAIT)
174 p->parentpid = 0;
175 else {
176 lock(&up->exl);
177 up->nchild++;
178 unlock(&up->exl);
179 }
180 if((flag&RFNOTEG) == 0)
181 p->noteid = up->noteid;
182
183 p->fpstate = up->fpstate;
184 pid = p->pid;
185 memset(p->time, 0, sizeof(p->time));
186 p->time[TReal] = msec();
187
188 kstrdup(&p->text, up->text);
189 kstrdup(&p->user, up->user);
190 /*
191 * since the bss/data segments are now shareable,
192 * any mmu info about this process is now stale
193 * (i.e. has bad properties) and has to be discarded.
194 */
195 flushmmu();
196 p->basepri = up->basepri;
197 p->priority = up->basepri;
198 p->fixedpri = up->fixedpri;
199 p->mp = up->mp;
200 wm = up->wired;
201 if(wm)
202 procwired(p, wm->machno);
203 ready(p);
204 sched();
205 return pid;
206 }
207
208 static uint32
209 l2be(uint32 l)
210 {
211 uchar *cp;
212
213 cp = (uchar*)&l;
214 return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
215 }
216
217 static char Echanged[] = "exec arguments changed underfoot";
218
219 long
220 sysexec(uint32 *arg)
221 {
222 char *volatile elem, *volatile file, *ufile;
223 Chan *volatile tc;
224
225 /*
226 * Open the file, remembering the final element and the full name.
227 */
228 file = nil;
229 elem = nil;
230 tc = nil;
231 if(waserror()){
232 if(file)
233 free(file);
234 if(elem)
235 free(elem);
236 if(tc)
237 cclose(tc);
238 nexterror();
239 }
240
241 ufile = uvalidaddr(arg[0], 1, 0);
242 file = validnamedup(ufile, 1);
243 tc = namec(file, Aopen, OEXEC, 0);
244 kstrdup((char**)&elem, up->genbuf);
245
246 /*
247 * Read the header. If it's a #!, fill in progarg[] with info and repeat.
248 */
249 int i, n, nprogarg;
250 char *progarg[sizeof(Exec)/2+1];
251 char *prog, *p;
252 char line[sizeof(Exec)+1];
253 Exec exec;
254
255 nprogarg = 0;
256 n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
257 if(n < 2)
258 error(Ebadexec);
259 p = (char*)&exec;
260 if(p[0] == '#' && p[1] == '!'){
261 memmove(line, p, n);
262 nprogarg = shargs(line, n, progarg);
263 if(nprogarg == 0)
264 error(Ebadexec);
265
266 /* The original file becomes an extra arg after #! line */
267 progarg[nprogarg++] = file;
268
269 /*
270 * Take the #! $0 as a file to open, and replace
271 * $0 with the original path's name.
272 */
273 prog = progarg[0];
274 progarg[0] = elem;
275 cclose(tc);
276 tc = nil; /* in case namec errors out */
277 tc = namec(prog, Aopen, OEXEC, 0);
278 n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
279 if(n < 2)
280 error(Ebadexec);
281 }
282
283 /*
284 * #! has had its chance, now we need a real binary
285 */
286 uint32 magic, entry, text, etext, data, edata, bss, ebss;
287
288 magic = l2be(exec.magic);
289 if(n != sizeof(Exec) || l2be(exec.magic) != AOUT_MAGIC)
290 error(Ebadexec);
291
292 entry = l2be(exec.entry);
293 text = l2be(exec.text);
294 data = l2be(exec.data);
295 bss = l2be(exec.bss);
296 etext = ROUND(UTZERO+sizeof(Exec)+text, BY2PG);
297 edata = ROUND(etext + data, BY2PG);
298 ebss = ROUND(etext + data + bss, BY2PG);
299
300 //iprint("entry %#lux text %#lux data %#lux bss %#lux\n", entry, text, data, bss);
301 //iprint("etext %#lux edata %#lux ebss %#lux\n", etext, edata, ebss);
302
303 if(entry < UTZERO+sizeof(Exec) || entry >= UTZERO+sizeof(Exec)+text)
304 error(Ebadexec);
305
306 /* many overflow possibilities */
307 if(text >= USTKTOP || data >= USTKTOP || bss >= USTKTOP
308 || etext >= USTKTOP || edata >= USTKTOP || ebss >= USTKTOP
309 || etext >= USTKTOP || edata < etext || ebss < edata)
310 error(Ebadexec);
311
312 /*
313 * Copy argv into new stack segment temporarily mapped elsewhere.
314 * Be careful: multithreaded program could be changing argv during this.
315 * Pass 1: count number of arguments, string bytes.
316 */
317 int nargv, strbytes;
318 uint32 argp, ssize, spage;
319
320 strbytes = 0;
321 for(i=0; i<nprogarg; i++)
322 strbytes += strlen(progarg[i]) + 1;
323
324 argp = arg[1];
325 for(nargv=0;; nargv++, argp += BY2WD){
326 uint32 a;
327 char *str;
328
329 a = *(uint32*)uvalidaddr(argp, BY2WD, 0);
330 if(a == 0)
331 break;
332 str = uvalidaddr(a, 1, 0);
333 n = ((char*)vmemchr(str, 0, 0x7FFFFFFF) - str) + 1;
334 if(nprogarg > 0 && nargv == 0)
335 continue; /* going to skip argv[0] on #! */
336 strbytes += n;
337 }
338 if(nargv == 0)
339 error("exec missing argv");
340
341 /*
342 * Skip over argv[0] if using #!. Waited until now so that
343 * string would still be checked for validity during loop.
344 */
345 if(nprogarg > 0){
346 nargv--;
347 arg[1] += BY2WD;
348 }
349
350 ssize = BY2WD*((nprogarg+nargv)+1) + ROUND(strbytes, BY2WD) + sizeof(Tos);
351
352 /*
353 * 8-byte align SP for those (e.g. sparc) that need it.
354 * execregs() will subtract another 4 bytes for argc.
355 */
356 if((ssize+4) & 7)
357 ssize += 4;
358 spage = (ssize+(BY2PG-1)) >> PGSHIFT;
359
360 /*
361 * Pass 2: build the stack segment, being careful not to assume
362 * that the counts from pass 1 are still valid.
363 */
364 if(spage > TSTKSIZ)
365 error(Enovmem);
366
367 qlock(&up->seglock);
368 if(waserror()){
369 if(up->seg[ESEG]){
370 putseg(up->seg[ESEG]);
371 up->seg[ESEG] = nil;
372 }
373 qunlock(&up->seglock);
374 nexterror();
375 }
376 up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BY2PG);
377 flushmmu(); // Needed for Plan 9 VX XXX really?
378
379 /*
380 * Top-of-stack structure.
381 */
382 uchar *uzero;
383 uzero = up->pmmu.uzero;
384 Tos *tos;
385 uint32 utos;
386 utos = USTKTOP - sizeof(Tos);
387 tos = (Tos*)(uzero + utos + TSTKTOP - USTKTOP);
388 tos->cyclefreq = m->cyclefreq;
389 cycles((uvlong*)&tos->pcycles);
390 tos->pcycles = -tos->pcycles;
391 tos->kcycles = tos->pcycles;
392 tos->clock = 0;
393
394 /*
395 * Argument pointers and strings, together.
396 */
397 char *bp, *ep;
398 uint32 *targp;
399 uint32 ustrp, uargp;
400
401 ustrp = utos - ROUND(strbytes, BY2WD);
402 uargp = ustrp - BY2WD*((nprogarg+nargv)+1);
403 bp = (char*)(uzero + ustrp + TSTKTOP - USTKTOP);
404 ep = bp + strbytes;
405 p = bp;
406 targp = (uint32*)(uzero + uargp + TSTKTOP - USTKTOP);
407
408 /* #! args are trusted */
409 for(i=0; i<nprogarg; i++){
410 n = strlen(progarg[i]) + 1;
411 if(n > ep - p)
412 error(Echanged);
413 memmove(p, progarg[i], n);
414 p += n;
415 *targp++ = ustrp;
416 ustrp += n;
417 }
418
419 /* the rest are not */
420 argp = arg[1];
421 for(i=0; i<nargv; i++){
422 uint32 a;
423 char *str;
424
425 a = *(uint32*)uvalidaddr(argp, BY2WD, 0);
426 argp += BY2WD;
427
428 str = uvalidaddr(a, 1, 0);
429 n = ((char*)vmemchr(str, 0, 0x7FFFFFFF) - str) + 1;
430 if(n > ep - p)
431 error(Echanged);
432 memmove(p, str, n);
433 p += n;
434 *targp++ = ustrp;
435 ustrp += n;
436 }
437
438 if(*(uint32*)uvalidaddr(argp, BY2WD, 0) != 0)
439 error(Echanged);
440 *targp = 0;
441
442 /*
443 * But wait, there's more: prepare an arg copy for up->args
444 * using the copy we just made in the temporary segment.
445 */
446 char *args;
447 int nargs;
448
449 n = p - bp; /* includes NUL on last arg, so must be > 0 */
450 if(n <= 0) /* nprogarg+nargv > 0; checked above */
451 error(Egreg);
452 if(n > 128)
453 n = 128;
454 args = smalloc(n);
455 if(waserror()){
456 free(args);
457 nexterror();
458 }
459 memmove(args, bp, n);
460 /* find beginning of UTF character boundary to place final NUL */
461 while(n > 0 && (args[n-1]&0xC0) == 0x80)
462 n--;
463 args[n-1] = '\0';
464 nargs = n;
465
466 /*
467 * Now we're ready to commit.
468 */
469 free(up->text);
470 up->text = elem;
471 free(up->args);
472 up->args = args;
473 up->nargs = n;
474 elem = nil;
475 poperror(); /* args */
476
477 /*
478 * Free old memory. Special segments maintained across exec.
479 */
480 Segment *s;
481 for(i = SSEG; i <= BSEG; i++) {
482 putseg(up->seg[i]);
483 up->seg[i] = nil; /* in case of error */
484 }
485 for(i = BSEG+1; i< NSEG; i++) {
486 s = up->seg[i];
487 if(s && (s->type&SG_CEXEC)) {
488 putseg(s);
489 up->seg[i] = nil;
490 }
491 }
492
493 /*
494 * Close on exec
495 */
496 Fgrp *f;
497 f = up->fgrp;
498 for(i=0; i<=f->maxfd; i++)
499 fdclose(i, CCEXEC);
500
501 /* Text. Shared. Attaches to cache image if possible */
502 /* attachimage returns a locked cache image */
503 Image *img;
504 Segment *ts;
505 img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (etext-UTZERO)>>PGSHIFT);
506 ts = img->s;
507 up->seg[TSEG] = ts;
508 ts->flushme = 1;
509 ts->fstart = 0;
510 ts->flen = sizeof(Exec)+text;
511 unlock(&img->ref.lk);
512
513 /* Data. Shared. */
514 s = newseg(SG_DATA, etext, (edata-etext)>>PGSHIFT);
515 up->seg[DSEG] = s;
516
517 /* Attached by hand */
518 incref(&img->ref);
519 s->image = img;
520 s->fstart = ts->fstart+ts->flen;
521 s->flen = data;
522
523 /* BSS. Zero fill on demand */
524 up->seg[BSEG] = newseg(SG_BSS, edata, (ebss-edata)>>PGSHIFT);
525
526 /*
527 * Move the stack
528 */
529 s = up->seg[ESEG];
530 up->seg[ESEG] = 0;
531 up->seg[SSEG] = s;
532 qunlock(&up->seglock);
533 poperror(); /* seglock */
534
535 s->base = USTKTOP-USTKSIZE;
536 s->top = USTKTOP;
537 relocateseg(s, USTKTOP-TSTKTOP);
538
539 /*
540 * '/' processes are higher priority (hack to make /ip more responsive).
541 */
542 if(devtab[tc->type]->dc == L'/')
543 up->basepri = PriRoot;
544 up->priority = up->basepri;
545 poperror(); /* tc, elem, file */
546 cclose(tc);
547 free(file);
548 // elem is now up->text
549
550 /*
551 * At this point, the mmu contains info about the old address
552 * space and needs to be flushed
553 */
554 flushmmu();
555 qlock(&up->debug);
556 up->nnote = 0;
557 up->notify = 0;
558 up->notified = 0;
559 up->privatemem = 0;
560 procsetup(up);
561 qunlock(&up->debug);
562 if(up->hang)
563 up->procctl = Proc_stopme;
564
565 return execregs(entry, USTKTOP - uargp, nprogarg+nargv);
566 }
567
568 int
569 shargs(char *s, int n, char **ap)
570 {
571 int i;
572
573 s += 2;
574 n -= 2; /* skip #! */
575 for(i=0; s[i]!='\n'; i++)
576 if(i == n-1)
577 return 0;
578 s[i] = 0;
579 *ap = 0;
580 i = 0;
581 for(;;) {
582 while(*s==' ' || *s=='\t')
583 s++;
584 if(*s == 0)
585 break;
586 i++;
587 *ap++ = s;
588 *ap = 0;
589 while(*s && *s!=' ' && *s!='\t')
590 s++;
591 if(*s == 0)
592 break;
593 else
594 *s++ = 0;
595 }
596 return i;
597 }
598
599 int
600 return0(void *v)
601 {
602 return 0;
603 }
604
605 long
606 syssleep(uint32 *arg)
607 {
608
609 int n;
610
611 n = arg[0];
612 if(n <= 0) {
613 yield();
614 return 0;
615 }
616 if(n < TK2MS(1))
617 n = TK2MS(1);
618 tsleep(&up->sleep, return0, 0, n);
619 return 0;
620 }
621
622 long
623 sysalarm(uint32 *arg)
624 {
625 return procalarm(arg[0]);
626 }
627
628 long
629 sysexits(uint32 *arg)
630 {
631 char *status;
632 char *inval = "invalid exit string";
633 char buf[ERRMAX];
634
635 if(arg[0]){
636 if(waserror())
637 status = inval;
638 else{
639 status = uvalidaddr(arg[0], 1, 0);
640 if(vmemchr(status, 0, ERRMAX) == 0){
641 memmove(buf, status, ERRMAX);
642 buf[ERRMAX-1] = 0;
643 status = buf;
644 }
645 poperror();
646 }
647
648 }else
649 status = nil;
650 pexit(status, 1);
651 return 0; /* not reached */
652 }
653
654 long
655 sys_wait(uint32 *arg)
656 {
657 int pid;
658 Waitmsg w;
659 OWaitmsg *ow;
660
661 if(arg[0] == 0)
662 return pwait(nil);
663
664 ow = uvalidaddr(arg[0], sizeof(OWaitmsg), 1);
665 evenaddr(arg[0]);
666 pid = pwait(&w);
667 if(pid >= 0){
668 readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
669 readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
670 readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
671 readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
672 strncpy(ow->msg, w.msg, sizeof(ow->msg));
673 ow->msg[sizeof(ow->msg)-1] = '\0';
674 }
675 return pid;
676 }
677
678 long
679 sysawait(uint32 *arg)
680 {
681 int i;
682 int pid;
683 Waitmsg w;
684 uint32 n;
685 char *buf;
686
687 n = arg[1];
688 buf = uvalidaddr(arg[0], n, 1);
689 pid = pwait(&w);
690 if(pid < 0)
691 return -1;
692 i = snprint(buf, n, "%d %lud %lud %lud %q",
693 w.pid,
694 w.time[TUser], w.time[TSys], w.time[TReal],
695 w.msg);
696
697 return i;
698 }
699
700 void
701 werrstr(char *fmt, ...)
702 {
703 va_list va;
704
705 if(up == nil)
706 return;
707
708 va_start(va, fmt);
709 vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
710 va_end(va);
711 }
712
713 static long
714 generrstr(uint32 addr, uint nbuf)
715 {
716 char tmp[ERRMAX];
717 char *buf;
718
719 if(nbuf == 0)
720 error(Ebadarg);
721 buf = uvalidaddr(addr, nbuf, 1);
722 if(nbuf > sizeof tmp)
723 nbuf = sizeof tmp;
724 memmove(tmp, buf, nbuf);
725
726 /* make sure it's NUL-terminated */
727 tmp[nbuf-1] = '\0';
728 memmove(buf, up->syserrstr, nbuf);
729 buf[nbuf-1] = '\0';
730 memmove(up->syserrstr, tmp, nbuf);
731 return 0;
732 }
733
734 long
735 syserrstr(uint32 *arg)
736 {
737 return generrstr(arg[0], arg[1]);
738 }
739
740 /* compatibility for old binaries */
741 long
742 sys_errstr(uint32 *arg)
743 {
744 return generrstr(arg[0], 64);
745 }
746
747 long
748 sysnotify(uint32 *arg)
749 {
750 if(arg[0] != 0)
751 uvalidaddr(arg[0], 1, 0);
752 up->notify = arg[0]; /* checked again when used */
753 return 0;
754 }
755
756 long
757 sysnoted(uint32 *arg)
758 {
759 if(arg[0]!=NRSTR && !up->notified)
760 error(Egreg);
761 return 0;
762 }
763
764 long
765 syssegbrk(uint32 *arg)
766 {
767 int i;
768 uint32 addr;
769 Segment *s;
770
771 addr = arg[0];
772 for(i = 0; i < NSEG; i++) {
773 s = up->seg[i];
774 if(s == 0 || addr < s->base || addr >= s->top)
775 continue;
776 switch(s->type&SG_TYPE) {
777 case SG_TEXT:
778 case SG_DATA:
779 case SG_STACK:
780 error(Ebadarg);
781 default:
782 return ibrk(arg[1], i);
783 }
784 }
785
786 error(Ebadarg);
787 return 0; /* not reached */
788 }
789
790 long
791 syssegattach(uint32 *arg)
792 {
793 return segattach(up, arg[0], uvalidaddr(arg[1], 1, 0), arg[2], arg[3]);
794 }
795
796 long
797 syssegdetach(uint32 *arg)
798 {
799 int i;
800 uint32 addr;
801 Segment *s;
802
803 qlock(&up->seglock);
804 if(waserror()){
805 qunlock(&up->seglock);
806 nexterror();
807 }
808
809 s = 0;
810 addr = arg[0];
811 for(i = 0; i < NSEG; i++)
812 if((s = up->seg[i])) {
813 qlock(&s->lk);
814 if((addr >= s->base && addr < s->top) ||
815 (s->top == s->base && addr == s->base))
816 goto found;
817 qunlock(&s->lk);
818 }
819
820 error(Ebadarg);
821
822 found:
823 /*
824 * Check we are not detaching the initial stack segment.
825 */
826 if(s == up->seg[SSEG]){
827 qunlock(&s->lk);
828 error(Ebadarg);
829 }
830 up->seg[i] = 0;
831 qunlock(&s->lk);
832 putseg(s);
833 qunlock(&up->seglock);
834 poperror();
835
836 /* Ensure we flush any entries from the lost segment */
837 flushmmu();
838 return 0;
839 }
840
841 long
842 syssegfree(uint32 *arg)
843 {
844 Segment *s;
845 uint32 from, to;
846
847 from = arg[0];
848 s = seg(up, from, 1);
849 if(s == nil)
850 error(Ebadarg);
851 to = (from + arg[1]) & ~(BY2PG-1);
852 from = PGROUND(from);
853
854 if(to > s->top) {
855 qunlock(&s->lk);
856 error(Ebadarg);
857 }
858
859 mfreeseg(s, from, (to - from) / BY2PG);
860 qunlock(&s->lk);
861 flushmmu();
862
863 return 0;
864 }
865
866 /* For binary compatibility */
867 long
868 sysbrk_(uint32 *arg)
869 {
870 return ibrk(arg[0], BSEG);
871 }
872
873 long
874 sysrendezvous(uint32 *arg)
875 {
876 uintptr tag, val;
877 Proc *p, **l;
878
879 tag = arg[0];
880 l = &REND(up->rgrp, tag);
881 up->rendval = ~(uintptr)0;
882
883 lock(&up->rgrp->ref.lk);
884 for(p = *l; p; p = p->rendhash) {
885 if(p->rendtag == tag) {
886 *l = p->rendhash;
887 val = p->rendval;
888 p->rendval = arg[1];
889
890 while(p->mach != 0)
891 ;
892 ready(p);
893 unlock(&up->rgrp->ref.lk);
894 return val;
895 }
896 l = &p->rendhash;
897 }
898
899 /* Going to sleep here */
900 up->rendtag = tag;
901 up->rendval = arg[1];
902 up->rendhash = *l;
903 *l = up;
904 up->state = Rendezvous;
905 unlock(&up->rgrp->ref.lk);
906
907 sched();
908
909 return up->rendval;
910 }
911
912 /*
913 * The implementation of semaphores is complicated by needing
914 * to avoid rescheduling in syssemrelease, so that it is safe
915 * to call from real-time processes. This means syssemrelease
916 * cannot acquire any qlocks, only spin locks.
917 *
918 * Semacquire and semrelease must both manipulate the semaphore
919 * wait list. Lock-free linked lists only exist in theory, not
920 * in practice, so the wait list is protected by a spin lock.
921 *
922 * The semaphore value *addr is stored in user memory, so it
923 * cannot be read or written while holding spin locks.
924 *
925 * Thus, we can access the list only when holding the lock, and
926 * we can access the semaphore only when not holding the lock.
927 * This makes things interesting. Note that sleep's condition function
928 * is called while holding two locks - r and up->rlock - so it cannot
929 * access the semaphore value either.
930 *
931 * An acquirer announces its intention to try for the semaphore
932 * by putting a Sema structure onto the wait list and then
933 * setting Sema.waiting. After one last check of semaphore,
934 * the acquirer sleeps until Sema.waiting==0. A releaser of n
935 * must wake up n acquirers who have Sema.waiting set. It does
936 * this by clearing Sema.waiting and then calling wakeup.
937 *
938 * There are three interesting races here.
939
940 * The first is that in this particular sleep/wakeup usage, a single
941 * wakeup can rouse a process from two consecutive sleeps!
942 * The ordering is:
943 *
944 * (a) set Sema.waiting = 1
945 * (a) call sleep
946 * (b) set Sema.waiting = 0
947 * (a) check Sema.waiting inside sleep, return w/o sleeping
948 * (a) try for semaphore, fail
949 * (a) set Sema.waiting = 1
950 * (a) call sleep
951 * (b) call wakeup(a)
952 * (a) wake up again
953 *
954 * This is okay - semacquire will just go around the loop
955 * again. It does mean that at the top of the for(;;) loop in
956 * semacquire, phore.waiting might already be set to 1.
957 *
958 * The second is that a releaser might wake an acquirer who is
959 * interrupted before he can acquire the lock. Since
960 * release(n) issues only n wakeup calls -- only n can be used
961 * anyway -- if the interrupted process is not going to use his
962 * wakeup call he must pass it on to another acquirer.
963 *
964 * The third race is similar to the second but more subtle. An
965 * acquirer sets waiting=1 and then does a final canacquire()
966 * before going to sleep. The opposite order would result in
967 * missing wakeups that happen between canacquire and
968 * waiting=1. (In fact, the whole point of Sema.waiting is to
969 * avoid missing wakeups between canacquire() and sleep().) But
970 * there can be spurious wakeups between a successful
971 * canacquire() and the following semdequeue(). This wakeup is
972 * not useful to the acquirer, since he has already acquired
973 * the semaphore. Like in the previous case, though, the
974 * acquirer must pass the wakeup call along.
975 *
976 * This is all rather subtle. The code below has been verified
977 * with the spin model /sys/src/9/port/semaphore.p. The
978 * original code anticipated the second race but not the first
979 * or third, which were caught only with spin. The first race
980 * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
981 * It was lucky that my abstract model of sleep/wakeup still managed
982 * to preserve that behavior.
983 *
984 * I remain slightly concerned about memory coherence
985 * outside of locks. The spin model does not take
986 * queued processor writes into account so we have to
987 * think hard. The only variables accessed outside locks
988 * are the semaphore value itself and the boolean flag
989 * Sema.waiting. The value is only accessed with cmpswap,
990 * whose job description includes doing the right thing as
991 * far as memory coherence across processors. That leaves
992 * Sema.waiting. To handle it, we call coherence() before each
993 * read and after each write. - rsc
994 */
995
996 /* Add semaphore p with addr a to list in seg. */
997 static void
998 semqueue(Segment *s, long *a, Sema *p)
999 {
1000 memset(p, 0, sizeof *p);
1001 p->addr = a;
1002 lock(&s->sema.rendez.lk); /* uses s->sema.Rendez.Lock, but no one else is */
1003 p->next = &s->sema;
1004 p->prev = s->sema.prev;
1005 p->next->prev = p;
1006 p->prev->next = p;
1007 unlock(&s->sema.rendez.lk);
1008 }
1009
1010 /* Remove semaphore p from list in seg. */
1011 static void
1012 semdequeue(Segment *s, Sema *p)
1013 {
1014 lock(&s->sema.rendez.lk);
1015 p->next->prev = p->prev;
1016 p->prev->next = p->next;
1017 unlock(&s->sema.rendez.lk);
1018 }
1019
1020 /* Wake up n waiters with addr a on list in seg. */
1021 static void
1022 semwakeup(Segment *s, long *a, long n)
1023 {
1024 Sema *p;
1025
1026 lock(&s->sema.rendez.lk);
1027 for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){
1028 if(p->addr == a && p->waiting){
1029 p->waiting = 0;
1030 coherence();
1031 wakeup(&p->rendez);
1032 n--;
1033 }
1034 }
1035 unlock(&s->sema.rendez.lk);
1036 }
1037
1038 /* Add delta to semaphore and wake up waiters as appropriate. */
1039 static long
1040 semrelease(Segment *s, long *addr, long delta)
1041 {
1042 long value;
1043
1044 do
1045 value = *addr;
1046 while(!cmpswap(addr, value, value+delta));
1047 semwakeup(s, addr, delta);
1048 return value+delta;
1049 }
1050
1051 /* Try to acquire semaphore using compare-and-swap */
1052 static int
1053 canacquire(long *addr)
1054 {
1055 long value;
1056
1057 while((value=*addr) > 0)
1058 if(cmpswap(addr, value, value-1))
1059 return 1;
1060 return 0;
1061 }
1062
1063 /* Should we wake up? */
1064 static int
1065 semawoke(void *p)
1066 {
1067 coherence();
1068 return !((Sema*)p)->waiting;
1069 }
1070
1071 /* Acquire semaphore (subtract 1). */
1072 static int
1073 semacquire(Segment *s, long *addr, int block)
1074 {
1075 int acquired;
1076 Sema phore;
1077
1078 if(canacquire(addr))
1079 return 1;
1080 if(!block)
1081 return 0;
1082
1083 acquired = 0;
1084 semqueue(s, addr, &phore);
1085 for(;;){
1086 phore.waiting = 1;
1087 coherence();
1088 if(canacquire(addr)){
1089 acquired = 1;
1090 break;
1091 }
1092 if(waserror())
1093 break;
1094 sleep(&phore.rendez, semawoke, &phore);
1095 poperror();
1096 }
1097 semdequeue(s, &phore);
1098 coherence(); /* not strictly necessary due to lock in semdequeue */
1099 if(!phore.waiting)
1100 semwakeup(s, addr, 1);
1101 if(!acquired)
1102 nexterror();
1103 return 1;
1104 }
1105
1106 long
1107 syssemacquire(uint32 *arg)
1108 {
1109 int block;
1110 long *addr;
1111 Segment *s;
1112
1113 addr = uvalidaddr(arg[0], sizeof(long), 1);
1114 evenaddr(arg[0]);
1115 block = arg[1];
1116
1117 if((s = seg(up, arg[0], 0)) == nil)
1118 error(Ebadarg);
1119 if(*addr < 0)
1120 error(Ebadarg);
1121 return semacquire(s, addr, block);
1122 }
1123
1124 long
1125 syssemrelease(uint32 *arg)
1126 {
1127 long *addr, delta;
1128 Segment *s;
1129
1130 addr = uvalidaddr(arg[0], sizeof(long), 1);
1131 evenaddr(arg[0]);
1132 delta = arg[1];
1133
1134 if((s = seg(up, arg[0], 0)) == nil)
1135 error(Ebadarg);
1136 if(delta < 0 || *addr < 0)
1137 error(Ebadarg);
1138 return semrelease(s, addr, arg[1]);
1139 }