proc.c - vx32 - Local 9vx git repository for patches.
(HTM) git clone git://r-36.net/vx32
(DIR) Log
(DIR) Files
(DIR) Refs
---
proc.c (29466B)
---
1 #define WANT_M
2 #include "u.h"
3 #include "lib.h"
4 #include "mem.h"
5 #include "dat.h"
6 #include "fns.h"
7 #include "error.h"
8 #include "trace.h"
9
10 int schedgain = 30; /* units in seconds */
11 int nrdy;
12 Ref noteidalloc;
13
14 void updatecpu(Proc*);
15 int reprioritize(Proc*);
16
17 ulong delayedscheds; /* statistics */
18 long skipscheds;
19 long preempts;
20 ulong load;
21
22 static Ref pidalloc;
23
24 static struct Procalloc
25 {
26 Lock lk;
27 Proc* ht[128];
28 Proc* arena;
29 Proc* free;
30 } procalloc;
31
32 enum
33 {
34 Q=10,
35 DQ=4,
36 Scaling=2,
37 };
38
39 Schedq runq[Nrq];
40 ulong runvec;
41
42 char *statename[] =
43 { /* BUG: generate automatically */
44 "Dead",
45 "Moribund",
46 "Ready",
47 "Scheding",
48 "Running",
49 "Queueing",
50 "QueueingR",
51 "QueueingW",
52 "Wakeme",
53 "Broken",
54 "Stopped",
55 "Rendez",
56 "Waitrelease",
57 };
58
59 static void pidhash(Proc*);
60 static void pidunhash(Proc*);
61 static void rebalance(void);
62
63 /*
64 * Always splhi()'ed.
65 */
66 void
67 schedinit(void) /* never returns */
68 {
69
70 setlabel(&m->sched);
71 if(traceprocs) // Plan 9 VX
72 print("schedinit %p %p %s\n", m, up, up ? up->text : "");
73 if(up) {
74 m->proc = 0;
75 switch(up->state) {
76 case Running:
77 ready(up);
78 break;
79 case Moribund:
80 up->state = Dead;
81
82 /*
83 * Holding locks from pexit:
84 * procalloc
85 * palloc
86 */
87 mmurelease(up);
88
89 up->qnext = procalloc.free;
90 procalloc.free = up;
91
92 unlock(&palloc.lk);
93 unlock(&procalloc.lk);
94 break;
95 }
96 up->mach = nil;
97 updatecpu(up);
98 up = nil;
99 }
100 sched();
101 }
102
103 /*
104 * If changing this routine, look also at sleep(). It
105 * contains a copy of the guts of sched().
106 */
107 void
108 sched(void)
109 {
110 Proc *p;
111
112 if(traceprocs) // Plan 9 VX
113 print("sched %p %p [%s]\n", m, up, up ? up->text : "");
114 if(m->ilockdepth)
115 panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
116 m->machno,
117 m->ilockdepth,
118 up? up->lastilock: nil,
119 (up && up->lastilock)? up->lastilock->pc: 0,
120 getcallerpc(&p+2));
121 if(up){
122 /*
123 * Delay the sched until the process gives up the locks
124 * it is holding. This avoids dumb lock loops.
125 * Don't delay if the process is Moribund.
126 * It called sched to die.
127 * But do sched eventually. This avoids a missing unlock
128 * from hanging the entire kernel.
129 * But don't reschedule procs holding palloc or procalloc.
130 * Those are far too important to be holding while asleep.
131 *
132 * This test is not exact. There can still be a few instructions
133 * in the middle of taslock when a process holds a lock
134 * but Lock.p has not yet been initialized.
135 */
136 if(up->nlocks.ref)
137 if(up->state != Moribund)
138 if(up->delaysched < 20
139 || palloc.lk.p == up
140 || procalloc.lk.p == up){
141 up->delaysched++;
142 delayedscheds++;
143 return;
144 }
145 up->delaysched = 0;
146
147 splhi();
148
149 /* statistics */
150 m->cs++;
151
152 procsave(up);
153 if(setlabel(&up->sched)){
154 if(traceprocs)
155 print("sched %p %p: awake\n", m, up);
156 procrestore(up);
157 spllo();
158 return;
159 }
160 if(traceprocs)
161 print("sched %p %p: entering scheduler\n", m, up);
162 gotolabel(&m->sched);
163 }
164 if(traceprocs)
165 print("sched %p %p: runproc", m, up);
166 p = runproc();
167 if(1){
168 updatecpu(p);
169 p->priority = reprioritize(p);
170 }
171 if(p != m->readied)
172 m->schedticks = msec() + HZ/10;
173 m->readied = 0;
174 up = p;
175 up->state = Running;
176 up->mach = MACHP(m->machno);
177 m->proc = up;
178 if(traceprocs)
179 print("run %p %p [%s]\n", m, up, up->text);
180 mmuswitch(up);
181 gotolabel(&up->sched);
182 }
183
184 int
185 anyready(void)
186 {
187 return runvec;
188 }
189
190 int
191 anyhigher(void)
192 {
193 return runvec & ~((1<<(up->priority+1))-1);
194 }
195
196 /*
197 * here once per clock tick to see if we should resched
198 */
199 void
200 hzsched(void)
201 {
202 /* once a second, rebalance will reprioritize ready procs */
203 if(m->machno == 0)
204 rebalance();
205
206 /* unless preempted, get to run for at least 100ms */
207 if(anyhigher()
208 || (!up->fixedpri && msec() > m->schedticks && anyready())){
209 m->readied = nil; /* avoid cooperative scheduling */
210 up->delaysched++;
211 }
212 }
213
214 /*
215 * here at the end of non-clock interrupts to see if we should preempt the
216 * current process. Returns 1 if preempted, 0 otherwise.
217 */
218 int
219 preempted(void)
220 {
221 if(up && up->state == Running)
222 if(up->preempted == 0)
223 if(anyhigher())
224 if(!active.exiting){
225 m->readied = nil; /* avoid cooperative scheduling */
226 up->preempted = 1;
227 sched();
228 splhi();
229 up->preempted = 0;
230 return 1;
231 }
232 return 0;
233 }
234
235 /*
236 * Update the cpu time average for this particular process,
237 * which is about to change from up -> not up or vice versa.
238 * p->lastupdate is the last time an updatecpu happened.
239 *
240 * The cpu time average is a decaying average that lasts
241 * about D clock ticks. D is chosen to be approximately
242 * the cpu time of a cpu-intensive "quick job". A job has to run
243 * for approximately D clock ticks before we home in on its
244 * actual cpu usage. Thus if you manage to get in and get out
245 * quickly, you won't be penalized during your burst. Once you
246 * start using your share of the cpu for more than about D
247 * clock ticks though, your p->cpu hits 1000 (1.0) and you end up
248 * below all the other quick jobs. Interactive tasks, because
249 * they basically always use less than their fair share of cpu,
250 * will be rewarded.
251 *
252 * If the process has not been running, then we want to
253 * apply the filter
254 *
255 * cpu = cpu * (D-1)/D
256 *
257 * n times, yielding
258 *
259 * cpu = cpu * ((D-1)/D)^n
260 *
261 * but D is big enough that this is approximately
262 *
263 * cpu = cpu * (D-n)/D
264 *
265 * so we use that instead.
266 *
267 * If the process has been running, we apply the filter to
268 * 1 - cpu, yielding a similar equation. Note that cpu is
269 * stored in fixed point (* 1000).
270 *
271 * Updatecpu must be called before changing up, in order
272 * to maintain accurate cpu usage statistics. It can be called
273 * at any time to bring the stats for a given proc up-to-date.
274 */
275 void
276 updatecpu(Proc *p)
277 {
278 int n, t, ocpu;
279 int D = schedgain*HZ*Scaling;
280
281 if(p->edf)
282 return;
283
284 t = msec()*Scaling + Scaling/2;
285 n = t - p->lastupdate;
286 p->lastupdate = t;
287
288 if(n == 0)
289 return;
290 if(n > D)
291 n = D;
292
293 ocpu = p->cpu;
294 if(p != up)
295 p->cpu = (ocpu*(D-n))/D;
296 else{
297 t = 1000 - ocpu;
298 t = (t*(D-n))/D;
299 p->cpu = 1000 - t;
300 }
301
302 //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
303 }
304
305 /*
306 * On average, p has used p->cpu of a cpu recently.
307 * Its fair share is conf.nmach/m->load of a cpu. If it has been getting
308 * too much, penalize it. If it has been getting not enough, reward it.
309 * I don't think you can get much more than your fair share that
310 * often, so most of the queues are for using less. Having a priority
311 * of 3 means you're just right. Having a higher priority (up to p->basepri)
312 * means you're not using as much as you could.
313 */
314 int
315 reprioritize(Proc *p)
316 {
317 int fairshare, n, load, ratio;
318
319 load = MACHP(0)->load;
320 if(load == 0)
321 return p->basepri;
322
323 /*
324 * fairshare = 1.000 * conf.nproc * 1.000/load,
325 * except the decimal point is moved three places
326 * on both load and fairshare.
327 */
328 fairshare = (conf.nmach*1000*1000)/load;
329 n = p->cpu;
330 if(n == 0)
331 n = 1;
332 ratio = (fairshare+n/2) / n;
333 if(ratio > p->basepri)
334 ratio = p->basepri;
335 if(ratio < 0)
336 panic("reprioritize");
337 //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
338 return ratio;
339 }
340
341 /*
342 * add a process to a scheduling queue
343 */
344 void
345 queueproc(Schedq *rq, Proc *p)
346 {
347 int pri;
348
349 pri = rq - runq;
350 lock(&runq->lk);
351 p->priority = pri;
352 p->rnext = 0;
353 if(rq->tail)
354 rq->tail->rnext = p;
355 else
356 rq->head = p;
357 rq->tail = p;
358 rq->n++;
359 nrdy++;
360 runvec |= 1<<pri;
361 unlock(&runq->lk);
362 }
363
364 /*
365 * try to remove a process from a scheduling queue (called splhi)
366 */
367 Proc*
368 dequeueproc(Schedq *rq, Proc *tp)
369 {
370 Proc *l, *p;
371
372 if(!canlock(&runq->lk))
373 return nil;
374
375 /*
376 * the queue may have changed before we locked runq,
377 * refind the target process.
378 */
379 l = 0;
380 for(p = rq->head; p; p = p->rnext){
381 if(p == tp)
382 break;
383 l = p;
384 }
385
386 /*
387 * p->mach==0 only when process state is saved
388 */
389 if(p == 0 || p->mach){
390 unlock(&runq->lk);
391 return nil;
392 }
393 if(p->rnext == 0)
394 rq->tail = l;
395 if(l)
396 l->rnext = p->rnext;
397 else
398 rq->head = p->rnext;
399 if(rq->head == nil)
400 runvec &= ~(1<<(rq-runq));
401 rq->n--;
402 nrdy--;
403 if(p->state != Ready)
404 print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]);
405
406 unlock(&runq->lk);
407 return p;
408 }
409
410 /*
411 * ready(p) picks a new priority for a process and sticks it in the
412 * runq for that priority.
413 */
414 void
415 _ready(Proc *p)
416 {
417 int s, pri;
418 Schedq *rq;
419 void (*pt)(Proc*, int, vlong);
420
421 s = splhi();
422 if(0){
423 splx(s);
424 return;
425 }
426
427 if(up != p)
428 m->readied = p; /* group scheduling */
429
430 updatecpu(p);
431 pri = reprioritize(p);
432 p->priority = pri;
433 rq = &runq[pri];
434 p->state = Ready;
435 queueproc(rq, p);
436 pt = proctrace;
437 if(pt)
438 pt(p, SReady, 0);
439 splx(s);
440 }
441
442 /*
443 * yield the processor and drop our priority
444 */
445 void
446 yield(void)
447 {
448 if(anyready()){
449 /* pretend we just used 1/2 tick */
450 up->lastupdate -= Scaling/2;
451 sched();
452 }
453 }
454
455 /*
456 * recalculate priorities once a second. We need to do this
457 * since priorities will otherwise only be recalculated when
458 * the running process blocks.
459 */
460 ulong balancetime;
461
462 static void
463 rebalance(void)
464 {
465 int pri, npri, t, x;
466 Schedq *rq;
467 Proc *p;
468
469 t = msec();
470 if(t - balancetime < HZ)
471 return;
472 balancetime = t;
473
474 for(pri=0, rq=runq; pri<Npriq; pri++, rq++){
475 another:
476 p = rq->head;
477 if(p == nil)
478 continue;
479 if(p->mp != MACHP(m->machno))
480 continue;
481 if(pri == p->basepri)
482 continue;
483 updatecpu(p);
484 npri = reprioritize(p);
485 if(npri != pri){
486 x = splhi();
487 p = dequeueproc(rq, p);
488 if(p)
489 queueproc(&runq[npri], p);
490 splx(x);
491 goto another;
492 }
493 }
494 }
495
496
497 /*
498 * pick a process to run
499 */
500 Proc*
501 _runproc(void)
502 {
503 Schedq *rq;
504 Proc *p;
505 ulong start, now;
506 int i;
507 void (*pt)(Proc*, int, vlong);
508
509 start = perfticks();
510
511 /* cooperative scheduling until the clock ticks */
512 if((p=m->readied) && p->mach==0 && p->state==Ready
513 && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){
514 skipscheds++;
515 rq = &runq[p->priority];
516 goto found;
517 }
518
519 preempts++;
520
521 loop:
522 /*
523 * find a process that last ran on this processor (affinity),
524 * or one that hasn't moved in a while (load balancing). Every
525 * time around the loop affinity goes down.
526 */
527 spllo();
528 for(i = 0;; i++){
529 /*
530 * find the highest priority target process that this
531 * processor can run given affinity constraints.
532 *
533 */
534 for(rq = &runq[Nrq-1]; rq >= runq; rq--){
535 for(p = rq->head; p; p = p->rnext){
536 if(p->mp == nil || p->mp == MACHP(m->machno)
537 || (!p->wired && i > 0))
538 goto found;
539 }
540 }
541
542 /* waste time or halt the CPU */
543 idlehands();
544
545 /* remember how much time we're here */
546 now = perfticks();
547 m->perf.inidle += now-start;
548 start = now;
549 }
550
551 found:
552 splhi();
553 p = dequeueproc(rq, p);
554 if(p == nil)
555 goto loop;
556
557 p->state = Scheding;
558 p->mp = MACHP(m->machno);
559
560 pt = proctrace;
561 if(pt)
562 pt(p, SRun, 0);
563 return p;
564 }
565
566 int
567 canpage(Proc *p)
568 {
569 int ok = 0;
570
571 splhi();
572 lock(&runq->lk);
573 /* Only reliable way to see if we are Running */
574 if(p->mach == 0) {
575 p->newtlb = 1;
576 ok = 1;
577 }
578 unlock(&runq->lk);
579 spllo();
580
581 return ok;
582 }
583
584 Proc*
585 newproc(void)
586 {
587 char msg[64];
588 Proc *p;
589
590 lock(&procalloc.lk);
591 for(;;) {
592 if((p = procalloc.free))
593 break;
594
595 snprint(msg, sizeof msg, "no procs; %s forking",
596 up? up->text: "kernel");
597 unlock(&procalloc.lk);
598 resrcwait(msg);
599 lock(&procalloc.lk);
600 }
601 procalloc.free = p->qnext;
602 unlock(&procalloc.lk);
603
604 p->state = Scheding;
605 p->psstate = "New";
606 p->mach = 0;
607 p->qnext = 0;
608 p->nchild = 0;
609 p->nwait = 0;
610 p->waitq = 0;
611 p->parent = 0;
612 p->pgrp = 0;
613 p->egrp = 0;
614 p->fgrp = 0;
615 p->rgrp = 0;
616 p->pdbg = 0;
617 p->fpstate = FPinit;
618 p->kp = 0;
619 if(up && up->procctl == Proc_tracesyscall)
620 p->procctl = Proc_tracesyscall;
621 else
622 p->procctl = 0;
623 p->syscalltrace = 0;
624 p->notepending = 0;
625 p->ureg = 0;
626 p->privatemem = 0;
627 p->noswap = 0;
628 p->errstr = p->errbuf0;
629 p->syserrstr = p->errbuf1;
630 p->errbuf0[0] = '\0';
631 p->errbuf1[0] = '\0';
632 p->nlocks.ref = 0;
633 p->delaysched = 0;
634 p->trace = 0;
635 kstrdup(&p->user, "*nouser");
636 kstrdup(&p->text, "*notext");
637 kstrdup(&p->args, "");
638 p->nargs = 0;
639 p->setargs = 0;
640 memset(p->seg, 0, sizeof p->seg);
641 p->pid = incref(&pidalloc);
642 pidhash(p);
643 p->noteid = incref(¬eidalloc);
644 if(p->pid==0 || p->noteid==0)
645 panic("pidalloc");
646 if(p->kstack == 0)
647 p->kstack = smalloc(KSTACK);
648
649 /* sched params */
650 p->mp = 0;
651 p->wired = 0;
652 procpriority(p, PriNormal, 0);
653 p->cpu = 0;
654 p->lastupdate = msec()*Scaling;
655 p->edf = nil;
656
657 vxnewproc(p);
658 return p;
659 }
660
661 /*
662 * wire this proc to a machine
663 */
664 void
665 procwired(Proc *p, int bm)
666 {
667 Proc *pp;
668 int i;
669 char nwired[MAXMACH];
670 Mach *wm;
671
672 if(bm < 0){
673 /* pick a machine to wire to */
674 memset(nwired, 0, sizeof(nwired));
675 p->wired = 0;
676 pp = proctab(0);
677 for(i=0; i<conf.nproc; i++, pp++){
678 wm = pp->wired;
679 if(wm && pp->pid)
680 nwired[wm->machno]++;
681 }
682 bm = 0;
683 for(i=0; i<conf.nmach; i++)
684 if(nwired[i] < nwired[bm])
685 bm = i;
686 } else {
687 /* use the virtual machine requested */
688 bm = bm % conf.nmach;
689 }
690
691 p->wired = MACHP(bm);
692 p->mp = p->wired;
693 }
694
695 void
696 procpriority(Proc *p, int pri, int fixed)
697 {
698 if(pri >= Npriq)
699 pri = Npriq - 1;
700 else if(pri < 0)
701 pri = 0;
702 p->basepri = pri;
703 p->priority = pri;
704 if(fixed){
705 p->fixedpri = 1;
706 } else {
707 p->fixedpri = 0;
708 }
709 }
710
711 void
712 procinit0(void) /* bad planning - clashes with devproc.c */
713 {
714 Proc *p;
715 int i;
716
717 procalloc.free = xalloc(conf.nproc*sizeof(Proc));
718 if(procalloc.free == nil){
719 xsummary();
720 panic("cannot allocate %lud procs (%ludMB)\n", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024));
721 }
722 procalloc.arena = procalloc.free;
723
724 p = procalloc.free;
725 for(i=0; i<conf.nproc-1; i++,p++)
726 p->qnext = p+1;
727 p->qnext = 0;
728 }
729
730 /*
731 * sleep if a condition is not true. Another process will
732 * awaken us after it sets the condition. When we awaken
733 * the condition may no longer be true.
734 *
735 * we lock both the process and the rendezvous to keep r->p
736 * and p->r synchronized.
737 */
738 void
739 sleep(Rendez *r, int (*f)(void*), void *arg)
740 {
741 int s;
742 void (*pt)(Proc*, int, vlong);
743
744 s = splhi();
745
746 if(up->nlocks.ref)
747 print("process %lud sleeps with %lud locks held, last lock %#p locked at pc %#lux, sleep called from %#p\n",
748 up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r));
749 lock(&r->lk);
750 lock(&up->rlock);
751 if(r->p){
752 print("double sleep called from %#p, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid);
753 dumpstack();
754 }
755
756 /*
757 * Wakeup only knows there may be something to do by testing
758 * r->p in order to get something to lock on.
759 * Flush that information out to memory in case the sleep is
760 * committed.
761 */
762 r->p = up;
763
764 if((*f)(arg) || up->notepending){
765 /*
766 * if condition happened or a note is pending
767 * never mind
768 */
769 if(traceprocs)
770 print("sleep %p %p: already happened\n", m, up);
771 r->p = nil;
772 unlock(&up->rlock);
773 unlock(&r->lk);
774 } else {
775 /*
776 * now we are committed to
777 * change state and call scheduler
778 */
779 pt = proctrace;
780 if(pt)
781 pt(up, SSleep, 0);
782 up->state = Wakeme;
783 up->r = r;
784
785 /* statistics */
786 m->cs++;
787
788 procsave(up);
789 if(setlabel(&up->sched)) {
790 if(traceprocs)
791 print("sleep %p %p: awake\n", m, up);
792 /*
793 * here when the process is awakened
794 */
795 procrestore(up);
796 spllo();
797 } else {
798 if(traceprocs)
799 print("sleep %p %p: sleeping\n", m, up);
800 /*
801 * here to go to sleep (i.e. stop Running)
802 */
803 unlock(&up->rlock);
804 unlock(&r->lk);
805 gotolabel(&m->sched);
806 }
807 }
808
809 if(up->notepending) {
810 up->notepending = 0;
811 splx(s);
812 if(up->procctl == Proc_exitme && up->closingfgrp)
813 forceclosefgrp();
814 error(Eintr);
815 }
816
817 splx(s);
818 }
819
820 static int
821 tfn(void *arg)
822 {
823 return up->trend == nil || up->tfn(arg);
824 }
825
826 void
827 twakeup(Ureg *ureg, Timer *t)
828 {
829 Proc *p;
830 Rendez *trend;
831
832 p = t->ta;
833 trend = p->trend;
834 p->trend = 0;
835 if(trend)
836 wakeup(trend);
837 }
838
839 void
840 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms)
841 {
842 if (up->timer.tt){
843 print("tsleep: timer active: mode %d, tf %#p\n", up->timer.tmode, up->timer.tf);
844 timerdel(&up->timer);
845 }
846 up->timer.tns = MS2NS(ms);
847 up->timer.tf = twakeup;
848 up->timer.tmode = Trelative;
849 up->timer.ta = up;
850 up->trend = r;
851 up->tfn = fn;
852 timeradd(&up->timer);
853
854 if(waserror()){
855 timerdel(&up->timer);
856 nexterror();
857 }
858 sleep(r, tfn, arg);
859 if (up->timer.tt)
860 timerdel(&up->timer);
861 up->timer.twhen = 0;
862 poperror();
863 }
864
865 /*
866 * Expects that only one process can call wakeup for any given Rendez.
867 * We hold both locks to ensure that r->p and p->r remain consistent.
868 * Richard Miller has a better solution that doesn't require both to
869 * be held simultaneously, but I'm a paranoid - presotto.
870 */
871 Proc*
872 wakeup(Rendez *r)
873 {
874 Proc *p;
875 int s;
876
877 s = splhi();
878
879 lock(&r->lk);
880 p = r->p;
881
882 if(p != nil){
883 lock(&p->rlock);
884 if(p->state != Wakeme || p->r != r){
885 iprint("%p %p %d\n", p->r, r, p->state);
886 panic("wakeup: state");
887 }
888 r->p = nil;
889 p->r = nil;
890 ready(p);
891 unlock(&p->rlock);
892 }
893 unlock(&r->lk);
894
895 splx(s);
896
897 return p;
898 }
899
900 /*
901 * if waking a sleeping process, this routine must hold both
902 * p->rlock and r->lock. However, it can't know them in
903 * the same order as wakeup causing a possible lock ordering
904 * deadlock. We break the deadlock by giving up the p->rlock
905 * lock if we can't get the r->lock and retrying.
906 */
907 int
908 postnote(Proc *p, int dolock, char *n, int flag)
909 {
910 int s, ret;
911 Rendez *r;
912 Proc *d, **l;
913
914 if(dolock)
915 qlock(&p->debug);
916
917 if(flag != NUser && (p->notify == 0 || p->notified))
918 p->nnote = 0;
919
920 ret = 0;
921 if(p->nnote < NNOTE) {
922 strcpy(p->note[p->nnote].msg, n);
923 p->note[p->nnote++].flag = flag;
924 ret = 1;
925 }
926 p->notepending = 1;
927 if(dolock)
928 qunlock(&p->debug);
929
930 /* this loop is to avoid lock ordering problems. */
931 for(;;){
932 s = splhi();
933 lock(&p->rlock);
934 r = p->r;
935
936 /* waiting for a wakeup? */
937 if(r == nil)
938 break; /* no */
939
940 /* try for the second lock */
941 if(canlock(&r->lk)){
942 if(p->state != Wakeme || r->p != p)
943 panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state);
944 p->r = nil;
945 r->p = nil;
946 ready(p);
947 unlock(&r->lk);
948 break;
949 }
950
951 /* give other process time to get out of critical section and try again */
952 unlock(&p->rlock);
953 splx(s);
954 sched();
955 }
956 unlock(&p->rlock);
957 splx(s);
958
959 if(p->state != Rendezvous)
960 return ret;
961
962 /* Try and pull out of a rendezvous */
963 lock(&p->rgrp->ref.lk);
964 if(p->state == Rendezvous) {
965 p->rendval = ~0;
966 l = &REND(p->rgrp, p->rendtag);
967 for(d = *l; d; d = d->rendhash) {
968 if(d == p) {
969 *l = p->rendhash;
970 break;
971 }
972 l = &d->rendhash;
973 }
974 ready(p);
975 }
976 unlock(&p->rgrp->ref.lk);
977 return ret;
978 }
979
980 /*
981 * weird thing: keep at most NBROKEN around
982 */
983 #define NBROKEN 4
984 struct
985 {
986 QLock lk;
987 int n;
988 Proc *p[NBROKEN];
989 }broken;
990
991 void
992 addbroken(Proc *p)
993 {
994 qlock(&broken.lk);
995 if(broken.n == NBROKEN) {
996 ready(broken.p[0]);
997 memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
998 --broken.n;
999 }
1000 broken.p[broken.n++] = p;
1001 qunlock(&broken.lk);
1002
1003 p->state = Broken;
1004 p->psstate = 0;
1005 sched();
1006 }
1007
1008 void
1009 unbreak(Proc *p)
1010 {
1011 int b;
1012
1013 qlock(&broken.lk);
1014 for(b=0; b < broken.n; b++)
1015 if(broken.p[b] == p) {
1016 broken.n--;
1017 memmove(&broken.p[b], &broken.p[b+1],
1018 sizeof(Proc*)*(NBROKEN-(b+1)));
1019 ready(p);
1020 break;
1021 }
1022 qunlock(&broken.lk);
1023 }
1024
1025 int
1026 freebroken(void)
1027 {
1028 int i, n;
1029
1030 qlock(&broken.lk);
1031 n = broken.n;
1032 for(i=0; i<n; i++) {
1033 ready(broken.p[i]);
1034 broken.p[i] = 0;
1035 }
1036 broken.n = 0;
1037 qunlock(&broken.lk);
1038 return n;
1039 }
1040
1041 void
1042 pexit(char *exitstr, int freemem)
1043 {
1044 Proc *p;
1045 Segment **s, **es;
1046 long utime, stime;
1047 Waitq *wq, *f, *next;
1048 Fgrp *fgrp;
1049 Egrp *egrp;
1050 Rgrp *rgrp;
1051 Pgrp *pgrp;
1052 Chan *dot;
1053 void (*pt)(Proc*, int, vlong);
1054
1055 if(up->syscalltrace)
1056 free(up->syscalltrace);
1057 up->alarm = 0;
1058 if (up->timer.tt)
1059 timerdel(&up->timer);
1060 pt = proctrace;
1061 if(pt)
1062 pt(up, SDead, 0);
1063
1064 /* nil out all the resources under lock (free later) */
1065 qlock(&up->debug);
1066 fgrp = up->fgrp;
1067 up->fgrp = nil;
1068 egrp = up->egrp;
1069 up->egrp = nil;
1070 rgrp = up->rgrp;
1071 up->rgrp = nil;
1072 pgrp = up->pgrp;
1073 up->pgrp = nil;
1074 dot = up->dot;
1075 up->dot = nil;
1076 qunlock(&up->debug);
1077
1078 if(fgrp)
1079 closefgrp(fgrp);
1080 if(egrp)
1081 closeegrp(egrp);
1082 if(rgrp)
1083 closergrp(rgrp);
1084 if(dot)
1085 cclose(dot);
1086 if(pgrp)
1087 closepgrp(pgrp);
1088
1089 /*
1090 * if not a kernel process and have a parent,
1091 * do some housekeeping.
1092 */
1093 if(up->kp == 0) {
1094 p = up->parent;
1095 if(p == 0) {
1096 if(exitstr == 0)
1097 exitstr = "unknown";
1098 panic("boot process died: %s", exitstr);
1099 }
1100
1101 while(waserror())
1102 ;
1103
1104 wq = smalloc(sizeof(Waitq));
1105 poperror();
1106
1107 wq->w.pid = up->pid;
1108 utime = up->time[TUser] + up->time[TCUser];
1109 stime = up->time[TSys] + up->time[TCSys];
1110 wq->w.time[TUser] = tk2ms(utime);
1111 wq->w.time[TSys] = tk2ms(stime);
1112 wq->w.time[TReal] = tk2ms(msec() - up->time[TReal]);
1113 if(exitstr && exitstr[0])
1114 snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr);
1115 else
1116 wq->w.msg[0] = '\0';
1117
1118 lock(&p->exl);
1119 /*
1120 * Check that parent is still alive.
1121 */
1122 if(p->pid == up->parentpid && p->state != Broken) {
1123 p->nchild--;
1124 p->time[TCUser] += utime;
1125 p->time[TCSys] += stime;
1126 /*
1127 * If there would be more than 128 wait records
1128 * processes for my parent, then don't leave a wait
1129 * record behind. This helps prevent badly written
1130 * daemon processes from accumulating lots of wait
1131 * records.
1132 */
1133 if(p->nwait < 128) {
1134 wq->next = p->waitq;
1135 p->waitq = wq;
1136 p->nwait++;
1137 wq = nil;
1138 wakeup(&p->waitr);
1139 }
1140 }
1141 unlock(&p->exl);
1142 if(wq)
1143 free(wq);
1144 }
1145
1146 if(!freemem)
1147 addbroken(up);
1148
1149 qlock(&up->seglock);
1150 es = &up->seg[NSEG];
1151 for(s = up->seg; s < es; s++) {
1152 if(*s) {
1153 putseg(*s);
1154 *s = 0;
1155 }
1156 }
1157 qunlock(&up->seglock);
1158
1159 lock(&up->exl); /* Prevent my children from leaving waits */
1160 pidunhash(up);
1161 up->pid = 0;
1162 wakeup(&up->waitr);
1163 unlock(&up->exl);
1164
1165 for(f = up->waitq; f; f = next) {
1166 next = f->next;
1167 free(f);
1168 }
1169
1170 /* release debuggers */
1171 qlock(&up->debug);
1172 if(up->pdbg) {
1173 wakeup(&up->pdbg->sleep);
1174 up->pdbg = 0;
1175 }
1176 qunlock(&up->debug);
1177
1178 /* Sched must not loop for these locks */
1179 lock(&procalloc.lk);
1180 lock(&palloc.lk);
1181
1182 up->state = Moribund;
1183 sched();
1184 panic("pexit");
1185 }
1186
1187 int
1188 haswaitq(void *x)
1189 {
1190 Proc *p;
1191
1192 p = (Proc *)x;
1193 return p->waitq != 0;
1194 }
1195
1196 ulong
1197 pwait(Waitmsg *w)
1198 {
1199 ulong cpid;
1200 Waitq *wq;
1201
1202 if(!canqlock(&up->qwaitr))
1203 error(Einuse);
1204
1205 if(waserror()) {
1206 qunlock(&up->qwaitr);
1207 nexterror();
1208 }
1209
1210 lock(&up->exl);
1211 if(up->nchild == 0 && up->waitq == 0) {
1212 unlock(&up->exl);
1213 error(Enochild);
1214 }
1215 unlock(&up->exl);
1216
1217 sleep(&up->waitr, haswaitq, up);
1218
1219 lock(&up->exl);
1220 wq = up->waitq;
1221 up->waitq = wq->next;
1222 up->nwait--;
1223 unlock(&up->exl);
1224
1225 qunlock(&up->qwaitr);
1226 poperror();
1227
1228 if(w)
1229 memmove(w, &wq->w, sizeof(Waitmsg));
1230 cpid = wq->w.pid;
1231 free(wq);
1232 return cpid;
1233 }
1234
1235 Proc*
1236 proctab(int i)
1237 {
1238 return &procalloc.arena[i];
1239 }
1240
1241 void
1242 dumpaproc(Proc *p)
1243 {
1244 ulong bss;
1245 char *s;
1246
1247 if(p == 0)
1248 return;
1249
1250 bss = 0;
1251 if(p->seg[BSEG])
1252 bss = p->seg[BSEG]->top;
1253
1254 s = p->psstate;
1255 if(s == 0)
1256 s = statename[p->state];
1257 print("%3lud:%10s pc %8lux dbgpc %8lux %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n",
1258 p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state],
1259 p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority);
1260 }
1261
1262 void
1263 procdump(void)
1264 {
1265 int i;
1266 Proc *p;
1267
1268 if(up)
1269 print("up %lud\n", up->pid);
1270 else
1271 print("no current process\n");
1272 for(i=0; i<conf.nproc; i++) {
1273 p = &procalloc.arena[i];
1274 if(p->state == Dead)
1275 continue;
1276
1277 dumpaproc(p);
1278 }
1279 }
1280
1281 /*
1282 * wait till all processes have flushed their mmu
1283 * state about segement s
1284 */
1285 void
1286 procflushseg(Segment *s)
1287 {
1288 int i, ns, nm, nwait;
1289 Proc *p;
1290
1291 /*
1292 * tell all processes with this
1293 * segment to flush their mmu's
1294 */
1295 nwait = 0;
1296 for(i=0; i<conf.nproc; i++) {
1297 p = &procalloc.arena[i];
1298 if(p->state == Dead)
1299 continue;
1300 for(ns = 0; ns < NSEG; ns++)
1301 if(p->seg[ns] == s){
1302 p->newtlb = 1;
1303 for(nm = 0; nm < conf.nmach; nm++){
1304 if(MACHP(nm)->proc == p){
1305 MACHP(nm)->flushmmu = 1;
1306 nwait++;
1307 }
1308 }
1309 break;
1310 }
1311 }
1312
1313 if(nwait == 0)
1314 return;
1315
1316 /*
1317 * wait for all processors to take a clock interrupt
1318 * and flush their mmu's
1319 */
1320 for(nm = 0; nm < conf.nmach; nm++)
1321 if(MACHP(nm) != m)
1322 while(MACHP(nm)->flushmmu)
1323 sched();
1324 }
1325
1326 void
1327 scheddump(void)
1328 {
1329 Proc *p;
1330 Schedq *rq;
1331
1332 for(rq = &runq[Nrq-1]; rq >= runq; rq--){
1333 if(rq->head == 0)
1334 continue;
1335 print("rq%ld:", rq-runq);
1336 for(p = rq->head; p; p = p->rnext)
1337 print(" %lud(%lud)", p->pid, msec() - p->readytime);
1338 print("\n");
1339 delay(150);
1340 }
1341 print("nrdy %d\n", nrdy);
1342 }
1343
1344 void
1345 kproc(char *name, void (*func)(void *), void *arg)
1346 {
1347 Proc *p;
1348 static Pgrp *kpgrp;
1349
1350 extern int tracekdev;
1351 if(tracekdev)
1352 iprint("kproc %s\n", name);
1353
1354 p = newproc();
1355 p->psstate = 0;
1356 p->procmode = 0640;
1357 p->kp = 1;
1358 p->noswap = 1;
1359
1360 if(up){
1361 p->fpsave = up->fpsave;
1362 p->scallnr = up->scallnr;
1363 p->s = up->s;
1364 p->slash = up->slash;
1365 p->dot = up->dot;
1366 if(p->dot)
1367 incref(&p->dot->ref);
1368
1369 memmove(p->note, up->note, sizeof(p->note));
1370 p->nnote = up->nnote;
1371 p->lastnote = up->lastnote;
1372 p->notify = up->notify;
1373 }
1374
1375 p->notified = 0;
1376 p->ureg = 0;
1377 p->dbgreg = 0;
1378 p->nerrlab = 0;
1379
1380 procpriority(p, PriKproc, 0);
1381
1382 kprocchild(p, func, arg);
1383
1384 kstrdup(&p->user, eve);
1385 kstrdup(&p->text, name);
1386 if(kpgrp == 0)
1387 kpgrp = newpgrp();
1388 p->pgrp = kpgrp;
1389 incref(&kpgrp->ref);
1390
1391 memset(p->time, 0, sizeof(p->time));
1392 p->time[TReal] = msec();
1393 ready(p);
1394 }
1395
1396 /*
1397 * called splhi() by notify(). See comment in notify for the
1398 * reasoning.
1399 */
1400 void
1401 procctl(Proc *p)
1402 {
1403 char *state;
1404 ulong s;
1405
1406 switch(p->procctl) {
1407 case Proc_exitbig:
1408 spllo();
1409 pexit("Killed: Insufficient physical memory", 1);
1410
1411 case Proc_exitme:
1412 spllo(); /* pexit has locks in it */
1413 pexit("Killed", 1);
1414
1415 case Proc_traceme:
1416 if(p->nnote == 0)
1417 return;
1418 /* No break */
1419
1420 case Proc_stopme:
1421 p->procctl = 0;
1422 state = p->psstate;
1423 p->psstate = "Stopped";
1424 /* free a waiting debugger */
1425 s = spllo();
1426 qlock(&p->debug);
1427 if(p->pdbg) {
1428 wakeup(&p->pdbg->sleep);
1429 p->pdbg = 0;
1430 }
1431 qunlock(&p->debug);
1432 splhi();
1433 p->state = Stopped;
1434 sched();
1435 p->psstate = state;
1436 splx(s);
1437 return;
1438 }
1439 }
1440
1441 #include "errstr.h"
1442
1443 void
1444 error(char *err)
1445 {
1446 spllo();
1447
1448 assert(up->nerrlab < NERR);
1449 kstrcpy(up->errstr, err, ERRMAX);
1450 setlabel(&up->errlab[NERR-1]);
1451 nexterror();
1452 }
1453
1454 void
1455 nexterror(void)
1456 {
1457 gotolabel(&up->errlab[--up->nerrlab]);
1458 }
1459
1460 void
1461 exhausted(char *resource)
1462 {
1463 char buf[ERRMAX];
1464
1465 sprint(buf, "no free %s", resource);
1466 iprint("%s\n", buf);
1467 error(buf);
1468 }
1469
1470 void
1471 killbig(char *why)
1472 {
1473 int i;
1474 Segment *s;
1475 ulong l, max;
1476 Proc *p, *ep, *kp;
1477
1478 max = 0;
1479 kp = 0;
1480 ep = procalloc.arena+conf.nproc;
1481 for(p = procalloc.arena; p < ep; p++) {
1482 if(p->state == Dead || p->kp)
1483 continue;
1484 l = 0;
1485 for(i=1; i<NSEG; i++) {
1486 s = p->seg[i];
1487 if(s != 0)
1488 l += s->top - s->base;
1489 }
1490 if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
1491 kp = p;
1492 max = l;
1493 }
1494 }
1495
1496 print("%lud: %s killed: %s\n", kp->pid, kp->text, why);
1497 for(p = procalloc.arena; p < ep; p++) {
1498 if(p->state == Dead || p->kp)
1499 continue;
1500 if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
1501 p->procctl = Proc_exitbig;
1502 }
1503 kp->procctl = Proc_exitbig;
1504 for(i = 0; i < NSEG; i++) {
1505 s = kp->seg[i];
1506 if(s != 0 && canqlock(&s->lk)) {
1507 mfreeseg(s, s->base, (s->top - s->base)/BY2PG);
1508 qunlock(&s->lk);
1509 }
1510 }
1511 }
1512
1513 /*
1514 * change ownership to 'new' of all processes owned by 'old'. Used when
1515 * eve changes.
1516 */
1517 void
1518 renameuser(char *old, char *new)
1519 {
1520 Proc *p, *ep;
1521
1522 ep = procalloc.arena+conf.nproc;
1523 for(p = procalloc.arena; p < ep; p++)
1524 if(p->user!=nil && strcmp(old, p->user)==0)
1525 kstrdup(&p->user, new);
1526 }
1527
1528 /*
1529 * time accounting called by clock() splhi'd
1530 */
1531 void
1532 accounttime(void)
1533 {
1534 Proc *p;
1535 ulong n, per;
1536 static ulong nrun;
1537
1538 p = m->proc;
1539 if(p) {
1540 nrun++;
1541 p->time[p->insyscall]++;
1542 }
1543
1544 /* calculate decaying duty cycles */
1545 n = perfticks();
1546 per = n - m->perf.last;
1547 m->perf.last = n;
1548 per = (m->perf.period*(HZ-1) + per)/HZ;
1549 if(per != 0)
1550 m->perf.period = per;
1551
1552 m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ;
1553 m->perf.inidle = 0;
1554
1555 m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ;
1556 m->perf.inintr = 0;
1557
1558 /* only one processor gets to compute system load averages */
1559 if(m->machno != 0)
1560 return;
1561
1562 /*
1563 * calculate decaying load average.
1564 * if we decay by (n-1)/n then it takes
1565 * n clock ticks to go from load L to .36 L once
1566 * things quiet down. it takes about 5 n clock
1567 * ticks to go to zero. so using HZ means this is
1568 * approximately the load over the last second,
1569 * with a tail lasting about 5 seconds.
1570 */
1571 n = nrun;
1572 nrun = 0;
1573 n = (nrdy+n)*1000;
1574 m->load = (m->load*(HZ-1)+n)/HZ;
1575 }
1576
1577 static void
1578 pidhash(Proc *p)
1579 {
1580 int h;
1581
1582 h = p->pid % nelem(procalloc.ht);
1583 lock(&procalloc.lk);
1584 p->pidhash = procalloc.ht[h];
1585 procalloc.ht[h] = p;
1586 unlock(&procalloc.lk);
1587 }
1588
1589 static void
1590 pidunhash(Proc *p)
1591 {
1592 int h;
1593 Proc **l;
1594
1595 h = p->pid % nelem(procalloc.ht);
1596 lock(&procalloc.lk);
1597 for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash)
1598 if(*l == p){
1599 *l = p->pidhash;
1600 break;
1601 }
1602 unlock(&procalloc.lk);
1603 }
1604
1605 int
1606 procindex(ulong pid)
1607 {
1608 Proc *p;
1609 int h;
1610 int s;
1611
1612 s = -1;
1613 h = pid % nelem(procalloc.ht);
1614 lock(&procalloc.lk);
1615 for(p = procalloc.ht[h]; p != nil; p = p->pidhash)
1616 if(p->pid == pid){
1617 s = p - procalloc.arena;
1618 break;
1619 }
1620 unlock(&procalloc.lk);
1621 return s;
1622 }