mmu.c - vx32 - Local 9vx git repository for patches.
 (HTM) git clone git://r-36.net/vx32
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
       mmu.c (7541B)
       ---
            1 #define        WANT_M
            2 #include "u.h"
            3 #include <pthread.h>
            4 #include "libvx32/vx32.h"
            5 #include <sys/mman.h>
            6 #include "lib.h"
            7 #include "mem.h"
            8 #include "dat.h"
            9 #include "fns.h"
           10 #include "error.h"
           11 #include "ureg.h"
           12 
           13 int tracemmu;
           14 
           15 #ifndef MAP_ANONYMOUS 
           16 #define MAP_ANONYMOUS MAP_ANON
           17 #endif
           18 #define MAP_EMPTY (MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE)
           19 
           20 /*
           21  * We allocate a 256MB page file on disk to hold the "physical memory".
           22  * We'll mmap individual file pages where we need them to simulate
           23  * the page translation of a real MMU.  To make the simulation more
           24  * faithful, we map the vx32 sandboxed address space starting at 0,
           25  * so that kernel 0 = user 0, so that pointers can be shared.
           26  * Plan 9 assumes this, and while it's not a ton of work to break that
           27  * assumption, it was easier not to.
           28  *
           29  * This value may be changed with the -m switch.
           30  */
           31 int memsize = (256<<20);
           32 
           33 static int pagefile;
           34 static char* pagebase;
           35 
           36 static Uspace uspace[16];
           37 static Uspace *ulist[nelem(uspace)];
           38 int nuspace = 1;
           39 
           40 #ifdef __i386__
           41 #define BIT32 0
           42 #define HINT nil
           43 #elif defined(__amd64__)
           44 #ifdef linux
           45 #define BIT32 MAP_32BIT
           46 #define HINT nil
           47 #elif defined(__FreeBSD__)
           48 #define BIT32 MAP_FIXED
           49 #define HINT (caddr_t)0x40000000
           50 #endif
           51 #endif
           52 
           53 int
           54 isuaddr(void *v)
           55 {
           56         uchar *p;
           57         uchar *uzero;
           58         
           59         p = v;
           60         uzero = up->pmmu.uzero;
           61         return uzero <= p && p < uzero+USTKTOP;
           62 }
           63 
           64 /*
           65  * Allocate a process-sized mapping with nothing there.
           66  * The point is to reserve the space so that
           67  * nothing else ends up there later.
           68  */
           69 static void*
           70 mapzero(void)
           71 {
           72         int fd, bit32;
           73         void *v;
           74         void *hint;
           75         
           76         bit32 = BIT32;
           77         hint = HINT;
           78 
           79         /* First try mmaping /dev/zero.  Some OS'es don't allow this. */
           80         if((fd = open("/dev/zero", O_RDONLY)) >= 0){
           81                 v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE, fd, 0);
           82                 if(v != MAP_FAILED) {
           83                         if((uint32_t)(uintptr)v != (uintptr)v) {
           84                                 iprint("mmap returned 64-bit pointer %p\n", v);
           85                                 panic("mmap");
           86                         }
           87                         return v;
           88                 }
           89         }
           90         
           91         /* Next try an anonymous map. */
           92         v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
           93         if(v != MAP_FAILED) {
           94                 if((uint32_t)(uintptr)v != (uintptr)v) {
           95                         iprint("mmap returned 64-bit pointer %p\n", v);
           96                         panic("mmap");
           97                 }
           98                 return v;
           99         }
          100 
          101         return nil;
          102 }
          103 
          104 void
          105 mmuinit(void)
          106 {
          107         char tmp[] = "/var/tmp/9vx.pages.XXXXXX";
          108         void *v;
          109         int i;
          110         
          111         if((pagefile = mkstemp(tmp)) < 0)
          112                 panic("mkstemp: %r");
          113         if(ftruncate(pagefile, memsize) < 0)
          114                 panic("ftruncate pagefile: %r");
          115         unlink(tmp);        /* "remove on close" */
          116 
          117         /* Map pages for direct access at pagebase, wherever that is */
          118         /* MAP_SHARED means write the changes back to the file */
          119         v = mmap(nil, memsize, PROT_READ|PROT_WRITE,
          120                 MAP_SHARED, pagefile, 0);
          121         if(v == MAP_FAILED)        
          122                 panic("mmap pagefile: %r");
          123         pagebase = v;
          124 
          125         if(nuspace <= 0)
          126                 nuspace = 1;
          127         if(nuspace > nelem(uspace))
          128                 nuspace = nelem(uspace);
          129         for(i=0; i<nuspace; i++){
          130                 uspace[i].uzero = mapzero();
          131                 if(uspace[i].uzero == nil)
          132                         panic("mmap address space %d", i);
          133                 ulist[i] = &uspace[i];
          134         }
          135 
          136         conf.mem[0].base = 0;
          137         conf.mem[0].npage = memsize / BY2PG;
          138         
          139         palloc.mem[0].base = 0;
          140         palloc.mem[0].npage = memsize / BY2PG;
          141 }
          142 
          143 /*
          144  * Temporary page mappings are easy again:
          145  * everything is mapped at PAGEBASE.
          146  */
          147 void*
          148 tmpmap(Page *pg)
          149 {
          150         assert(pg->pa < memsize);
          151         return pagebase + pg->pa;
          152 }
          153 
          154 void
          155 tmpunmap(void *v)
          156 {
          157         assert(pagebase <= (char*)v && (char*)v < pagebase + memsize);
          158 }
          159 
          160 KMap*
          161 kmap(Page *p)
          162 {
          163         return (KMap*)tmpmap(p);
          164 }
          165 
          166 void
          167 kunmap(KMap *k)
          168 {
          169 }
          170 
          171 /*
          172  * Flush the current address space.
          173  */
          174 static void
          175 mmapflush(Uspace *us)
          176 {
          177         m->flushmmu = 0;
          178 
          179         /* Nothing mapped? */
          180         if(us == nil || us->lo > us->hi || us->uzero == nil)
          181                 return;
          182 
          183 #ifdef __FreeBSD__
          184         if(__FreeBSD__ < 7){
          185                 /*
          186                  * On FreeBSD, we need to be able to use mincore to
          187                  * tell whether a page is mapped, so we have to remap
          188                  * something with no pages here. 
          189                  */
          190                 if(mmap(us->uzero, us->hi+BY2PG, PROT_NONE, 
          191                                 MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
          192                         panic("mmapflush mmap: %r");
          193                 us->lo = 0x80000000UL;
          194                 us->hi = 0;
          195                 return;
          196         }
          197 #endif
          198 
          199         /* Clear only as much as got mapped. */
          200         if(mprotect(us->uzero, us->hi+BY2PG, PROT_NONE) < 0)
          201                 panic("mmapflush mprotect: %r");
          202         us->lo = 0x80000000UL;
          203         us->hi = 0;
          204 }
          205 
          206 /*
          207  * Update the "MMU" in response to a user fault. 
          208  * pa may have PTEWRITE set.
          209  */
          210 void
          211 putmmu(ulong va, ulong pa, Page *p)
          212 {
          213         int prot;
          214         Uspace *us;
          215 
          216         if(tracemmu || (pa&~(PTEWRITE|PTEVALID)) != p->pa)
          217                 iprint("putmmu va %lux pa %lux p->pa %lux\n", va, pa, p->pa);
          218 
          219         assert(p->pa < memsize && pa < memsize);
          220         assert(up);
          221         us = up->pmmu.us;
          222         assert(us);
          223 
          224         /* Map the page */
          225         prot = PROT_READ;
          226         if(pa&PTEWRITE)
          227                 prot |= PROT_WRITE;
          228         pa &= ~(BY2PG-1);
          229         va  &= ~(BY2PG-1);
          230         if(mmap(us->uzero+va, BY2PG, prot, MAP_FIXED|MAP_SHARED,
          231                         pagefile, pa) == MAP_FAILED)
          232                 panic("putmmu");
          233         
          234         /* Record high and low address range for quick unmap. */
          235         if(us->lo > va)
          236                 us->lo = va;
          237         if(us->hi < va)
          238                 us->hi = va;
          239 //        printlinuxmaps();
          240 }
          241 
          242 /*
          243  * The memory maps have changed for up.  Flush all cached state.
          244  */
          245 void
          246 flushmmu(void)
          247 {
          248         if(tracemmu)
          249                 iprint("flushmmu\n");
          250 
          251         if(up){
          252                 vxproc_flush(up->pmmu.vxproc);
          253                 mmapflush(up->pmmu.us);
          254         }
          255 }
          256 
          257 void
          258 usespace(Uspace *us)
          259 {
          260         int i;
          261         
          262         for(i=0; i<nuspace; i++)
          263                 if(ulist[i] == us){
          264                         while(i > 0){
          265                                 ulist[i] = ulist[i-1];
          266                                 i--;
          267                         }
          268                         ulist[0] = us;
          269                         break;
          270                 }
          271 }
          272 
          273 Uspace*
          274 getspace(Proc *p)
          275 {
          276         Uspace *us;
          277         
          278         us = ulist[nuspace-1];
          279         if(us->p){
          280                 if(tracemmu)
          281                         iprint("^^^^^^^^^^ %ld %s [evict %d]\n", us->p->pid, us->p->text, us - uspace);
          282                 mmapflush(us);
          283         }
          284         us->p = p;
          285         p->pmmu.vxmm.base = us->uzero;
          286         p->pmmu.uzero = us->uzero;
          287         p->pmmu.us = us;
          288         usespace(us);
          289         return us;
          290 }
          291 
          292 void
          293 takespace(Proc *p, Uspace *us)
          294 {
          295         usespace(us);
          296         if(us->p == p)
          297                 return;
          298         if(tracemmu){
          299                 if(us->p)
          300                         iprint("^^^^^^^^^^ %ld %s [steal %d]\n", us->p->pid, us->p->text, us - uspace);
          301         }
          302         us->p = p;
          303         mmapflush(us);
          304 }
          305 
          306 void
          307 putspace(Uspace *us)
          308 {
          309         int i;
          310 
          311         mmapflush(us);
          312         us->p->pmmu.us = nil;
          313         us->p->pmmu.uzero = nil;
          314         us->p->pmmu.vxmm.base = nil;
          315         us->p = nil;
          316         for(i=0; i<nuspace; i++)
          317                 if(ulist[i] == us){
          318                         while(++i < nuspace)
          319                                 ulist[i-1] = ulist[i];
          320                         ulist[i-1] = us;
          321                         break;
          322                 }
          323 }
          324 
          325 /*
          326  * Called when scheduler has decided to run proc p.
          327  * Prepare to run proc p.
          328  */
          329 void
          330 mmuswitch(Proc *p)
          331 {
          332         /*
          333          * Switch the address space, but only if it's not the
          334          * one we were just in.  Also, kprocs don't count --
          335          * only the guys on cpu0 do.
          336          */
          337         if(p->kp)
          338                 return;
          339         
          340         if(tracemmu)
          341                 iprint("mmuswitch %ld %s\n", p->pid, p->text);
          342 
          343         if(p->pmmu.us && p->pmmu.us->p == p){
          344                 if(tracemmu) iprint("---------- %ld %s [%d]\n",
          345                         p->pid, p->text, p->pmmu.us - uspace);
          346                 usespace(p->pmmu.us);
          347                 if(!p->newtlb && !m->flushmmu){
          348                         usespace(p->pmmu.us);
          349                         return;
          350                 }
          351                 mmapflush(p->pmmu.us);
          352                 p->newtlb = 0;
          353                 return;
          354         }
          355 
          356         if(p->pmmu.us == nil)
          357                 getspace(p);
          358         else
          359                 takespace(p, p->pmmu.us);
          360         if(tracemmu) iprint("========== %ld %s [%d]\n",
          361                 p->pid, p->text, p->pmmu.us - uspace);
          362 }
          363 
          364 /*
          365  * Called when proc p is dying.
          366  */
          367 void
          368 mmurelease(Proc *p)
          369 {
          370         if(p->kp)
          371                 return;
          372         if(tracemmu)
          373                 iprint("mmurelease %ld %s\n", p->pid, p->text);
          374         if(p->pmmu.vxproc)
          375                 vxproc_flush(p->pmmu.vxproc);
          376         if(p->pmmu.us){
          377                 if(tracemmu)
          378                         iprint("^^^^^^^^^^ %ld %s [release %d]\n", p->pid, p->text, p->pmmu.us - uspace);
          379                 putspace(p->pmmu.us);
          380                 if(m->flushmmu)
          381                         mmapflush(p->pmmu.us);
          382         }
          383 }
          384 
          385 void
          386 printlinuxmaps(void)
          387 {
          388         char buf[100];
          389         sprint(buf, "cat /proc/%d/maps", getpid());
          390         system(buf);
          391 }
          392 
          393 void
          394 mmusize(int size)
          395 {
          396         static int set = 0;
          397         if(!set && size){
          398                 memsize = (size << 20);
          399         }
          400 }