mmu.c - vx32 - Local 9vx git repository for patches.
(HTM) git clone git://r-36.net/vx32
(DIR) Log
(DIR) Files
(DIR) Refs
---
mmu.c (7541B)
---
1 #define WANT_M
2 #include "u.h"
3 #include <pthread.h>
4 #include "libvx32/vx32.h"
5 #include <sys/mman.h>
6 #include "lib.h"
7 #include "mem.h"
8 #include "dat.h"
9 #include "fns.h"
10 #include "error.h"
11 #include "ureg.h"
12
13 int tracemmu;
14
15 #ifndef MAP_ANONYMOUS
16 #define MAP_ANONYMOUS MAP_ANON
17 #endif
18 #define MAP_EMPTY (MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE)
19
20 /*
21 * We allocate a 256MB page file on disk to hold the "physical memory".
22 * We'll mmap individual file pages where we need them to simulate
23 * the page translation of a real MMU. To make the simulation more
24 * faithful, we map the vx32 sandboxed address space starting at 0,
25 * so that kernel 0 = user 0, so that pointers can be shared.
26 * Plan 9 assumes this, and while it's not a ton of work to break that
27 * assumption, it was easier not to.
28 *
29 * This value may be changed with the -m switch.
30 */
31 int memsize = (256<<20);
32
33 static int pagefile;
34 static char* pagebase;
35
36 static Uspace uspace[16];
37 static Uspace *ulist[nelem(uspace)];
38 int nuspace = 1;
39
40 #ifdef __i386__
41 #define BIT32 0
42 #define HINT nil
43 #elif defined(__amd64__)
44 #ifdef linux
45 #define BIT32 MAP_32BIT
46 #define HINT nil
47 #elif defined(__FreeBSD__)
48 #define BIT32 MAP_FIXED
49 #define HINT (caddr_t)0x40000000
50 #endif
51 #endif
52
53 int
54 isuaddr(void *v)
55 {
56 uchar *p;
57 uchar *uzero;
58
59 p = v;
60 uzero = up->pmmu.uzero;
61 return uzero <= p && p < uzero+USTKTOP;
62 }
63
64 /*
65 * Allocate a process-sized mapping with nothing there.
66 * The point is to reserve the space so that
67 * nothing else ends up there later.
68 */
69 static void*
70 mapzero(void)
71 {
72 int fd, bit32;
73 void *v;
74 void *hint;
75
76 bit32 = BIT32;
77 hint = HINT;
78
79 /* First try mmaping /dev/zero. Some OS'es don't allow this. */
80 if((fd = open("/dev/zero", O_RDONLY)) >= 0){
81 v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE, fd, 0);
82 if(v != MAP_FAILED) {
83 if((uint32_t)(uintptr)v != (uintptr)v) {
84 iprint("mmap returned 64-bit pointer %p\n", v);
85 panic("mmap");
86 }
87 return v;
88 }
89 }
90
91 /* Next try an anonymous map. */
92 v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
93 if(v != MAP_FAILED) {
94 if((uint32_t)(uintptr)v != (uintptr)v) {
95 iprint("mmap returned 64-bit pointer %p\n", v);
96 panic("mmap");
97 }
98 return v;
99 }
100
101 return nil;
102 }
103
104 void
105 mmuinit(void)
106 {
107 char tmp[] = "/var/tmp/9vx.pages.XXXXXX";
108 void *v;
109 int i;
110
111 if((pagefile = mkstemp(tmp)) < 0)
112 panic("mkstemp: %r");
113 if(ftruncate(pagefile, memsize) < 0)
114 panic("ftruncate pagefile: %r");
115 unlink(tmp); /* "remove on close" */
116
117 /* Map pages for direct access at pagebase, wherever that is */
118 /* MAP_SHARED means write the changes back to the file */
119 v = mmap(nil, memsize, PROT_READ|PROT_WRITE,
120 MAP_SHARED, pagefile, 0);
121 if(v == MAP_FAILED)
122 panic("mmap pagefile: %r");
123 pagebase = v;
124
125 if(nuspace <= 0)
126 nuspace = 1;
127 if(nuspace > nelem(uspace))
128 nuspace = nelem(uspace);
129 for(i=0; i<nuspace; i++){
130 uspace[i].uzero = mapzero();
131 if(uspace[i].uzero == nil)
132 panic("mmap address space %d", i);
133 ulist[i] = &uspace[i];
134 }
135
136 conf.mem[0].base = 0;
137 conf.mem[0].npage = memsize / BY2PG;
138
139 palloc.mem[0].base = 0;
140 palloc.mem[0].npage = memsize / BY2PG;
141 }
142
143 /*
144 * Temporary page mappings are easy again:
145 * everything is mapped at PAGEBASE.
146 */
147 void*
148 tmpmap(Page *pg)
149 {
150 assert(pg->pa < memsize);
151 return pagebase + pg->pa;
152 }
153
154 void
155 tmpunmap(void *v)
156 {
157 assert(pagebase <= (char*)v && (char*)v < pagebase + memsize);
158 }
159
160 KMap*
161 kmap(Page *p)
162 {
163 return (KMap*)tmpmap(p);
164 }
165
166 void
167 kunmap(KMap *k)
168 {
169 }
170
171 /*
172 * Flush the current address space.
173 */
174 static void
175 mmapflush(Uspace *us)
176 {
177 m->flushmmu = 0;
178
179 /* Nothing mapped? */
180 if(us == nil || us->lo > us->hi || us->uzero == nil)
181 return;
182
183 #ifdef __FreeBSD__
184 if(__FreeBSD__ < 7){
185 /*
186 * On FreeBSD, we need to be able to use mincore to
187 * tell whether a page is mapped, so we have to remap
188 * something with no pages here.
189 */
190 if(mmap(us->uzero, us->hi+BY2PG, PROT_NONE,
191 MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
192 panic("mmapflush mmap: %r");
193 us->lo = 0x80000000UL;
194 us->hi = 0;
195 return;
196 }
197 #endif
198
199 /* Clear only as much as got mapped. */
200 if(mprotect(us->uzero, us->hi+BY2PG, PROT_NONE) < 0)
201 panic("mmapflush mprotect: %r");
202 us->lo = 0x80000000UL;
203 us->hi = 0;
204 }
205
206 /*
207 * Update the "MMU" in response to a user fault.
208 * pa may have PTEWRITE set.
209 */
210 void
211 putmmu(ulong va, ulong pa, Page *p)
212 {
213 int prot;
214 Uspace *us;
215
216 if(tracemmu || (pa&~(PTEWRITE|PTEVALID)) != p->pa)
217 iprint("putmmu va %lux pa %lux p->pa %lux\n", va, pa, p->pa);
218
219 assert(p->pa < memsize && pa < memsize);
220 assert(up);
221 us = up->pmmu.us;
222 assert(us);
223
224 /* Map the page */
225 prot = PROT_READ;
226 if(pa&PTEWRITE)
227 prot |= PROT_WRITE;
228 pa &= ~(BY2PG-1);
229 va &= ~(BY2PG-1);
230 if(mmap(us->uzero+va, BY2PG, prot, MAP_FIXED|MAP_SHARED,
231 pagefile, pa) == MAP_FAILED)
232 panic("putmmu");
233
234 /* Record high and low address range for quick unmap. */
235 if(us->lo > va)
236 us->lo = va;
237 if(us->hi < va)
238 us->hi = va;
239 // printlinuxmaps();
240 }
241
242 /*
243 * The memory maps have changed for up. Flush all cached state.
244 */
245 void
246 flushmmu(void)
247 {
248 if(tracemmu)
249 iprint("flushmmu\n");
250
251 if(up){
252 vxproc_flush(up->pmmu.vxproc);
253 mmapflush(up->pmmu.us);
254 }
255 }
256
257 void
258 usespace(Uspace *us)
259 {
260 int i;
261
262 for(i=0; i<nuspace; i++)
263 if(ulist[i] == us){
264 while(i > 0){
265 ulist[i] = ulist[i-1];
266 i--;
267 }
268 ulist[0] = us;
269 break;
270 }
271 }
272
273 Uspace*
274 getspace(Proc *p)
275 {
276 Uspace *us;
277
278 us = ulist[nuspace-1];
279 if(us->p){
280 if(tracemmu)
281 iprint("^^^^^^^^^^ %ld %s [evict %d]\n", us->p->pid, us->p->text, us - uspace);
282 mmapflush(us);
283 }
284 us->p = p;
285 p->pmmu.vxmm.base = us->uzero;
286 p->pmmu.uzero = us->uzero;
287 p->pmmu.us = us;
288 usespace(us);
289 return us;
290 }
291
292 void
293 takespace(Proc *p, Uspace *us)
294 {
295 usespace(us);
296 if(us->p == p)
297 return;
298 if(tracemmu){
299 if(us->p)
300 iprint("^^^^^^^^^^ %ld %s [steal %d]\n", us->p->pid, us->p->text, us - uspace);
301 }
302 us->p = p;
303 mmapflush(us);
304 }
305
306 void
307 putspace(Uspace *us)
308 {
309 int i;
310
311 mmapflush(us);
312 us->p->pmmu.us = nil;
313 us->p->pmmu.uzero = nil;
314 us->p->pmmu.vxmm.base = nil;
315 us->p = nil;
316 for(i=0; i<nuspace; i++)
317 if(ulist[i] == us){
318 while(++i < nuspace)
319 ulist[i-1] = ulist[i];
320 ulist[i-1] = us;
321 break;
322 }
323 }
324
325 /*
326 * Called when scheduler has decided to run proc p.
327 * Prepare to run proc p.
328 */
329 void
330 mmuswitch(Proc *p)
331 {
332 /*
333 * Switch the address space, but only if it's not the
334 * one we were just in. Also, kprocs don't count --
335 * only the guys on cpu0 do.
336 */
337 if(p->kp)
338 return;
339
340 if(tracemmu)
341 iprint("mmuswitch %ld %s\n", p->pid, p->text);
342
343 if(p->pmmu.us && p->pmmu.us->p == p){
344 if(tracemmu) iprint("---------- %ld %s [%d]\n",
345 p->pid, p->text, p->pmmu.us - uspace);
346 usespace(p->pmmu.us);
347 if(!p->newtlb && !m->flushmmu){
348 usespace(p->pmmu.us);
349 return;
350 }
351 mmapflush(p->pmmu.us);
352 p->newtlb = 0;
353 return;
354 }
355
356 if(p->pmmu.us == nil)
357 getspace(p);
358 else
359 takespace(p, p->pmmu.us);
360 if(tracemmu) iprint("========== %ld %s [%d]\n",
361 p->pid, p->text, p->pmmu.us - uspace);
362 }
363
364 /*
365 * Called when proc p is dying.
366 */
367 void
368 mmurelease(Proc *p)
369 {
370 if(p->kp)
371 return;
372 if(tracemmu)
373 iprint("mmurelease %ld %s\n", p->pid, p->text);
374 if(p->pmmu.vxproc)
375 vxproc_flush(p->pmmu.vxproc);
376 if(p->pmmu.us){
377 if(tracemmu)
378 iprint("^^^^^^^^^^ %ld %s [release %d]\n", p->pid, p->text, p->pmmu.us - uspace);
379 putspace(p->pmmu.us);
380 if(m->flushmmu)
381 mmapflush(p->pmmu.us);
382 }
383 }
384
385 void
386 printlinuxmaps(void)
387 {
388 char buf[100];
389 sprint(buf, "cat /proc/%d/maps", getpid());
390 system(buf);
391 }
392
393 void
394 mmusize(int size)
395 {
396 static int set = 0;
397 if(!set && size){
398 memsize = (size << 20);
399 }
400 }