linux.c - vx32 - Local 9vx git repository for patches.
(HTM) git clone git://r-36.net/vx32
(DIR) Log
(DIR) Files
(DIR) Refs
---
linux.c (10562B)
---
1 // Code specific to x86 hosts running Linux.
2
3 #define _GNU_SOURCE
4 #include <stdio.h>
5 #include <string.h>
6 #include <signal.h>
7 #include <assert.h>
8 #include <ucontext.h>
9 #include <sys/ucontext.h>
10 #include <asm/ldt.h>
11 #include <errno.h>
12
13 #include "vx32.h"
14 #include "vx32impl.h"
15 #include "os.h"
16
17 extern int modify_ldt(int, void*, unsigned long);
18
19 int vxemu_map(vxemu *emu, vxmmap *mm)
20 {
21 struct vxproc *vxp;
22 struct user_desc desc;
23 uint ldt[2];
24 #ifdef __x86_64
25 static int didflat;
26 #endif
27
28 vxp = emu->proc;
29 emu->datasel = vxp->vxpno * 16 + 16 + 4 + 3; // 4=LDT, 3=RPL
30 emu->emusel = emu->datasel + 8;
31
32 if (emu->ldt_base != (uintptr_t)mm->base || emu->ldt_size != mm->size) {
33 // Set up the process's data segment selector (for DS,ES,SS).
34 memset(&desc, 0, sizeof(desc));
35 desc.seg_32bit = 1;
36 desc.read_exec_only = 0;
37 desc.limit_in_pages = 1;
38 desc.seg_not_present = 0;
39 desc.useable = 1;
40
41 desc.entry_number = emu->datasel / 8;
42 desc.base_addr = (uintptr_t)mm->base;
43 desc.limit = (mm->size - 1) >> VXPAGESHIFT;
44 desc.contents = MODIFY_LDT_CONTENTS_DATA;
45 if (modify_ldt(1, &desc, sizeof(desc)) < 0)
46 return -1;
47
48 // Set up the process's vxemu segment selector (for FS).
49 desc.entry_number = emu->emusel / 8;
50 desc.base_addr = (uintptr_t)emu;
51 desc.limit = (VXCODEBUFSIZE - 1) >> VXPAGESHIFT;
52 desc.contents = MODIFY_LDT_CONTENTS_DATA;
53 if (modify_ldt(1, &desc, sizeof(desc)) < 0)
54 return -1;
55
56 emu->ldt_base = (uintptr_t)mm->base;
57 emu->ldt_size = mm->size;
58 }
59
60 #ifdef __x86_64
61 // Set up 32-bit mode code and data segments (not vxproc-specific),
62 // giving access to the full low 32 bits of linear address space.
63 // The code segment is necessary to get into 32-bit compatibility mode;
64 // the data segment is needed because Linux for x86-64
65 // doesn't give 64-bit processes a "real" data segment by default
66 // but instead just loads zero into the data segment selectors!
67 emu->runptr.sel = FLATCODE;
68
69 if (!didflat) {
70 didflat = 1;
71 memset(&desc, 0, sizeof(desc));
72 desc.seg_32bit = 1;
73 desc.read_exec_only = 0;
74 desc.limit_in_pages = 1;
75 desc.seg_not_present = 0;
76 desc.useable = 1;
77
78 desc.entry_number = FLATCODE / 8;
79 desc.base_addr = 0;
80 desc.limit = 0xfffff;
81 desc.contents = MODIFY_LDT_CONTENTS_CODE;
82 if (modify_ldt(1, &desc, sizeof(desc)) < 0)
83 return -1;
84
85 /*
86 * Linux 2.6.27 has a bug: it does not load the L (long mode)
87 * bit from desc.lm when copying desc into its own
88 * copy of the LDT entry on the kernel stack.
89 * Instead, it leaves L uninitialized, picking up whatever
90 * random bit was left on the kernel stack by the
91 * previous call sequence. We need L to be 0.
92 * If it ends up 1, the *ljmpq in run64.S will GP fault.
93 * Luckily, we can look for this by asking to read
94 * back the raw LDT bytes. If we observe this problem,
95 * try to fix it by doing a modify_ldt with base = limit = 0,
96 * which clears the entire stack ldt structure, and then
97 * quickly do another modify_ldt with desc, hoping that
98 * the bit will still be zero when we get there for the
99 * second modify_ldt. I wish I were making this up.
100 * This is fixed in Linus's git repository, but the Ubuntu
101 * git repositories are still out of date. See for example
102 * http://swtch.com/go/ubuntu-ldt
103 * http://swtch.com/go/linus-ldt
104 *
105 * Remember, folks, Free Software is only free if your
106 * time has no value.
107 */
108 if(modify_ldt(0, ldt, sizeof ldt) < 0)
109 return -1;
110 if(ldt[1] & 0x00200000) {
111 if (vx32_debugxlate)
112 vxprint("FLATCODE LDT=%08x %08x; working around\n", ldt[0], ldt[1]);
113 desc.limit = 0;
114 modify_ldt(1, &desc, sizeof desc);
115 desc.limit = 0xfffff;
116 modify_ldt(1, &desc, sizeof desc);
117 modify_ldt(0, ldt, sizeof ldt);
118 if(ldt[1] & 0x00200000) {
119 vxprint("cannot work around Linux FLATCODE bug\n");
120 errno = EBADE;
121 return -1;
122 }
123 if (vx32_debugxlate)
124 vxprint("FLATCODE LDT=%08x %08x\n", ldt[0], ldt[1]);
125 }
126
127 desc.entry_number = FLATDATA / 8;
128 desc.base_addr = 0;
129 desc.limit = 0xfffff;
130 desc.contents = MODIFY_LDT_CONTENTS_DATA;
131 if (modify_ldt(1, &desc, sizeof(desc)) < 0)
132 return -1;
133 }
134
135 // Set up a far return vector in emu->retptr
136 // for getting back into 64-bit long mode.
137 extern void vxrun_return();
138 asm volatile("movw %%cs,%0" : "=r" (emu->retptr.sel));
139 emu->retptr.ofs = (uint32_t)(intptr_t)vxrun_return;
140 #endif
141
142 return 0;
143 }
144
145 static void dumpsigcontext(struct sigcontext *ctx)
146 {
147 #ifdef i386
148 printf(
149 "eax %08lx ebx %08lx\necx %08lx edx %08lx "
150 "rsi %08lx rdi %08lx\nrbp %08lx rsp %08lx\n"
151 "eip %08lx efl %08lx cs %04x\n"
152 "err %08lx trapno %08lx cr2 %08lx\n",
153 ctx->eax, ctx->ebx, ctx->ecx, ctx->edx,
154 ctx->esi, ctx->edi, ctx->ebp, ctx->esp,
155 ctx->eip, ctx->eflags, ctx->cs,
156 ctx->err, ctx->trapno, ctx->cr2);
157 #else
158 printf(
159 "rax %016lx rbx %016lx\nrcx %016lx rdx %016lx\n"
160 "rsi %016lx rdi %016lx\nrbp %016lx rsp %016lx\n"
161 "r8 %016lx r9 %016lx\nr10 %016lx r11 %016lx\n"
162 "r12 %016lx r13 %016lx\nr14 %016lx r15 %016lx\n"
163 "rip %016lx efl %016lx cs %04x ss %04x\n"
164 "err %016lx trapno %016lx cr2 %016lx\n",
165 ctx->rax, ctx->rbx, ctx->rcx, ctx->rdx,
166 ctx->rsi, ctx->rdi, ctx->rbp, ctx->rsp,
167 ctx->r8, ctx->r9, ctx->r10, ctx->r11,
168 ctx->r12, ctx->r13, ctx->r14, ctx->r15,
169 ctx->rip, ctx->eflags, ctx->cs, ctx->__pad0,
170 ctx->err, ctx->trapno, ctx->cr2);
171 #endif
172 }
173
174 #ifdef i386
175 #define VX32_BELIEVE_EIP (ctx->ds == vs - 8)
176 #define ctxeip eip
177 #else
178 #define VX32_BELIEVE_EIP (ctx->cs == FLATCODE)
179
180 // On x86-64, make x86 names work for ctx->xxx.
181 #define eax rax
182 #define ebx rbx
183 #define ecx rcx
184 #define edx rdx
185 #define esi rsi
186 #define edi rdi
187 #define esp rsp
188 #define ebp rbp
189 #define ctxeip rip
190 #endif
191
192 static void
193 fprestore(struct _fpstate *s)
194 {
195 asm volatile("frstor 0(%%eax); fwait\n" : : "a" (s) : "memory");
196 }
197
198 int vx32_sighandler(int signo, siginfo_t *si, void *v)
199 {
200 uint32_t trapeip;
201 uint32_t magic;
202 uint16_t vs;
203 vxproc *vxp;
204 vxemu *emu;
205 struct sigcontext *ctx;
206 ucontext_t *uc;
207 mcontext_t *mc;
208 int r;
209
210 uc = v;
211 mc = &uc->uc_mcontext;
212
213 // same layout, and sigcontext is more convenient...
214 ctx = (struct sigcontext*)mc;
215
216 // We can't be sure that vxemu is running,
217 // and thus that %VSEG is actually mapped to a
218 // valid vxemu. The only way to tell is to look at %VSEG.
219
220 // First sanity check vxproc segment number.
221 asm("movw %"VSEGSTR",%0"
222 : "=r" (vs));
223
224 if(vx32_debugxlate) vxprint("vx32_sighandler signo=%d eip=%#x esp=%#x vs=%#x\n",
225 signo, ctx->ctxeip, ctx->esp, vs);
226 if(vx32_debugxlate) dumpsigcontext(ctx);
227
228 if ((vs & 15) != 15) // 8 (emu), LDT, RPL=3
229 return 0;
230
231 // Okay, assume mapped; check for vxemu.
232 asm("movl %"VSEGSTR":%1,%0"
233 : "=r" (magic)
234 : "m" (((vxemu*)0)->magic));
235 if (magic != VXEMU_MAGIC)
236 return 0;
237
238 // Okay, we're convinced.
239
240 // Find current vxproc and vxemu.
241 asm("mov %"VSEGSTR":%1,%0"
242 : "=r" (vxp)
243 : "m" (((vxemu*)0)->proc));
244 emu = vxp->emu;
245
246 // Get back our regular host segment register state,
247 // so that thread-local storage and such works.
248 vxrun_cleanup(emu);
249
250 // dumpsigcontext(ctx);
251
252 if (VX32_BELIEVE_EIP)
253 trapeip = ctx->ctxeip;
254 else
255 trapeip = 0xffffffff;
256
257 int newtrap;
258 switch(signo){
259 case SIGSEGV:
260 case SIGBUS:
261 newtrap = VXTRAP_PAGEFAULT;
262 break;
263
264 case SIGFPE:
265 newtrap = VXTRAP_FLOAT;
266 break;
267
268 case SIGVTALRM:
269 newtrap = VXTRAP_IRQ + VXIRQ_TIMER;
270 break;
271
272 case SIGTRAP:
273 // Linux sends SIGTRAP when it gets a processor
274 // debug exception, which is caused by single-stepping
275 // with the TF bit, among other things. The processor
276 // turns off the TF bit before generating the trap, but
277 // it appears that Linux turns it back on for us.
278 // Let's use it to confirm that this is a single-step trap.
279 if (ctx->eflags & EFLAGS_TF){
280 newtrap = VXTRAP_SINGLESTEP;
281 ctx->eflags &= ~EFLAGS_TF;
282 }else{
283 vxprint("Unexpected sigtrap eflags=%#x\n", ctx->eflags);
284 newtrap = VXTRAP_SIGNAL + signo;
285 }
286 break;
287
288 default:
289 newtrap = VXTRAP_SIGNAL + signo;
290 break;
291 }
292
293 int replaced_trap = 0;
294 if (emu->cpu_trap) {
295 // There's already a pending trap!
296 // Handle the new trap, and assume that when it
297 // finishes, restarting the code at cpu.eip will trigger
298 // the old trap again.
299 // Have to fix up eip for int 0x30 and syscall instructions.
300 if (emu->cpu_trap == VXTRAP_SYSCALL ||
301 (emu->cpu_trap&VXTRAP_CATEGORY) == VXTRAP_SOFT)
302 emu->cpu.eip -= 2;
303 replaced_trap = emu->cpu_trap;
304 }
305 emu->cpu_trap = newtrap;
306
307 r = vxemu_sighandler(emu, trapeip);
308
309 if (r == VXSIG_SINGLESTEP){
310 // Vxemu_sighandler wants us to single step.
311 // Execution state is in intermediate state - don't touch.
312 ctx->eflags |= EFLAGS_TF; // x86 TF (single-step) bit
313 vxrun_setup(emu);
314 return 1;
315 }
316
317 // Copy execution state into emu.
318 if ((r & VXSIG_SAVE_ALL) == VXSIG_SAVE_ALL) {
319 emu->cpu.reg[EAX] = ctx->eax;
320 emu->cpu.reg[EBX] = ctx->ebx;
321 emu->cpu.reg[ECX] = ctx->ecx;
322 emu->cpu.reg[EDX] = ctx->edx;
323 emu->cpu.reg[ESI] = ctx->esi;
324 emu->cpu.reg[EDI] = ctx->edi;
325 emu->cpu.reg[ESP] = ctx->esp; // or esp_at_signal ???
326 emu->cpu.reg[EBP] = ctx->ebp;
327 emu->cpu.eflags = ctx->eflags;
328 } else if (r & VXSIG_SAVE_ALL) {
329 if (r & VXSIG_SAVE_EAX)
330 emu->cpu.reg[EAX] = ctx->eax;
331 if (r & VXSIG_SAVE_EBX)
332 emu->cpu.reg[EBX] = ctx->ebx;
333 if (r & VXSIG_SAVE_ECX)
334 emu->cpu.reg[ECX] = ctx->ecx;
335 if (r & VXSIG_SAVE_EDX)
336 emu->cpu.reg[EDX] = ctx->edx;
337 if (r & VXSIG_SAVE_ESI)
338 emu->cpu.reg[ESI] = ctx->esi;
339 if (r & VXSIG_SAVE_EDI)
340 emu->cpu.reg[EDI] = ctx->edi;
341 if (r & VXSIG_SAVE_ESP)
342 emu->cpu.reg[ESP] = ctx->esp; // or esp_at_signal ???
343 if (r & VXSIG_SAVE_EBP)
344 emu->cpu.reg[EBP] = ctx->ebp;
345 if (r & VXSIG_SAVE_EFLAGS)
346 emu->cpu.eflags = ctx->eflags;
347 }
348 r &= ~VXSIG_SAVE_ALL;
349
350 if (r & VXSIG_SAVE_EBX_AS_EIP)
351 emu->cpu.eip = ctx->ebx;
352 r &= ~VXSIG_SAVE_EBX_AS_EIP;
353
354 if (r & VXSIG_ADD_COUNT_TO_ESP) {
355 emu->cpu.reg[ESP] += (uint16_t)(r >> VXSIG_COUNT_SHIFT);
356 r &= ~VXSIG_ADD_COUNT_TO_ESP;
357 r &= ~(0xFFFF << VXSIG_COUNT_SHIFT);
358 }
359
360 if (r & VXSIG_INC_ECX) {
361 emu->cpu.reg[ECX]++;
362 r &= ~VXSIG_INC_ECX;
363 }
364
365 if (r == VXSIG_TRAP) {
366 if (emu->trapenv == NULL)
367 return 0;
368 emu->cpu.traperr = ctx->err;
369 // Usually, ctx->cr2 == si->si_addr.
370 // But on a segmentation fault (as opposed to a paging fault),
371 // cr2 is not updated and the kernel sends an si_addr == 0.
372 // Be sure to use si_addr, not cr2.
373 emu->cpu.trapva = (uint32_t)(uintptr_t)si->si_addr;
374 memmove(mc->gregs, emu->trapenv->gregs, sizeof emu->trapenv->gregs);
375
376 return 1;
377 }
378
379 // The signal handler is confused; so are we.
380 return 0;
381 }