emu.c - vx32 - Local 9vx git repository for patches.
(HTM) git clone git://r-36.net/vx32
(DIR) Log
(DIR) Files
(DIR) Refs
---
emu.c (49656B)
---
1 /*
2 * Simple instruction scanning and rewriting
3 * for implementing vx32 on x86-32 hosts.
4 */
5
6 #ifdef __APPLE__
7 #define __DARWIN_UNIX03 0
8 #endif
9
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <stddef.h>
13 #include <string.h>
14 #include <setjmp.h>
15 #include <assert.h>
16 #include <errno.h>
17 #include <sys/stat.h> // XX FreeBSD 4.9 header bug?
18 #include <sys/mman.h>
19 #include <stdarg.h>
20 #include <unistd.h>
21
22 #include "vx32.h"
23 #include "vx32impl.h"
24 #include "os.h"
25 #include "x86dis.h"
26
27 // Special values for unused entries in entrypoint hash table
28 #define NULLSRCEIP ((uint32_t)-1)
29 #define NULLDSTEIP ((uint32_t)(uintptr_t)vxrun_nullfrag);
30
31 int vx32_debugxlate = 0;
32
33 static uint64_t nflush;
34
35 static void disassemble(uint8_t *addr0, uint8_t*, uint8_t*);
36
37 // Create the emulation state for a new process
38 int vxemu_init(struct vxproc *vxp)
39 {
40 // Initial emulation hash table size (must be a power of two)
41 int etablen = 4096;
42
43 // Allocate the vxemu state area in 32-bit memory,
44 // because it must be accessible to our translated code
45 // via the special fs segment register setup.
46 vxemu *e = mmap(NULL, VXCODEBUFSIZE,
47 PROT_READ | PROT_WRITE | PROT_EXEC,
48 MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
49 if (e == MAP_FAILED){
50 vxprint("vxemu_init: mmap failed\n");
51 return -1;
52 }
53
54 // Basic initialization
55 memset(e, 0, sizeof(vxemu));
56 e->magic = VXEMU_MAGIC;
57 e->proc = vxp;
58 vxp->cpu = &e->cpu;
59 e->emuptr = (uint32_t)(intptr_t)e;
60 e->etablen = etablen;
61 e->etabmask = etablen - 1;
62
63 // Initialize the entrypoint table and translation buffer pointers
64 vxemu_flush(e);
65
66 vxp->emu = e;
67 return 0;
68 }
69
70 void vxemu_free(vxemu *e)
71 {
72 assert(e->proc->emu == e);
73 e->proc->emu = NULL;
74
75 // Free the vxemu state area
76 munmap(e, VXCODEBUFSIZE);
77 }
78
79 // Reset a vxproc's translation code buffer and entrypoint table.
80 void vxemu_flush(vxemu *e)
81 {
82 uint32_t i;
83
84 // Clear the entrypoint table.
85 uint32_t etablen = e->etablen;
86 for (i = 0; i < etablen; i++) {
87 e->etab[i].srceip = NULLSRCEIP;
88 e->etab[i].dsteip = NULLDSTEIP;
89 }
90 e->etabcnt = 0;
91
92 // The translated code buffer immediately follows the etab.
93 e->codebuf = &e->etab[etablen];
94 e->codefree = &e->etab[etablen];
95 e->codetab = (void*)e + VXCODEBUFSIZE;
96 e->codetop = (void*)e + VXCODEBUFSIZE;
97
98 nflush++;
99 }
100
101 void vxemu_growetab(struct vxemu *e)
102 {
103 // Increase the size of the entrypoint table,
104 // which effectively just reserves more memory
105 // from the code translation buffer.
106 e->etablen *= 2;
107 e->etabmask = e->etablen - 1;
108
109 // Re-initialize the entrypoint table and translation buffer.
110 vxemu_flush(e);
111 }
112
113 // Each translated frag starts with a one-instruction prolog...
114 #define PROLOG_LEN 7 // Length of 'mov VSEG:VXEMU_EBX,%ebx'
115
116
117 // Translate a block of code starting at the current vx32 EIP.
118 // The basic procedure works in four stages.
119 //
120 // 1: We first scan the instruction stream to build up a
121 // tentative vxinsn table for the instructions we plan to translate,
122 // with output code offsets computed for worst-case instruction lengths.
123 // This pass handles checking execute permissions on instruction pages,
124 // and decides exactly how many instructions we'll translate in this block.
125 // The final instruction in a fragment is always either
126 // an unconditional flow control instruction (JMP, CALL, RET, INT, etc.),
127 // or the special "pseudo-instruction" VXI_ENDFRAG,
128 // which ends the fragment with a jump to the appropriate subsequent EIP.
129 //
130 // 2: Next we do a reverse scan through the vxinsn table
131 // to identify instructions we can simplify:
132 // particularly instructions with condition code fixups
133 // whose condition codes are not actually used before they are killed.
134 // We also identify branches that can be rewritten with 8-bit displacements.
135 // In the process we adjust the target instruction length (dstlen) fields
136 // for all simplified instructions accordingly.
137 //
138 // 3: We now perform a forward scan through the vxinsn table
139 // to compute the final offsets for all target instructions in the block.
140 //
141 // 4: Finally, we scan the instruction stream again
142 // and emit the target instructions for the block.
143 //
144
145 // Macros to extract fields in a Mod-Reg-R/M byte
146 #define EA_MOD(b) ((uint8_t)(b) >> 6)
147 #define EA_REG(b) (((uint8_t)(b) >> 3) & 7)
148 #define EA_RM(b) ((uint8_t)(b) & 7)
149
150 // Scan a Mod-Reg-R/M byte and the rest of the effective address
151 uint8_t *xscan_rm(uint8_t *inp)
152 {
153 uint8_t ea = *inp++;
154 switch (EA_MOD(ea)) {
155 case 0:
156 switch (EA_RM(ea)) {
157 case 4: ; // SIB
158 uint8_t sib = *inp;
159 if ((sib & 7) == 5)
160 return inp+1+4;
161 else
162 return inp+1;
163 case 5: // disp32
164 return inp+4;
165 default: // [reg]
166 return inp;
167 }
168
169 case 1:
170 switch (EA_RM(ea)) {
171 case 4: // SIB+disp8
172 return inp+1+1;
173 default: // [reg]+disp8
174 return inp+1;
175 }
176
177 case 2:
178 switch (EA_RM(ea)) {
179 case 4: // SIB+disp32
180 return inp+1+4;
181 default: // [reg]+disp32
182 return inp+4;
183 }
184
185 case 3: // reg
186 return inp;
187
188 default:
189 assert(0);
190 return 0;
191 }
192 }
193
194 // Translation pass 1:
195 // scan instruction stream, build preliminary vxinsn table,
196 // and decide how many instructions to translate in this fragment.
197 static int xscan(struct vxproc *p)
198 {
199 uint32_t faultva;
200 uint32_t eip;
201 uint8_t *instart, *inmax;
202 struct vxemu *emu = p->emu;
203
204 // Make sure there's enough space in the translated code buffer;
205 // if not, then first clear the code buffer and entrypoint table.
206 if (((uint8_t*)emu->codetab - (uint8_t*)emu->codefree) < 1024)
207 vxemu_flush(emu);
208
209 // Grow the entrypoint hash table if it gets too crowded.
210 // This also in effect flushes the translated code buffer.
211 if (emu->etabcnt > emu->etablen/2)
212 vxemu_growetab(emu);
213
214 // Find and check permissions on the input instruction stream,
215 // and determine how far ahead we can scan (up to one full page)
216 // before hitting a non-executable page.
217 eip = emu->cpu.eip;
218 instart = (uint8_t*)emu->mem->base + eip;
219 emu->guestfrag = instart;
220 if (!vxmem_checkperm(p->mem, eip, 2*VXPAGESIZE, VXPERM_EXEC, &faultva)) {
221 if(faultva == eip) {
222 noexec:
223 emu->cpu_trap = VXTRAP_PAGEFAULT;
224 emu->cpu.traperr = 0x10;
225 emu->cpu.trapva = faultva;
226 return emu->cpu_trap;
227 }
228 } else
229 faultva = VXPAGETRUNC(eip) + 2*VXPAGESIZE;
230 inmax = instart + faultva - eip;
231
232 // Create a new fragment header in the code translation buffer
233 struct vxfrag *f = (struct vxfrag*)(((intptr_t)emu->codefree + 3) & ~3);
234 emu->txfrag = f;
235 f->eip = eip;
236
237 unsigned ino = 0; // instruction number
238 unsigned dstofs = PROLOG_LEN;
239 uint8_t *inp = instart;
240 emu->ininst = inp; // save instruction currently being translated
241 int fin = 0;
242 do {
243 uint8_t itype = 0;
244 uint8_t dstlen;
245 uint8_t ea;
246
247 if(*inp == 0xF0) // LOCK
248 inp++;
249
250 // Begin instruction decode.
251 // We might take a fault on any of these instruction reads
252 // if we run off the end of a mapped code page.
253 // In that case our exception handler
254 // notices that emu->ininst != NULL and initiates recovery.
255 // Or we might _not_ take a fault
256 // on a page marked read-only but not executable;
257 // that's why we check against inmax after each insn.
258 switch (*inp++) {
259
260 // OP Eb,Gb; OP Ev,Gv; OP Gb,Eb; OP Gv,Ev
261 case 0x00: case 0x01: case 0x02: case 0x03: // ADD
262 case 0x08: case 0x09: case 0x0a: case 0x0b: // OR
263 case 0x10: case 0x11: case 0x12: case 0x13: // ADC
264 case 0x18: case 0x19: case 0x1a: case 0x1b: // SBB
265 case 0x20: case 0x21: case 0x22: case 0x23: // AND
266 case 0x28: case 0x29: case 0x2a: case 0x2b: // SUB
267 case 0x30: case 0x31: case 0x32: case 0x33: // XOR
268 case 0x38: case 0x39: case 0x3a: case 0x3b: // CMP
269 case 0x84: case 0x85: // TEST
270 case 0x86: case 0x87: // XCHG
271 case 0x88: case 0x89: case 0x8a: case 0x8b: // MOV
272 inp = xscan_rm(inp);
273 goto notrans;
274
275 // OP AL,Ib; PUSH Ib
276 case 0x04: case 0x0c: case 0x14: case 0x1c: // ADD etc.
277 case 0x24: case 0x2c: case 0x34: case 0x3c: // AND etc.
278 case 0x6a: // PUSH Ib
279 case 0xa8: // TEST AL,Ib
280 case 0xb0: case 0xb1: case 0xb2: case 0xb3: // MOV Gb,Ib
281 case 0xb4: case 0xb5: case 0xb6: case 0xb7:
282 inp += 1;
283 goto notrans;
284
285 // OP EAX,Iv; PUSH Iv; MOV moffs
286 case 0x05: case 0x0d: case 0x15: case 0x1d: // OP EAX,Iv
287 case 0x25: case 0x2d: case 0x35: case 0x3d:
288 case 0x68: // PUSH Iv
289 case 0xa0: case 0xa1: case 0xa2: case 0xa3: // MOV moffs
290 case 0xa9: // TEST eAX,Iv
291 case 0xb8: case 0xb9: case 0xba: case 0xbb: // MOV Gv,Iv
292 case 0xbc: case 0xbd: case 0xbe: case 0xbf:
293 inp += 4;
294 goto notrans;
295
296 // CS and DS segment overrides, only valid for branch hints
297 case 0x2e: // CS/"not taken"
298 case 0x3e: // DS/"taken"
299 switch (*inp++) {
300
301 // Jcc (8-bit displacement)
302 case 0x70: case 0x71: case 0x72: case 0x73:
303 case 0x74: case 0x75: case 0x76: case 0x77:
304 case 0x78: case 0x79: case 0x7a: case 0x7b:
305 case 0x7c: case 0x7d: case 0x7e: case 0x7f:
306 inp += 1;
307 itype = VXI_JUMP;
308 dstlen = 7; // 32-bit branch w/hint
309 goto done;
310
311 // Two-byte opcode
312 case 0x0f:
313 switch (*inp++) {
314
315 // Jcc - conditional branch with disp32
316 case 0x80: case 0x81: case 0x82: case 0x83:
317 case 0x84: case 0x85: case 0x86: case 0x87:
318 case 0x88: case 0x89: case 0x8a: case 0x8b:
319 case 0x8c: case 0x8d: case 0x8e: case 0x8f:
320 inp += 4;
321 itype = VXI_JUMP;
322 dstlen = 7; // 32-bit branch w/hint
323 goto done;
324
325 }
326 goto invalid;
327 }
328 goto invalid;
329
330 // INC reg; DEC reg; PUSH reg; POP reg; XCHG eAX,reg
331 case 0x40: case 0x41: case 0x42: case 0x43: // INC
332 case 0x44: case 0x45: case 0x46: case 0x47:
333 case 0x48: case 0x49: case 0x4a: case 0x4b: // DEC
334 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
335 case 0x50: case 0x51: case 0x52: case 0x53: // PUSH
336 case 0x54: case 0x55: case 0x56: case 0x57:
337 case 0x58: case 0x59: case 0x5a: case 0x5b: // POP
338 case 0x5c: case 0x5d: case 0x5e: case 0x5f:
339 case 0x90: case 0x91: case 0x92: case 0x93: // XCHG
340 case 0x94: case 0x95: case 0x96: case 0x97:
341 case 0x98: case 0x99: // CWDE, CDQ
342 case 0xa4: case 0xa5: case 0xa6: case 0xa7: // MOVS, CMPS
343 case 0xaa: case 0xab: // STOS
344 case 0xac: case 0xad: case 0xae: case 0xaf: // LODS, SCAS
345 case 0xc9: // LEAVE
346 case 0xfc: case 0xfd: // CLD, STD
347 goto notrans;
348
349 // OP Eb,Ib; OP Ev,Ib; IMUL Gv,Ev,Ib
350 case 0x80: // OP Eb,Ib
351 case 0x83: // OP Ev,Ib
352 case 0x6b: // IMUL Gv,Ev,Ib
353 inp = xscan_rm(inp);
354 inp += 1;
355 goto notrans;
356
357 // OP Ev,Iv; IMUL Gv,Ev,Iv
358 case 0x81: // OP Ev,Iv
359 case 0x69: // IMUL Gv,Ev,Iv
360 inp = xscan_rm(inp);
361 inp += 4;
362 goto notrans;
363
364 // Jcc (8-bit displacement)
365 case 0x70: case 0x71: case 0x72: case 0x73:
366 case 0x74: case 0x75: case 0x76: case 0x77:
367 case 0x78: case 0x79: case 0x7a: case 0x7b:
368 case 0x7c: case 0x7d: case 0x7e: case 0x7f:
369 inp += 1;
370 itype = VXI_JUMP;
371 dstlen = 6; // Size of worst-case 32-bit branch
372 goto done;
373
374 // LEA Gv,M
375 case 0x8d:
376 if (EA_MOD(*inp) == 3) // Mem-only
377 goto invalid;
378 inp = xscan_rm(inp);
379 goto notrans;
380
381 // Group 1a - POP Ev
382 case 0x8f:
383 if (EA_REG(*inp) != 0)
384 goto invalid;
385 inp = xscan_rm(inp);
386 goto notrans;
387
388 // FWAIT
389 case 0x9b:
390 if (p->allowfp == 0) {
391 badfp:
392 if (ino > 0)
393 goto endfrag;
394 emu->cpu_trap = VXTRAP_FPOFF;
395 return emu->cpu_trap;
396 }
397 goto notrans;
398
399 // PUSHF; POPF
400 case 0x9c: case 0x9d:
401 goto notrans;
402
403 // SAHF; LAHF
404 case 0x9f: case 0x9e:
405 goto notrans;
406
407 // Shift Eb,Ib; Shift Ev,Ib
408 case 0xc0: case 0xc1:
409 inp = xscan_rm(inp);
410 inp += 1;
411 // XXX fix CCs
412 goto notrans;
413
414 // Shift Eb,1; Shift Ev,1
415 case 0xd0: case 0xd1:
416 inp = xscan_rm(inp);
417 // XXX fix CCs
418 goto notrans;
419
420 // Shift Eb,CL; Shift Ev,CL
421 case 0xd2: case 0xd3:
422 inp = xscan_rm(inp);
423 // XXX fix CCs
424 goto notrans;
425
426 // RET Iw
427 case 0xc2:
428 inp += 2;
429 itype = VXI_RETURN_IMM;
430 dstlen = 7+1+6+5; // movl %ebx,VSEG:VXEMU_EBX
431 // popl %ebx
432 // addl $Iw,%esp
433 // jmp vxrun_lookup_indirect
434 fin = 1;
435 goto done;
436
437 // RET
438 case 0xc3:
439 itype = VXI_RETURN;
440 dstlen = 7+1+5; // movl %ebx,VSEG:VXEMU_EBX
441 // popl %ebx
442 // jmp vxrun_lookup_indirect
443 fin = 1;
444 goto done;
445
446 // Group 11 - MOV Eb,Ib
447 case 0xc6:
448 if (EA_REG(*inp) != 0)
449 goto invalid;
450 inp = xscan_rm(inp);
451 inp += 1;
452 goto notrans;
453
454 // Group 11 - MOV Ev,Iv
455 case 0xc7:
456 if (EA_REG(*inp) != 0)
457 goto invalid;
458 inp = xscan_rm(inp);
459 inp += 4;
460 goto notrans;
461
462 // ENTER
463 case 0xc8:
464 inp += 2+1; // imm16,imm8
465 goto notrans;
466
467 case 0xcd: // INT n (software interrupt)
468 inp++;
469 case 0xcc: // INT3 (breakpoint)
470 goto gentrap;
471
472 // 387 escapes - modrm with opcode field
473 case 0xd8: case 0xd9: case 0xda: case 0xdb:
474 case 0xdc: case 0xdd: case 0xde: case 0xdf:
475 if (!p->allowfp)
476 goto badfp;
477 if ((*inp>>6) == 3)
478 inp++;
479 else
480 inp = xscan_rm(inp);
481 goto notrans;
482
483 // Loops
484 case 0xe0: // LOOPNZ cb
485 inp++;
486 itype = VXI_LOOPNZ;
487 dstlen = 3+2+2+5; // leal -1(ecx), ecx
488 // jz .+7
489 // jecxz .+5
490 // jmp cb
491 goto done;
492
493 case 0xe1: // LOOPZ cb
494 inp++;
495 itype = VXI_LOOPZ;
496 dstlen = 3+2+2+5; // leal -1(ecx), ecx
497 // jnz .+7
498 // jecxz .+5
499 // jmp cb
500 goto done;
501
502 case 0xe2: // LOOP cb
503 inp++;
504 itype = VXI_LOOP;
505 dstlen = 3+2+5; // leal -1(ecx), ecx
506 // jecxz .+5
507 // jmp cb
508 goto done;
509
510 // CALL
511 case 0xe8: // CALL Jv
512 inp += 4;
513 itype = VXI_CALL;
514 dstlen = 5+5; // pushl $nexteip
515 // jmp trampoline
516 fin = 1;
517 goto done;
518
519 // JMP
520 case 0xe9: // JMP Jv
521 inp += 4;
522 itype = VXI_JUMP;
523 dstlen = 5; // Size of worst-case 32-bit JMP
524 fin = 1;
525 goto done;
526
527 // JMP short
528 case 0xeb: // JMP Jb
529 inp += 1;
530 itype = VXI_JUMP;
531 dstlen = 5; // Size of worst-case 32-bit JMP
532 fin = 1;
533 goto done;
534
535 // Group 3 - unary ops
536 case 0xf6:
537 ea = *inp;
538 inp = xscan_rm(inp);
539 switch (EA_REG(ea)) {
540 case 0: case 1: // TEST Eb,Ib
541 inp += 1;
542 default: // NOT, NEG, ...
543 ; // XXX MUL/DIV require fixcc!
544 }
545 goto notrans;
546
547 case 0xf7:
548 ea = *inp;
549 inp = xscan_rm(inp);
550 switch (EA_REG(ea)) {
551 case 0: case 1: // TEST Ev,Iv
552 inp += 4;
553 default: // NOT, NEG, ...
554 ; // XXX MUL/DIV require fixcc!
555 }
556 goto notrans;
557
558 // Group 4 - INC, DEC
559 case 0xfe:
560 ea = *inp;
561 inp = xscan_rm(inp);
562 switch (EA_REG(ea)) {
563 case 0: case 1: // INC Eb, DEC Eb
564 goto notrans;
565 }
566 goto invalid;
567
568 // Group 5 - INC, DEC, CALL, JMP, PUSH
569 case 0xff:
570 ea = *inp;
571 inp = xscan_rm(inp);
572 switch (EA_REG(ea)) {
573 case 0: case 1: // INC Ev, DEC Ev
574 case 6: // PUSH Ev
575 goto notrans;
576 case 2: // CALL Ev
577 itype = VXI_CALLIND;
578 dstlen = 7+(inp-emu->ininst)+5+5;
579 // movl %ebx,VSEG:VXEMU_EBX
580 // movl <indirect_ea>,%ebx
581 // (same length as CALL inst)
582 // pushl $<return_eip>
583 // jmp vxrun_lookup_indirect
584 fin = 1;
585 goto done;
586 case 4: // JMP Ev
587 itype = VXI_JUMPIND;
588 dstlen = 7+(inp-emu->ininst)+5;
589 // movl %ebx,VSEG:VXEMU_EBX
590 // movl <indirect_ea>,%ebx
591 // (same length as CALL inst)
592 // jmp vxrun_lookup_indirect
593 fin = 1;
594 goto done;
595 }
596 goto invalid;
597
598 // I/O
599 case 0xed:
600 goto gentrap;
601
602 // Prefixes
603 case 0x0f: // 2-byte opcode escape
604 goto twobyte;
605 case 0x66: // Operand size prefix
606 goto opsize;
607 case 0xf3: // REP/REPE prefix
608 goto rep;
609 case 0xf2: // REPNE prefix
610 goto repne;
611 }
612 goto invalid;
613
614 // Operand size prefix (0x66) seen
615 opsize:
616 switch (*inp++) {
617
618 // OP Ev,Gv; OP Gv,Ev
619 case 0x01: case 0x03: // ADD
620 case 0x09: case 0x0b: // OR
621 case 0x11: case 0x13: // ADC
622 case 0x19: case 0x1b: // SBB
623 case 0x21: case 0x23: // AND
624 case 0x29: case 0x2b: // SUB
625 case 0x31: case 0x33: // XOR
626 case 0x39: case 0x3b: // CMP
627 case 0x85: // TEST
628 case 0x87: // XCHG
629 case 0x89: case 0x8b: // MOV
630 inp = xscan_rm(inp);
631 goto notrans;
632
633 // OP EAX,Iv; PUSH Iv
634 case 0x05: case 0x0d: case 0x15: case 0x1d: // OP EAX,Iv
635 case 0x25: case 0x2d: case 0x35: case 0x3d:
636 case 0x68: // PUSH Iv
637 case 0xa9: // TEST eAX,Iv
638 case 0xb8: case 0xb9: case 0xba: case 0xbb: // MOV Gv,Iv
639 case 0xbc: case 0xbd: case 0xbe: case 0xbf:
640 inp += 2;
641 goto notrans;
642
643 // INC reg; DEC reg; PUSH reg; POP reg; XCHG eAX,reg
644 case 0x40: case 0x41: case 0x42: case 0x43: // INC
645 case 0x44: case 0x45: case 0x46: case 0x47:
646 case 0x48: case 0x49: case 0x4a: case 0x4b: // DEC
647 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
648 case 0x50: case 0x51: case 0x52: case 0x53: // PUSH
649 case 0x54: case 0x55: case 0x56: case 0x57:
650 case 0x58: case 0x59: case 0x5a: case 0x5b: // POP
651 case 0x5c: case 0x5d: case 0x5e: case 0x5f:
652 case 0x90: case 0x91: case 0x92: case 0x93: // XCHG
653 case 0x94: case 0x95: case 0x96: case 0x97:
654 case 0x98: case 0x99: // CWDE, CDQ
655 case 0xa4: case 0xa5: case 0xa6: case 0xa7: // MOVS, CMPS
656 case 0xaa: case 0xab: // STOS
657 case 0xac: case 0xad: case 0xae: case 0xaf: // LODS, SCAS
658 case 0xc9: // LEAVE
659 case 0xfc: case 0xfd: // CLD, STD
660 goto notrans;
661
662 // OP Ev,Iv; IMUL Gv,Ev,Iv
663 case 0x81: // OP Ev,Iv
664 case 0x69: // IMUL Gv,Ev,Iv
665 inp = xscan_rm(inp);
666 inp += 2;
667 goto notrans;
668
669 // OP Ev,Ib; IMUL Gv,Ev,Ib
670 case 0x83: // OP Ev,Ib
671 case 0x6b: // IMUL Gv,Ev,Ib
672 inp = xscan_rm(inp);
673 inp += 1;
674 goto notrans;
675
676 // MOV moffs
677 case 0xa1: case 0xa3:
678 inp += 4; // always 32-bit offset
679 goto notrans;
680
681 // Shift Ev,Ib
682 case 0xc1:
683 inp = xscan_rm(inp);
684 inp += 1;
685 // XXX fix CCs
686 goto notrans;
687
688 // Shift Ev,1
689 case 0xd1:
690 inp = xscan_rm(inp);
691 // XXX fix CCs
692 goto notrans;
693
694 // Shift Ev,CL
695 case 0xd3:
696 inp = xscan_rm(inp);
697 // XXX fix CCs
698 goto notrans;
699
700 // Group 11 - MOV Ev,Iv
701 case 0xc7:
702 if (EA_REG(*inp) != 0)
703 goto invalid;
704 inp = xscan_rm(inp);
705 inp += 2;
706 goto notrans;
707
708 // Group 3 - unary ops
709 case 0xf7:
710 ea = *inp;
711 inp = xscan_rm(inp);
712 switch (EA_REG(ea)) {
713 case 0: case 1: // TEST Ev,Iv
714 inp += 2;
715 default: // NOT, NEG, ...
716 ; // XXX MUL/DIV require fixcc!
717 }
718 goto notrans;
719
720 // Group 5 - INC, DEC, CALL, JMP, PUSH
721 case 0xff:
722 ea = *inp;
723 inp = xscan_rm(inp);
724 switch (EA_REG(ea)) {
725 case 0: case 1: // INC Ev, DEC Ev
726 goto notrans;
727 }
728 goto invalid;
729
730 // Prefixes
731 case 0x0f: // 2-byte opcode escape
732 goto twobyte_opsize;
733 case 0x66: // Operand size prefix (redundant)
734 goto invalid;
735 case 0xf3: // REP/REPE prefix
736 goto opsize_rep;
737 case 0xf2: // REPNE prefix
738 goto opsize_repne;
739 }
740 goto invalid;
741
742 // REP/REPE prefix (0xf3) seen
743 rep:
744 switch (*inp++) {
745
746 // No-operand insns
747 case 0xa4: case 0xa5: case 0xa6: case 0xa7: // MOVS, CMPS
748 case 0xaa: case 0xab: // STOS
749 case 0xac: case 0xad: case 0xae: case 0xaf: // LODS, SCAS
750 goto notrans;
751
752 // Prefixes
753 case 0x0f: // 2-byte opcode escape
754 goto twobyte_rep;
755 case 0x66: // Operand size prefix
756 goto opsize_rep;
757 case 0xf3: // REP/REPE prefix (redundant)
758 goto invalid;
759 case 0xf2: // REPNE prefix (conflicting)
760 goto invalid;
761 }
762 goto invalid;
763
764 // REPNE prefix (0xf2) seen
765 repne:
766 switch (*inp++) {
767
768 // No-operand insns
769 case 0xa6: case 0xa7: // CMPS
770 case 0xae: case 0xaf: // SCAS
771 goto notrans;
772
773 // Prefixes
774 case 0x0f: // 2-byte opcode escape
775 goto twobyte_repne;
776 case 0x66: // Operand size prefix
777 goto opsize_repne;
778 case 0xf3: // REP/REPE prefix (conflicting)
779 goto invalid;
780 case 0xf2: // REPNE prefix (redundant)
781 goto invalid;
782 }
783 goto invalid;
784
785
786 // Operand size prefix (0x66) and REP/REPE prefix (0xf3) seen
787 opsize_rep:
788 switch (*inp++) {
789 case 0xa5: case 0xa7: // MOVS, CMPS
790 case 0xab: // STOS
791 case 0xad: case 0xaf: // LODS, SCAS
792 goto notrans;
793 }
794 goto invalid;
795
796 // Operand size prefix (0x66) and REPNE prefix (0xf2) seen
797 opsize_repne:
798 switch (*inp++) {
799 case 0xa7: // CMPS
800 case 0xaf: // SCAS
801 goto notrans;
802 }
803 goto invalid;
804
805
806 twobyte:
807 switch (*inp++) {
808
809 // SYSCALL instruction for fast system calls
810 case 0x05:
811 goto gentrap;
812
813 // No additional operand
814 case 0xc8: case 0xc9: case 0xca: case 0xcb: // BSWAP
815 case 0xcc: case 0xcd: case 0xce: case 0xcf:
816 goto notrans;
817
818 // General EA operands
819 case 0x10: case 0x11: // MOVUPS
820 case 0x12: // MOVLPS Vps,Mq/MOVHLPS
821 case 0x14: case 0x15: // UNPCKLPS/UNPCKHPS
822 case 0x16: // MOVHPS Vps,Mq/MOVLHPS
823 case 0x28: case 0x29: // MOVAPS
824 case 0x2e: case 0x2f: // UCOMISS/COMISS
825 case 0x40: case 0x41: case 0x42: case 0x43: // CMOVcc
826 case 0x44: case 0x45: case 0x46: case 0x47:
827 case 0x48: case 0x49: case 0x4a: case 0x4b:
828 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
829 case 0x51: // SQRTPS
830 case 0x54: case 0x55: case 0x56: case 0x57: // ANDPS etc.
831 case 0x58: case 0x59: case 0x5a: case 0x5b: // ADDPS etc.
832 case 0x5c: case 0x5d: case 0x5e: case 0x5f: // SUBPS etc.
833 case 0xa3: // BT Ev,Gv
834 case 0xab: // BTS Ev,Gv
835 case 0xaf: // IMUL Gv,Ev
836 case 0xb0: // CMPXCHG Eb,Gb
837 case 0xb1: // CMPXCHG Ev,Gv
838 case 0xb3: // BTR Ev,Gv
839 case 0xb6: case 0xb7: // MOVZX
840 case 0xbb: // BTC Ev,Gv
841 case 0xbc: case 0xbd: // BSF, BSR
842 case 0xbe: case 0xbf: // MOVSX
843 case 0xc0: // XADD Eb,Gb
844 case 0xc1: // XADD Ev,Gv
845 inp = xscan_rm(inp);
846 goto notrans;
847
848 // General EA operands plus immediate byte
849 case 0xc2: // CMPPS Vps,Wps,Ib
850 case 0xc6: // SHUFPS Vps,Wps,Ib
851 inp = xscan_rm(inp);
852 inp += 1;
853 goto notrans;
854
855 // Memory-only EA operand
856 case 0x13: // MOVLPS Mq,Vps
857 case 0x17: // MOVHPS Mq,Vps
858 case 0x2b: // MOVNTPS
859 case 0xc3: // MOVNTI Md,Gd
860 if (EA_MOD(*inp) == 3) // Mem-only
861 goto invalid;
862 inp = xscan_rm(inp);
863 goto notrans;
864
865 // Register-only EA operand
866 case 0x50: // MOVMSKPS
867 if (EA_MOD(*inp) != 3) // Reg-only
868 goto invalid;
869 inp = xscan_rm(inp);
870 goto notrans;
871
872 // Jcc - conditional branch with disp32
873 case 0x80: case 0x81: case 0x82: case 0x83:
874 case 0x84: case 0x85: case 0x86: case 0x87:
875 case 0x88: case 0x89: case 0x8a: case 0x8b:
876 case 0x8c: case 0x8d: case 0x8e: case 0x8f:
877 inp += 4;
878 itype = VXI_JUMP;
879 dstlen = 6; // Size of worst-case 32-bit branch
880 goto done;
881
882 // SETcc - set byte based on condition
883 case 0x90: case 0x91: case 0x92: case 0x93:
884 case 0x94: case 0x95: case 0x96: case 0x97:
885 case 0x98: case 0x99: case 0x9a: case 0x9b:
886 case 0x9c: case 0x9d: case 0x9e: case 0x9f:
887 if (EA_REG(*inp) != 0)
888 goto invalid;
889 inp = xscan_rm(inp);
890 goto notrans;
891
892 // Shift instructions
893 case 0xa4: // SHLD Ev,Gv,Ib
894 case 0xac: // SHRD Ev,Gv,Ib
895 inp = xscan_rm(inp);
896 inp += 1;
897 // XXX fix cc
898 goto notrans;
899 case 0xa5: // SHLD Ev,Gv,CL
900 case 0xad: // SHRD Ev,Gv,CL
901 inp = xscan_rm(inp);
902 // XXX fix cc
903 goto notrans;
904
905 // Group 8 - Bit test/modify with immediate
906 case 0xba:
907 if (!(EA_REG(*inp) & 4))
908 goto invalid;
909 inp = xscan_rm(inp);
910 inp += 1;
911 goto invalid;
912
913 // Group 15 - SSE control
914 case 0xae:
915 ea = *inp;
916 inp = xscan_rm(inp);
917 switch (EA_REG(ea)) {
918 case 2: // LDMXCSR
919 case 3: // STMXCSR
920 if (EA_MOD(ea) == 3) // Mem-only
921 goto invalid;
922 goto notrans;
923 // XX LFENCE, SFENCE, MFENCE?
924 }
925 goto invalid;
926
927 // Group 16 - PREFETCH
928 case 0x18:
929 if (EA_MOD(*inp) == 3) // Mem-only
930 goto invalid;
931 // XX Squash to NOP if EA_REG(*inp) > 3?
932 inp = xscan_rm(inp);
933 goto notrans;
934
935 }
936 goto invalid;
937
938 twobyte_opsize:
939 switch (*inp++) {
940
941 // General EA operands
942 case 0x10: case 0x11: // MOVUPD
943 case 0x14: case 0x15: // UNPCKLPD/UNPCKHPD
944 case 0x28: case 0x29: // MOVAPD
945 case 0x2e: case 0x2f: // UCOMISD/COMISD
946 case 0x40: case 0x41: case 0x42: case 0x43: // CMOVcc
947 case 0x44: case 0x45: case 0x46: case 0x47:
948 case 0x48: case 0x49: case 0x4a: case 0x4b:
949 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
950 case 0x51: // SQRTPD
951 case 0x54: case 0x55: case 0x56: case 0x57: // ANDPD etc.
952 case 0x58: case 0x59: case 0x5a: case 0x5b: // ADDPD etc.
953 case 0x5c: case 0x5d: case 0x5e: case 0x5f: // SUBPD etc.
954 case 0x60: case 0x61: case 0x62: case 0x63: // PUNPCK...
955 case 0x64: case 0x65: case 0x66: case 0x67: // PCMPGT...
956 case 0x68: case 0x69: case 0x6a: case 0x6b: // PUNPCK...
957 case 0x6c: case 0x6d: case 0x6e: case 0x6f: // PUNPCK...
958 case 0x74: case 0x75: case 0x76: // PCMPEQ...
959 case 0x7e: case 0x7f: // MOVD/MOVDQA
960 case 0xa3: // BT Ev,Gv
961 case 0xab: // BTS Ev,Gv
962 case 0xb3: // BTR Ev,Gv
963 case 0xbb: // BTC Ev,Gv
964 case 0xbc: case 0xbd: // BSF, BSR
965 case 0xaf: // IMUL Gv,Ev
966 case 0xb6: // MOVZX Gv,Eb
967 case 0xbe: // MOVSX Gv,Eb
968 case 0xd1: case 0xd2: case 0xd3: // PSRLx
969 case 0xd4: case 0xd5: case 0xd6: // PADDQ...
970 case 0xd8: case 0xd9: case 0xda: case 0xdb: // PSUBUSB...
971 case 0xdc: case 0xdd: case 0xde: case 0xdf: // PADDUSB...
972 case 0xe0: case 0xe1: case 0xe2: case 0xe3: // PAVGB...
973 case 0xe4: case 0xe5: case 0xe6: // PMULHUW...
974 case 0xe8: case 0xe9: case 0xea: case 0xeb: // PSUBSB...
975 case 0xec: case 0xed: case 0xee: case 0xef: // PADDSB...
976 case 0xf1: case 0xf2: case 0xf3: // PSLLx
977 case 0xf4: case 0xf5: case 0xf6: // PMULUDQ...
978 case 0xf8: case 0xf9: case 0xfa: case 0xfb: // PSUBB...
979 case 0xfc: case 0xfd: case 0xfe: // PADDB...
980 inp = xscan_rm(inp);
981 goto notrans;
982
983 // General EA operands plus immediate byte
984 case 0xc5: // PEXTRW Gd,VRdq,Ib
985 if (EA_MOD(*inp) != 3)
986 goto invalid; // Reg-only
987 case 0x70: // PSHUFD Vdq,Wdq,Ib
988 case 0xc2: // CMPPD Vps,Wps,Ib
989 case 0xc4: // PINSRW Vdq,Ew,Ib
990 case 0xc6: // SHUFPD Vps,Wps,Ib
991 inp = xscan_rm(inp);
992 inp += 1;
993 goto notrans;
994
995 // Memory-only EA operand
996 case 0x12: case 0x13: // MOVLPD
997 case 0x16: case 0x17: // MOVHPD
998 case 0x2b: // MOVNTPD
999 case 0xe7: // MOVNTDQ Mdq,Vdq
1000 if (EA_MOD(*inp) == 3) // Mem-only
1001 goto invalid;
1002 inp = xscan_rm(inp);
1003 goto notrans;
1004
1005 // Register-only EA operand
1006 case 0x50: // MOVMSKPD
1007 case 0xd7: // PMOVMSKB Gd,VRdq
1008 case 0xf7: // MASKMOVQ Vdq,Wdq
1009 if (EA_MOD(*inp) != 3) // Reg-only
1010 goto invalid;
1011 inp = xscan_rm(inp);
1012 goto notrans;
1013
1014 // Shift instructions
1015 case 0xa4: // SHLD Ev,Gv,Ib
1016 case 0xac: // SHRD Ev,Gv,Ib
1017 inp = xscan_rm(inp);
1018 inp += 1;
1019 // XXX fix cc
1020 goto notrans;
1021 case 0xa5: // SHLD Ev,Gv,CL
1022 case 0xad: // SHRD Ev,Gv,CL
1023 inp = xscan_rm(inp);
1024 // XXX fix cc
1025 goto notrans;
1026
1027 // Group 8 - Bit test/modify with immediate
1028 case 0xba:
1029 if (!(EA_REG(*inp) & 4))
1030 goto invalid;
1031 inp = xscan_rm(inp);
1032 inp += 1;
1033 goto invalid;
1034
1035 // Group 12, 13, 14 - SSE vector shift w/ immediate
1036 case 0x71: case 0x72: case 0x73:
1037 ea = *inp;
1038 inp = xscan_rm(inp);
1039 switch (EA_REG(ea)) {
1040 case 2: case 4: case 6:
1041 inp += 1;
1042 goto notrans;
1043 }
1044 goto invalid;
1045 }
1046 goto invalid;
1047
1048 twobyte_rep:
1049 switch (*inp++) {
1050
1051 // General EA operands
1052 case 0x10: case 0x11: // MOVSS
1053 case 0x2a: case 0x2c: case 0x2d: // CVT...
1054 case 0x51: // SQRTSS
1055 case 0x58: case 0x59: case 0x5a: case 0x5b: // ADDSS etc.
1056 case 0x5c: case 0x5d: case 0x5e: case 0x5f: // SUBSS etc.
1057 case 0x6f: // MOVDQU
1058 case 0x7e: case 0x7f: // MOVQ/MOVDQU
1059 case 0xe6: // CVTDQ2PD
1060 inp = xscan_rm(inp);
1061 goto notrans;
1062
1063 // General EA operands plus immediate byte
1064 case 0x70: // PSHUFHW Vq,Wq,Ib
1065 case 0xc2: // CMPSS Vss,Wss,Ib
1066 inp = xscan_rm(inp);
1067 inp += 1;
1068 goto notrans;
1069 }
1070 goto invalid;
1071
1072 twobyte_repne:
1073 switch (*inp++) {
1074
1075 // General EA operands
1076 case 0x10: case 0x11: // MOVSD
1077 case 0x2a: case 0x2c: case 0x2d: // CVT...
1078 case 0x51: // SQRTSD
1079 case 0x58: case 0x59: case 0x5a: // ADDSD etc.
1080 case 0x5c: case 0x5d: case 0x5e: case 0x5f: // SUBSD etc.
1081 case 0xe6: // CVTPD2DQ
1082 inp = xscan_rm(inp);
1083 goto notrans;
1084
1085 // General EA operands plus immediate byte
1086 case 0x70: // PSHUFLW Vq,Wq,Ib
1087 case 0xc2: // CMPSD Vss,Wss,Ib
1088 inp = xscan_rm(inp);
1089 inp += 1;
1090 goto notrans;
1091 }
1092 goto invalid;
1093
1094
1095 invalid:
1096 vxrun_cleanup(emu);
1097 vxprint("invalid opcode %02x %02x %02x at eip %08x\n",
1098 emu->ininst[0], emu->ininst[1], emu->ininst[2],
1099 emu->cpu.eip + (emu->ininst - instart));
1100 vxrun_setup(emu);
1101 gentrap:
1102 fin = 1;
1103 itype = VXI_TRAP;
1104 dstlen = 6+5+11+5; // movl %eax,VSEG:VXEMU_EAX
1105 // movl $fin,%eax
1106 // movl $eip,VSEG:VXEMU_EIP
1107 // jmp vxrun_gentrap
1108 goto done;
1109
1110
1111 notrans:
1112 // No translation of this instruction is required -
1113 // dstlen is the same as srclen.
1114 dstlen = inp - emu->ininst;
1115
1116 done:
1117 // Make sure this whole instruction was actually executable
1118 if (inp > inmax) {
1119 // If the whole first instruction isn't executable,
1120 // then just generate the trap immediately,
1121 // since we know it'll be required.
1122 if (ino == 0)
1123 goto noexec;
1124
1125 // Otherwise, just roll back
1126 // and stop translating before this instruction,
1127 // and let the exception (if any)
1128 // happen next time into the translator.
1129 goto endfrag;
1130 }
1131
1132 // Make sure there's actually room for the resulting code
1133 if (dstofs + dstlen > VXDSTOFS_MAX) {
1134
1135 // Roll back and end the frag before this instruction
1136 endfrag:
1137 fin = 1;
1138 itype = VXI_ENDFRAG;
1139 inp = emu->ininst; // no source consumed
1140 dstlen = 5; // jmp to next frag
1141 }
1142
1143 // Record the instruction record
1144 f->insn[ino].itype = itype;
1145 f->insn[ino].srcofs = emu->ininst - instart;
1146 f->insn[ino].dstofs = dstofs;
1147 f->insn[ino].dstlen = dstlen;
1148
1149 // Move on to next instruction
1150 ino++;
1151 emu->ininst = inp;
1152 dstofs += dstlen;
1153
1154 } while (!fin);
1155
1156 // Record the total number of instructions for this frag
1157 f->ninsn = ino;
1158
1159 // vxprint("%d ins - to %x\n", ino, emu->ininst - instart + eip);
1160 // Clear the special instruction-scanning exception state flag
1161 emu->guestfragend = emu->ininst;
1162 emu->ininst = NULL;
1163
1164 return 0;
1165 }
1166
1167 // Try to optimize jump instructions whose target
1168 // is in the same fragment we're building.
1169 static inline void xsimp_jump(struct vxproc *p, unsigned ino)
1170 {
1171 struct vxemu *emu = p->emu;
1172 struct vxfrag *f = emu->txfrag;
1173 unsigned ninsn = f->ninsn;
1174 unsigned srcofs = f->insn[ino].srcofs;
1175 uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
1176
1177 // Skip any branch prediction hint prefix
1178 uint8_t opcode = *inp++;
1179 int dstlen = 2;
1180 uint32_t targofs = srcofs;
1181 if (opcode == 0x2e || opcode == 0x3e) {
1182 opcode = *inp++;
1183 dstlen = 3;
1184 targofs++;
1185 }
1186
1187 // Determine the jump target.
1188 if (opcode == 0xe9) {
1189 // 32-bit JMP
1190 targofs += 5 + *(int32_t*)inp;
1191 } else if (opcode == 0x0f) {
1192 // 32-bit Jcc
1193 targofs += 6 + *(int32_t*)inp;
1194 } else {
1195 // 8-bit JMP or Jcc or LOOP
1196 targofs += 2 + (int32_t)(int8_t)*inp;
1197 }
1198 if (targofs > f->insn[ninsn-1].srcofs)
1199 return; // Target is not in this fragment
1200
1201 // Find the target in the insn table
1202 unsigned lo = 0;
1203 unsigned hi = ninsn-1;
1204 while (hi > lo) {
1205 unsigned mid = (lo + hi + 1) / 2;
1206 unsigned midofs = f->insn[mid].srcofs;
1207 if (targofs >= midofs)
1208 lo = mid;
1209 else
1210 hi = mid - 1;
1211 }
1212 if (targofs != f->insn[lo].srcofs)
1213 return; // Jump target is _between_ instructions!
1214
1215 // Make sure target is still in range after translation
1216 if (lo > ino) {
1217 if ((int)f->insn[lo].dstofs >
1218 (int)f->insn[ino+1].dstofs+127)
1219 return; // too far ahead
1220 } else {
1221 if ((int)f->insn[lo].dstofs <
1222 (int)f->insn[ino].dstofs+3-128)
1223 return; // too far behind
1224 }
1225
1226 // In range - convert it to an 8-bit jump!
1227 f->insn[ino].itype = VXI_JUMP8;
1228 f->insn[ino].dstlen = dstlen;
1229 }
1230
1231 // Translation pass 2:
1232 // Reverse scan through the instruction table trying to simplify instructions.
1233 static void xsimp(struct vxproc *p)
1234 {
1235 int i;
1236 struct vxemu *emu = p->emu;
1237 struct vxfrag *f = emu->txfrag;
1238 unsigned ninsn = f->ninsn;
1239
1240 for (i = ninsn-1; i >= 0; i--) {
1241 unsigned itype = f->insn[i].itype;
1242
1243 switch (itype) {
1244 case VXI_LOOP:
1245 case VXI_LOOPZ:
1246 case VXI_LOOPNZ:
1247 case VXI_JUMP:
1248 xsimp_jump(p, i);
1249 break;
1250 default:
1251 break; // no simplifications
1252 }
1253
1254 }
1255 }
1256
1257 // Translation pass 3:
1258 // Compute final instruction offsets.
1259 static void xplace(struct vxproc *p)
1260 {
1261 int i;
1262 struct vxemu *emu = p->emu;
1263 struct vxfrag *f = emu->txfrag;
1264 unsigned ninsn = f->ninsn;
1265
1266 size_t outofs = PROLOG_LEN;
1267 for (i = 0; i < ninsn; i++) {
1268 f->insn[i].dstofs = outofs;
1269 outofs += f->insn[i].dstlen;
1270 }
1271 }
1272
1273 // Emit a direct 32-bit jump/branch/call/endfrag instruction.
1274 // The original jump might have been either short or long.
1275 // NB. vxemu_sighandler (sig.c) knows that jumps don't trash registers.
1276 // NB. vxemu_sighandler knows that calls push the return address
1277 // onto the stack as the first instruction, and that the target address
1278 // can be found at offset 26 of the translation.
1279 static inline void xemit_jump(
1280 struct vxproc *p, uint8_t itype, unsigned ino,
1281 uint8_t **extrap)
1282 {
1283 extern void vxrun_lookup_backpatch();
1284
1285 struct vxemu *emu = p->emu;
1286 struct vxfrag *f = emu->txfrag;
1287
1288 // Determine the jump target EIP
1289 // and emit the appropriate call/jump/branch instruction,
1290 // with its target pointing to a temporary jump trampoline.
1291 uint8_t *tramp = *extrap;
1292 unsigned srcofs = f->insn[ino].srcofs;
1293 uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
1294 uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
1295 uint32_t targeip = emu->cpu.eip + srcofs;
1296 if (itype == VXI_JUMP) {
1297
1298 uint8_t opcode = *inp;
1299
1300 // Copy any branch taken/not taken hint prefix
1301 if (opcode == 0x2e || opcode == 0x3e) {
1302 *outp++ = opcode;
1303 opcode = *++inp;
1304 targeip++;
1305 }
1306
1307 // Emit the branch/jump/call instruction
1308 switch (opcode) {
1309
1310 case 0xe9: // was a 32-bit JMP
1311 targeip += 5 + *(int32_t*)&inp[1];
1312 goto emitjmp;
1313
1314 case 0xeb: // was an 8-bit JMP
1315 targeip += 2 + (int32_t)(int8_t)inp[1];
1316 emitjmp:
1317 outp[0] = 0xe9; // always emit 32-bit JMP
1318 *(int32_t*)&outp[1] = (int32_t)(tramp - (outp+5));
1319 outp += 5;
1320 break;
1321
1322 case 0x0f: // was a 32-bit Jcc
1323 opcode = inp[1];
1324 targeip += 6 + *(int32_t*)&inp[2];
1325 goto emitjcc;
1326
1327 default: // was an 8-bit Jcc
1328 opcode = inp[0] + 0x10;
1329 targeip += 2 + (int32_t)(int8_t)inp[1];
1330 emitjcc:
1331 outp[0] = 0x0f; // always emit 32-bit Jcc
1332 outp[1] = opcode;
1333 *(int32_t*)&outp[2] = (int32_t)(tramp - (outp+6));
1334 outp += 6;
1335 break;
1336 }
1337 } else if (itype == VXI_CALL) {
1338 assert(*inp == 0xe8); // 32-bit CALL
1339
1340 outp[0] = 0x68; // pushl $<return_eip>
1341 *(uint32_t*)&outp[1] = targeip + 5;
1342 outp += 5;
1343 targeip += 5 + *(int32_t*)&inp[1];
1344 goto emitjmp;
1345 } else if (itype == VXI_LOOP || itype == VXI_LOOPZ || itype == VXI_LOOPNZ) {
1346 *outp++ = 0x8d; // leal -1(ecx) -> ecx
1347 *outp++ = 0x49;
1348 *outp++ = 0xff;
1349 if (itype == VXI_LOOPZ) {
1350 *outp++ = 0x75; // jnz .+7
1351 *outp++ = 0x07;
1352 } else if (itype == VXI_LOOPNZ) {
1353 *outp++ = 0x74; // jz .+7
1354 *outp++ = 0x07;
1355 }
1356 *outp++ = 0xe3; // jecxz .+5
1357 *outp++ = 0x05;
1358 targeip += 2 + (int32_t)(int8_t)inp[1];
1359 goto emitjmp;
1360 } else {
1361 // End-of-fragment pseudo-instruction.
1362 // targeip already points to the eip we wish to "jump" to.
1363 assert(itype == VXI_ENDFRAG);
1364 goto emitjmp;
1365 }
1366
1367 // Emit the trampoline code
1368 tramp[0] = VSEGPREFIX; // movl $patchrec,VSEG:VXEMU_JMPINFO
1369 tramp[1] = 0xc7;
1370 tramp[2] = 0x05;
1371 *(uint32_t*)&tramp[3] = offsetof(vxemu,jmpinfo);
1372 *(uint32_t*)&tramp[7] = (uint32_t)((intptr_t)tramp+11+5 -
1373 (intptr_t)emu);
1374
1375 tramp[11+0] = 0xe9; // jmp vxrun_lookup_backpatch
1376 *(uint32_t*)&tramp[11+1] = (uint32_t)((intptr_t)vxrun_lookup_backpatch
1377 - (intptr_t)&tramp[11+5]);
1378
1379 *(uint32_t*)&tramp[11+5] = targeip; // .long targeip
1380 *(uint32_t*)&tramp[11+5+4] = (uint32_t)(intptr_t)outp; // .long jmpend
1381 *extrap = &tramp[11+5+4+4];
1382 }
1383
1384 // Emit a short (8-bit) jump/branch instruction.
1385 // The original branch might have been either short or long.
1386 // NB. vxemu_sighandler (sig.c) knows that jump8s don't
1387 // trash registers.
1388 static inline void xemit_jump8(struct vxproc *p, unsigned ino)
1389 {
1390 struct vxemu *emu = p->emu;
1391 struct vxfrag *f = emu->txfrag;
1392 unsigned srcofs = f->insn[ino].srcofs;
1393 uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
1394 uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
1395
1396 // Copy any branch taken/not taken hint prefix
1397 uint8_t opcode = *inp;
1398 int outlen = 2;
1399 uint32_t targofs = srcofs;
1400 if (opcode == 0x2e || opcode == 0x3e) {
1401 *outp++ = opcode;
1402 opcode = *++inp;
1403 outlen = 3;
1404 targofs++;
1405 }
1406
1407 // Determine the jump target and output opcode.
1408 switch (opcode) {
1409 case 0xe9: // 32-bit JMP
1410 opcode = 0xeb;
1411 targofs += 5 + *(int32_t*)&inp[1];
1412 break;
1413 case 0x0f: // 32-bit Jcc
1414 opcode = inp[1] - 0x10;
1415 targofs += 6 + *(int32_t*)&inp[2];
1416 break;
1417 case 0xeb: // 8-bit JMP
1418 case 0xe0: // 8-bit LOOP
1419 case 0xe1:
1420 case 0xe2:
1421 default: // 8-bit Jcc
1422 targofs += 2 + (int32_t)(int8_t)inp[1];
1423 break;
1424 }
1425 assert(targofs <= f->insn[f->ninsn-1].srcofs);
1426
1427 // Find the target in the insn table
1428 unsigned lo = 0;
1429 unsigned hi = f->ninsn-1;
1430 while (hi > lo) {
1431 unsigned mid = (lo + hi + 1) / 2;
1432 unsigned midofs = f->insn[mid].srcofs;
1433 if (targofs >= midofs)
1434 lo = mid;
1435 else
1436 hi = mid - 1;
1437 }
1438 assert(targofs == f->insn[lo].srcofs);
1439
1440 // Emit the 2-byte jump instruction (3 bytes with prediction hint)
1441 outp[0] = opcode;
1442 outp[1] = (int)f->insn[lo].dstofs - ((int)f->insn[ino].dstofs+outlen);
1443 }
1444
1445 // Emit an indirect jump/call/ret instruction.
1446 // NB. vxemu_sighandler (sig.c) knows that ebx is saved as
1447 // the first instruction and then trashed.
1448 // NB. vxemu_sighandler knows that the immediate count
1449 // in a return immediate instruction is at offset 10.
1450 // NB. vxemu_sighandler knows that in an indirect call:
1451 // * the stack is unchanged until offset -5 (from the end)
1452 // * at offset -5, the return address has been pushed
1453 // and the target eip is in ebx.
1454 static inline void xemit_indir(struct vxproc *p, int itype, unsigned ino)
1455 {
1456 unsigned i;
1457 extern void vxrun_lookup_indirect();
1458
1459 struct vxemu *emu = p->emu;
1460 struct vxfrag *f = emu->txfrag;
1461 unsigned srcofs = f->insn[ino].srcofs;
1462 uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
1463 uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
1464 uint8_t *outp0 = outp;
1465
1466 // Common: movl %ebx,VSEG:VXEMU_EBX
1467 outp[0] = VSEGPREFIX; // Appropriate segment override
1468 outp[1] = 0x89;
1469 outp[2] = 0x1d;
1470 *(uint32_t*)&outp[3] = offsetof(vxemu, cpu.reg[EBX]);
1471 outp += 7;
1472
1473 // Instruction-specific code
1474 switch (itype) {
1475 default:
1476 assert(0);
1477
1478 case VXI_CALLIND:
1479 assert(inp[0] == 0xff);
1480 assert(EA_REG(inp[1]) == 2);
1481 goto Common;
1482
1483 case VXI_JUMPIND:
1484 assert(inp[0] == 0xff);
1485 assert(EA_REG(inp[1]) == 4);
1486 Common:;
1487 unsigned srclen = xscan_rm(inp+1) - inp;
1488 outp[0] = 0x8b; // movl <indirect_ea>,%ebx
1489 outp[1] = (inp[1] & 0xc7) | (EBX << 3);
1490 for (i = 2; i < srclen; i++)
1491 outp[i] = inp[i];
1492 outp += srclen;
1493
1494 if(itype == VXI_CALLIND) {
1495 outp[0] = 0x68; // pushl $<return_eip>
1496 *(uint32_t*)&outp[1] = emu->cpu.eip + srcofs + srclen;
1497 outp += 5;
1498 }
1499 break;
1500
1501 case VXI_RETURN:
1502 assert(inp[0] == 0xc3);
1503 *outp++ = 0x5b; // popl %ebx
1504 break;
1505
1506 case VXI_RETURN_IMM:
1507 assert(inp[0] == 0xc2);
1508 outp[0] = 0x5b; // popl %ebx
1509 outp[1] = 0x81; // add $<spc>,%esp
1510 outp[2] = 0xc4;
1511 *(uint32_t*)&outp[3] = *(uint16_t*)&inp[1];
1512 outp += 1+6;
1513 break;
1514 }
1515
1516 // Common: jmp vxrun_lookup_indirect
1517 outp[0] = 0xe9;
1518 *(uint32_t*)&outp[1] = (uint32_t)(intptr_t)vxrun_lookup_indirect -
1519 (uint32_t)(intptr_t)&outp[5];
1520 outp += 5;
1521 assert(outp - outp0 == f->insn[ino].dstlen);
1522 }
1523
1524 // NB. vxemu_sighandler (sig.c) knows that eax is saved as
1525 // the first instruction and then trashed.
1526 static void xemit_trap(struct vxproc *p, int ino)
1527 {
1528 extern void vxrun_gentrap();
1529
1530 struct vxemu *emu = p->emu;
1531 struct vxfrag *f = emu->txfrag;
1532
1533 // Trapping instruction. Determine the trap type.
1534 uint32_t trapno;
1535 uint32_t trapeip = emu->cpu.eip + f->insn[ino].srcofs;
1536 uint8_t *inp = (uint8_t*)emu->mem->base + trapeip;
1537 switch (inp[0]) {
1538 case 0xcc: // Breakpoint
1539 trapno = VXTRAP_BREAKPOINT;
1540 trapeip++; // EIP points after insn
1541 break;
1542 case 0xcd: // INT $n
1543 trapno = VXTRAP_SOFT + inp[1];
1544 trapeip += 2; // EIP points after insn
1545 break;
1546 case 0x0f:
1547 if (inp[1] == 0x05) { // SYSCALL instruction
1548 trapno = VXTRAP_SYSCALL;
1549 trapeip += 2; // EIP points after insn
1550 break;
1551 }
1552 // fall thru...
1553 default: // Invalid instruction
1554 trapno = VXTRAP_INVALID;
1555 break;
1556 }
1557
1558 // Emit the output code sequence.
1559 uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
1560
1561 // movl %eax,VSEG:VXEMU_EAX
1562 outp[0] = VSEGPREFIX;
1563 outp[1] = 0xa3;
1564 *(uint32_t*)&outp[2] = offsetof(vxemu, cpu.reg[EAX]);
1565
1566 // movl $trapno,%eax
1567 outp[6+0] = 0xb8;
1568 *(uint32_t*)&outp[6+1] = trapno;
1569
1570 // movl $trapeip,VSEG:VXEMU_EIP
1571 outp[6+5+0] = VSEGPREFIX;
1572 outp[6+5+1] = 0xc7;
1573 outp[6+5+2] = 0x05;
1574 *(uint32_t*)&outp[6+5+3] = offsetof(vxemu, cpu.eip);
1575 *(uint32_t*)&outp[6+5+7] = trapeip;
1576
1577 // jmp vxrun_gentrap
1578 outp[6+5+11+0] = 0xe9;
1579 *(uint32_t*)&outp[6+5+11+1] = (uint32_t)(intptr_t)vxrun_gentrap -
1580 (uint32_t)(intptr_t)&outp[6+5+11+5];
1581
1582 assert(f->insn[ino].dstlen == 6+5+11+5);
1583 }
1584
1585 // Translation pass 4:
1586 // Emit the translated instruction stream.
1587 static void xemit(struct vxproc *p)
1588 {
1589 unsigned i, j;
1590 struct vxemu *emu = p->emu;
1591 struct vxfrag *f = emu->txfrag;
1592 unsigned ninsn = f->ninsn;
1593
1594 // Writing the instruction stream immediately after the insn table.
1595 uint8_t *outstart = FRAGCODE(f);
1596
1597 // Write extra trampoline code after the already-arranged code.
1598 uint8_t *extra = outstart + (unsigned)f->insn[ninsn-1].dstofs
1599 + (unsigned)f->insn[ninsn-1].dstlen;
1600
1601 // First emit the prolog
1602 outstart[0] = VSEGPREFIX; // Segment override
1603 outstart[1] = 0x8b; outstart[2] = 0x1d; // movl <abs32>,%ebx
1604 *(uint32_t*)&outstart[3] = offsetof(vxemu, cpu.reg[EBX]);
1605
1606 // Now emit the instructions
1607 asm volatile("cld");
1608 uint8_t *instart = (uint8_t*)emu->mem->base + emu->cpu.eip;
1609 for (i = 0; i < ninsn; ) {
1610 unsigned itype = f->insn[i].itype;
1611
1612 switch (itype) {
1613
1614 case VXI_NOTRANS:
1615 // Just copy strings of untranslated instructions.
1616 for (j = i+1; j < ninsn; j++)
1617 if (f->insn[j].itype != VXI_NOTRANS)
1618 break;
1619
1620 unsigned srcofs = f->insn[i].srcofs;
1621 unsigned dstofs = f->insn[i].dstofs;
1622 uint8_t *inp = instart + f->insn[i].srcofs;
1623 uint8_t *outp = outstart + f->insn[i].dstofs;
1624 unsigned cnt = f->insn[j].dstofs - dstofs;
1625 assert(cnt == f->insn[j].srcofs - srcofs);
1626 asm volatile("rep movsb"
1627 : : "c" (cnt), "S" (inp), "D" (outp));
1628
1629 i = j;
1630 break;
1631
1632 case VXI_CALL:
1633 case VXI_JUMP:
1634 case VXI_ENDFRAG:
1635 case VXI_LOOP:
1636 case VXI_LOOPZ:
1637 case VXI_LOOPNZ:
1638 xemit_jump(p, itype, i++, &extra);
1639 break;
1640
1641 case VXI_JUMP8:
1642 xemit_jump8(p, i++);
1643 break;
1644
1645 case VXI_RETURN:
1646 case VXI_JUMPIND:
1647 case VXI_CALLIND:
1648 xemit_indir(p, itype, i++);
1649 break;
1650
1651 case VXI_TRAP:
1652 xemit_trap(p, i++);
1653 break;
1654
1655 default:
1656 assert(0);
1657 }
1658 }
1659
1660 // Record the final amount of code table space we've consumed.
1661 emu->codefree = extra;
1662
1663 // Add an entry to the code pointer table to the new fragment
1664 uint32_t *codetab = emu->codetab;
1665 *--codetab = (uint32_t)(intptr_t)f;
1666 emu->codetab = codetab;
1667
1668 assert((void*)extra < (void*)codetab);
1669
1670 // Insert the new entrypoint into the hash table
1671 uint32_t idx = etabhash(emu->cpu.eip) & emu->etabmask;
1672 while (emu->etab[idx].srceip != NULLSRCEIP) {
1673 assert(emu->etab[idx].srceip != emu->cpu.eip);
1674 idx = (idx+1) & emu->etabmask;
1675 }
1676 emu->etab[idx].srceip = emu->cpu.eip;
1677 emu->etab[idx].dsteip = (uint32_t)(intptr_t)outstart;
1678 emu->etabcnt++;
1679
1680 if (vx32_debugxlate) {
1681 vxrun_cleanup(emu);
1682 vxprint("====== xlate\n");
1683 vxprint("-- guest\n");
1684 disassemble(emu->mem->base, emu->guestfrag, emu->guestfragend);
1685 vxprint("-- translation\n");
1686 disassemble(NULL, outstart, extra);
1687 vxprint("======\n");
1688 vxrun_setup(emu);
1689 }
1690 }
1691
1692 static int xlate(struct vxproc *vxp)
1693 {
1694 // Pass 1: scan instruction stream, build preliminary vxinsn table
1695 int rc = xscan(vxp);
1696 if (rc != 0)
1697 return rc;
1698
1699 // Pass 2: simplify vxinsns wherever possible
1700 xsimp(vxp);
1701
1702 // Pass 3: compute final instruction placement and sizes
1703 xplace(vxp);
1704
1705 // Pass 4: emit translated instructions
1706 xemit(vxp);
1707
1708 return 0;
1709 }
1710
1711 #if 0
1712 #include <asm/prctl.h>
1713 #include <sys/prctl.h>
1714 #endif
1715
1716 void dumpsegs(const char *prefix)
1717 {
1718 uint16_t ds, es, fs, gs, ss;
1719 asm( "movw %%ds,%0; movw %%es,%1; "
1720 "movw %%fs,%2; movw %%gs,%3; "
1721 "movw %%ss,%4"
1722 : "=rm"(ds), "=rm" (es), "=rm" (fs), "=rm" (gs), "=rm" (ss));
1723 vxprint("%s: ds=%04x es=%04x fs=%04x gs=%04x ss=%04x\n",
1724 prefix, ds, es, fs, gs, ss);
1725 #if 0
1726 unsigned long fsofs, gsofs;
1727 arch_prctl(ARCH_GET_FS, (unsigned long)&fsofs);
1728 arch_prctl(ARCH_GET_GS, (unsigned long)&gsofs);
1729 vxprint("fsofs=%016lx gsofs=%016lx\n", fsofs, gsofs);
1730 #endif
1731 }
1732
1733 int vxproc_run(struct vxproc *vxp)
1734 {
1735 vxemu *emu = vxp->emu;
1736 vxmmap *mm;
1737
1738 // Make sure the process is mapped into our host memory
1739 if ((mm = vxmem_map(vxp->mem, 0)) == NULL)
1740 return -1;
1741 if (vxemu_map(emu, mm) < 0) {
1742 vxmem_unmap(vxp->mem, mm);
1743 return -1;
1744 }
1745 emu->mem = mm;
1746
1747 // Pending trap?
1748 if(emu->cpu_trap){
1749 assert(0); // Can this even happen?
1750 int trap = emu->cpu_trap;
1751 emu->cpu_trap = 0;
1752 return trap;
1753 }
1754
1755 uint16_t vs;
1756 // Registers can't be already loaded or we will smash
1757 // the "host segment registers" part of emu.
1758 asm("movw %"VSEGSTR",%0"
1759 : "=r" (vs));
1760
1761 assert(vs != emu->emusel);
1762
1763 // Save our stack environment for exception-handling.
1764 // This only saves the integer registers. If the signal handler
1765 // happens in the middle of a translation involving floating-point
1766 // code, we need to make sure that when we jump back here in the
1767 // handler, we first restore the floating point registers to
1768 // the state they were in during the computation. (Operating
1769 // systems typically save the FPU state, reset the FPU, and
1770 // pass the saved state to the signal handler.)
1771 // The Linux signal handler does exactly this.
1772 //
1773 // On FreeBSD, after hours wasted trying to manually restore the
1774 // floating point state, I gave up. Instead, the FreeBSD code
1775 // saves an mcontext_t here and then overwrites the signal handler's
1776 // mcontext_t with this one. Then when it returns from the handler,
1777 // the OS will restore the floating point state and then the mcontext,
1778 // jumping back here with exactly the FPU state that we want.
1779 // Why not do this on Linux? Because it didn't work when I tried it,
1780 // and I was not about to track down why.
1781 //
1782 // On OS X, there is no getcontext, so you'd think we'd be back to
1783 // the Linux approach of manual FPU restore + siglongjmp.
1784 // Unfortunately, OS X can't deal with siglongjmp from alternate
1785 // signal stacks. If it invokes a signal handler on an alternate
1786 // signal stack and that handler uses siglongjmp to go back to the
1787 // original stack instead of returning out of the handler, then
1788 // OS X thinks the code is still running on the alternate stack,
1789 // which causes all sorts of problems. Thus we have to do the
1790 // getcontext trick. Besides, it is far easier to write a getcontext
1791 // routine--we already need to know the layout of mcontext_t to
1792 // write the signal handler--than to figure out what the FPU state
1793 // looks like.
1794 //
1795 // And you thought this was going to be easy.
1796
1797 #if defined(__FreeBSD__)
1798 ucontext_t env;
1799 emu->trapenv = &env.uc_mcontext;
1800 volatile int n = 0;
1801 getcontext(&env);
1802 if(++n > 1){
1803 #elif defined(__APPLE__)
1804 struct i386_thread_state env;
1805 emu->trapenv = &env;
1806 if(vx32_getcontext(&env)){
1807 #else
1808 mcontext_t env;
1809 emu->trapenv = &env;
1810 if(vx32_getcontext(&env)){
1811 #endif
1812 if(vx32_debugxlate) vxprint("VX trap %x err %x va %08x "
1813 "veip %08x veflags %08x\n",
1814 emu->cpu_trap, emu->cpu.traperr, emu->cpu.trapva,
1815 emu->cpu.eip, emu->cpu.eflags);
1816 goto trapped;
1817 }
1818
1819 // Load our special vxproc segment selector into fs register.
1820 vxrun_setup(emu);
1821
1822 while (1) {
1823 // Look up the translated entrypoint for the current vx32 EIP.
1824 uint32_t eip = emu->cpu.eip;
1825 uint32_t idx = etabhash(eip) & emu->etabmask;
1826 while (emu->etab[idx].srceip != eip) {
1827 if (emu->etab[idx].srceip == NULLSRCEIP)
1828 goto notfound;
1829 idx = (idx+1) & emu->etabmask;
1830 }
1831
1832 // Run the translated code fragment.
1833 // Return if the code terminated with an exception.
1834 // Otherwise it terminated because of an untranslated EIP,
1835 // so translate it.
1836 if(vxrun(emu, emu->etab[idx].dsteip) != 0)
1837 break;
1838
1839 notfound:
1840 // Translate the code fragment the current emu->cpu.eip points to
1841 if(xlate(vxp) != 0)
1842 break;
1843 }
1844
1845 // Restore the usual flat model data segment registers.
1846 vxrun_cleanup(emu);
1847
1848 trapped:
1849 // De-register our setjmp environment for trap handling.
1850 emu->trapenv = NULL;
1851
1852 emu->mem = NULL;
1853 int trap = emu->cpu_trap;
1854 emu->cpu_trap = 0;
1855 return trap;
1856 }
1857
1858 void vxemu_stats(struct vxproc *p)
1859 {
1860 unsigned i;
1861 vxemu *emu = p->emu;
1862
1863 vxprint("flush count: %llu\n", nflush);
1864
1865 // vxprint("vxproc size %dKB\n", p->size/1024);
1866
1867 unsigned coll = 0;
1868 for (i = 0; i < emu->etablen; i++) {
1869 vxentry *e = &emu->etab[i];
1870 if (e->srceip == NULLSRCEIP)
1871 continue;
1872 unsigned idx = etabhash(e->srceip) & emu->etabmask;
1873 if (idx != i) {
1874 // vxprint("srcip %08x hash %d actually at %d\n",
1875 // e->srceip, idx, i);
1876 coll++;
1877 }
1878 }
1879 vxprint("entry tab: %d used, %d total, %d collisions\n",
1880 emu->etabcnt, emu->etablen, coll);
1881 }
1882
1883 static void disassemble(uint8_t *addr0, uint8_t *p, uint8_t *ep)
1884 {
1885 xdinst i;
1886 int j;
1887 uint8_t *q;
1888 char buf[128];
1889
1890 for (; p < ep; p = q) {
1891 if ((q = x86decode(addr0, p, &i)) == NULL)
1892 break;
1893 x86print(buf, sizeof buf, &i);
1894 vxprint("%08x", i.addr);
1895 for(j=0; j<i.len; j++)
1896 vxprint(" %02x", p[j]);
1897 for(; j<10; j++)
1898 vxprint(" ");
1899 vxprint(" %s\n", buf);
1900 }
1901 }
1902
1903 void vxprint(char *fmt, ...)
1904 {
1905 va_list arg;
1906 char buf[512];
1907
1908 va_start(arg, fmt);
1909 vsnprintf(buf, sizeof buf, fmt, arg);
1910 va_end(arg);
1911 USED(write(2, buf, strlen(buf)));
1912 }
1913