emu.c - vx32 - Local 9vx git repository for patches.
 (HTM) git clone git://r-36.net/vx32
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
       emu.c (49656B)
       ---
            1 /*
            2  * Simple instruction scanning and rewriting
            3  * for implementing vx32 on x86-32 hosts.
            4  */
            5 
            6 #ifdef __APPLE__
            7 #define __DARWIN_UNIX03 0
            8 #endif
            9 
           10 #include <stdio.h>
           11 #include <stdlib.h>
           12 #include <stddef.h>
           13 #include <string.h>
           14 #include <setjmp.h>
           15 #include <assert.h>
           16 #include <errno.h>
           17 #include <sys/stat.h>                // XX FreeBSD 4.9 header bug?
           18 #include <sys/mman.h>
           19 #include <stdarg.h>
           20 #include <unistd.h>
           21 
           22 #include "vx32.h"
           23 #include "vx32impl.h"
           24 #include "os.h"
           25 #include "x86dis.h"
           26 
           27 // Special values for unused entries in entrypoint hash table
           28 #define NULLSRCEIP                ((uint32_t)-1)
           29 #define NULLDSTEIP                ((uint32_t)(uintptr_t)vxrun_nullfrag);
           30 
           31 int vx32_debugxlate = 0;
           32 
           33 static uint64_t nflush;
           34 
           35 static void disassemble(uint8_t *addr0, uint8_t*, uint8_t*);
           36 
           37 // Create the emulation state for a new process
           38 int vxemu_init(struct vxproc *vxp)
           39 {
           40         // Initial emulation hash table size (must be a power of two)
           41         int etablen = 4096;
           42 
           43         // Allocate the vxemu state area in 32-bit memory,
           44         // because it must be accessible to our translated code
           45         // via the special fs segment register setup.
           46         vxemu *e = mmap(NULL, VXCODEBUFSIZE,
           47                         PROT_READ | PROT_WRITE | PROT_EXEC,
           48                         MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
           49         if (e == MAP_FAILED){
           50                 vxprint("vxemu_init: mmap failed\n");
           51                 return -1;
           52         }
           53 
           54         // Basic initialization
           55         memset(e, 0, sizeof(vxemu));
           56         e->magic = VXEMU_MAGIC;
           57         e->proc = vxp;
           58         vxp->cpu = &e->cpu;
           59         e->emuptr = (uint32_t)(intptr_t)e;
           60         e->etablen = etablen;
           61         e->etabmask = etablen - 1;
           62 
           63         // Initialize the entrypoint table and translation buffer pointers
           64         vxemu_flush(e);
           65 
           66         vxp->emu = e;
           67         return 0;
           68 }
           69 
           70 void vxemu_free(vxemu *e)
           71 {
           72         assert(e->proc->emu == e);
           73         e->proc->emu = NULL;
           74 
           75         // Free the vxemu state area
           76         munmap(e, VXCODEBUFSIZE);
           77 }
           78 
           79 // Reset a vxproc's translation code buffer and entrypoint table.
           80 void vxemu_flush(vxemu *e)
           81 {
           82         uint32_t i;
           83 
           84         // Clear the entrypoint table.
           85         uint32_t etablen = e->etablen;
           86         for (i = 0; i < etablen; i++) {
           87                 e->etab[i].srceip = NULLSRCEIP;
           88                 e->etab[i].dsteip = NULLDSTEIP;
           89         }
           90         e->etabcnt = 0;
           91 
           92         // The translated code buffer immediately follows the etab.
           93         e->codebuf = &e->etab[etablen];
           94         e->codefree = &e->etab[etablen];
           95         e->codetab = (void*)e + VXCODEBUFSIZE;
           96         e->codetop = (void*)e + VXCODEBUFSIZE;
           97 
           98         nflush++;
           99 }
          100 
          101 void vxemu_growetab(struct vxemu *e)
          102 {
          103         // Increase the size of the entrypoint table,
          104         // which effectively just reserves more memory
          105         // from the code translation buffer.
          106         e->etablen *= 2;
          107         e->etabmask = e->etablen - 1;
          108 
          109         // Re-initialize the entrypoint table and translation buffer.
          110         vxemu_flush(e);
          111 }
          112 
          113 // Each translated frag starts with a one-instruction prolog...
          114 #define PROLOG_LEN                7        // Length of 'mov VSEG:VXEMU_EBX,%ebx'
          115 
          116 
          117 // Translate a block of code starting at the current vx32 EIP.
          118 // The basic procedure works in four stages.
          119 //
          120 // 1: We first scan the instruction stream to build up a
          121 // tentative vxinsn table for the instructions we plan to translate,
          122 // with output code offsets computed for worst-case instruction lengths.
          123 // This pass handles checking execute permissions on instruction pages,
          124 // and decides exactly how many instructions we'll translate in this block.
          125 // The final instruction in a fragment is always either
          126 // an unconditional flow control instruction (JMP, CALL, RET, INT, etc.),
          127 // or the special "pseudo-instruction" VXI_ENDFRAG,
          128 // which ends the fragment with a jump to the appropriate subsequent EIP.
          129 //
          130 // 2: Next we do a reverse scan through the vxinsn table
          131 // to identify instructions we can simplify:
          132 // particularly instructions with condition code fixups
          133 // whose condition codes are not actually used before they are killed.
          134 // We also identify branches that can be rewritten with 8-bit displacements.
          135 // In the process we adjust the target instruction length (dstlen) fields
          136 // for all simplified instructions accordingly.
          137 //
          138 // 3: We now perform a forward scan through the vxinsn table
          139 // to compute the final offsets for all target instructions in the block.
          140 //
          141 // 4: Finally, we scan the instruction stream again
          142 // and emit the target instructions for the block.
          143 //
          144 
          145 // Macros to extract fields in a Mod-Reg-R/M byte
          146 #define EA_MOD(b)        ((uint8_t)(b) >> 6)
          147 #define EA_REG(b)        (((uint8_t)(b) >> 3) & 7)
          148 #define EA_RM(b)        ((uint8_t)(b) & 7)
          149 
          150 // Scan a Mod-Reg-R/M byte and the rest of the effective address
          151 uint8_t *xscan_rm(uint8_t *inp)
          152 {
          153         uint8_t ea = *inp++;
          154         switch (EA_MOD(ea)) {
          155         case 0:
          156                 switch (EA_RM(ea)) {
          157                 case 4:        ; // SIB
          158                         uint8_t sib = *inp;
          159                         if ((sib & 7) == 5)
          160                                 return inp+1+4;
          161                         else
          162                                 return inp+1;
          163                 case 5:        // disp32
          164                         return inp+4;
          165                 default: // [reg]
          166                         return inp;
          167                 }
          168 
          169         case 1:
          170                 switch (EA_RM(ea)) {
          171                 case 4:        // SIB+disp8
          172                         return inp+1+1;
          173                 default: // [reg]+disp8
          174                         return inp+1;
          175                 }
          176 
          177         case 2:
          178                 switch (EA_RM(ea)) {
          179                 case 4: // SIB+disp32
          180                         return inp+1+4;
          181                 default: // [reg]+disp32
          182                         return inp+4;
          183                 }
          184 
          185         case 3:        // reg
          186                 return inp;
          187 
          188         default:
          189                 assert(0);
          190                 return 0;
          191         }
          192 }
          193 
          194 // Translation pass 1:
          195 // scan instruction stream, build preliminary vxinsn table,
          196 // and decide how many instructions to translate in this fragment.
          197 static int xscan(struct vxproc *p)
          198 {
          199         uint32_t faultva;
          200         uint32_t eip;
          201         uint8_t *instart, *inmax;
          202         struct vxemu *emu = p->emu;
          203 
          204         // Make sure there's enough space in the translated code buffer;
          205         // if not, then first clear the code buffer and entrypoint table.
          206         if (((uint8_t*)emu->codetab - (uint8_t*)emu->codefree) < 1024)
          207                 vxemu_flush(emu);
          208 
          209         // Grow the entrypoint hash table if it gets too crowded.
          210         // This also in effect flushes the translated code buffer.
          211         if (emu->etabcnt > emu->etablen/2)
          212                 vxemu_growetab(emu);
          213 
          214         // Find and check permissions on the input instruction stream,
          215         // and determine how far ahead we can scan (up to one full page)
          216         // before hitting a non-executable page.
          217         eip = emu->cpu.eip;
          218         instart = (uint8_t*)emu->mem->base + eip;
          219         emu->guestfrag = instart;
          220         if (!vxmem_checkperm(p->mem, eip, 2*VXPAGESIZE, VXPERM_EXEC, &faultva)) {
          221                 if(faultva == eip) {
          222                 noexec:
          223                         emu->cpu_trap = VXTRAP_PAGEFAULT;
          224                         emu->cpu.traperr = 0x10;
          225                         emu->cpu.trapva = faultva;
          226                         return emu->cpu_trap;
          227                 }
          228         } else
          229                 faultva = VXPAGETRUNC(eip) + 2*VXPAGESIZE;
          230         inmax = instart + faultva - eip;
          231 
          232         // Create a new fragment header in the code translation buffer
          233         struct vxfrag *f = (struct vxfrag*)(((intptr_t)emu->codefree + 3) & ~3);
          234         emu->txfrag = f;
          235         f->eip = eip;
          236 
          237         unsigned ino = 0;        // instruction number
          238         unsigned dstofs = PROLOG_LEN;
          239         uint8_t *inp = instart;
          240         emu->ininst = inp;        // save instruction currently being translated
          241         int fin = 0;
          242         do {
          243                 uint8_t itype = 0;
          244                 uint8_t dstlen;
          245                 uint8_t ea;
          246                 
          247                 if(*inp == 0xF0)        // LOCK
          248                         inp++;
          249 
          250                 // Begin instruction decode.
          251                 // We might take a fault on any of these instruction reads
          252                 // if we run off the end of a mapped code page.
          253                 // In that case our exception handler
          254                 // notices that emu->ininst != NULL and initiates recovery.
          255                 // Or we might _not_ take a fault
          256                 // on a page marked read-only but not executable;
          257                 // that's why we check against inmax after each insn.
          258                 switch (*inp++) {
          259 
          260                 // OP Eb,Gb; OP Ev,Gv; OP Gb,Eb; OP Gv,Ev
          261                 case 0x00: case 0x01: case 0x02: case 0x03:        // ADD
          262                 case 0x08: case 0x09: case 0x0a: case 0x0b:        // OR
          263                 case 0x10: case 0x11: case 0x12: case 0x13:        // ADC
          264                 case 0x18: case 0x19: case 0x1a: case 0x1b:        // SBB
          265                 case 0x20: case 0x21: case 0x22: case 0x23:        // AND
          266                 case 0x28: case 0x29: case 0x2a: case 0x2b:        // SUB
          267                 case 0x30: case 0x31: case 0x32: case 0x33:        // XOR
          268                 case 0x38: case 0x39: case 0x3a: case 0x3b:        // CMP
          269                 case 0x84: case 0x85:                                // TEST
          270                 case 0x86: case 0x87:                                // XCHG
          271                 case 0x88: case 0x89: case 0x8a: case 0x8b:        // MOV
          272                         inp = xscan_rm(inp);
          273                         goto notrans;
          274 
          275                 // OP AL,Ib; PUSH Ib
          276                 case 0x04: case 0x0c: case 0x14: case 0x1c:        // ADD etc.
          277                 case 0x24: case 0x2c: case 0x34: case 0x3c:        // AND etc.
          278                 case 0x6a:                                        // PUSH Ib
          279                 case 0xa8:                                        // TEST AL,Ib
          280                 case 0xb0: case 0xb1: case 0xb2: case 0xb3:        // MOV Gb,Ib
          281                 case 0xb4: case 0xb5: case 0xb6: case 0xb7:
          282                         inp += 1;
          283                         goto notrans;
          284 
          285                 // OP EAX,Iv; PUSH Iv; MOV moffs
          286                 case 0x05: case 0x0d: case 0x15: case 0x1d:        // OP EAX,Iv
          287                 case 0x25: case 0x2d: case 0x35: case 0x3d:
          288                 case 0x68:                                        // PUSH Iv
          289                 case 0xa0: case 0xa1: case 0xa2: case 0xa3:        // MOV moffs
          290                 case 0xa9:                                        // TEST eAX,Iv
          291                 case 0xb8: case 0xb9: case 0xba: case 0xbb:        // MOV Gv,Iv
          292                 case 0xbc: case 0xbd: case 0xbe: case 0xbf:
          293                         inp += 4;
          294                         goto notrans;
          295 
          296                 // CS and DS segment overrides, only valid for branch hints
          297                 case 0x2e:        // CS/"not taken"
          298                 case 0x3e:        // DS/"taken"
          299                         switch (*inp++) {
          300 
          301                         // Jcc (8-bit displacement)
          302                         case 0x70: case 0x71: case 0x72: case 0x73:
          303                         case 0x74: case 0x75: case 0x76: case 0x77:
          304                         case 0x78: case 0x79: case 0x7a: case 0x7b:
          305                         case 0x7c: case 0x7d: case 0x7e: case 0x7f:
          306                                 inp += 1;
          307                                 itype = VXI_JUMP;
          308                                 dstlen = 7;        // 32-bit branch w/hint
          309                                 goto done;
          310 
          311                         // Two-byte opcode
          312                         case 0x0f:
          313                                 switch (*inp++) {
          314 
          315                                 // Jcc - conditional branch with disp32
          316                                 case 0x80: case 0x81: case 0x82: case 0x83:
          317                                 case 0x84: case 0x85: case 0x86: case 0x87:
          318                                 case 0x88: case 0x89: case 0x8a: case 0x8b:
          319                                 case 0x8c: case 0x8d: case 0x8e: case 0x8f:
          320                                         inp += 4;
          321                                         itype = VXI_JUMP;
          322                                         dstlen = 7;        // 32-bit branch w/hint
          323                                         goto done;
          324 
          325                                 }
          326                                 goto invalid;
          327                         }
          328                         goto invalid;
          329 
          330                 // INC reg; DEC reg; PUSH reg; POP reg; XCHG eAX,reg
          331                 case 0x40: case 0x41: case 0x42: case 0x43:        // INC
          332                 case 0x44: case 0x45: case 0x46: case 0x47:
          333                 case 0x48: case 0x49: case 0x4a: case 0x4b:        // DEC
          334                 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
          335                 case 0x50: case 0x51: case 0x52: case 0x53:        // PUSH
          336                 case 0x54: case 0x55: case 0x56: case 0x57:
          337                 case 0x58: case 0x59: case 0x5a: case 0x5b:        // POP
          338                 case 0x5c: case 0x5d: case 0x5e: case 0x5f:
          339                 case 0x90: case 0x91: case 0x92: case 0x93:        // XCHG
          340                 case 0x94: case 0x95: case 0x96: case 0x97:
          341                 case 0x98: case 0x99:                                // CWDE, CDQ
          342                 case 0xa4: case 0xa5: case 0xa6: case 0xa7:        // MOVS, CMPS
          343                 case 0xaa: case 0xab:                                // STOS
          344                 case 0xac: case 0xad: case 0xae: case 0xaf:        // LODS, SCAS
          345                 case 0xc9:                                        // LEAVE
          346                 case 0xfc: case 0xfd:                                // CLD, STD
          347                         goto notrans;
          348 
          349                 // OP Eb,Ib; OP Ev,Ib; IMUL Gv,Ev,Ib
          350                 case 0x80:                                        // OP Eb,Ib
          351                 case 0x83:                                        // OP Ev,Ib
          352                 case 0x6b:                                        // IMUL Gv,Ev,Ib
          353                         inp = xscan_rm(inp);
          354                         inp += 1;
          355                         goto notrans;
          356 
          357                 // OP Ev,Iv; IMUL Gv,Ev,Iv
          358                 case 0x81:                                        // OP Ev,Iv
          359                 case 0x69:                                        // IMUL Gv,Ev,Iv
          360                         inp = xscan_rm(inp);
          361                         inp += 4;
          362                         goto notrans;
          363 
          364                 // Jcc (8-bit displacement)
          365                 case 0x70: case 0x71: case 0x72: case 0x73:
          366                 case 0x74: case 0x75: case 0x76: case 0x77:
          367                 case 0x78: case 0x79: case 0x7a: case 0x7b:
          368                 case 0x7c: case 0x7d: case 0x7e: case 0x7f:
          369                         inp += 1;
          370                         itype = VXI_JUMP;
          371                         dstlen = 6;        // Size of worst-case 32-bit branch
          372                         goto done;
          373 
          374                 // LEA Gv,M
          375                 case 0x8d:
          376                         if (EA_MOD(*inp) == 3)        // Mem-only
          377                                 goto invalid;
          378                         inp = xscan_rm(inp);
          379                         goto notrans;
          380 
          381                 // Group 1a - POP Ev
          382                 case 0x8f:
          383                         if (EA_REG(*inp) != 0)
          384                                 goto invalid;
          385                         inp = xscan_rm(inp);
          386                         goto notrans;
          387 
          388                 // FWAIT
          389                 case 0x9b:
          390                         if (p->allowfp == 0) {
          391                         badfp:
          392                                 if (ino > 0)
          393                                         goto endfrag;
          394                                 emu->cpu_trap = VXTRAP_FPOFF;
          395                                 return emu->cpu_trap;
          396                         }
          397                         goto notrans;
          398 
          399                 // PUSHF; POPF
          400                 case 0x9c: case 0x9d:
          401                         goto notrans;
          402 
          403                 // SAHF; LAHF
          404                 case 0x9f: case 0x9e:
          405                         goto notrans;
          406 
          407                 // Shift Eb,Ib; Shift Ev,Ib
          408                 case 0xc0: case 0xc1:
          409                         inp = xscan_rm(inp);
          410                         inp += 1;
          411                         // XXX fix CCs
          412                         goto notrans;
          413 
          414                 // Shift Eb,1; Shift Ev,1
          415                 case 0xd0: case 0xd1:
          416                         inp = xscan_rm(inp);
          417                         // XXX fix CCs
          418                         goto notrans;
          419 
          420                 // Shift Eb,CL; Shift Ev,CL
          421                 case 0xd2: case 0xd3:
          422                         inp = xscan_rm(inp);
          423                         // XXX fix CCs
          424                         goto notrans;
          425 
          426                 // RET Iw
          427                 case 0xc2:
          428                         inp += 2;
          429                         itype = VXI_RETURN_IMM;
          430                         dstlen = 7+1+6+5;        // movl %ebx,VSEG:VXEMU_EBX
          431                                                 // popl %ebx
          432                                                 // addl $Iw,%esp
          433                                                 // jmp vxrun_lookup_indirect
          434                         fin = 1;
          435                         goto done;
          436 
          437                 // RET
          438                 case 0xc3:
          439                         itype = VXI_RETURN;
          440                         dstlen = 7+1+5;                // movl %ebx,VSEG:VXEMU_EBX
          441                                                 // popl %ebx
          442                                                 // jmp vxrun_lookup_indirect
          443                         fin = 1;
          444                         goto done;
          445 
          446                 // Group 11 - MOV Eb,Ib
          447                 case 0xc6:
          448                         if (EA_REG(*inp) != 0)
          449                                 goto invalid;
          450                         inp = xscan_rm(inp);
          451                         inp += 1;
          452                         goto notrans;
          453 
          454                 // Group 11 - MOV Ev,Iv
          455                 case 0xc7:
          456                         if (EA_REG(*inp) != 0)
          457                                 goto invalid;
          458                         inp = xscan_rm(inp);
          459                         inp += 4;
          460                         goto notrans;
          461 
          462                 // ENTER
          463                 case 0xc8:
          464                         inp += 2+1;                // imm16,imm8
          465                         goto notrans;
          466 
          467                 case 0xcd:                        // INT n (software interrupt)
          468                         inp++;
          469                 case 0xcc:                        // INT3 (breakpoint)
          470                         goto gentrap;
          471 
          472                 // 387 escapes - modrm with opcode field
          473                 case 0xd8: case 0xd9: case 0xda: case 0xdb:
          474                 case 0xdc: case 0xdd: case 0xde: case 0xdf:
          475                         if (!p->allowfp)
          476                                 goto badfp;
          477                         if ((*inp>>6) == 3)
          478                                 inp++;
          479                         else
          480                                 inp = xscan_rm(inp);
          481                         goto notrans;
          482 
          483                 // Loops
          484                 case 0xe0:        // LOOPNZ cb
          485                         inp++;
          486                         itype = VXI_LOOPNZ;
          487                         dstlen = 3+2+2+5;        // leal -1(ecx), ecx
          488                                                 // jz .+7
          489                                                 // jecxz .+5
          490                                                 // jmp cb
          491                         goto done;
          492 
          493                 case 0xe1:        // LOOPZ cb
          494                         inp++;
          495                         itype = VXI_LOOPZ;
          496                         dstlen = 3+2+2+5;        // leal -1(ecx), ecx
          497                                                 // jnz .+7
          498                                                 // jecxz .+5
          499                                                 // jmp cb
          500                         goto done;
          501 
          502                 case 0xe2:        // LOOP cb
          503                         inp++;
          504                         itype = VXI_LOOP;
          505                         dstlen = 3+2+5;        // leal -1(ecx), ecx
          506                                                 // jecxz .+5
          507                                                 // jmp cb
          508                         goto done;
          509 
          510                 // CALL
          511                 case 0xe8:                                // CALL Jv
          512                         inp += 4;
          513                         itype = VXI_CALL;
          514                         dstlen = 5+5;                // pushl $nexteip
          515                                                 // jmp trampoline
          516                         fin = 1;
          517                         goto done;
          518 
          519                 // JMP
          520                 case 0xe9:                                // JMP Jv
          521                         inp += 4;
          522                         itype = VXI_JUMP;
          523                         dstlen = 5;        // Size of worst-case 32-bit JMP
          524                         fin = 1;
          525                         goto done;
          526 
          527                 // JMP short
          528                 case 0xeb:                                // JMP Jb
          529                         inp += 1;
          530                         itype = VXI_JUMP;
          531                         dstlen = 5;        // Size of worst-case 32-bit JMP
          532                         fin = 1;
          533                         goto done;
          534 
          535                 // Group 3 - unary ops
          536                 case 0xf6:
          537                         ea = *inp;
          538                         inp = xscan_rm(inp);
          539                         switch (EA_REG(ea)) {
          540                         case 0: case 1:                        // TEST Eb,Ib
          541                                 inp += 1;
          542                         default:                        // NOT, NEG, ...
          543                                 ; // XXX MUL/DIV require fixcc!
          544                         }
          545                         goto notrans;
          546 
          547                 case 0xf7:
          548                         ea = *inp;
          549                         inp = xscan_rm(inp);
          550                         switch (EA_REG(ea)) {
          551                         case 0: case 1:                        // TEST Ev,Iv
          552                                 inp += 4;
          553                         default:                        // NOT, NEG, ...
          554                                 ; // XXX MUL/DIV require fixcc!
          555                         }
          556                         goto notrans;
          557 
          558                 // Group 4 - INC, DEC
          559                 case 0xfe:
          560                         ea = *inp;
          561                         inp = xscan_rm(inp);
          562                         switch (EA_REG(ea)) {
          563                         case 0: case 1:                        // INC Eb, DEC Eb
          564                                 goto notrans;
          565                         }
          566                         goto invalid;
          567 
          568                 // Group 5 - INC, DEC, CALL, JMP, PUSH
          569                 case 0xff:
          570                         ea = *inp;
          571                         inp = xscan_rm(inp);
          572                         switch (EA_REG(ea)) {
          573                         case 0: case 1:                        // INC Ev, DEC Ev
          574                         case 6:                                // PUSH Ev
          575                                 goto notrans;
          576                         case 2:                                // CALL Ev
          577                                 itype = VXI_CALLIND;
          578                                 dstlen = 7+(inp-emu->ininst)+5+5;
          579                                         // movl %ebx,VSEG:VXEMU_EBX
          580                                         // movl <indirect_ea>,%ebx
          581                                         //        (same length as CALL inst)
          582                                         // pushl $<return_eip>
          583                                         // jmp vxrun_lookup_indirect
          584                                 fin = 1;
          585                                 goto done;
          586                         case 4:                                // JMP Ev
          587                                 itype = VXI_JUMPIND;
          588                                 dstlen = 7+(inp-emu->ininst)+5;
          589                                         // movl %ebx,VSEG:VXEMU_EBX
          590                                         // movl <indirect_ea>,%ebx
          591                                         //        (same length as CALL inst)
          592                                         // jmp vxrun_lookup_indirect
          593                                 fin = 1;
          594                                 goto done;
          595                         }
          596                         goto invalid;
          597                 
          598                 // I/O
          599                 case 0xed:
          600                         goto gentrap;
          601 
          602                 // Prefixes
          603                 case 0x0f:        // 2-byte opcode escape
          604                         goto twobyte;
          605                 case 0x66:        // Operand size prefix
          606                         goto opsize;
          607                 case 0xf3:        // REP/REPE prefix
          608                         goto rep;
          609                 case 0xf2:        // REPNE prefix
          610                         goto repne;
          611                 }
          612                 goto invalid;
          613 
          614         // Operand size prefix (0x66) seen
          615         opsize:
          616                 switch (*inp++) {
          617 
          618                 // OP Ev,Gv; OP Gv,Ev
          619                 case 0x01: case 0x03:                                // ADD
          620                 case 0x09: case 0x0b:                                // OR
          621                 case 0x11: case 0x13:                                // ADC
          622                 case 0x19: case 0x1b:                                // SBB
          623                 case 0x21: case 0x23:                                // AND
          624                 case 0x29: case 0x2b:                                // SUB
          625                 case 0x31: case 0x33:                                // XOR
          626                 case 0x39: case 0x3b:                                // CMP
          627                 case 0x85:                                        // TEST
          628                 case 0x87:                                        // XCHG
          629                 case 0x89: case 0x8b:                                // MOV
          630                         inp = xscan_rm(inp);
          631                         goto notrans;
          632 
          633                 // OP EAX,Iv; PUSH Iv
          634                 case 0x05: case 0x0d: case 0x15: case 0x1d:        // OP EAX,Iv
          635                 case 0x25: case 0x2d: case 0x35: case 0x3d:
          636                 case 0x68:                                        // PUSH Iv
          637                 case 0xa9:                                        // TEST eAX,Iv
          638                 case 0xb8: case 0xb9: case 0xba: case 0xbb:        // MOV Gv,Iv
          639                 case 0xbc: case 0xbd: case 0xbe: case 0xbf:
          640                         inp += 2;
          641                         goto notrans;
          642 
          643                 // INC reg; DEC reg; PUSH reg; POP reg; XCHG eAX,reg
          644                 case 0x40: case 0x41: case 0x42: case 0x43:        // INC
          645                 case 0x44: case 0x45: case 0x46: case 0x47:
          646                 case 0x48: case 0x49: case 0x4a: case 0x4b:        // DEC
          647                 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
          648                 case 0x50: case 0x51: case 0x52: case 0x53:        // PUSH
          649                 case 0x54: case 0x55: case 0x56: case 0x57:
          650                 case 0x58: case 0x59: case 0x5a: case 0x5b:        // POP
          651                 case 0x5c: case 0x5d: case 0x5e: case 0x5f:
          652                 case 0x90: case 0x91: case 0x92: case 0x93:        // XCHG
          653                 case 0x94: case 0x95: case 0x96: case 0x97:
          654                 case 0x98: case 0x99:                                // CWDE, CDQ
          655                 case 0xa4: case 0xa5: case 0xa6: case 0xa7:        // MOVS, CMPS
          656                 case 0xaa: case 0xab:                                // STOS
          657                 case 0xac: case 0xad: case 0xae: case 0xaf:        // LODS, SCAS
          658                 case 0xc9:                                        // LEAVE
          659                 case 0xfc: case 0xfd:                                // CLD, STD
          660                         goto notrans;
          661 
          662                 // OP Ev,Iv; IMUL Gv,Ev,Iv
          663                 case 0x81:                                        // OP Ev,Iv
          664                 case 0x69:                                        // IMUL Gv,Ev,Iv
          665                         inp = xscan_rm(inp);
          666                         inp += 2;
          667                         goto notrans;
          668 
          669                 // OP Ev,Ib; IMUL Gv,Ev,Ib
          670                 case 0x83:                                        // OP Ev,Ib
          671                 case 0x6b:                                        // IMUL Gv,Ev,Ib
          672                         inp = xscan_rm(inp);
          673                         inp += 1;
          674                         goto notrans;
          675 
          676                 // MOV moffs
          677                 case 0xa1: case 0xa3:
          678                         inp += 4;        // always 32-bit offset
          679                         goto notrans;
          680 
          681                 // Shift Ev,Ib
          682                 case 0xc1:
          683                         inp = xscan_rm(inp);
          684                         inp += 1;
          685                         // XXX fix CCs
          686                         goto notrans;
          687 
          688                 // Shift Ev,1
          689                 case 0xd1:
          690                         inp = xscan_rm(inp);
          691                         // XXX fix CCs
          692                         goto notrans;
          693 
          694                 // Shift Ev,CL
          695                 case 0xd3:
          696                         inp = xscan_rm(inp);
          697                         // XXX fix CCs
          698                         goto notrans;
          699 
          700                 // Group 11 - MOV Ev,Iv
          701                 case 0xc7:
          702                         if (EA_REG(*inp) != 0)
          703                                 goto invalid;
          704                         inp = xscan_rm(inp);
          705                         inp += 2;
          706                         goto notrans;
          707                 
          708                 // Group 3 - unary ops
          709                 case 0xf7:
          710                         ea = *inp;
          711                         inp = xscan_rm(inp);
          712                         switch (EA_REG(ea)) {
          713                         case 0: case 1:                        // TEST Ev,Iv
          714                                 inp += 2;
          715                         default:                        // NOT, NEG, ...
          716                                 ; // XXX MUL/DIV require fixcc!
          717                         }
          718                         goto notrans;
          719 
          720                 // Group 5 - INC, DEC, CALL, JMP, PUSH
          721                 case 0xff:
          722                         ea = *inp;
          723                         inp = xscan_rm(inp);
          724                         switch (EA_REG(ea)) {
          725                         case 0: case 1:                        // INC Ev, DEC Ev
          726                                 goto notrans;
          727                         }
          728                         goto invalid;
          729 
          730                 // Prefixes
          731                 case 0x0f:        // 2-byte opcode escape
          732                         goto twobyte_opsize;
          733                 case 0x66:        // Operand size prefix (redundant)
          734                         goto invalid;
          735                 case 0xf3:        // REP/REPE prefix
          736                         goto opsize_rep;
          737                 case 0xf2:        // REPNE prefix
          738                         goto opsize_repne;
          739                 }
          740                 goto invalid;
          741 
          742         // REP/REPE prefix (0xf3) seen
          743         rep:
          744                 switch (*inp++) {
          745 
          746                 // No-operand insns
          747                 case 0xa4: case 0xa5: case 0xa6: case 0xa7:        // MOVS, CMPS
          748                 case 0xaa: case 0xab:                                // STOS
          749                 case 0xac: case 0xad: case 0xae: case 0xaf:        // LODS, SCAS
          750                         goto notrans;
          751 
          752                 // Prefixes
          753                 case 0x0f:        // 2-byte opcode escape
          754                         goto twobyte_rep;
          755                 case 0x66:        // Operand size prefix
          756                         goto opsize_rep;
          757                 case 0xf3:        // REP/REPE prefix (redundant)
          758                         goto invalid;
          759                 case 0xf2:        // REPNE prefix (conflicting)
          760                         goto invalid;
          761                 }
          762                 goto invalid;
          763 
          764         // REPNE prefix (0xf2) seen
          765         repne:
          766                 switch (*inp++) {
          767 
          768                 // No-operand insns
          769                 case 0xa6: case 0xa7:                                // CMPS
          770                 case 0xae: case 0xaf:                                // SCAS
          771                         goto notrans;
          772 
          773                 // Prefixes
          774                 case 0x0f:        // 2-byte opcode escape
          775                         goto twobyte_repne;
          776                 case 0x66:        // Operand size prefix
          777                         goto opsize_repne;
          778                 case 0xf3:        // REP/REPE prefix (conflicting)
          779                         goto invalid;
          780                 case 0xf2:        // REPNE prefix (redundant)
          781                         goto invalid;
          782                 }
          783                 goto invalid;
          784 
          785 
          786         // Operand size prefix (0x66) and REP/REPE prefix (0xf3) seen
          787         opsize_rep:
          788                 switch (*inp++) {
          789                 case 0xa5: case 0xa7:                                // MOVS, CMPS
          790                 case 0xab:                                        // STOS
          791                 case 0xad: case 0xaf:                                // LODS, SCAS
          792                         goto notrans;
          793                 }
          794                 goto invalid;
          795 
          796         // Operand size prefix (0x66) and REPNE prefix (0xf2) seen
          797         opsize_repne:
          798                 switch (*inp++) {
          799                 case 0xa7:                                        // CMPS
          800                 case 0xaf:                                        // SCAS
          801                         goto notrans;
          802                 }
          803                 goto invalid;
          804 
          805 
          806         twobyte:
          807                 switch (*inp++) {
          808 
          809                 // SYSCALL instruction for fast system calls
          810                 case 0x05:
          811                         goto gentrap;
          812 
          813                 // No additional operand
          814                 case 0xc8: case 0xc9: case 0xca: case 0xcb:        // BSWAP
          815                 case 0xcc: case 0xcd: case 0xce: case 0xcf:
          816                         goto notrans;
          817 
          818                 // General EA operands
          819                 case 0x10: case 0x11:                        // MOVUPS
          820                 case 0x12:                                // MOVLPS Vps,Mq/MOVHLPS
          821                 case 0x14: case 0x15:                        // UNPCKLPS/UNPCKHPS
          822                 case 0x16:                                // MOVHPS Vps,Mq/MOVLHPS
          823                 case 0x28: case 0x29:                        // MOVAPS
          824                 case 0x2e: case 0x2f:                        // UCOMISS/COMISS
          825                 case 0x40: case 0x41: case 0x42: case 0x43:        // CMOVcc
          826                 case 0x44: case 0x45: case 0x46: case 0x47:
          827                 case 0x48: case 0x49: case 0x4a: case 0x4b:
          828                 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
          829                 case 0x51:                                        // SQRTPS
          830                 case 0x54: case 0x55: case 0x56: case 0x57:        // ANDPS etc.
          831                 case 0x58: case 0x59: case 0x5a: case 0x5b:        // ADDPS etc.
          832                 case 0x5c: case 0x5d: case 0x5e: case 0x5f:        // SUBPS etc.
          833                 case 0xa3:                                        // BT Ev,Gv
          834                 case 0xab:                                        // BTS Ev,Gv
          835                 case 0xaf:                                        // IMUL Gv,Ev
          836                 case 0xb0:                                        // CMPXCHG Eb,Gb
          837                 case 0xb1:                                        // CMPXCHG Ev,Gv
          838                 case 0xb3:                                        // BTR Ev,Gv
          839                 case 0xb6: case 0xb7:                                // MOVZX
          840                 case 0xbb:                                        // BTC Ev,Gv
          841                 case 0xbc: case 0xbd:                                // BSF, BSR
          842                 case 0xbe: case 0xbf:                                // MOVSX
          843                 case 0xc0:                                        // XADD Eb,Gb
          844                 case 0xc1:                                        // XADD Ev,Gv
          845                         inp = xscan_rm(inp);
          846                         goto notrans;
          847 
          848                 // General EA operands plus immediate byte
          849                 case 0xc2:                                // CMPPS Vps,Wps,Ib
          850                 case 0xc6:                                // SHUFPS Vps,Wps,Ib
          851                         inp = xscan_rm(inp);
          852                         inp += 1;
          853                         goto notrans;
          854 
          855                 // Memory-only EA operand
          856                 case 0x13:                                // MOVLPS Mq,Vps
          857                 case 0x17:                                // MOVHPS Mq,Vps
          858                 case 0x2b:                                // MOVNTPS
          859                 case 0xc3:                                // MOVNTI Md,Gd
          860                         if (EA_MOD(*inp) == 3)        // Mem-only
          861                                 goto invalid;
          862                         inp = xscan_rm(inp);
          863                         goto notrans;
          864 
          865                 // Register-only EA operand
          866                 case 0x50:                                // MOVMSKPS
          867                         if (EA_MOD(*inp) != 3)        // Reg-only
          868                                 goto invalid;
          869                         inp = xscan_rm(inp);
          870                         goto notrans;
          871 
          872                 // Jcc - conditional branch with disp32
          873                 case 0x80: case 0x81: case 0x82: case 0x83:
          874                 case 0x84: case 0x85: case 0x86: case 0x87:
          875                 case 0x88: case 0x89: case 0x8a: case 0x8b:
          876                 case 0x8c: case 0x8d: case 0x8e: case 0x8f:
          877                         inp += 4;
          878                         itype = VXI_JUMP;
          879                         dstlen = 6;        // Size of worst-case 32-bit branch
          880                         goto done;
          881 
          882                 // SETcc - set byte based on condition
          883                 case 0x90: case 0x91: case 0x92: case 0x93:
          884                 case 0x94: case 0x95: case 0x96: case 0x97:
          885                 case 0x98: case 0x99: case 0x9a: case 0x9b:
          886                 case 0x9c: case 0x9d: case 0x9e: case 0x9f:
          887                         if (EA_REG(*inp) != 0)
          888                                 goto invalid;
          889                         inp = xscan_rm(inp);
          890                         goto notrans;
          891 
          892                 // Shift instructions
          893                 case 0xa4:                                        // SHLD Ev,Gv,Ib
          894                 case 0xac:                                        // SHRD Ev,Gv,Ib
          895                         inp = xscan_rm(inp);
          896                         inp += 1;
          897                         // XXX fix cc
          898                         goto notrans;
          899                 case 0xa5:                                        // SHLD Ev,Gv,CL
          900                 case 0xad:                                        // SHRD Ev,Gv,CL
          901                         inp = xscan_rm(inp);
          902                         // XXX fix cc
          903                         goto notrans;
          904 
          905                 // Group 8 - Bit test/modify with immediate
          906                 case 0xba:
          907                         if (!(EA_REG(*inp) & 4))
          908                                 goto invalid;
          909                         inp = xscan_rm(inp);
          910                         inp += 1;
          911                         goto invalid;
          912 
          913                 // Group 15 - SSE control
          914                 case 0xae:
          915                         ea = *inp;
          916                         inp = xscan_rm(inp);
          917                         switch (EA_REG(ea)) {
          918                         case 2:                                        // LDMXCSR
          919                         case 3:                                        // STMXCSR
          920                                 if (EA_MOD(ea) == 3)        // Mem-only
          921                                         goto invalid;
          922                                 goto notrans;
          923                         // XX LFENCE, SFENCE, MFENCE?
          924                         }
          925                         goto invalid;
          926 
          927                 // Group 16 - PREFETCH
          928                 case 0x18:
          929                         if (EA_MOD(*inp) == 3)        // Mem-only
          930                                 goto invalid;
          931                         // XX Squash to NOP if EA_REG(*inp) > 3?
          932                         inp = xscan_rm(inp);
          933                         goto notrans;
          934 
          935                 }
          936                 goto invalid;
          937 
          938         twobyte_opsize:
          939                 switch (*inp++) {
          940 
          941                 // General EA operands
          942                 case 0x10: case 0x11:                        // MOVUPD
          943                 case 0x14: case 0x15:                        // UNPCKLPD/UNPCKHPD
          944                 case 0x28: case 0x29:                        // MOVAPD
          945                 case 0x2e: case 0x2f:                        // UCOMISD/COMISD
          946                 case 0x40: case 0x41: case 0x42: case 0x43:        // CMOVcc
          947                 case 0x44: case 0x45: case 0x46: case 0x47:
          948                 case 0x48: case 0x49: case 0x4a: case 0x4b:
          949                 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
          950                 case 0x51:                                        // SQRTPD
          951                 case 0x54: case 0x55: case 0x56: case 0x57:        // ANDPD etc.
          952                 case 0x58: case 0x59: case 0x5a: case 0x5b:        // ADDPD etc.
          953                 case 0x5c: case 0x5d: case 0x5e: case 0x5f:        // SUBPD etc.
          954                 case 0x60: case 0x61: case 0x62: case 0x63:        // PUNPCK...
          955                 case 0x64: case 0x65: case 0x66: case 0x67:        // PCMPGT...
          956                 case 0x68: case 0x69: case 0x6a: case 0x6b:        // PUNPCK...
          957                 case 0x6c: case 0x6d: case 0x6e: case 0x6f:        // PUNPCK...
          958                 case 0x74: case 0x75: case 0x76:                // PCMPEQ...
          959                 case 0x7e: case 0x7f:                                // MOVD/MOVDQA
          960                 case 0xa3:                                        // BT Ev,Gv
          961                 case 0xab:                                        // BTS Ev,Gv
          962                 case 0xb3:                                        // BTR Ev,Gv
          963                 case 0xbb:                                        // BTC Ev,Gv
          964                 case 0xbc: case 0xbd:                                // BSF, BSR
          965                 case 0xaf:                                        // IMUL Gv,Ev
          966                 case 0xb6:                                        // MOVZX Gv,Eb
          967                 case 0xbe:                                        // MOVSX Gv,Eb
          968                 case 0xd1: case 0xd2: case 0xd3:                // PSRLx
          969                 case 0xd4: case 0xd5: case 0xd6:                // PADDQ...
          970                 case 0xd8: case 0xd9: case 0xda: case 0xdb:        // PSUBUSB...
          971                 case 0xdc: case 0xdd: case 0xde: case 0xdf:        // PADDUSB...
          972                 case 0xe0: case 0xe1: case 0xe2: case 0xe3:        // PAVGB...
          973                 case 0xe4: case 0xe5: case 0xe6:                // PMULHUW...
          974                 case 0xe8: case 0xe9: case 0xea: case 0xeb:        // PSUBSB...
          975                 case 0xec: case 0xed: case 0xee: case 0xef:        // PADDSB...
          976                 case 0xf1: case 0xf2: case 0xf3:                // PSLLx
          977                 case 0xf4: case 0xf5: case 0xf6:                // PMULUDQ...
          978                 case 0xf8: case 0xf9: case 0xfa: case 0xfb:        // PSUBB...
          979                 case 0xfc: case 0xfd: case 0xfe:                // PADDB...
          980                         inp = xscan_rm(inp);
          981                         goto notrans;
          982 
          983                 // General EA operands plus immediate byte
          984                 case 0xc5:                                // PEXTRW Gd,VRdq,Ib
          985                         if (EA_MOD(*inp) != 3)
          986                                 goto invalid; // Reg-only
          987                 case 0x70:                                // PSHUFD Vdq,Wdq,Ib
          988                 case 0xc2:                                // CMPPD Vps,Wps,Ib
          989                 case 0xc4:                                // PINSRW Vdq,Ew,Ib
          990                 case 0xc6:                                // SHUFPD Vps,Wps,Ib
          991                         inp = xscan_rm(inp);
          992                         inp += 1;
          993                         goto notrans;
          994 
          995                 // Memory-only EA operand
          996                 case 0x12: case 0x13:                        // MOVLPD
          997                 case 0x16: case 0x17:                        // MOVHPD
          998                 case 0x2b:                                // MOVNTPD
          999                 case 0xe7:                                // MOVNTDQ Mdq,Vdq
         1000                         if (EA_MOD(*inp) == 3)                // Mem-only
         1001                                 goto invalid;
         1002                         inp = xscan_rm(inp);
         1003                         goto notrans;
         1004 
         1005                 // Register-only EA operand
         1006                 case 0x50:                                // MOVMSKPD
         1007                 case 0xd7:                                // PMOVMSKB Gd,VRdq
         1008                 case 0xf7:                                // MASKMOVQ Vdq,Wdq
         1009                         if (EA_MOD(*inp) != 3)                // Reg-only
         1010                                 goto invalid;
         1011                         inp = xscan_rm(inp);
         1012                         goto notrans;
         1013 
         1014                 // Shift instructions
         1015                 case 0xa4:                                        // SHLD Ev,Gv,Ib
         1016                 case 0xac:                                        // SHRD Ev,Gv,Ib
         1017                         inp = xscan_rm(inp);
         1018                         inp += 1;
         1019                         // XXX fix cc
         1020                         goto notrans;
         1021                 case 0xa5:                                        // SHLD Ev,Gv,CL
         1022                 case 0xad:                                        // SHRD Ev,Gv,CL
         1023                         inp = xscan_rm(inp);
         1024                         // XXX fix cc
         1025                         goto notrans;
         1026 
         1027                 // Group 8 - Bit test/modify with immediate
         1028                 case 0xba:
         1029                         if (!(EA_REG(*inp) & 4))
         1030                                 goto invalid;
         1031                         inp = xscan_rm(inp);
         1032                         inp += 1;
         1033                         goto invalid;
         1034 
         1035                 // Group 12, 13, 14 - SSE vector shift w/ immediate
         1036                 case 0x71: case 0x72: case 0x73:
         1037                         ea = *inp;
         1038                         inp = xscan_rm(inp);
         1039                         switch (EA_REG(ea)) {
         1040                         case 2: case 4: case 6:
         1041                                 inp += 1;
         1042                                 goto notrans;
         1043                         }
         1044                         goto invalid;
         1045                 }
         1046                 goto invalid;
         1047 
         1048         twobyte_rep:
         1049                 switch (*inp++) {
         1050 
         1051                 // General EA operands
         1052                 case 0x10: case 0x11:                                // MOVSS
         1053                 case 0x2a: case 0x2c: case 0x2d:                // CVT...
         1054                 case 0x51:                                        // SQRTSS
         1055                 case 0x58: case 0x59: case 0x5a: case 0x5b:        // ADDSS etc.
         1056                 case 0x5c: case 0x5d: case 0x5e: case 0x5f:        // SUBSS etc.
         1057                 case 0x6f:                                        // MOVDQU
         1058                 case 0x7e: case 0x7f:                                // MOVQ/MOVDQU
         1059                 case 0xe6:                                        // CVTDQ2PD
         1060                         inp = xscan_rm(inp);
         1061                         goto notrans;
         1062 
         1063                 // General EA operands plus immediate byte
         1064                 case 0x70:                                // PSHUFHW Vq,Wq,Ib
         1065                 case 0xc2:                                // CMPSS Vss,Wss,Ib
         1066                         inp = xscan_rm(inp);
         1067                         inp += 1;
         1068                         goto notrans;
         1069                 }
         1070                 goto invalid;
         1071 
         1072         twobyte_repne:
         1073                 switch (*inp++) {
         1074 
         1075                 // General EA operands
         1076                 case 0x10: case 0x11:                                // MOVSD
         1077                 case 0x2a: case 0x2c: case 0x2d:                // CVT...
         1078                 case 0x51:                                        // SQRTSD
         1079                 case 0x58: case 0x59: case 0x5a:                // ADDSD etc.
         1080                 case 0x5c: case 0x5d: case 0x5e: case 0x5f:        // SUBSD etc.
         1081                 case 0xe6:                                        // CVTPD2DQ
         1082                         inp = xscan_rm(inp);
         1083                         goto notrans;
         1084 
         1085                 // General EA operands plus immediate byte
         1086                 case 0x70:                                // PSHUFLW Vq,Wq,Ib
         1087                 case 0xc2:                                // CMPSD Vss,Wss,Ib
         1088                         inp = xscan_rm(inp);
         1089                         inp += 1;
         1090                         goto notrans;
         1091                 }
         1092                 goto invalid;
         1093 
         1094 
         1095         invalid:
         1096                 vxrun_cleanup(emu);
         1097                 vxprint("invalid opcode %02x %02x %02x at eip %08x\n",
         1098                         emu->ininst[0], emu->ininst[1], emu->ininst[2],
         1099                         emu->cpu.eip + (emu->ininst - instart));
         1100                 vxrun_setup(emu);
         1101         gentrap:
         1102                 fin = 1;
         1103                 itype = VXI_TRAP;
         1104                 dstlen = 6+5+11+5;        // movl %eax,VSEG:VXEMU_EAX
         1105                                         // movl $fin,%eax
         1106                                         // movl $eip,VSEG:VXEMU_EIP
         1107                                         // jmp vxrun_gentrap
         1108                 goto done;
         1109 
         1110 
         1111         notrans:
         1112                 // No translation of this instruction is required -
         1113                 // dstlen is the same as srclen.
         1114                 dstlen = inp - emu->ininst;
         1115 
         1116         done:
         1117                 // Make sure this whole instruction was actually executable
         1118                 if (inp > inmax) {
         1119                         // If the whole first instruction isn't executable,
         1120                         // then just generate the trap immediately,
         1121                         // since we know it'll be required.
         1122                         if (ino == 0)
         1123                                 goto noexec;
         1124 
         1125                         // Otherwise, just roll back
         1126                         // and stop translating before this instruction,
         1127                         // and let the exception (if any)
         1128                         // happen next time into the translator.
         1129                         goto endfrag;
         1130                 }
         1131 
         1132                 // Make sure there's actually room for the resulting code
         1133                 if (dstofs + dstlen > VXDSTOFS_MAX) {
         1134 
         1135                         // Roll back and end the frag before this instruction
         1136                         endfrag:
         1137                         fin = 1;
         1138                         itype = VXI_ENDFRAG;
         1139                         inp = emu->ininst;        // no source consumed
         1140                         dstlen = 5;                // jmp to next frag
         1141                 }
         1142 
         1143                 // Record the instruction record
         1144                 f->insn[ino].itype = itype;
         1145                 f->insn[ino].srcofs = emu->ininst - instart;
         1146                 f->insn[ino].dstofs = dstofs;
         1147                 f->insn[ino].dstlen = dstlen;
         1148 
         1149                 // Move on to next instruction
         1150                 ino++;
         1151                 emu->ininst = inp;
         1152                 dstofs += dstlen;
         1153 
         1154         } while (!fin);
         1155 
         1156         // Record the total number of instructions for this frag
         1157         f->ninsn = ino;
         1158         
         1159 // vxprint("%d ins - to %x\n", ino, emu->ininst - instart + eip);
         1160         // Clear the special instruction-scanning exception state flag
         1161         emu->guestfragend = emu->ininst;
         1162         emu->ininst = NULL;
         1163 
         1164         return 0;
         1165 }
         1166 
         1167 // Try to optimize jump instructions whose target
         1168 // is in the same fragment we're building.
         1169 static inline void xsimp_jump(struct vxproc *p, unsigned ino)
         1170 {
         1171         struct vxemu *emu = p->emu;
         1172         struct vxfrag *f = emu->txfrag;
         1173         unsigned ninsn = f->ninsn;
         1174         unsigned srcofs = f->insn[ino].srcofs;
         1175         uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
         1176 
         1177         // Skip any branch prediction hint prefix
         1178         uint8_t opcode = *inp++;
         1179         int dstlen = 2;
         1180         uint32_t targofs = srcofs;
         1181         if (opcode == 0x2e || opcode == 0x3e) {
         1182                 opcode = *inp++;
         1183                 dstlen = 3;
         1184                 targofs++;
         1185         }
         1186 
         1187         // Determine the jump target.
         1188         if (opcode == 0xe9) {
         1189                 // 32-bit JMP
         1190                 targofs += 5 + *(int32_t*)inp;
         1191         } else if (opcode == 0x0f) {
         1192                 // 32-bit Jcc
         1193                 targofs += 6 + *(int32_t*)inp;
         1194         } else {
         1195                 // 8-bit JMP or Jcc or LOOP
         1196                 targofs += 2 + (int32_t)(int8_t)*inp;
         1197         }
         1198         if (targofs > f->insn[ninsn-1].srcofs)
         1199                 return;                // Target is not in this fragment
         1200 
         1201         // Find the target in the insn table
         1202         unsigned lo = 0;
         1203         unsigned hi = ninsn-1;
         1204         while (hi > lo) {
         1205                 unsigned mid = (lo + hi + 1) / 2;
         1206                 unsigned midofs = f->insn[mid].srcofs;
         1207                 if (targofs >= midofs)
         1208                         lo = mid;
         1209                 else
         1210                         hi = mid - 1;
         1211         }
         1212         if (targofs != f->insn[lo].srcofs)
         1213                 return;                // Jump target is _between_ instructions!
         1214 
         1215         // Make sure target is still in range after translation
         1216         if (lo > ino) {
         1217                 if ((int)f->insn[lo].dstofs >
         1218                                 (int)f->insn[ino+1].dstofs+127)
         1219                         return;        // too far ahead
         1220         } else {
         1221                 if ((int)f->insn[lo].dstofs <
         1222                                 (int)f->insn[ino].dstofs+3-128)
         1223                         return;        // too far behind
         1224         }
         1225 
         1226         // In range - convert it to an 8-bit jump!
         1227         f->insn[ino].itype = VXI_JUMP8;
         1228         f->insn[ino].dstlen = dstlen;
         1229 }
         1230 
         1231 // Translation pass 2:
         1232 // Reverse scan through the instruction table trying to simplify instructions.
         1233 static void xsimp(struct vxproc *p)
         1234 {
         1235         int i;
         1236         struct vxemu *emu = p->emu;
         1237         struct vxfrag *f = emu->txfrag;
         1238         unsigned ninsn = f->ninsn;
         1239 
         1240         for (i = ninsn-1; i >= 0; i--) {
         1241                 unsigned itype = f->insn[i].itype;
         1242 
         1243                 switch (itype) {
         1244                 case VXI_LOOP:
         1245                 case VXI_LOOPZ:
         1246                 case VXI_LOOPNZ:
         1247                 case VXI_JUMP:
         1248                         xsimp_jump(p, i);
         1249                         break;
         1250                 default:
         1251                         break;        // no simplifications
         1252                 }
         1253 
         1254         }
         1255 }
         1256 
         1257 // Translation pass 3:
         1258 // Compute final instruction offsets.
         1259 static void xplace(struct vxproc *p)
         1260 {
         1261         int i;
         1262         struct vxemu *emu = p->emu;
         1263         struct vxfrag *f = emu->txfrag;
         1264         unsigned ninsn = f->ninsn;
         1265 
         1266         size_t outofs = PROLOG_LEN;
         1267         for (i = 0; i < ninsn; i++) {
         1268                 f->insn[i].dstofs = outofs;
         1269                 outofs += f->insn[i].dstlen;
         1270         }
         1271 }
         1272 
         1273 // Emit a direct 32-bit jump/branch/call/endfrag instruction.
         1274 // The original jump might have been either short or long.
         1275 // NB. vxemu_sighandler (sig.c) knows that jumps don't trash registers.
         1276 // NB. vxemu_sighandler knows that calls push the return address 
         1277 // onto the stack as the first instruction, and that the target address
         1278 // can be found at offset 26 of the translation.
         1279 static inline void xemit_jump(
         1280                 struct vxproc *p, uint8_t itype, unsigned ino,
         1281                 uint8_t **extrap)
         1282 {
         1283         extern void vxrun_lookup_backpatch();
         1284 
         1285         struct vxemu *emu = p->emu;
         1286         struct vxfrag *f = emu->txfrag;
         1287 
         1288         // Determine the jump target EIP
         1289         // and emit the appropriate call/jump/branch instruction,
         1290         // with its target pointing to a temporary jump trampoline.
         1291         uint8_t *tramp = *extrap;
         1292         unsigned srcofs = f->insn[ino].srcofs;
         1293         uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
         1294         uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
         1295         uint32_t targeip = emu->cpu.eip + srcofs;
         1296         if (itype == VXI_JUMP) {
         1297 
         1298                 uint8_t opcode = *inp;
         1299 
         1300                 // Copy any branch taken/not taken hint prefix
         1301                 if (opcode == 0x2e || opcode == 0x3e) {
         1302                         *outp++ = opcode;
         1303                         opcode = *++inp;
         1304                         targeip++;
         1305                 }
         1306 
         1307                 // Emit the branch/jump/call instruction
         1308                 switch (opcode) {
         1309 
         1310                 case 0xe9:        // was a 32-bit JMP
         1311                         targeip += 5 + *(int32_t*)&inp[1];
         1312                         goto emitjmp;
         1313 
         1314                 case 0xeb:        // was an 8-bit JMP
         1315                         targeip += 2 + (int32_t)(int8_t)inp[1];
         1316                 emitjmp:
         1317                         outp[0] = 0xe9;                // always emit 32-bit JMP
         1318                         *(int32_t*)&outp[1] = (int32_t)(tramp - (outp+5));
         1319                         outp += 5;
         1320                         break;
         1321 
         1322                 case 0x0f:        // was a 32-bit Jcc
         1323                         opcode = inp[1];
         1324                         targeip += 6 + *(int32_t*)&inp[2];
         1325                         goto emitjcc;
         1326 
         1327                 default:        // was an 8-bit Jcc
         1328                         opcode = inp[0] + 0x10;
         1329                         targeip += 2 + (int32_t)(int8_t)inp[1];
         1330                 emitjcc:
         1331                         outp[0] = 0x0f;                // always emit 32-bit Jcc
         1332                         outp[1] = opcode;
         1333                         *(int32_t*)&outp[2] = (int32_t)(tramp - (outp+6));
         1334                         outp += 6;
         1335                         break;
         1336                 }
         1337         } else if (itype == VXI_CALL) {
         1338                 assert(*inp == 0xe8);        // 32-bit CALL
         1339                 
         1340                 outp[0] = 0x68;                // pushl $<return_eip>
         1341                 *(uint32_t*)&outp[1] = targeip + 5;
         1342                 outp += 5;
         1343                 targeip += 5 + *(int32_t*)&inp[1];
         1344                 goto emitjmp;
         1345         } else if (itype == VXI_LOOP || itype == VXI_LOOPZ || itype == VXI_LOOPNZ) {
         1346                 *outp++ = 0x8d;        // leal -1(ecx) -> ecx
         1347                 *outp++ = 0x49;
         1348                 *outp++ = 0xff;
         1349                 if (itype == VXI_LOOPZ) {
         1350                         *outp++ = 0x75;        // jnz .+7
         1351                         *outp++ = 0x07;
         1352                 } else if (itype == VXI_LOOPNZ) {
         1353                         *outp++ = 0x74;        // jz .+7
         1354                         *outp++ = 0x07;
         1355                 }
         1356                 *outp++ = 0xe3;        // jecxz .+5
         1357                 *outp++ = 0x05;
         1358                 targeip += 2 + (int32_t)(int8_t)inp[1];
         1359                 goto emitjmp;
         1360         } else {
         1361                 // End-of-fragment pseudo-instruction.
         1362                 // targeip already points to the eip we wish to "jump" to.
         1363                 assert(itype == VXI_ENDFRAG);
         1364                 goto emitjmp;
         1365         }
         1366 
         1367         // Emit the trampoline code
         1368         tramp[0] = VSEGPREFIX;                // movl $patchrec,VSEG:VXEMU_JMPINFO
         1369         tramp[1] = 0xc7;
         1370         tramp[2] = 0x05;
         1371         *(uint32_t*)&tramp[3] = offsetof(vxemu,jmpinfo);
         1372         *(uint32_t*)&tramp[7] = (uint32_t)((intptr_t)tramp+11+5 -
         1373                                                 (intptr_t)emu);
         1374 
         1375         tramp[11+0] = 0xe9;                // jmp vxrun_lookup_backpatch
         1376         *(uint32_t*)&tramp[11+1] = (uint32_t)((intptr_t)vxrun_lookup_backpatch
         1377                                         - (intptr_t)&tramp[11+5]);
         1378 
         1379         *(uint32_t*)&tramp[11+5] = targeip;                // .long targeip
         1380         *(uint32_t*)&tramp[11+5+4] = (uint32_t)(intptr_t)outp; // .long jmpend
         1381         *extrap = &tramp[11+5+4+4];
         1382 }
         1383 
         1384 // Emit a short (8-bit) jump/branch instruction.
         1385 // The original branch might have been either short or long.
         1386 // NB. vxemu_sighandler (sig.c) knows that jump8s don't
         1387 // trash registers.
         1388 static inline void xemit_jump8(struct vxproc *p, unsigned ino)
         1389 {
         1390         struct vxemu *emu = p->emu;
         1391         struct vxfrag *f = emu->txfrag;
         1392         unsigned srcofs = f->insn[ino].srcofs;
         1393         uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
         1394         uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
         1395 
         1396         // Copy any branch taken/not taken hint prefix
         1397         uint8_t opcode = *inp;
         1398         int outlen = 2;
         1399         uint32_t targofs = srcofs;
         1400         if (opcode == 0x2e || opcode == 0x3e) {
         1401                 *outp++ = opcode;
         1402                 opcode = *++inp;
         1403                 outlen = 3;
         1404                 targofs++;
         1405         }
         1406 
         1407         // Determine the jump target and output opcode.
         1408         switch (opcode) {
         1409         case 0xe9:        // 32-bit JMP
         1410                 opcode = 0xeb;
         1411                 targofs += 5 + *(int32_t*)&inp[1];
         1412                 break;
         1413         case 0x0f:        // 32-bit Jcc
         1414                 opcode = inp[1] - 0x10;
         1415                 targofs += 6 + *(int32_t*)&inp[2];
         1416                 break;
         1417         case 0xeb:        // 8-bit JMP
         1418         case 0xe0:        // 8-bit LOOP
         1419         case 0xe1:
         1420         case 0xe2:
         1421         default:        // 8-bit Jcc
         1422                 targofs += 2 + (int32_t)(int8_t)inp[1];
         1423                 break;
         1424         }
         1425         assert(targofs <= f->insn[f->ninsn-1].srcofs);
         1426 
         1427         // Find the target in the insn table
         1428         unsigned lo = 0;
         1429         unsigned hi = f->ninsn-1;
         1430         while (hi > lo) {
         1431                 unsigned mid = (lo + hi + 1) / 2;
         1432                 unsigned midofs = f->insn[mid].srcofs;
         1433                 if (targofs >= midofs)
         1434                         lo = mid;
         1435                 else
         1436                         hi = mid - 1;
         1437         }
         1438         assert(targofs == f->insn[lo].srcofs);
         1439 
         1440         // Emit the 2-byte jump instruction (3 bytes with prediction hint)
         1441         outp[0] = opcode;
         1442         outp[1] = (int)f->insn[lo].dstofs - ((int)f->insn[ino].dstofs+outlen);
         1443 }
         1444 
         1445 // Emit an indirect jump/call/ret instruction.
         1446 // NB. vxemu_sighandler (sig.c) knows that ebx is saved as
         1447 // the first instruction and then trashed.  
         1448 // NB. vxemu_sighandler knows that the immediate count 
         1449 // in a return immediate instruction is at offset 10.
         1450 // NB. vxemu_sighandler knows that in an indirect call:
         1451 //        * the stack is unchanged until offset -5 (from the end)
         1452 //        * at offset -5, the return address has been pushed
         1453 //          and the target eip is in ebx.
         1454 static inline void xemit_indir(struct vxproc *p, int itype, unsigned ino)
         1455 {
         1456         unsigned i;
         1457         extern void vxrun_lookup_indirect();
         1458 
         1459         struct vxemu *emu = p->emu;
         1460         struct vxfrag *f = emu->txfrag;
         1461         unsigned srcofs = f->insn[ino].srcofs;
         1462         uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
         1463         uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
         1464         uint8_t *outp0 = outp;
         1465 
         1466         // Common: movl %ebx,VSEG:VXEMU_EBX
         1467         outp[0] = VSEGPREFIX;                // Appropriate segment override
         1468         outp[1] = 0x89;
         1469         outp[2] = 0x1d;
         1470         *(uint32_t*)&outp[3] = offsetof(vxemu, cpu.reg[EBX]);
         1471         outp += 7;
         1472 
         1473         // Instruction-specific code
         1474         switch (itype) {
         1475         default:
         1476                 assert(0);
         1477 
         1478         case VXI_CALLIND:
         1479                 assert(inp[0] == 0xff);
         1480                 assert(EA_REG(inp[1]) == 2);
         1481                 goto Common;
         1482 
         1483         case VXI_JUMPIND:
         1484                 assert(inp[0] == 0xff);
         1485                 assert(EA_REG(inp[1]) == 4);
         1486         Common:;
         1487                 unsigned srclen = xscan_rm(inp+1) - inp;
         1488                 outp[0] = 0x8b;                // movl <indirect_ea>,%ebx
         1489                 outp[1] = (inp[1] & 0xc7) | (EBX << 3);
         1490                 for (i = 2; i < srclen; i++)
         1491                         outp[i] = inp[i];
         1492                 outp += srclen;
         1493                 
         1494                 if(itype == VXI_CALLIND) {
         1495                         outp[0] = 0x68;                // pushl $<return_eip>
         1496                         *(uint32_t*)&outp[1] = emu->cpu.eip + srcofs + srclen;
         1497                         outp += 5;
         1498                 }
         1499                 break;
         1500 
         1501         case VXI_RETURN:
         1502                 assert(inp[0] == 0xc3);
         1503                 *outp++ = 0x5b;                // popl %ebx
         1504                 break;
         1505         
         1506         case VXI_RETURN_IMM:
         1507                 assert(inp[0] == 0xc2);
         1508                 outp[0] = 0x5b;                // popl %ebx
         1509                 outp[1] = 0x81;                // add $<spc>,%esp
         1510                 outp[2] = 0xc4;
         1511                 *(uint32_t*)&outp[3] = *(uint16_t*)&inp[1];
         1512                 outp += 1+6;
         1513                 break;
         1514         }
         1515 
         1516         // Common: jmp vxrun_lookup_indirect
         1517         outp[0] = 0xe9;
         1518         *(uint32_t*)&outp[1] = (uint32_t)(intptr_t)vxrun_lookup_indirect -
         1519                                 (uint32_t)(intptr_t)&outp[5];
         1520         outp += 5;
         1521         assert(outp - outp0 == f->insn[ino].dstlen);
         1522 }
         1523 
         1524 // NB. vxemu_sighandler (sig.c) knows that eax is saved as
         1525 // the first instruction and then trashed.
         1526 static void xemit_trap(struct vxproc *p, int ino)
         1527 {
         1528         extern void vxrun_gentrap();
         1529 
         1530         struct vxemu *emu = p->emu;
         1531         struct vxfrag *f = emu->txfrag;
         1532 
         1533         // Trapping instruction.  Determine the trap type.
         1534         uint32_t trapno;
         1535         uint32_t trapeip = emu->cpu.eip + f->insn[ino].srcofs;
         1536         uint8_t *inp = (uint8_t*)emu->mem->base + trapeip;
         1537         switch (inp[0]) {
         1538         case 0xcc:        // Breakpoint
         1539                 trapno = VXTRAP_BREAKPOINT;
         1540                 trapeip++;        // EIP points after insn
         1541                 break;
         1542         case 0xcd:        // INT $n
         1543                 trapno = VXTRAP_SOFT + inp[1];
         1544                 trapeip += 2;        // EIP points after insn
         1545                 break;
         1546         case 0x0f:
         1547                 if (inp[1] == 0x05) {        // SYSCALL instruction
         1548                         trapno = VXTRAP_SYSCALL;
         1549                         trapeip += 2;        // EIP points after insn
         1550                         break;
         1551                 }
         1552                 // fall thru...
         1553         default:        // Invalid instruction
         1554                 trapno = VXTRAP_INVALID;
         1555                 break;
         1556         }
         1557 
         1558         // Emit the output code sequence.
         1559         uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
         1560 
         1561         // movl %eax,VSEG:VXEMU_EAX
         1562         outp[0] = VSEGPREFIX;
         1563         outp[1] = 0xa3;
         1564         *(uint32_t*)&outp[2] = offsetof(vxemu, cpu.reg[EAX]);
         1565 
         1566         // movl $trapno,%eax
         1567         outp[6+0] = 0xb8;
         1568         *(uint32_t*)&outp[6+1] = trapno;
         1569 
         1570         // movl $trapeip,VSEG:VXEMU_EIP
         1571         outp[6+5+0] = VSEGPREFIX;
         1572         outp[6+5+1] = 0xc7;
         1573         outp[6+5+2] = 0x05;
         1574         *(uint32_t*)&outp[6+5+3] = offsetof(vxemu, cpu.eip);
         1575         *(uint32_t*)&outp[6+5+7] = trapeip;
         1576 
         1577         // jmp vxrun_gentrap
         1578         outp[6+5+11+0] = 0xe9;
         1579         *(uint32_t*)&outp[6+5+11+1] = (uint32_t)(intptr_t)vxrun_gentrap -
         1580                                         (uint32_t)(intptr_t)&outp[6+5+11+5];
         1581 
         1582         assert(f->insn[ino].dstlen == 6+5+11+5);
         1583 }
         1584 
         1585 // Translation pass 4:
         1586 // Emit the translated instruction stream.
         1587 static void xemit(struct vxproc *p)
         1588 {
         1589         unsigned i, j;
         1590         struct vxemu *emu = p->emu;
         1591         struct vxfrag *f = emu->txfrag;
         1592         unsigned ninsn = f->ninsn;
         1593 
         1594         // Writing the instruction stream immediately after the insn table.
         1595         uint8_t *outstart = FRAGCODE(f);
         1596 
         1597         // Write extra trampoline code after the already-arranged code.
         1598         uint8_t *extra = outstart + (unsigned)f->insn[ninsn-1].dstofs
         1599                                 + (unsigned)f->insn[ninsn-1].dstlen;
         1600 
         1601         // First emit the prolog
         1602         outstart[0] = VSEGPREFIX;                        // Segment override
         1603         outstart[1] = 0x8b; outstart[2] = 0x1d;                // movl <abs32>,%ebx
         1604         *(uint32_t*)&outstart[3] = offsetof(vxemu, cpu.reg[EBX]);
         1605 
         1606         // Now emit the instructions
         1607         asm volatile("cld");
         1608         uint8_t *instart = (uint8_t*)emu->mem->base + emu->cpu.eip;
         1609         for (i = 0; i < ninsn; ) {
         1610                 unsigned itype = f->insn[i].itype;
         1611 
         1612                 switch (itype) {
         1613 
         1614                 case VXI_NOTRANS:
         1615                         // Just copy strings of untranslated instructions.
         1616                         for (j = i+1; j < ninsn; j++)
         1617                                 if (f->insn[j].itype != VXI_NOTRANS)
         1618                                         break;
         1619 
         1620                         unsigned srcofs = f->insn[i].srcofs;
         1621                         unsigned dstofs = f->insn[i].dstofs;
         1622                         uint8_t *inp = instart + f->insn[i].srcofs;
         1623                         uint8_t *outp = outstart + f->insn[i].dstofs;
         1624                         unsigned cnt = f->insn[j].dstofs - dstofs;
         1625                         assert(cnt == f->insn[j].srcofs - srcofs);
         1626                         asm volatile("rep movsb"
         1627                                 : : "c" (cnt), "S" (inp), "D" (outp));
         1628 
         1629                         i = j;
         1630                         break;
         1631 
         1632                 case VXI_CALL:
         1633                 case VXI_JUMP:
         1634                 case VXI_ENDFRAG:
         1635                 case VXI_LOOP:
         1636                 case VXI_LOOPZ:
         1637                 case VXI_LOOPNZ:
         1638                         xemit_jump(p, itype, i++, &extra);
         1639                         break;
         1640 
         1641                 case VXI_JUMP8:
         1642                         xemit_jump8(p, i++);
         1643                         break;
         1644 
         1645                 case VXI_RETURN:
         1646                 case VXI_JUMPIND:
         1647                 case VXI_CALLIND:
         1648                         xemit_indir(p, itype, i++);
         1649                         break;
         1650 
         1651                 case VXI_TRAP:
         1652                         xemit_trap(p, i++);
         1653                         break;
         1654 
         1655                 default:
         1656                         assert(0);
         1657                 }
         1658         }
         1659 
         1660         // Record the final amount of code table space we've consumed.
         1661         emu->codefree = extra;
         1662 
         1663         // Add an entry to the code pointer table to the new fragment
         1664         uint32_t *codetab = emu->codetab;
         1665         *--codetab = (uint32_t)(intptr_t)f;
         1666         emu->codetab = codetab;
         1667 
         1668         assert((void*)extra < (void*)codetab);
         1669 
         1670         // Insert the new entrypoint into the hash table
         1671         uint32_t idx = etabhash(emu->cpu.eip) & emu->etabmask;
         1672         while (emu->etab[idx].srceip != NULLSRCEIP) {
         1673                 assert(emu->etab[idx].srceip != emu->cpu.eip);
         1674                 idx = (idx+1) & emu->etabmask;
         1675         }
         1676         emu->etab[idx].srceip = emu->cpu.eip;
         1677         emu->etab[idx].dsteip = (uint32_t)(intptr_t)outstart;
         1678         emu->etabcnt++;
         1679         
         1680         if (vx32_debugxlate) {
         1681                 vxrun_cleanup(emu);
         1682                 vxprint("====== xlate\n");
         1683                 vxprint("-- guest\n");
         1684                 disassemble(emu->mem->base, emu->guestfrag, emu->guestfragend);
         1685                 vxprint("-- translation\n");
         1686                 disassemble(NULL, outstart, extra);
         1687                 vxprint("======\n");
         1688                 vxrun_setup(emu);
         1689         }
         1690 }
         1691 
         1692 static int xlate(struct vxproc *vxp)
         1693 {
         1694         // Pass 1: scan instruction stream, build preliminary vxinsn table
         1695         int rc = xscan(vxp);
         1696         if (rc != 0)
         1697                 return rc;
         1698 
         1699         // Pass 2: simplify vxinsns wherever possible
         1700         xsimp(vxp);
         1701 
         1702         // Pass 3: compute final instruction placement and sizes
         1703         xplace(vxp);
         1704 
         1705         // Pass 4: emit translated instructions
         1706         xemit(vxp);
         1707 
         1708         return 0;
         1709 }
         1710 
         1711 #if 0
         1712 #include <asm/prctl.h>
         1713 #include <sys/prctl.h>
         1714 #endif
         1715 
         1716 void dumpsegs(const char *prefix)
         1717 {
         1718         uint16_t ds, es, fs, gs, ss;
         1719         asm(        "movw %%ds,%0; movw %%es,%1; "
         1720                 "movw %%fs,%2; movw %%gs,%3; "
         1721                 "movw %%ss,%4"
         1722                 : "=rm"(ds), "=rm" (es), "=rm" (fs), "=rm" (gs), "=rm" (ss));
         1723         vxprint("%s: ds=%04x es=%04x fs=%04x gs=%04x ss=%04x\n",
         1724                 prefix, ds, es, fs, gs, ss);
         1725 #if 0
         1726         unsigned long fsofs, gsofs;
         1727         arch_prctl(ARCH_GET_FS, (unsigned long)&fsofs);
         1728         arch_prctl(ARCH_GET_GS, (unsigned long)&gsofs);
         1729         vxprint("fsofs=%016lx gsofs=%016lx\n", fsofs, gsofs);
         1730 #endif
         1731 }
         1732 
         1733 int vxproc_run(struct vxproc *vxp)
         1734 {
         1735         vxemu *emu = vxp->emu;
         1736         vxmmap *mm;
         1737 
         1738         // Make sure the process is mapped into our host memory
         1739         if ((mm = vxmem_map(vxp->mem, 0)) == NULL)
         1740                 return -1;
         1741         if (vxemu_map(emu, mm) < 0) {
         1742                 vxmem_unmap(vxp->mem, mm);
         1743                 return -1;
         1744         }
         1745         emu->mem = mm;
         1746         
         1747         // Pending trap?
         1748         if(emu->cpu_trap){
         1749                 assert(0);        // Can this even happen?
         1750                 int trap = emu->cpu_trap;
         1751                 emu->cpu_trap = 0;
         1752                 return trap;
         1753         }
         1754         
         1755         uint16_t vs;
         1756         // Registers can't be already loaded or we will smash
         1757         // the "host segment registers" part of emu.
         1758         asm("movw %"VSEGSTR",%0"
         1759                 : "=r" (vs));
         1760 
         1761         assert(vs != emu->emusel);
         1762 
         1763         // Save our stack environment for exception-handling.
         1764         // This only saves the integer registers.  If the signal handler
         1765         // happens in the middle of a translation involving floating-point
         1766         // code, we need to make sure that when we jump back here in the
         1767         // handler, we first restore the floating point registers to
         1768         // the state they were in during the computation.  (Operating
         1769         // systems typically save the FPU state, reset the FPU, and 
         1770         // pass the saved state to the signal handler.)
         1771         // The Linux signal handler does exactly this.
         1772         //
         1773         // On FreeBSD, after hours wasted trying to manually restore the
         1774         // floating point state, I gave up.  Instead, the FreeBSD code
         1775         // saves an mcontext_t here and then overwrites the signal handler's
         1776         // mcontext_t with this one.  Then when it returns from the handler,
         1777         // the OS will restore the floating point state and then the mcontext,
         1778         // jumping back here with exactly the FPU state that we want.
         1779         // Why not do this on Linux?  Because it didn't work when I tried it,
         1780         // and I was not about to track down why.
         1781         //
         1782         // On OS X, there is no getcontext, so you'd think we'd be back to
         1783         // the Linux approach of manual FPU restore + siglongjmp.
         1784         // Unfortunately, OS X can't deal with siglongjmp from alternate
         1785         // signal stacks.  If it invokes a signal handler on an alternate 
         1786         // signal stack and that handler uses siglongjmp to go back to the
         1787         // original stack instead of returning out of the handler, then
         1788         // OS X thinks the code is still running on the alternate stack, 
         1789         // which causes all sorts of problems.  Thus we have to do the
         1790         // getcontext trick.  Besides, it is far easier to write a getcontext
         1791         // routine--we already need to know the layout of mcontext_t to
         1792         // write the signal handler--than to figure out what the FPU state
         1793         // looks like.
         1794         //
         1795         // And you thought this was going to be easy.
         1796 
         1797 #if defined(__FreeBSD__)
         1798         ucontext_t env;
         1799         emu->trapenv = &env.uc_mcontext;
         1800         volatile int n = 0;
         1801         getcontext(&env);
         1802         if(++n > 1){
         1803 #elif defined(__APPLE__)
         1804         struct i386_thread_state env;
         1805         emu->trapenv = &env;
         1806         if(vx32_getcontext(&env)){
         1807 #else
         1808         mcontext_t env;
         1809         emu->trapenv = &env;
         1810         if(vx32_getcontext(&env)){
         1811 #endif
         1812                 if(vx32_debugxlate) vxprint("VX trap %x err %x va %08x "
         1813                                 "veip %08x veflags %08x\n",
         1814                                 emu->cpu_trap, emu->cpu.traperr, emu->cpu.trapva,
         1815                                 emu->cpu.eip, emu->cpu.eflags);
         1816                 goto trapped;
         1817         }
         1818 
         1819         // Load our special vxproc segment selector into fs register.
         1820         vxrun_setup(emu);
         1821 
         1822         while (1) {
         1823                 // Look up the translated entrypoint for the current vx32 EIP.
         1824                 uint32_t eip = emu->cpu.eip;
         1825                 uint32_t idx = etabhash(eip) & emu->etabmask;
         1826                 while (emu->etab[idx].srceip != eip) {
         1827                         if (emu->etab[idx].srceip == NULLSRCEIP)
         1828                                 goto notfound;
         1829                         idx = (idx+1) & emu->etabmask;
         1830                 }
         1831 
         1832                 // Run the translated code fragment.
         1833                 // Return if the code terminated with an exception.
         1834                 // Otherwise it terminated because of an untranslated EIP,
         1835                 // so translate it.
         1836                 if(vxrun(emu, emu->etab[idx].dsteip) != 0)
         1837                         break;
         1838 
         1839         notfound:
         1840                 // Translate the code fragment the current emu->cpu.eip points to
         1841                 if(xlate(vxp) != 0)
         1842                         break;
         1843         }
         1844 
         1845         // Restore the usual flat model data segment registers.
         1846         vxrun_cleanup(emu);
         1847         
         1848 trapped:
         1849         // De-register our setjmp environment for trap handling.
         1850         emu->trapenv = NULL;
         1851 
         1852         emu->mem = NULL;
         1853         int trap = emu->cpu_trap;
         1854         emu->cpu_trap = 0;
         1855         return trap;
         1856 }
         1857 
         1858 void vxemu_stats(struct vxproc *p)
         1859 {
         1860         unsigned i;
         1861         vxemu *emu = p->emu;
         1862 
         1863         vxprint("flush count: %llu\n", nflush);
         1864 
         1865 //        vxprint("vxproc size %dKB\n", p->size/1024);
         1866 
         1867         unsigned coll = 0;
         1868         for (i = 0; i < emu->etablen; i++) {
         1869                 vxentry *e = &emu->etab[i];
         1870                 if (e->srceip == NULLSRCEIP)
         1871                         continue;
         1872                 unsigned idx = etabhash(e->srceip) & emu->etabmask;
         1873                 if (idx != i) {
         1874                 //        vxprint("srcip %08x hash %d actually at %d\n",
         1875                 //                e->srceip, idx, i);
         1876                         coll++;
         1877                 }
         1878         }
         1879         vxprint("entry tab: %d used, %d total, %d collisions\n",
         1880                 emu->etabcnt, emu->etablen, coll);
         1881 }
         1882 
         1883 static void disassemble(uint8_t *addr0, uint8_t *p, uint8_t *ep)
         1884 {
         1885         xdinst i;
         1886         int j;
         1887         uint8_t *q;
         1888         char buf[128];
         1889 
         1890         for (; p < ep; p = q) {
         1891                 if ((q = x86decode(addr0, p, &i)) == NULL)
         1892                         break;
         1893                 x86print(buf, sizeof buf, &i);
         1894                 vxprint("%08x", i.addr);
         1895                 for(j=0; j<i.len; j++)
         1896                         vxprint(" %02x", p[j]);
         1897                 for(; j<10; j++)
         1898                         vxprint("   ");
         1899                 vxprint(" %s\n", buf);
         1900         }
         1901 }
         1902 
         1903 void vxprint(char *fmt, ...)
         1904 {
         1905         va_list arg;
         1906         char buf[512];
         1907         
         1908         va_start(arg, fmt);
         1909         vsnprintf(buf, sizeof buf, fmt, arg);
         1910         va_end(arg);
         1911         USED(write(2, buf, strlen(buf)));
         1912 }
         1913