linux.c - vx32 - Local 9vx git repository for patches.
 (HTM) git clone git://r-36.net/vx32
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
       linux.c (10562B)
       ---
            1 // Code specific to x86 hosts running Linux.
            2 
            3 #define _GNU_SOURCE
            4 #include <stdio.h>
            5 #include <string.h>
            6 #include <signal.h>
            7 #include <assert.h>
            8 #include <ucontext.h>
            9 #include <sys/ucontext.h>
           10 #include <asm/ldt.h>
           11 #include <errno.h>
           12 
           13 #include "vx32.h"
           14 #include "vx32impl.h"
           15 #include "os.h"
           16 
           17 extern int modify_ldt(int, void*, unsigned long);
           18 
           19 int vxemu_map(vxemu *emu, vxmmap *mm)
           20 {
           21         struct vxproc *vxp;
           22         struct user_desc desc;
           23         uint ldt[2];
           24 #ifdef __x86_64
           25         static int didflat;
           26 #endif
           27 
           28         vxp = emu->proc;
           29         emu->datasel = vxp->vxpno * 16 + 16 + 4 + 3;        // 4=LDT, 3=RPL
           30         emu->emusel = emu->datasel + 8;
           31 
           32         if (emu->ldt_base != (uintptr_t)mm->base || emu->ldt_size != mm->size) {
           33                 // Set up the process's data segment selector (for DS,ES,SS).
           34                 memset(&desc, 0, sizeof(desc));
           35                 desc.seg_32bit = 1;
           36                 desc.read_exec_only = 0;
           37                 desc.limit_in_pages = 1;
           38                 desc.seg_not_present = 0;
           39                 desc.useable = 1;
           40 
           41                 desc.entry_number = emu->datasel / 8;
           42                 desc.base_addr = (uintptr_t)mm->base;
           43                 desc.limit = (mm->size - 1) >> VXPAGESHIFT;
           44                 desc.contents = MODIFY_LDT_CONTENTS_DATA;
           45                 if (modify_ldt(1, &desc, sizeof(desc)) < 0)
           46                         return -1;
           47         
           48                 // Set up the process's vxemu segment selector (for FS).
           49                 desc.entry_number = emu->emusel / 8;
           50                 desc.base_addr = (uintptr_t)emu;
           51                 desc.limit = (VXCODEBUFSIZE - 1) >> VXPAGESHIFT;
           52                 desc.contents = MODIFY_LDT_CONTENTS_DATA;
           53                 if (modify_ldt(1, &desc, sizeof(desc)) < 0)
           54                         return -1;
           55                 
           56                 emu->ldt_base = (uintptr_t)mm->base;
           57                 emu->ldt_size = mm->size;
           58         }
           59 
           60 #ifdef __x86_64
           61         // Set up 32-bit mode code and data segments (not vxproc-specific),
           62         // giving access to the full low 32 bits of linear address space.
           63         // The code segment is necessary to get into 32-bit compatibility mode;
           64         // the data segment is needed because Linux for x86-64
           65         // doesn't give 64-bit processes a "real" data segment by default
           66         // but instead just loads zero into the data segment selectors!
           67         emu->runptr.sel = FLATCODE;
           68 
           69         if (!didflat) {
           70                 didflat = 1;
           71                 memset(&desc, 0, sizeof(desc));
           72                 desc.seg_32bit = 1;
           73                 desc.read_exec_only = 0;
           74                 desc.limit_in_pages = 1;
           75                 desc.seg_not_present = 0;
           76                 desc.useable = 1;
           77 
           78                 desc.entry_number = FLATCODE / 8;
           79                 desc.base_addr = 0;
           80                 desc.limit = 0xfffff;
           81                 desc.contents = MODIFY_LDT_CONTENTS_CODE;
           82                 if (modify_ldt(1, &desc, sizeof(desc)) < 0)
           83                         return -1;
           84                 
           85                 /*
           86                  * Linux 2.6.27 has a bug: it does not load the L (long mode)
           87                  * bit from desc.lm when copying desc into its own
           88                  * copy of the LDT entry on the kernel stack.
           89                  * Instead, it leaves L uninitialized, picking up whatever
           90                  * random bit was left on the kernel stack by the
           91                  * previous call sequence.  We need L to be 0.
           92                  * If it ends up 1, the *ljmpq in run64.S will GP fault.
           93                  * Luckily, we can look for this by asking to read
           94                  * back the raw LDT bytes.  If we observe this problem,
           95                  * try to fix it by doing a modify_ldt with base = limit = 0,
           96                  * which clears the entire stack ldt structure, and then
           97                  * quickly do another modify_ldt with desc, hoping that
           98                  * the bit will still be zero when we get there for the
           99                  * second modify_ldt.  I wish I were making this up.
          100                  * This is fixed in Linus's git repository, but the Ubuntu
          101                  * git repositories are still out of date.  See for example
          102                  *         http://swtch.com/go/ubuntu-ldt
          103                  *        http://swtch.com/go/linus-ldt
          104                  *
          105                  * Remember, folks, Free Software is only free if your
          106                  * time has no value.
          107                  */
          108                 if(modify_ldt(0, ldt, sizeof ldt) < 0)
          109                         return -1;
          110                 if(ldt[1] & 0x00200000) {
          111                         if (vx32_debugxlate)
          112                                 vxprint("FLATCODE LDT=%08x %08x; working around\n", ldt[0], ldt[1]);
          113                         desc.limit = 0;
          114                         modify_ldt(1, &desc, sizeof desc);
          115                         desc.limit = 0xfffff;
          116                         modify_ldt(1, &desc, sizeof desc);
          117                         modify_ldt(0, ldt, sizeof ldt);
          118                         if(ldt[1] & 0x00200000) {
          119                                 vxprint("cannot work around Linux FLATCODE bug\n");
          120                                 errno = EBADE;
          121                                 return -1;
          122                         }
          123                         if (vx32_debugxlate)
          124                                 vxprint("FLATCODE LDT=%08x %08x\n", ldt[0], ldt[1]);
          125                 }
          126 
          127                 desc.entry_number = FLATDATA / 8;
          128                 desc.base_addr = 0;
          129                 desc.limit = 0xfffff;
          130                 desc.contents = MODIFY_LDT_CONTENTS_DATA;
          131                 if (modify_ldt(1, &desc, sizeof(desc)) < 0)
          132                         return -1;
          133         }
          134 
          135         // Set up a far return vector in emu->retptr
          136         // for getting back into 64-bit long mode.
          137         extern void vxrun_return();
          138         asm volatile("movw %%cs,%0" : "=r" (emu->retptr.sel));
          139         emu->retptr.ofs = (uint32_t)(intptr_t)vxrun_return;
          140 #endif
          141 
          142         return 0;
          143 }
          144 
          145 static void dumpsigcontext(struct sigcontext *ctx)
          146 {
          147 #ifdef i386
          148         printf(
          149                 "eax %08lx  ebx %08lx\necx %08lx  edx %08lx  "
          150                 "rsi %08lx  rdi %08lx\nrbp %08lx  rsp %08lx\n"
          151                 "eip %08lx  efl %08lx  cs %04x\n"
          152                 "err %08lx  trapno %08lx  cr2 %08lx\n",
          153                 ctx->eax, ctx->ebx, ctx->ecx, ctx->edx,
          154                 ctx->esi, ctx->edi, ctx->ebp, ctx->esp,
          155                 ctx->eip, ctx->eflags, ctx->cs,
          156                 ctx->err, ctx->trapno, ctx->cr2);
          157 #else
          158         printf(
          159                 "rax %016lx  rbx %016lx\nrcx %016lx  rdx %016lx\n"
          160                 "rsi %016lx  rdi %016lx\nrbp %016lx  rsp %016lx\n"
          161                 "r8  %016lx  r9  %016lx\nr10 %016lx  r11 %016lx\n"
          162                 "r12 %016lx  r13 %016lx\nr14 %016lx  r15 %016lx\n"
          163                 "rip %016lx  efl %016lx  cs %04x  ss %04x\n"
          164                 "err %016lx  trapno %016lx  cr2 %016lx\n",
          165                 ctx->rax, ctx->rbx, ctx->rcx, ctx->rdx,
          166                 ctx->rsi, ctx->rdi, ctx->rbp, ctx->rsp,
          167                 ctx->r8, ctx->r9, ctx->r10, ctx->r11,
          168                 ctx->r12, ctx->r13, ctx->r14, ctx->r15,
          169                 ctx->rip, ctx->eflags, ctx->cs, ctx->__pad0,
          170                 ctx->err, ctx->trapno, ctx->cr2);
          171 #endif
          172 }
          173 
          174 #ifdef i386
          175 #define        VX32_BELIEVE_EIP        (ctx->ds == vs - 8)
          176 #define        ctxeip eip
          177 #else
          178 #define        VX32_BELIEVE_EIP        (ctx->cs == FLATCODE)
          179 
          180 // On x86-64, make x86 names work for ctx->xxx.
          181 #define        eax rax
          182 #define        ebx rbx
          183 #define        ecx rcx
          184 #define        edx rdx
          185 #define        esi rsi
          186 #define        edi rdi
          187 #define        esp rsp
          188 #define        ebp rbp
          189 #define        ctxeip rip
          190 #endif
          191 
          192 static void
          193 fprestore(struct _fpstate *s)
          194 {
          195         asm volatile("frstor 0(%%eax); fwait\n" : : "a" (s) : "memory");
          196 }
          197 
          198 int vx32_sighandler(int signo, siginfo_t *si, void *v)
          199 {
          200         uint32_t trapeip;
          201         uint32_t magic;
          202         uint16_t vs;
          203         vxproc *vxp;
          204         vxemu *emu;
          205         struct sigcontext *ctx;
          206         ucontext_t *uc;
          207         mcontext_t *mc;
          208         int r;
          209 
          210         uc = v;
          211         mc = &uc->uc_mcontext;
          212 
          213         // same layout, and sigcontext is more convenient...
          214         ctx = (struct sigcontext*)mc;
          215 
          216         // We can't be sure that vxemu is running,
          217         // and thus that %VSEG is actually mapped to a
          218         // valid vxemu.  The only way to tell is to look at %VSEG.
          219 
          220         // First sanity check vxproc segment number.
          221         asm("movw %"VSEGSTR",%0"
          222                 : "=r" (vs));
          223         
          224         if(vx32_debugxlate) vxprint("vx32_sighandler signo=%d eip=%#x esp=%#x vs=%#x\n",
          225                 signo, ctx->ctxeip, ctx->esp, vs);
          226         if(vx32_debugxlate) dumpsigcontext(ctx);
          227 
          228         if ((vs & 15) != 15)        // 8 (emu), LDT, RPL=3
          229                 return 0;
          230 
          231         // Okay, assume mapped; check for vxemu.
          232         asm("movl %"VSEGSTR":%1,%0"
          233                 : "=r" (magic)
          234                 : "m" (((vxemu*)0)->magic));
          235         if (magic != VXEMU_MAGIC)
          236                 return 0;
          237 
          238         // Okay, we're convinced.
          239 
          240         // Find current vxproc and vxemu.
          241         asm("mov %"VSEGSTR":%1,%0"
          242                 : "=r" (vxp)
          243                 : "m" (((vxemu*)0)->proc));
          244         emu = vxp->emu;
          245 
          246         // Get back our regular host segment register state,
          247         // so that thread-local storage and such works.
          248         vxrun_cleanup(emu);
          249 
          250         // dumpsigcontext(ctx);
          251 
          252         if (VX32_BELIEVE_EIP)
          253                 trapeip = ctx->ctxeip;
          254         else
          255                 trapeip = 0xffffffff;
          256 
          257         int newtrap;
          258         switch(signo){
          259         case SIGSEGV:
          260         case SIGBUS:
          261                 newtrap = VXTRAP_PAGEFAULT;
          262                 break;
          263         
          264         case SIGFPE:
          265                 newtrap = VXTRAP_FLOAT;
          266                 break;
          267         
          268         case SIGVTALRM:
          269                 newtrap = VXTRAP_IRQ + VXIRQ_TIMER;
          270                 break;
          271 
          272         case SIGTRAP:
          273                 // Linux sends SIGTRAP when it gets a processor 
          274                 // debug exception, which is caused by single-stepping
          275                 // with the TF bit, among other things.  The processor
          276                 // turns off the TF bit before generating the trap, but
          277                 // it appears that Linux turns it back on for us.
          278                 // Let's use it to confirm that this is a single-step trap.
          279                 if (ctx->eflags & EFLAGS_TF){
          280                         newtrap = VXTRAP_SINGLESTEP;
          281                         ctx->eflags &= ~EFLAGS_TF;
          282                 }else{
          283                         vxprint("Unexpected sigtrap eflags=%#x\n", ctx->eflags);
          284                         newtrap = VXTRAP_SIGNAL + signo;
          285                 }
          286                 break;
          287 
          288         default:
          289                 newtrap = VXTRAP_SIGNAL + signo;
          290                 break;
          291         }
          292         
          293         int replaced_trap = 0;
          294         if (emu->cpu_trap) {
          295                 // There's already a pending trap!
          296                 // Handle the new trap, and assume that when it
          297                 // finishes, restarting the code at cpu.eip will trigger
          298                 // the old trap again.
          299                 // Have to fix up eip for int 0x30 and syscall instructions.
          300                 if (emu->cpu_trap == VXTRAP_SYSCALL ||
          301                                 (emu->cpu_trap&VXTRAP_CATEGORY) == VXTRAP_SOFT)
          302                         emu->cpu.eip -= 2;
          303                 replaced_trap = emu->cpu_trap;
          304         }
          305         emu->cpu_trap = newtrap;
          306 
          307         r = vxemu_sighandler(emu, trapeip);
          308 
          309         if (r == VXSIG_SINGLESTEP){
          310                 // Vxemu_sighandler wants us to single step.
          311                 // Execution state is in intermediate state - don't touch.
          312                 ctx->eflags |= EFLAGS_TF;                // x86 TF (single-step) bit
          313                 vxrun_setup(emu);
          314                 return 1;
          315         }
          316 
          317         // Copy execution state into emu.
          318         if ((r & VXSIG_SAVE_ALL) == VXSIG_SAVE_ALL) {
          319                 emu->cpu.reg[EAX] = ctx->eax;
          320                 emu->cpu.reg[EBX] = ctx->ebx;
          321                 emu->cpu.reg[ECX] = ctx->ecx;
          322                 emu->cpu.reg[EDX] = ctx->edx;
          323                 emu->cpu.reg[ESI] =  ctx->esi;
          324                 emu->cpu.reg[EDI] = ctx->edi;
          325                 emu->cpu.reg[ESP] = ctx->esp;        // or esp_at_signal ???
          326                 emu->cpu.reg[EBP] = ctx->ebp;
          327                 emu->cpu.eflags = ctx->eflags;
          328         } else if (r & VXSIG_SAVE_ALL) {
          329                 if (r & VXSIG_SAVE_EAX)
          330                         emu->cpu.reg[EAX] = ctx->eax;
          331                 if (r & VXSIG_SAVE_EBX)
          332                         emu->cpu.reg[EBX] = ctx->ebx;
          333                 if (r & VXSIG_SAVE_ECX)
          334                         emu->cpu.reg[ECX] = ctx->ecx;
          335                 if (r & VXSIG_SAVE_EDX)
          336                         emu->cpu.reg[EDX] = ctx->edx;
          337                 if (r & VXSIG_SAVE_ESI)
          338                         emu->cpu.reg[ESI] =  ctx->esi;
          339                 if (r & VXSIG_SAVE_EDI)
          340                         emu->cpu.reg[EDI] = ctx->edi;
          341                 if (r & VXSIG_SAVE_ESP)
          342                         emu->cpu.reg[ESP] = ctx->esp;        // or esp_at_signal ???
          343                 if (r & VXSIG_SAVE_EBP)
          344                         emu->cpu.reg[EBP] = ctx->ebp;
          345                 if (r & VXSIG_SAVE_EFLAGS)
          346                         emu->cpu.eflags = ctx->eflags;
          347         }
          348         r &= ~VXSIG_SAVE_ALL;
          349 
          350         if (r & VXSIG_SAVE_EBX_AS_EIP)
          351                 emu->cpu.eip = ctx->ebx;
          352         r &= ~VXSIG_SAVE_EBX_AS_EIP;
          353 
          354         if (r & VXSIG_ADD_COUNT_TO_ESP) {
          355                 emu->cpu.reg[ESP] += (uint16_t)(r >> VXSIG_COUNT_SHIFT);
          356                 r &= ~VXSIG_ADD_COUNT_TO_ESP;
          357                 r &= ~(0xFFFF << VXSIG_COUNT_SHIFT);
          358         }
          359         
          360         if (r &  VXSIG_INC_ECX) {
          361                 emu->cpu.reg[ECX]++;
          362                 r &= ~VXSIG_INC_ECX;
          363         }
          364 
          365         if (r == VXSIG_TRAP) {
          366                 if (emu->trapenv == NULL)
          367                         return 0;
          368                 emu->cpu.traperr = ctx->err;
          369                 // Usually, ctx->cr2 == si->si_addr.
          370                 // But on a segmentation fault (as opposed to a paging fault),
          371                 // cr2 is not updated and the kernel sends an si_addr == 0.
          372                 // Be sure to use si_addr, not cr2.
          373                 emu->cpu.trapva = (uint32_t)(uintptr_t)si->si_addr;
          374                 memmove(mc->gregs, emu->trapenv->gregs, sizeof emu->trapenv->gregs);
          375                 
          376                 return 1;
          377         }
          378 
          379         // The signal handler is confused; so are we.
          380         return 0;
          381 }