patch-nvmm-support - pkgsrc-localpatches - leot's pkgsrc LOCALPATCHES
 (HTM) hg clone https://bitbucket.org/iamleot/pkgsrc-localpatches
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
       patch-nvmm-support
       ---
            1 $NetBSD: patch-nvmm_support,v 1.1 2018/10/29 00:00:00 maxv Exp $
            2 
            3 Add NVMM support.
            4 
            5 From wip/qemu-nvmm, the only changes are:
            6 
            7  - configure: nvmm is tried to be recognized automatically in the
            8    configure (s/nvmm="no"/nvmm=""/).
            9  - target/i386/helper.c: minor adjustments to apply cleanly to
           10    4.1.0
           11  - target/i386/Makefile.objs: minor adjustments to apply cleanly to
           12    4.0.0
           13  - vl.c: removed, no longer needed on 4.1.0
           14 
           15 --- accel/stubs/Makefile.objs   2018-12-11 18:44:34.000000000 +0100
           16 +++ accel/stubs/Makefile.objs   2019-05-01 11:53:33.068579985 +0200
           17 @@ -1,5 +1,6 @@
           18  obj-$(call lnot,$(CONFIG_HAX))  += hax-stub.o
           19  obj-$(call lnot,$(CONFIG_HVF))  += hvf-stub.o
           20  obj-$(call lnot,$(CONFIG_WHPX)) += whpx-stub.o
           21 +obj-$(call lnot,$(CONFIG_NVMM)) += nvmm-stub.o
           22  obj-$(call lnot,$(CONFIG_KVM))  += kvm-stub.o
           23  obj-$(call lnot,$(CONFIG_TCG))  += tcg-stub.o
           24 --- accel/stubs/nvmm-stub.c     1970-01-01 01:00:00.000000000 +0100
           25 +++ accel/stubs/nvmm-stub.c     2019-05-01 11:53:33.087579596 +0200
           26 @@ -0,0 +1,43 @@
           27 +/*
           28 + * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
           29 + *
           30 + * NetBSD Virtual Machine Monitor (NVMM) accelerator stub.
           31 + *
           32 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
           33 + * See the COPYING file in the top-level directory.
           34 + */
           35 +
           36 +#include "qemu/osdep.h"
           37 +#include "qemu-common.h"
           38 +#include "cpu.h"
           39 +#include "sysemu/nvmm.h"
           40 +
           41 +int nvmm_init_vcpu(CPUState *cpu)
           42 +{
           43 +    return -1;
           44 +}
           45 +
           46 +int nvmm_vcpu_exec(CPUState *cpu)
           47 +{
           48 +    return -1;
           49 +}
           50 +
           51 +void nvmm_destroy_vcpu(CPUState *cpu)
           52 +{
           53 +}
           54 +
           55 +void nvmm_cpu_synchronize_state(CPUState *cpu)
           56 +{
           57 +}
           58 +
           59 +void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
           60 +{
           61 +}
           62 +
           63 +void nvmm_cpu_synchronize_post_init(CPUState *cpu)
           64 +{
           65 +}
           66 +
           67 +void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
           68 +{
           69 +}
           70 --- configure   2018-12-11 18:44:34.000000000 +0100
           71 +++ configure   2019-05-01 11:53:33.090579534 +0200
           72 @@ -237,6 +237,17 @@
           73      return 1
           74  }
           75  
           76 +supported_nvmm_target() {
           77 +    test "$nvmm" = "yes" || return 1
           78 +    glob "$1" "*-softmmu" || return 1
           79 +    case "${1%-softmmu}" in
           80 +        x86_64)
           81 +            return 0
           82 +        ;;
           83 +    esac
           84 +    return 1
           85 +}
           86 +
           87  supported_target() {
           88      case "$1" in
           89          *-softmmu)
           90 @@ -264,6 +275,7 @@
           91      supported_hax_target "$1" && return 0
           92      supported_hvf_target "$1" && return 0
           93      supported_whpx_target "$1" && return 0
           94 +    supported_nvmm_target "$1" && return 0
           95      print_error "TCG disabled, but hardware accelerator not available for '$target'"
           96      return 1
           97  }
           98 @@ -375,6 +387,7 @@
           99  hax="no"
          100  hvf="no"
          101  whpx="no"
          102 +nvmm=""
          103  rdma=""
          104  pvrdma=""
          105  gprof="no"
          106 @@ -1143,6 +1156,10 @@
          107    ;;
          108    --enable-whpx) whpx="yes"
          109    ;;
          110 +  --disable-nvmm) nvmm="no"
          111 +  ;;
          112 +  --enable-nvmm) nvmm="yes"
          113 +  ;;
          114    --disable-tcg-interpreter) tcg_interpreter="no"
          115    ;;
          116    --enable-tcg-interpreter) tcg_interpreter="yes"
          117 @@ -1724,6 +1741,7 @@
          118    hax             HAX acceleration support
          119    hvf             Hypervisor.framework acceleration support
          120    whpx            Windows Hypervisor Platform acceleration support
          121 +  nvmm            NetBSD Virtual Machine Monitor acceleration support
          122    rdma            Enable RDMA-based migration
          123    pvrdma          Enable PVRDMA support
          124    vde             support for vde network
          125 @@ -2659,6 +2677,20 @@
          126  fi
          127  
          128  ##########################################
          129 +# NetBSD Virtual Machine Monitor (NVMM) accelerator check
          130 +if test "$nvmm" != "no" ; then
          131 +    if check_include "nvmm.h"; then
          132 +        nvmm="yes"
          133 +        LIBS="-lnvmm $LIBS"
          134 +    else
          135 +        if test "$nvmm" = "yes"; then
          136 +            feature_not_found "NVMM" "NVMM is not available"
          137 +        fi
          138 +        nvmm="no"
          139 +    fi
          140 +fi
          141 +
          142 +##########################################
          143  # Sparse probe
          144  if test "$sparse" != "no" ; then
          145    if has cgcc; then
          146 @@ -6033,6 +6065,7 @@
          147  echo "HAX support       $hax"
          148  echo "HVF support       $hvf"
          149  echo "WHPX support      $whpx"
          150 +echo "NVMM support      $nvmm"
          151  echo "TCG support       $tcg"
          152  if test "$tcg" = "yes" ; then
          153      echo "TCG debug enabled $debug_tcg"
          154 @@ -7291,6 +7324,9 @@
          155  if supported_whpx_target $target; then
          156      echo "CONFIG_WHPX=y" >> $config_target_mak
          157  fi
          158 +if supported_nvmm_target $target; then
          159 +    echo "CONFIG_NVMM=y" >> $config_target_mak
          160 +fi
          161  if test "$target_bigendian" = "yes" ; then
          162    echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
          163  fi
          164 --- cpus.c      2018-12-11 18:44:34.000000000 +0100
          165 +++ cpus.c      2019-05-01 11:53:33.092579493 +0200
          166 @@ -40,6 +40,7 @@
          167  #include "sysemu/hax.h"
          168  #include "sysemu/hvf.h"
          169  #include "sysemu/whpx.h"
          170 +#include "sysemu/nvmm.h"
          171  #include "exec/exec-all.h"
          172  
          173  #include "qemu/thread.h"
          174 @@ -1691,6 +1692,48 @@
          175      return NULL;
          176  }
          177  
          178 +static void *qemu_nvmm_cpu_thread_fn(void *arg)
          179 +{
          180 +    CPUState *cpu = arg;
          181 +    int r;
          182 +
          183 +    assert(nvmm_enabled());
          184 +
          185 +    rcu_register_thread();
          186 +
          187 +    qemu_mutex_lock_iothread();
          188 +    qemu_thread_get_self(cpu->thread);
          189 +    cpu->thread_id = qemu_get_thread_id();
          190 +    current_cpu = cpu;
          191 +
          192 +    r = nvmm_init_vcpu(cpu);
          193 +    if (r < 0) {
          194 +        fprintf(stderr, "nvmm_init_vcpu failed: %s\n", strerror(-r));
          195 +        exit(1);
          196 +    }
          197 +
          198 +    /* signal CPU creation */
          199 +    cpu->created = true;
          200 +    qemu_cond_signal(&qemu_cpu_cond);
          201 +
          202 +    do {
          203 +        if (cpu_can_run(cpu)) {
          204 +            r = nvmm_vcpu_exec(cpu);
          205 +            if (r == EXCP_DEBUG) {
          206 +                cpu_handle_guest_debug(cpu);
          207 +            }
          208 +        }
          209 +        qemu_wait_io_event(cpu);
          210 +    } while (!cpu->unplug || cpu_can_run(cpu));
          211 +
          212 +    nvmm_destroy_vcpu(cpu);
          213 +    cpu->created = false;
          214 +    qemu_cond_signal(&qemu_cpu_cond);
          215 +    qemu_mutex_unlock_iothread();
          216 +    rcu_unregister_thread();
          217 +    return NULL;
          218 +}
          219 +
          220  #ifdef _WIN32
          221  static void CALLBACK dummy_apc_func(ULONG_PTR unused)
          222  {
          223 @@ -2051,6 +2094,19 @@
          224  #endif
          225  }
          226  
          227 +static void qemu_nvmm_start_vcpu(CPUState *cpu)
          228 +{
          229 +    char thread_name[VCPU_THREAD_NAME_SIZE];
          230 +
          231 +    cpu->thread = g_malloc0(sizeof(QemuThread));
          232 +    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
          233 +    qemu_cond_init(cpu->halt_cond);
          234 +    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/NVMM",
          235 +             cpu->cpu_index);
          236 +    qemu_thread_create(cpu->thread, thread_name, qemu_nvmm_cpu_thread_fn,
          237 +                       cpu, QEMU_THREAD_JOINABLE);
          238 +}
          239 +
          240  static void qemu_dummy_start_vcpu(CPUState *cpu)
          241  {
          242      char thread_name[VCPU_THREAD_NAME_SIZE];
          243 @@ -2088,6 +2144,8 @@
          244          qemu_tcg_init_vcpu(cpu);
          245      } else if (whpx_enabled()) {
          246          qemu_whpx_start_vcpu(cpu);
          247 +    } else if (nvmm_enabled()) {
          248 +        qemu_nvmm_start_vcpu(cpu);
          249      } else {
          250          qemu_dummy_start_vcpu(cpu);
          251      }
          252 --- include/sysemu/hw_accel.h   2018-12-11 18:44:34.000000000 +0100
          253 +++ include/sysemu/hw_accel.h   2019-05-01 11:53:33.092579493 +0200
          254 @@ -15,6 +15,7 @@
          255  #include "sysemu/hax.h"
          256  #include "sysemu/kvm.h"
          257  #include "sysemu/whpx.h"
          258 +#include "sysemu/nvmm.h"
          259  
          260  static inline void cpu_synchronize_state(CPUState *cpu)
          261  {
          262 @@ -27,6 +28,9 @@
          263      if (whpx_enabled()) {
          264          whpx_cpu_synchronize_state(cpu);
          265      }
          266 +    if (nvmm_enabled()) {
          267 +        nvmm_cpu_synchronize_state(cpu);
          268 +    }
          269  }
          270  
          271  static inline void cpu_synchronize_post_reset(CPUState *cpu)
          272 @@ -40,6 +44,9 @@
          273      if (whpx_enabled()) {
          274          whpx_cpu_synchronize_post_reset(cpu);
          275      }
          276 +    if (nvmm_enabled()) {
          277 +        nvmm_cpu_synchronize_post_reset(cpu);
          278 +    }
          279  }
          280  
          281  static inline void cpu_synchronize_post_init(CPUState *cpu)
          282 @@ -53,6 +60,9 @@
          283      if (whpx_enabled()) {
          284          whpx_cpu_synchronize_post_init(cpu);
          285      }
          286 +    if (nvmm_enabled()) {
          287 +        nvmm_cpu_synchronize_post_init(cpu);
          288 +    }
          289  }
          290  
          291  static inline void cpu_synchronize_pre_loadvm(CPUState *cpu)
          292 @@ -66,6 +76,9 @@
          293      if (whpx_enabled()) {
          294          whpx_cpu_synchronize_pre_loadvm(cpu);
          295      }
          296 +    if (nvmm_enabled()) {
          297 +        nvmm_cpu_synchronize_pre_loadvm(cpu);
          298 +    }
          299  }
          300  
          301  #endif /* QEMU_HW_ACCEL_H */
          302 --- include/sysemu/nvmm.h       1970-01-01 01:00:00.000000000 +0100
          303 +++ include/sysemu/nvmm.h       2019-05-01 11:53:33.093579472 +0200
          304 @@ -0,0 +1,35 @@
          305 +/*
          306 + * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
          307 + *
          308 + * NetBSD Virtual Machine Monitor (NVMM) accelerator support.
          309 + *
          310 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
          311 + * See the COPYING file in the top-level directory.
          312 + */
          313 +
          314 +#ifndef QEMU_NVMM_H
          315 +#define QEMU_NVMM_H
          316 +
          317 +#include "config-host.h"
          318 +#include "qemu-common.h"
          319 +
          320 +int nvmm_init_vcpu(CPUState *);
          321 +int nvmm_vcpu_exec(CPUState *);
          322 +void nvmm_destroy_vcpu(CPUState *);
          323 +
          324 +void nvmm_cpu_synchronize_state(CPUState *);
          325 +void nvmm_cpu_synchronize_post_reset(CPUState *);
          326 +void nvmm_cpu_synchronize_post_init(CPUState *);
          327 +void nvmm_cpu_synchronize_pre_loadvm(CPUState *);
          328 +
          329 +#ifdef CONFIG_NVMM
          330 +
          331 +int nvmm_enabled(void);
          332 +
          333 +#else /* CONFIG_NVMM */
          334 +
          335 +#define nvmm_enabled() (0)
          336 +
          337 +#endif /* CONFIG_NVMM */
          338 +
          339 +#endif /* CONFIG_NVMM */
          340 --- qemu-options.hx     2018-12-11 18:44:34.000000000 +0100
          341 +++ qemu-options.hx     2019-05-01 11:53:33.093579472 +0200
          342 @@ -66,7 +66,7 @@
          343  @table @option
          344  @item accel=@var{accels1}[:@var{accels2}[:...]]
          345  This is used to enable an accelerator. Depending on the target architecture,
          346 -kvm, xen, hax, hvf, whpx or tcg can be available. By default, tcg is used. If there is
          347 +kvm, xen, hax, hvf, whpx, nvmm or tcg can be available. By default, tcg is used. If there is
          348  more than one accelerator specified, the next one is used if the previous one
          349  fails to initialize.
          350  @item kernel_irqchip=on|off
          351 @@ -119,13 +119,13 @@
          352  
          353  DEF("accel", HAS_ARG, QEMU_OPTION_accel,
          354      "-accel [accel=]accelerator[,thread=single|multi]\n"
          355 -    "                select accelerator (kvm, xen, hax, hvf, whpx or tcg; use 'help' for a list)\n"
          356 +    "                select accelerator (kvm, xen, hax, hvf, whpx, nvmm or tcg; use 'help' for a list)\n"
          357      "                thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL)
          358  STEXI
          359  @item -accel @var{name}[,prop=@var{value}[,...]]
          360  @findex -accel
          361  This is used to enable an accelerator. Depending on the target architecture,
          362 -kvm, xen, hax, hvf, whpx or tcg can be available. By default, tcg is used. If there is
          363 +kvm, xen, hax, hvf, whpx, nvmm or tcg can be available. By default, tcg is used. If there is
          364  more than one accelerator specified, the next one is used if the previous one
          365  fails to initialize.
          366  @table @option
          367 --- target/i386/helper.c        2018-12-11 18:44:34.000000000 +0100
          368 +++ target/i386/helper.c        2019-05-01 11:53:33.093579472 +0200
          369 @@ -986,7 +986,7 @@
          370      X86CPU *cpu = env_archcpu(env);
          371      CPUState *cs = env_cpu(env);
          372  
          373 -    if (kvm_enabled() || whpx_enabled()) {
          374 +    if (kvm_enabled() || whpx_enabled() || nvmm_enabled()) {
          375          env->tpr_access_type = access;
          376  
          377          cpu_interrupt(cs, CPU_INTERRUPT_TPR);
          378 --- target/i386/Makefile.objs   2018-12-11 18:44:34.000000000 +0100
          379 +++ target/i386/Makefile.objs   2019-05-01 11:53:33.094579452 +0200
          380 @@ -17,6 +17,7 @@
          381  endif
          382  obj-$(CONFIG_HVF) += hvf/
          383  obj-$(CONFIG_WHPX) += whpx-all.o
          384 +obj-$(CONFIG_NVMM) += nvmm-all.o
          385  endif
          386  obj-$(CONFIG_SEV) += sev.o
          387  obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o
          388 --- target/i386/nvmm-all.c      1970-01-01 01:00:00.000000000 +0100
          389 +++ target/i386/nvmm-all.c      2019-06-10 11:57:43.612013641 +0200
          390 @@ -0,0 +1,1168 @@
          391 +/*
          392 + * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
          393 + *
          394 + * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
          395 + *
          396 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
          397 + * See the COPYING file in the top-level directory.
          398 + */
          399 +
          400 +#include "qemu/osdep.h"
          401 +#include "cpu.h"
          402 +#include "exec/address-spaces.h"
          403 +#include "exec/ioport.h"
          404 +#include "qemu-common.h"
          405 +#include "strings.h"
          406 +#include "sysemu/accel.h"
          407 +#include "sysemu/nvmm.h"
          408 +#include "sysemu/sysemu.h"
          409 +#include "sysemu/cpus.h"
          410 +#include "qemu/main-loop.h"
          411 +#include "hw/boards.h"
          412 +#include "qemu/error-report.h"
          413 +#include "qemu/queue.h"
          414 +#include "qapi/error.h"
          415 +#include "migration/blocker.h"
          416 +
          417 +#include <nvmm.h>
          418 +
          419 +struct qemu_vcpu {
          420 +    struct nvmm_vcpu vcpu;
          421 +    uint8_t tpr;
          422 +    bool stop;
          423 +
          424 +    /* Window-exiting for INTs/NMIs. */
          425 +    bool int_window_exit;
          426 +    bool nmi_window_exit;
          427 +
          428 +    /* The guest is in an interrupt shadow (POP SS, etc). */
          429 +    bool int_shadow;
          430 +};
          431 +
          432 +struct qemu_machine {
          433 +    struct nvmm_machine mach;
          434 +};
          435 +
          436 +/* -------------------------------------------------------------------------- */
          437 +
          438 +static bool nvmm_allowed = false;
          439 +static struct qemu_machine qemu_mach;
          440 +
          441 +static struct qemu_vcpu *
          442 +get_qemu_vcpu(CPUState *cpu)
          443 +{
          444 +    return (struct qemu_vcpu *)cpu->hax_vcpu;
          445 +}
          446 +
          447 +static struct nvmm_machine *
          448 +get_nvmm_mach(void)
          449 +{
          450 +    return &qemu_mach.mach;
          451 +}
          452 +
          453 +/* -------------------------------------------------------------------------- */
          454 +
          455 +static void
          456 +nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
          457 +{
          458 +    uint32_t attrib = qseg->flags;
          459 +
          460 +    nseg->selector = qseg->selector;
          461 +    nseg->limit = qseg->limit;
          462 +    nseg->base = qseg->base;
          463 +    nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK);
          464 +    nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK);
          465 +    nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK);
          466 +    nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK);
          467 +    nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK);
          468 +    nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK);
          469 +    nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK);
          470 +    nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK);
          471 +}
          472 +
          473 +static void
          474 +nvmm_set_registers(CPUState *cpu)
          475 +{
          476 +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
          477 +    struct nvmm_machine *mach = get_nvmm_mach();
          478 +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
          479 +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
          480 +    struct nvmm_x64_state *state = vcpu->state;
          481 +    uint64_t bitmap;
          482 +    size_t i;
          483 +    int ret;
          484 +
          485 +    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
          486 +
          487 +    /* GPRs. */
          488 +    state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX];
          489 +    state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX];
          490 +    state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX];
          491 +    state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX];
          492 +    state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP];
          493 +    state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP];
          494 +    state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI];
          495 +    state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI];
          496 +    state->gprs[NVMM_X64_GPR_R8]  = env->regs[R_R8];
          497 +    state->gprs[NVMM_X64_GPR_R9]  = env->regs[R_R9];
          498 +    state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10];
          499 +    state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11];
          500 +    state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12];
          501 +    state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13];
          502 +    state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14];
          503 +    state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15];
          504 +
          505 +    /* RIP and RFLAGS. */
          506 +    state->gprs[NVMM_X64_GPR_RIP] = env->eip;
          507 +    state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags;
          508 +
          509 +    /* Segments. */
          510 +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]);
          511 +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]);
          512 +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]);
          513 +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]);
          514 +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]);
          515 +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]);
          516 +
          517 +    /* Special segments. */
          518 +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt);
          519 +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt);
          520 +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr);
          521 +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt);
          522 +
          523 +    /* Control registers. */
          524 +    state->crs[NVMM_X64_CR_CR0] = env->cr[0];
          525 +    state->crs[NVMM_X64_CR_CR2] = env->cr[2];
          526 +    state->crs[NVMM_X64_CR_CR3] = env->cr[3];
          527 +    state->crs[NVMM_X64_CR_CR4] = env->cr[4];
          528 +    state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
          529 +    state->crs[NVMM_X64_CR_XCR0] = env->xcr0;
          530 +
          531 +    /* Debug registers. */
          532 +    state->drs[NVMM_X64_DR_DR0] = env->dr[0];
          533 +    state->drs[NVMM_X64_DR_DR1] = env->dr[1];
          534 +    state->drs[NVMM_X64_DR_DR2] = env->dr[2];
          535 +    state->drs[NVMM_X64_DR_DR3] = env->dr[3];
          536 +    state->drs[NVMM_X64_DR_DR6] = env->dr[6];
          537 +    state->drs[NVMM_X64_DR_DR7] = env->dr[7];
          538 +
          539 +    /* FPU. */
          540 +    state->fpu.fx_cw = env->fpuc;
          541 +    state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11);
          542 +    state->fpu.fx_tw = 0;
          543 +    for (i = 0; i < 8; i++) {
          544 +        state->fpu.fx_tw |= (!env->fptags[i]) << i;
          545 +    }
          546 +    state->fpu.fx_opcode = env->fpop;
          547 +    state->fpu.fx_ip.fa_64 = env->fpip;
          548 +    state->fpu.fx_dp.fa_64 = env->fpdp;
          549 +    state->fpu.fx_mxcsr = env->mxcsr;
          550 +    state->fpu.fx_mxcsr_mask = 0x0000FFFF;
          551 +    assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
          552 +    memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs));
          553 +    for (i = 0; i < 16; i++) {
          554 +        memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0],
          555 +            &env->xmm_regs[i].ZMM_Q(0), 8);
          556 +        memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8],
          557 +            &env->xmm_regs[i].ZMM_Q(1), 8);
          558 +    }
          559 +
          560 +    /* MSRs. */
          561 +    state->msrs[NVMM_X64_MSR_EFER] = env->efer;
          562 +    state->msrs[NVMM_X64_MSR_STAR] = env->star;
          563 +#ifdef TARGET_X86_64
          564 +    state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar;
          565 +    state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar;
          566 +    state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask;
          567 +    state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase;
          568 +#endif
          569 +    state->msrs[NVMM_X64_MSR_SYSENTER_CS]  = env->sysenter_cs;
          570 +    state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp;
          571 +    state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip;
          572 +    state->msrs[NVMM_X64_MSR_PAT] = env->pat;
          573 +    state->msrs[NVMM_X64_MSR_TSC] = env->tsc;
          574 +
          575 +    bitmap =
          576 +        NVMM_X64_STATE_SEGS |
          577 +        NVMM_X64_STATE_GPRS |
          578 +        NVMM_X64_STATE_CRS  |
          579 +        NVMM_X64_STATE_DRS  |
          580 +        NVMM_X64_STATE_MSRS |
          581 +        NVMM_X64_STATE_FPU;
          582 +
          583 +    ret = nvmm_vcpu_setstate(mach, vcpu, bitmap);
          584 +    if (ret == -1) {
          585 +        error_report("NVMM: Failed to set virtual processor context,"
          586 +            " error=%d", errno);
          587 +    }
          588 +}
          589 +
          590 +static void
          591 +nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
          592 +{
          593 +    qseg->selector = nseg->selector;
          594 +    qseg->limit = nseg->limit;
          595 +    qseg->base = nseg->base;
          596 +
          597 +    qseg->flags =
          598 +        __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) |
          599 +        __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) |
          600 +        __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) |
          601 +        __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) |
          602 +        __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) |
          603 +        __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) |
          604 +        __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) |
          605 +        __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK);
          606 +}
          607 +
          608 +static void
          609 +nvmm_get_registers(CPUState *cpu)
          610 +{
          611 +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
          612 +    struct nvmm_machine *mach = get_nvmm_mach();
          613 +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
          614 +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
          615 +    X86CPU *x86_cpu = X86_CPU(cpu);
          616 +    struct nvmm_x64_state *state = vcpu->state;
          617 +    uint64_t bitmap, tpr;
          618 +    size_t i;
          619 +    int ret;
          620 +
          621 +    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
          622 +
          623 +    bitmap =
          624 +        NVMM_X64_STATE_SEGS |
          625 +        NVMM_X64_STATE_GPRS |
          626 +        NVMM_X64_STATE_CRS  |
          627 +        NVMM_X64_STATE_DRS  |
          628 +        NVMM_X64_STATE_MSRS |
          629 +        NVMM_X64_STATE_FPU;
          630 +
          631 +    ret = nvmm_vcpu_getstate(mach, vcpu, bitmap);
          632 +    if (ret == -1) {
          633 +        error_report("NVMM: Failed to get virtual processor context,"
          634 +            " error=%d", errno);
          635 +    }
          636 +
          637 +    /* GPRs. */
          638 +    env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX];
          639 +    env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX];
          640 +    env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX];
          641 +    env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX];
          642 +    env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP];
          643 +    env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP];
          644 +    env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI];
          645 +    env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI];
          646 +    env->regs[R_R8]  = state->gprs[NVMM_X64_GPR_R8];
          647 +    env->regs[R_R9]  = state->gprs[NVMM_X64_GPR_R9];
          648 +    env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10];
          649 +    env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11];
          650 +    env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12];
          651 +    env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13];
          652 +    env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14];
          653 +    env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15];
          654 +
          655 +    /* RIP and RFLAGS. */
          656 +    env->eip = state->gprs[NVMM_X64_GPR_RIP];
          657 +    env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS];
          658 +
          659 +    /* Segments. */
          660 +    nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]);
          661 +    nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]);
          662 +    nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]);
          663 +    nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]);
          664 +    nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]);
          665 +    nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]);
          666 +
          667 +    /* Special segments. */
          668 +    nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]);
          669 +    nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]);
          670 +    nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]);
          671 +    nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]);
          672 +
          673 +    /* Control registers. */
          674 +    env->cr[0] = state->crs[NVMM_X64_CR_CR0];
          675 +    env->cr[2] = state->crs[NVMM_X64_CR_CR2];
          676 +    env->cr[3] = state->crs[NVMM_X64_CR_CR3];
          677 +    env->cr[4] = state->crs[NVMM_X64_CR_CR4];
          678 +    tpr = state->crs[NVMM_X64_CR_CR8];
          679 +    if (tpr != qcpu->tpr) {
          680 +        qcpu->tpr = tpr;
          681 +        cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
          682 +    }
          683 +    env->xcr0 = state->crs[NVMM_X64_CR_XCR0];
          684 +
          685 +    /* Debug registers. */
          686 +    env->dr[0] = state->drs[NVMM_X64_DR_DR0];
          687 +    env->dr[1] = state->drs[NVMM_X64_DR_DR1];
          688 +    env->dr[2] = state->drs[NVMM_X64_DR_DR2];
          689 +    env->dr[3] = state->drs[NVMM_X64_DR_DR3];
          690 +    env->dr[6] = state->drs[NVMM_X64_DR_DR6];
          691 +    env->dr[7] = state->drs[NVMM_X64_DR_DR7];
          692 +
          693 +    /* FPU. */
          694 +    env->fpuc = state->fpu.fx_cw;
          695 +    env->fpstt = (state->fpu.fx_sw >> 11) & 0x7;
          696 +    env->fpus = state->fpu.fx_sw & ~0x3800;
          697 +    for (i = 0; i < 8; i++) {
          698 +        env->fptags[i] = !((state->fpu.fx_tw >> i) & 1);
          699 +    }
          700 +    env->fpop = state->fpu.fx_opcode;
          701 +    env->fpip = state->fpu.fx_ip.fa_64;
          702 +    env->fpdp = state->fpu.fx_dp.fa_64;
          703 +    env->mxcsr = state->fpu.fx_mxcsr;
          704 +    assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
          705 +    memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs));
          706 +    for (i = 0; i < 16; i++) {
          707 +        memcpy(&env->xmm_regs[i].ZMM_Q(0),
          708 +            &state->fpu.fx_xmm[i].xmm_bytes[0], 8);
          709 +        memcpy(&env->xmm_regs[i].ZMM_Q(1),
          710 +            &state->fpu.fx_xmm[i].xmm_bytes[8], 8);
          711 +    }
          712 +
          713 +    /* MSRs. */
          714 +    env->efer = state->msrs[NVMM_X64_MSR_EFER];
          715 +    env->star = state->msrs[NVMM_X64_MSR_STAR];
          716 +#ifdef TARGET_X86_64
          717 +    env->lstar = state->msrs[NVMM_X64_MSR_LSTAR];
          718 +    env->cstar = state->msrs[NVMM_X64_MSR_CSTAR];
          719 +    env->fmask = state->msrs[NVMM_X64_MSR_SFMASK];
          720 +    env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE];
          721 +#endif
          722 +    env->sysenter_cs  = state->msrs[NVMM_X64_MSR_SYSENTER_CS];
          723 +    env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP];
          724 +    env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP];
          725 +    env->pat = state->msrs[NVMM_X64_MSR_PAT];
          726 +    env->tsc = state->msrs[NVMM_X64_MSR_TSC];
          727 +
          728 +    x86_update_hflags(env);
          729 +}
          730 +
          731 +static bool
          732 +nvmm_can_take_int(CPUState *cpu)
          733 +{
          734 +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
          735 +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
          736 +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
          737 +    struct nvmm_machine *mach = get_nvmm_mach();
          738 +
          739 +    if (qcpu->int_window_exit) {
          740 +        return false;
          741 +    }
          742 +
          743 +    if (qcpu->int_shadow || (!(env->eflags & IF_MASK))) {
          744 +        struct nvmm_x64_state *state = vcpu->state;
          745 +
          746 +        /* Exit on interrupt window. */
          747 +        nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR);
          748 +        state->intr.int_window_exiting = 1;
          749 +        nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR);
          750 +
          751 +        return false;
          752 +    }
          753 +
          754 +    return true;
          755 +}
          756 +
          757 +static bool
          758 +nvmm_can_take_nmi(CPUState *cpu)
          759 +{
          760 +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
          761 +
          762 +    /*
          763 +     * Contrary to INTs, NMIs always schedule an exit when they are
          764 +     * completed. Therefore, if window-exiting is enabled, it means
          765 +     * NMIs are blocked.
          766 +     */
          767 +    if (qcpu->nmi_window_exit) {
          768 +        return false;
          769 +    }
          770 +
          771 +    return true;
          772 +}
          773 +
          774 +/*
          775 + * Called before the VCPU is run. We inject events generated by the I/O
          776 + * thread, and synchronize the guest TPR.
          777 + */
          778 +static void
          779 +nvmm_vcpu_pre_run(CPUState *cpu)
          780 +{
          781 +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
          782 +    struct nvmm_machine *mach = get_nvmm_mach();
          783 +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
          784 +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
          785 +    X86CPU *x86_cpu = X86_CPU(cpu);
          786 +    struct nvmm_x64_state *state = vcpu->state;
          787 +    struct nvmm_event *event = vcpu->event;
          788 +    bool has_event = false;
          789 +    bool sync_tpr = false;
          790 +    uint8_t tpr;
          791 +    int ret;
          792 +
          793 +    qemu_mutex_lock_iothread();
          794 +
          795 +    tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
          796 +    if (tpr != qcpu->tpr) {
          797 +        qcpu->tpr = tpr;
          798 +        sync_tpr = true;
          799 +    }
          800 +
          801 +    /*
          802 +     * Force the VCPU out of its inner loop to process any INIT requests
          803 +     * or commit pending TPR access.
          804 +     */
          805 +    if (cpu->interrupt_request & (CPU_INTERRUPT_INIT|CPU_INTERRUPT_TPR)) {
          806 +        cpu->exit_request = 1;
          807 +    }
          808 +
          809 +    if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
          810 +        if (nvmm_can_take_nmi(cpu)) {
          811 +            cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
          812 +            event->type = NVMM_EVENT_INTERRUPT_HW;
          813 +            event->vector = 2;
          814 +            has_event = true;
          815 +        }
          816 +    }
          817 +
          818 +    if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
          819 +        if (nvmm_can_take_int(cpu)) {
          820 +            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
          821 +            event->type = NVMM_EVENT_INTERRUPT_HW;
          822 +            event->vector = cpu_get_pic_interrupt(env);
          823 +            has_event = true;
          824 +        }
          825 +    }
          826 +
          827 +    /* Don't want SMIs. */
          828 +    if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
          829 +        cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
          830 +    }
          831 +
          832 +    if (sync_tpr) {
          833 +        ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS);
          834 +        if (ret == -1) {
          835 +            error_report("NVMM: Failed to get CPU state,"
          836 +                " error=%d", errno);
          837 +        }
          838 +
          839 +        state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
          840 +
          841 +        ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS);
          842 +        if (ret == -1) {
          843 +            error_report("NVMM: Failed to set CPU state,"
          844 +                " error=%d", errno);
          845 +        }
          846 +    }
          847 +
          848 +    if (has_event) {
          849 +        ret = nvmm_vcpu_inject(mach, vcpu);
          850 +        if (ret == -1) {
          851 +            error_report("NVMM: Failed to inject event,"
          852 +                " error=%d", errno);
          853 +        }
          854 +    }
          855 +
          856 +    qemu_mutex_unlock_iothread();
          857 +}
          858 +
          859 +/*
          860 + * Called after the VCPU ran. We synchronize the host view of the TPR and
          861 + * RFLAGS.
          862 + */
          863 +static void
          864 +nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_exit *exit)
          865 +{
          866 +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
          867 +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
          868 +    X86CPU *x86_cpu = X86_CPU(cpu);
          869 +    uint64_t tpr;
          870 +
          871 +    env->eflags = exit->exitstate[NVMM_X64_EXITSTATE_RFLAGS];
          872 +
          873 +    qcpu->int_shadow =
          874 +        exit->exitstate[NVMM_X64_EXITSTATE_INT_SHADOW];
          875 +    qcpu->int_window_exit =
          876 +        exit->exitstate[NVMM_X64_EXITSTATE_INT_WINDOW_EXIT];
          877 +    qcpu->nmi_window_exit =
          878 +        exit->exitstate[NVMM_X64_EXITSTATE_NMI_WINDOW_EXIT];
          879 +
          880 +    tpr = exit->exitstate[NVMM_X64_EXITSTATE_CR8];
          881 +    if (qcpu->tpr != tpr) {
          882 +        qcpu->tpr = tpr;
          883 +        qemu_mutex_lock_iothread();
          884 +        cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr);
          885 +        qemu_mutex_unlock_iothread();
          886 +    }
          887 +}
          888 +
          889 +/* -------------------------------------------------------------------------- */
          890 +
          891 +static void
          892 +nvmm_io_callback(struct nvmm_io *io)
          893 +{
          894 +    MemTxAttrs attrs = { 0 };
          895 +    int ret;
          896 +
          897 +    ret = address_space_rw(&address_space_io, io->port, attrs, io->data,
          898 +        io->size, !io->in);
          899 +    if (ret != MEMTX_OK) {
          900 +        error_report("NVMM: I/O Transaction Failed "
          901 +            "[%s, port=%lu, size=%zu]", (io->in ? "in" : "out"),
          902 +            io->port, io->size);
          903 +    }
          904 +
          905 +    /* XXX Needed, otherwise infinite loop. */
          906 +    current_cpu->vcpu_dirty = false;
          907 +}
          908 +
          909 +static void
          910 +nvmm_mem_callback(struct nvmm_mem *mem)
          911 +{
          912 +    cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
          913 +
          914 +    /* XXX Needed, otherwise infinite loop. */
          915 +    current_cpu->vcpu_dirty = false;
          916 +}
          917 +
          918 +static struct nvmm_callbacks nvmm_callbacks = {
          919 +    .io = nvmm_io_callback,
          920 +    .mem = nvmm_mem_callback
          921 +};
          922 +
          923 +/* -------------------------------------------------------------------------- */
          924 +
          925 +static int
          926 +nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
          927 +{
          928 +    int ret;
          929 +
          930 +    ret = nvmm_assist_mem(mach, vcpu);
          931 +    if (ret == -1) {
          932 +        error_report("NVMM: Mem Assist Failed [gpa=%p]",
          933 +            (void *)vcpu->exit->u.mem.gpa);
          934 +    }
          935 +
          936 +    return ret;
          937 +}
          938 +
          939 +static int
          940 +nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
          941 +{
          942 +    int ret;
          943 +
          944 +    ret = nvmm_assist_io(mach, vcpu);
          945 +    if (ret == -1) {
          946 +        error_report("NVMM: I/O Assist Failed [port=%d]",
          947 +            (int)vcpu->exit->u.io.port);
          948 +    }
          949 +
          950 +    return ret;
          951 +}
          952 +
          953 +static int
          954 +nvmm_handle_msr(struct nvmm_machine *mach, CPUState *cpu,
          955 +    struct nvmm_exit *exit)
          956 +{
          957 +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
          958 +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
          959 +    X86CPU *x86_cpu = X86_CPU(cpu);
          960 +    struct nvmm_x64_state *state = vcpu->state;
          961 +    uint64_t val;
          962 +    int ret;
          963 +
          964 +    val = exit->u.msr.val;
          965 +
          966 +    switch (exit->u.msr.msr) {
          967 +    case MSR_IA32_APICBASE:
          968 +        if (exit->u.msr.type == NVMM_EXIT_MSR_RDMSR) {
          969 +            val = cpu_get_apic_base(x86_cpu->apic_state);
          970 +        } else {
          971 +            cpu_set_apic_base(x86_cpu->apic_state, val);
          972 +        }
          973 +        break;
          974 +    default:
          975 +        // TODO: more MSRs to add?
          976 +        if (exit->u.msr.type == NVMM_EXIT_MSR_RDMSR) {
          977 +            val = 0;
          978 +        }
          979 +        error_report("NVMM: Unexpected %sMSR 0x%lx [val=0x%lx], ignored",
          980 +            (exit->u.msr.type == NVMM_EXIT_MSR_RDMSR) ? "RD" : "WR",
          981 +            exit->u.msr.msr, val);
          982 +        break;
          983 +    }
          984 +
          985 +    ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
          986 +    if (ret == -1) {
          987 +        return -1;
          988 +    }
          989 +
          990 +    if (exit->u.msr.type == NVMM_EXIT_MSR_RDMSR) {
          991 +        state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF);
          992 +        state->gprs[NVMM_X64_GPR_RDX] = (val >> 32);
          993 +    }
          994 +    state->gprs[NVMM_X64_GPR_RIP] = exit->u.msr.npc;
          995 +
          996 +    ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
          997 +    if (ret == -1) {
          998 +        return -1;
          999 +    }
         1000 +
         1001 +    return 0;
         1002 +}
         1003 +
         1004 +static int
         1005 +nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
         1006 +    struct nvmm_exit *exit)
         1007 +{
         1008 +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
         1009 +    int ret = 0;
         1010 +
         1011 +    qemu_mutex_lock_iothread();
         1012 +
         1013 +    if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
         1014 +          (env->eflags & IF_MASK)) &&
         1015 +        !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
         1016 +        cpu->exception_index = EXCP_HLT;
         1017 +        cpu->halted = true;
         1018 +        ret = 1;
         1019 +    }
         1020 +
         1021 +    qemu_mutex_unlock_iothread();
         1022 +
         1023 +    return ret;
         1024 +}
         1025 +
         1026 +static int
         1027 +nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
         1028 +{
         1029 +    struct nvmm_event *event = vcpu->event;
         1030 +
         1031 +    event->type = NVMM_EVENT_EXCEPTION;
         1032 +    event->vector = 6;
         1033 +    event->u.error = 0;
         1034 +
         1035 +    return nvmm_vcpu_inject(mach, vcpu);
         1036 +}
         1037 +
         1038 +static int
         1039 +nvmm_vcpu_loop(CPUState *cpu)
         1040 +{
         1041 +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
         1042 +    struct nvmm_machine *mach = get_nvmm_mach();
         1043 +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
         1044 +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
         1045 +    X86CPU *x86_cpu = X86_CPU(cpu);
         1046 +    struct nvmm_exit *exit = vcpu->exit;
         1047 +    int ret;
         1048 +
         1049 +    /*
         1050 +     * Some asynchronous events must be handled outside of the inner
         1051 +     * VCPU loop. They are handled here.
         1052 +     */
         1053 +    if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
         1054 +        nvmm_cpu_synchronize_state(cpu);
         1055 +        do_cpu_init(x86_cpu);
         1056 +        /* XXX: reset the INT/NMI windows */
         1057 +    }
         1058 +    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
         1059 +        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
         1060 +        apic_poll_irq(x86_cpu->apic_state);
         1061 +    }
         1062 +    if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
         1063 +         (env->eflags & IF_MASK)) ||
         1064 +        (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
         1065 +        cpu->halted = false;
         1066 +    }
         1067 +    if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
         1068 +        nvmm_cpu_synchronize_state(cpu);
         1069 +        do_cpu_sipi(x86_cpu);
         1070 +    }
         1071 +    if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
         1072 +        cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
         1073 +        nvmm_cpu_synchronize_state(cpu);
         1074 +        apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
         1075 +            env->tpr_access_type);
         1076 +    }
         1077 +
         1078 +    if (cpu->halted) {
         1079 +        cpu->exception_index = EXCP_HLT;
         1080 +        atomic_set(&cpu->exit_request, false);
         1081 +        return 0;
         1082 +    }
         1083 +
         1084 +    qemu_mutex_unlock_iothread();
         1085 +    cpu_exec_start(cpu);
         1086 +
         1087 +    /*
         1088 +     * Inner VCPU loop.
         1089 +     */
         1090 +    do {
         1091 +        if (cpu->vcpu_dirty) {
         1092 +            nvmm_set_registers(cpu);
         1093 +            cpu->vcpu_dirty = false;
         1094 +        }
         1095 +
         1096 +        if (qcpu->stop) {
         1097 +            cpu->exception_index = EXCP_INTERRUPT;
         1098 +            qcpu->stop = false;
         1099 +            ret = 1;
         1100 +            break;
         1101 +        }
         1102 +
         1103 +        nvmm_vcpu_pre_run(cpu);
         1104 +
         1105 +        if (atomic_read(&cpu->exit_request)) {
         1106 +            qemu_cpu_kick_self();
         1107 +        }
         1108 +
         1109 +        ret = nvmm_vcpu_run(mach, vcpu);
         1110 +        if (ret == -1) {
         1111 +            error_report("NVMM: Failed to exec a virtual processor,"
         1112 +                " error=%d", errno);
         1113 +            break;
         1114 +        }
         1115 +
         1116 +        nvmm_vcpu_post_run(cpu, exit);
         1117 +
         1118 +        switch (exit->reason) {
         1119 +        case NVMM_EXIT_NONE:
         1120 +            break;
         1121 +        case NVMM_EXIT_MEMORY:
         1122 +            ret = nvmm_handle_mem(mach, vcpu);
         1123 +            break;
         1124 +        case NVMM_EXIT_IO:
         1125 +            ret = nvmm_handle_io(mach, vcpu);
         1126 +            break;
         1127 +        case NVMM_EXIT_MSR:
         1128 +            ret = nvmm_handle_msr(mach, cpu, exit);
         1129 +            break;
         1130 +        case NVMM_EXIT_INT_READY:
         1131 +        case NVMM_EXIT_NMI_READY:
         1132 +            break;
         1133 +        case NVMM_EXIT_MONITOR:
         1134 +        case NVMM_EXIT_MWAIT:
         1135 +        case NVMM_EXIT_MWAIT_COND:
         1136 +            ret = nvmm_inject_ud(mach, vcpu);
         1137 +            break;
         1138 +        case NVMM_EXIT_HALTED:
         1139 +            ret = nvmm_handle_halted(mach, cpu, exit);
         1140 +            break;
         1141 +        case NVMM_EXIT_SHUTDOWN:
         1142 +            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         1143 +            cpu->exception_index = EXCP_INTERRUPT;
         1144 +            ret = 1;
         1145 +            break;
         1146 +        default:
         1147 +            error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
         1148 +                exit->reason, exit->u.inv.hwcode);
         1149 +            nvmm_get_registers(cpu);
         1150 +            qemu_mutex_lock_iothread();
         1151 +            qemu_system_guest_panicked(cpu_get_crash_info(cpu));
         1152 +            qemu_mutex_unlock_iothread();
         1153 +            ret = -1;
         1154 +            break;
         1155 +        }
         1156 +    } while (ret == 0);
         1157 +
         1158 +    cpu_exec_end(cpu);
         1159 +    qemu_mutex_lock_iothread();
         1160 +    current_cpu = cpu;
         1161 +
         1162 +    atomic_set(&cpu->exit_request, false);
         1163 +
         1164 +    return ret < 0;
         1165 +}
         1166 +
         1167 +/* -------------------------------------------------------------------------- */
         1168 +
         1169 +static void
         1170 +do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
         1171 +{
         1172 +    nvmm_get_registers(cpu);
         1173 +    cpu->vcpu_dirty = true;
         1174 +}
         1175 +
         1176 +static void
         1177 +do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
         1178 +{
         1179 +    nvmm_set_registers(cpu);
         1180 +    cpu->vcpu_dirty = false;
         1181 +}
         1182 +
         1183 +static void
         1184 +do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
         1185 +{
         1186 +    nvmm_set_registers(cpu);
         1187 +    cpu->vcpu_dirty = false;
         1188 +}
         1189 +
         1190 +static void
         1191 +do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
         1192 +{
         1193 +    cpu->vcpu_dirty = true;
         1194 +}
         1195 +
         1196 +void nvmm_cpu_synchronize_state(CPUState *cpu)
         1197 +{
         1198 +    if (!cpu->vcpu_dirty) {
         1199 +        run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
         1200 +    }
         1201 +}
         1202 +
         1203 +void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
         1204 +{
         1205 +    run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
         1206 +}
         1207 +
         1208 +void nvmm_cpu_synchronize_post_init(CPUState *cpu)
         1209 +{
         1210 +    run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
         1211 +}
         1212 +
         1213 +void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
         1214 +{
         1215 +    run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
         1216 +}
         1217 +
         1218 +/* -------------------------------------------------------------------------- */
         1219 +
         1220 +static Error *nvmm_migration_blocker;
         1221 +
         1222 +static void
         1223 +nvmm_ipi_signal(int sigcpu)
         1224 +{
         1225 +    struct qemu_vcpu *qcpu;
         1226 +
         1227 +    if (current_cpu) {
         1228 +        qcpu = get_qemu_vcpu(current_cpu);
         1229 +        qcpu->stop = true;
         1230 +    }
         1231 +}
         1232 +
         1233 +static void
         1234 +nvmm_init_cpu_signals(void)
         1235 +{
         1236 +    struct sigaction sigact;
         1237 +    sigset_t set;
         1238 +
         1239 +    /* Install the IPI handler. */
         1240 +    memset(&sigact, 0, sizeof(sigact));
         1241 +    sigact.sa_handler = nvmm_ipi_signal;
         1242 +    sigaction(SIG_IPI, &sigact, NULL);
         1243 +
         1244 +    /* Allow IPIs on the current thread. */
         1245 +    sigprocmask(SIG_BLOCK, NULL, &set);
         1246 +    sigdelset(&set, SIG_IPI);
         1247 +    pthread_sigmask(SIG_SETMASK, &set, NULL);
         1248 +}
         1249 +
         1250 +int
         1251 +nvmm_init_vcpu(CPUState *cpu)
         1252 +{
         1253 +    struct nvmm_machine *mach = get_nvmm_mach();
         1254 +    Error *local_error = NULL;
         1255 +    struct qemu_vcpu *qcpu;
         1256 +    int ret;
         1257 +
         1258 +    nvmm_init_cpu_signals();
         1259 +
         1260 +    if (nvmm_migration_blocker == NULL) {
         1261 +        error_setg(&nvmm_migration_blocker,
         1262 +            "NVMM: Migration not supported");
         1263 +
         1264 +        (void)migrate_add_blocker(nvmm_migration_blocker, &local_error);
         1265 +        if (local_error) {
         1266 +            error_report_err(local_error);
         1267 +            migrate_del_blocker(nvmm_migration_blocker);
         1268 +            error_free(nvmm_migration_blocker);
         1269 +            return -EINVAL;
         1270 +        }
         1271 +    }
         1272 +
         1273 +    qcpu = g_malloc0(sizeof(*qcpu));
         1274 +    if (qcpu == NULL) {
         1275 +        error_report("NVMM: Failed to allocate VCPU context.");
         1276 +        return -ENOMEM;
         1277 +    }
         1278 +
         1279 +    ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu);
         1280 +    if (ret == -1) {
         1281 +        error_report("NVMM: Failed to create a virtual processor,"
         1282 +            " error=%d", errno);
         1283 +        g_free(qcpu);
         1284 +        return -EINVAL;
         1285 +    }
         1286 +
         1287 +    cpu->vcpu_dirty = true;
         1288 +    cpu->hax_vcpu = (struct hax_vcpu_state *)qcpu;
         1289 +
         1290 +    return 0;
         1291 +}
         1292 +
         1293 +int
         1294 +nvmm_vcpu_exec(CPUState *cpu)
         1295 +{
         1296 +    int ret, fatal;
         1297 +
         1298 +    while (1) {
         1299 +        if (cpu->exception_index >= EXCP_INTERRUPT) {
         1300 +            ret = cpu->exception_index;
         1301 +            cpu->exception_index = -1;
         1302 +            break;
         1303 +        }
         1304 +
         1305 +        fatal = nvmm_vcpu_loop(cpu);
         1306 +
         1307 +        if (fatal) {
         1308 +            error_report("NVMM: Failed to execute a VCPU.");
         1309 +            abort();
         1310 +        }
         1311 +    }
         1312 +
         1313 +    return ret;
         1314 +}
         1315 +
         1316 +void
         1317 +nvmm_destroy_vcpu(CPUState *cpu)
         1318 +{
         1319 +    struct nvmm_machine *mach = get_nvmm_mach();
         1320 +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
         1321 +
         1322 +    nvmm_vcpu_destroy(mach, &qcpu->vcpu);
         1323 +    g_free(cpu->hax_vcpu);
         1324 +}
         1325 +
         1326 +/* -------------------------------------------------------------------------- */
         1327 +
         1328 +static void
         1329 +nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva,
         1330 +    bool add, bool rom, const char *name)
         1331 +{
         1332 +    struct nvmm_machine *mach = get_nvmm_mach();
         1333 +    int ret, prot;
         1334 +
         1335 +    if (add) {
         1336 +        prot = PROT_READ | PROT_EXEC;
         1337 +        if (!rom) {
         1338 +            prot |= PROT_WRITE;
         1339 +        }
         1340 +        ret = nvmm_gpa_map(mach, hva, start_pa, size, prot);
         1341 +    } else {
         1342 +        ret = nvmm_gpa_unmap(mach, hva, start_pa, size);
         1343 +    }
         1344 +
         1345 +    if (ret == -1) {
         1346 +        error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
         1347 +            "Size:%p bytes, HostVA:%p, error=%d",
         1348 +            (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa,
         1349 +            (void *)size, (void *)hva, errno);
         1350 +    }
         1351 +}
         1352 +
         1353 +static void
         1354 +nvmm_process_section(MemoryRegionSection *section, int add)
         1355 +{
         1356 +    MemoryRegion *mr = section->mr;
         1357 +    hwaddr start_pa = section->offset_within_address_space;
         1358 +    ram_addr_t size = int128_get64(section->size);
         1359 +    unsigned int delta;
         1360 +    uintptr_t hva;
         1361 +
         1362 +    if (!memory_region_is_ram(mr)) {
         1363 +        return;
         1364 +    }
         1365 +
         1366 +    /* Adjust start_pa and size so that they are page-aligned. */
         1367 +    delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
         1368 +    delta &= ~qemu_real_host_page_mask;
         1369 +    if (delta > size) {
         1370 +        return;
         1371 +    }
         1372 +    start_pa += delta;
         1373 +    size -= delta;
         1374 +    size &= qemu_real_host_page_mask;
         1375 +    if (!size || (start_pa & ~qemu_real_host_page_mask)) {
         1376 +        return;
         1377 +    }
         1378 +
         1379 +    hva = (uintptr_t)memory_region_get_ram_ptr(mr) +
         1380 +        section->offset_within_region + delta;
         1381 +
         1382 +    nvmm_update_mapping(start_pa, size, hva, add,
         1383 +        memory_region_is_rom(mr), mr->name);
         1384 +}
         1385 +
         1386 +static void
         1387 +nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section)
         1388 +{
         1389 +    memory_region_ref(section->mr);
         1390 +    nvmm_process_section(section, 1);
         1391 +}
         1392 +
         1393 +static void
         1394 +nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section)
         1395 +{
         1396 +    nvmm_process_section(section, 0);
         1397 +    memory_region_unref(section->mr);
         1398 +}
         1399 +
         1400 +static void
         1401 +nvmm_transaction_begin(MemoryListener *listener)
         1402 +{
         1403 +    /* nothing */
         1404 +}
         1405 +
         1406 +static void
         1407 +nvmm_transaction_commit(MemoryListener *listener)
         1408 +{
         1409 +    /* nothing */
         1410 +}
         1411 +
         1412 +static void
         1413 +nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section)
         1414 +{
         1415 +    MemoryRegion *mr = section->mr;
         1416 +
         1417 +    if (!memory_region_is_ram(mr)) {
         1418 +        return;
         1419 +    }
         1420 +
         1421 +    memory_region_set_dirty(mr, 0, int128_get64(section->size));
         1422 +}
         1423 +
         1424 +static MemoryListener nvmm_memory_listener = {
         1425 +    .begin = nvmm_transaction_begin,
         1426 +    .commit = nvmm_transaction_commit,
         1427 +    .region_add = nvmm_region_add,
         1428 +    .region_del = nvmm_region_del,
         1429 +    .log_sync = nvmm_log_sync,
         1430 +    .priority = 10,
         1431 +};
         1432 +
         1433 +static void
         1434 +nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size)
         1435 +{
         1436 +    struct nvmm_machine *mach = get_nvmm_mach();
         1437 +    uintptr_t hva = (uintptr_t)host;
         1438 +    int ret;
         1439 +
         1440 +    ret = nvmm_hva_map(mach, hva, size);
         1441 +
         1442 +    if (ret == -1) {
         1443 +        error_report("NVMM: Failed to map HVA, HostVA:%p "
         1444 +            "Size:%p bytes, error=%d",
         1445 +            (void *)hva, (void *)size, errno);
         1446 +    }
         1447 +}
         1448 +
         1449 +static struct RAMBlockNotifier nvmm_ram_notifier = {
         1450 +    .ram_block_added = nvmm_ram_block_added
         1451 +};
         1452 +
         1453 +/* -------------------------------------------------------------------------- */
         1454 +
         1455 +static void
         1456 +nvmm_handle_interrupt(CPUState *cpu, int mask)
         1457 +{
         1458 +    cpu->interrupt_request |= mask;
         1459 +
         1460 +    if (!qemu_cpu_is_self(cpu)) {
         1461 +        qemu_cpu_kick(cpu);
         1462 +    }
         1463 +}
         1464 +
         1465 +/* -------------------------------------------------------------------------- */
         1466 +
         1467 +static int
         1468 +nvmm_accel_configure(struct nvmm_machine *mach)
         1469 +{
         1470 +    struct nvmm_mach_conf_x86_cpuid cpuid;
         1471 +    int ret;
         1472 +
         1473 +    memset(&cpuid, 0, sizeof(cpuid));
         1474 +    cpuid.leaf = 0x00000001;
         1475 +    cpuid.del.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR;
         1476 +
         1477 +    ret = nvmm_machine_configure(mach, NVMM_MACH_CONF_X86_CPUID, &cpuid);
         1478 +    if (ret == -1)
         1479 +        return -1;
         1480 +
         1481 +    ret = nvmm_machine_configure(mach, NVMM_MACH_CONF_CALLBACKS,
         1482 +        &nvmm_callbacks);
         1483 +    if (ret == -1)
         1484 +        return -1;
         1485 +
         1486 +    return 0;
         1487 +}
         1488 +
         1489 +static int
         1490 +nvmm_accel_init(MachineState *ms)
         1491 +{
         1492 +    struct nvmm_capability cap;
         1493 +    int ret;
         1494 +
         1495 +    ret = nvmm_capability(&cap);
         1496 +    if (ret == -1) {
         1497 +        error_report("NVMM: No accelerator found, error=%d", errno);
         1498 +        return -ENOSPC;
         1499 +    }
         1500 +    if (cap.version != 1) {
         1501 +        error_report("NVMM: Unsupported version %lu", cap.version);
         1502 +        return -ENOSPC;
         1503 +    }
         1504 +    if (cap.state_size != sizeof(struct nvmm_x64_state)) {
         1505 +        error_report("NVMM: Wrong state size %zu", cap.state_size);
         1506 +        return -ENOSPC;
         1507 +    }
         1508 +
         1509 +    ret = nvmm_machine_create(&qemu_mach.mach);
         1510 +    if (ret == -1) {
         1511 +        error_report("NVMM: Machine creation failed, error=%d", errno);
         1512 +        return -ENOSPC;
         1513 +    }
         1514 +
         1515 +    ret = nvmm_accel_configure(&qemu_mach.mach);
         1516 +    if (ret == -1) {
         1517 +        error_report("NVMM: Machine configuration failed, error=%d",
         1518 +            errno);
         1519 +        return -ENOSPC;
         1520 +    }
         1521 +
         1522 +    memory_listener_register(&nvmm_memory_listener, &address_space_memory);
         1523 +    ram_block_notifier_add(&nvmm_ram_notifier);
         1524 +
         1525 +    cpu_interrupt_handler = nvmm_handle_interrupt;
         1526 +
         1527 +    printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
         1528 +    return 0;
         1529 +}
         1530 +
         1531 +int
         1532 +nvmm_enabled(void)
         1533 +{
         1534 +    return nvmm_allowed;
         1535 +}
         1536 +
         1537 +static void
         1538 +nvmm_accel_class_init(ObjectClass *oc, void *data)
         1539 +{
         1540 +    AccelClass *ac = ACCEL_CLASS(oc);
         1541 +    ac->name = "NVMM";
         1542 +    ac->init_machine = nvmm_accel_init;
         1543 +    ac->allowed = &nvmm_allowed;
         1544 +}
         1545 +
         1546 +static const TypeInfo nvmm_accel_type = {
         1547 +    .name = ACCEL_CLASS_NAME("nvmm"),
         1548 +    .parent = TYPE_ACCEL,
         1549 +    .class_init = nvmm_accel_class_init,
         1550 +};
         1551 +
         1552 +static void
         1553 +nvmm_type_init(void)
         1554 +{
         1555 +    type_register_static(&nvmm_accel_type);
         1556 +}
         1557 +
         1558 +type_init(nvmm_type_init);