/*
 *  plex86: run multiple x86 operating systems concurrently
 *  Copyright (C) 1999-2001 Kevin P. Lawton
 *
 *  fault-mon.c:  fault/int handlers for VM monitor - monitor space.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */


#include "plex86.h"
#define IN_MONITOR_SPACE
#include "monitor.h"





/* The monitor stack frame.  When an exception or interrupt occurrs
 * during the execution of either guest or monitor code, the following
 * values are pushed.
 * 
 * [gs]
 * [fs]       These are pushed by the CPU _only if_ transitioning from
 * [ds]       guest code running in v86 mode.
 * [es]
 *
 * ss
 * esp
 * eflags    Values pushed by the CPU and interrupt stub.  To simplify
 * cs        things, the stub pushes an error of zero for those
 * eip       events which don't naturally cause an error push, and
 * error     also pushes the vector of the exception/interrupt.
 * vector
 *
 * eax
 * ecx
 * edx       General registers, pushed with a PUSHA instruction,
 * ebx       by code below.
 * <esp>
 * ebp
 * esi
 * edi
 *
 * es
 * ds        Segment selectors, pushed by code below.
 * fs
 * gs
 */

/* Info on hack for transitioning to 16-bit guest stack segments.  For
 * the cases of returning to a guest being monitored in either v86-mode,
 * or PM with a 32-bit SS, the full 32 bits of ESP are honored from the
 * monitor stack image.  But when performing an IRET back to guest code
 * monitored in PM * with a 16-bit stack segment, only the lower 16 bits
 * of ESP are initialized.  The upper 16bits remain from the previous and
 * unrelated monitor ESP values.  To handle restoring the full 32 bits of
 * the guest ESP as expected, a temporary monitor stack segment is created,
 * with adjusted SS.base and ESP values such that the same linear offsets
 * are generated for the parameters (guest register values) on the
 * monitor stack, but with the upper 16 bits of ESP being identical
 * to those expected by the guest.  This temporary segment is used only
 * for the purposes of the IRET.  During the next event generated while
 * the guest is executing, the normal SS:ESP values are reloaded from
 * the monitor TSS.
 *
 * The only issue is if an exception is generated while trying to
 * IRET to the guest code.  In this case, we have to fix the SS:ESP
 * first, as these values will not have been reloaded as they usually
 * are in a transition from guest (ring3) to monitor (ring0).
 */


void handleMonFault(guest_context_t *monContext);

  static inline
Bit32u readCR2(void)
{
  Bit32u cr2;
  asm volatile ("movl %%cr2, %0" : "=r" (cr2));
  return( cr2 );
}


asm (
".text                  \n\t"

/* __handle_fault:  This is called by all of the monitor's fault handler
 *     stubs.  A fault could have originated from execution of the guest
 *     (due to virtualization conditions or natural fault generation) or
 *     from the monitor (currently only due to bugs in the monitor).
 */
".globl __handle_fault  \n\t"
"__handle_fault:        \n\t"
"  pushal               \n\t" /* Save general registers */
"  pushl %es            \n\t" /* Save segment registers */
"  pushl %ds            \n\t"
"  pushl %fs            \n\t"
"  pushl %gs            \n\t"

"  movl  64(%esp), %eax     \n\t" /* EFLAGS pushed by CPU from fault */
"  andl  $0x20000, %eax     \n\t" /* Check EFLAGS.VM bit */
"  jnz   __fault_from_guest \n\t" /* V86 mode means fault was from guest */
"  movl  60(%esp), %eax     \n\t" /* CS pushed by CPU from fault */
"  andl  $3, %eax           \n\t" /* Check CS.RPL bits */
"  jz    __fault_from_mon   \n\t" /* RPL0 means from monitor */

/* We have determined that the fault was from guest code.  Prepare
 * to call the monitor C code to do most of the fault handling.
 */
"__fault_from_guest:    \n\t"
"  movl  %ss, %eax      \n\t" /* Copy SS into DS/ES */
"  movl  %eax, %ds      \n\t"
"  movl  %eax, %es      \n\t"
"  cld                  \n\t" /* gcc-compiled code needs this */
"  pushl %esp           \n\t"
"  call handleGuestFault\n\t" /* Call the C monitor fault handler */
"  addl $4, %esp        \n\t" // xxx Could eliminate if we re-code
                              // xxx Need to pre-push esp in init code.
".globl __ret_to_guest  \n\t" /* Fault handled, work back to guest */
"__ret_to_guest:        \n\t"
"  pushl %esp           \n\t"
"  call preGuest        \n\t" /* Prepare for return to guest */
"  addl $4, %esp        \n\t"
"  cmpl $0x1, %eax      \n\t" /* What mode is guest monitored in? */
"  jb   __ret_to_v86    \n\t" /* case 0: guest monitored in v86 mode */
"  jnz  __ret_to_pmss16 \n\t" /* case 2: guest monitored in PM w/ small SS */
                            /* case 1: guest monitored in PM w/ big SS */

/* Return to the guest, monitored in PM with a 32-bit SS.  Simply
 * restore registers from the monitor stack.  The full 32-bit guest ESP
 * value is honored.
 */
"__ret_to_pmss32:       \n\t"
"  popl  %gs            \n\t" /* Restore guest segments */
"  popl  %fs            \n\t"
"  popl  %ds            \n\t"
"  popl  %es            \n\t"
"  popal                \n\t" /* Restore guest general registers */
"  addl  $8, %esp       \n\t" /* Ignore vector and error dwords */
"  iret                 \n\t" /* Resume execution of guest */


/* __fault_from_mon:  The only expected faults from the monitor occur
 *   when special assembly routines attempt native guest seg:offset
 *   accesses to accelerate emulation.  Paging faults and segment
 *   check faults can occur.
 *
 *   Old comments:
 *   Currently this means a monitor bug.  It's often
 *   due to an attempt to IRET to the guest with bad selector/descriptor
 *   values setup by the monitor.  Because of the SS16 hack, if an IRET
 *   back to guest code never completes and generates an exception, our
 *   SS:ESP values are still modified and not reloaded by the exception
 *   mechanism since no privilege level change occurred.  So before
 *   turning control over to the C code, we must check for this condition
 *   and restore SS:ESP.  Then we can reload the data segments from SS.
 */

"__fault_from_mon:               \n\t"
// xxx Fix this stuff
#if 0
"  movl  %esp, %ebx              \n\t" /* Get nexus page address */
"  andl  $0xfffff000, %ebx       \n\t"
"  movl  $__SSNormal, %edx       \n\t" /* Create a pointer to SSNormal */
"  subl  $__nexus_start, %edx    \n\t"
"  ss; movl  (%ebx,%edx), %ecx   \n\t" /* Get SSNormal */
"  movl  %ss, %eax               \n\t" /* Get current SS */
"  cmpw  %cx, %ax                \n\t" /* Compare SS with SSNormal */
"  je    __mon_SS_ESP_restored   \n\t" /* If same, then no restore needed */

"  movl  $__espUpperNormal, %edx \n\t" /* Get pointer to espUpperNormal */
"  subl  $__nexus_start, %edx    \n\t"
"  ss; movl  (%ebx,%edx), %eax   \n\t" /* Get espUpperNormal */
"  andl  $0x0000ffff, %esp       \n\t" /* Clear upper ESP bits */
"  orl   %eax, %esp              \n\t" /* Restore upper ESP bits */
"  movl  %ecx, %ss               \n\t" /* Restore SS */

"__mon_SS_ESP_restored:          \n\t"
"  movl  %ss, %eax               \n\t" /* Copy monitor SS to DS/ES */
"  movl  %eax, %ds               \n\t"
"  movl  %eax, %es               \n\t"
#endif
"  cld                           \n\t" /* gcc-compiled code needs this */
"  pushl %esp                    \n\t" /* Push pointer to context. */
"  call handleMonFault           \n\t" /* Call C code for real work */
"  addl $4, %esp                 \n\t"
"  jmp  __ret_to_monitor         \n\t" /* Event handled, back to monitor */
                                       /* (Currently never gets here) */



/* Return to guest, monitored in PM with a 16-bit SS.  Only the lower
 * 16 bits of ESP will be honored.  The upper 16 bits of ESP are actually
 * passed through (left) from the unrelated monitor ESP value, when
 * transitioning from a 32-bit SS (which is the case with the monitor)
 * to a 16-bit SS.  So what we do is create an alternate monitor stack
 * segment such that the upper 16 bits are equivalent to those expected
 * by the guest, by adjusting the SS.base value.  This alternate segment
 * is really only used for the purposes of an IRET back to the guest.
 */
"__ret_to_pmss16:                     \n\t"
"  movl  %esp, %ebx                   \n\t" /* Get nexus page addr */
"  andl  $0xfffff000, %ebx            \n\t"
"  movl  $__espUpper16BitSSHack, %edx \n\t" /* Get ptr to nexus field */
"  subl  $__nexus_start, %edx         \n\t"
"  movl  (%ebx,%edx), %eax            \n\t" /* Access nexus field */
"  movl  $__SS16BitSSHack, %edx       \n\t" /* Get ptr to nexus field */
"  subl  $__nexus_start, %edx         \n\t"
"  movl  (%ebx,%edx), %ecx            \n\t" /* Access nexus field */
"  andl  $0x0000ffff, %esp            \n\t" /* Clear upper ESP bits */
"  orl   %eax, %esp                   \n\t" /* Use alternate ESP upper bits */
"  movl  %ecx, %ss                    \n\t" /* Use alternate SS segment */
"  popl  %gs            \n\t" /* Restore guest segments */
"  popl  %fs            \n\t"
"  popl  %ds            \n\t"
"  popl  %es            \n\t"
"  popal                \n\t" /* Restore guest general registers */
"  addl  $8, %esp       \n\t" /* Ignore vector and error dwords */
"  iret                 \n\t" /* Resume execution of guest */

/* Return to the guest, monitored in v86 mode.  Simply restore registers
 * from the monitor stack.  The full 32-bit guest ESP value is honored
 * when returning to v86 mode code.
 */
"__ret_to_v86:          \n\t"
"  addl $16, %esp       \n\t" /* Ignore, segs are in IRET area for v86 */
"  popal                \n\t" /* Restore guest general registers */
"  addl $8, %esp        \n\t" /* Ignore vector and error dwords */
"  iret                 \n\t" /* Resume execution of guest */

/* Return to monitor.  An event occurred while monitor code was executing.
 * Simply restore state from the monitor stack.
 */
"__ret_to_monitor:      \n\t"
"  popl  %gs            \n\t" /* Restore monitor segments */
"  popl  %fs            \n\t"
"  popl  %ds            \n\t"
"  popl  %es            \n\t"
"  popal                \n\t" /* Restore monitor general registers */
"  addl  $8, %esp       \n\t" /* ignore vector and error dwords */
"  iret                 \n\t" /* Resume execution of monitor */


/*
 * Hardware interrupt handler stub
 */
".globl __handle_int    \n\t" /* Return to monitor code */
"__handle_int:          \n\t"
"  pushal               \n\t" /* Save guest general registers */
"  pushl %es            \n\t" /* Save guest segment registers */
"  pushl %ds            \n\t"
"  pushl %fs            \n\t"
"  pushl %gs            \n\t"

"  movl  %ss, %eax      \n\t" /* Copy SS into DS/ES */
"  movl  %eax, %ds      \n\t"
"  movl  %eax, %es      \n\t"
"  cld                  \n\t" /* gcc-compiled code needs this */
"  pushl %esp           \n\t"
"  call handleInt       \n\t" /* monitor interrupt handler */
"  addl $4, %esp        \n\t"
"  cmpl $0x1, %eax      \n\t" /* Was interrupt generated from monitor code? */
"  je   __ret_to_monitor\n\t" /* Yes, so return to monitor code */
"  jmp  __ret_to_guest  \n\t" /* No, so return to guest code */
);



  unsigned
preGuest(guest_context_t *context)
{
  nexus_t *nexus = (nexus_t *) (((Bit32u) context) & 0xfffff000);
  vm_t    *vm    = (vm_t *) nexus->vm;
  //v86_sregs_t *v86_sregs =
  //    (v86_sregs_t *) (((Bit32u)context) + sizeof(guest_context_t));

#if 0
  Bit32u   guestLinAddr, guest_ppage_index;
  unsigned seg32;
  Bit32u   gerror;
  unsigned us, rw;
  unsigned m, ret;
  Bit32u   tcodeAddr=0;
#endif

#if ANAL_CHECKS
  if ( !context->eflags.fields.vm && !(context->cs & 0x0003) )
    monpanic(vm, "preGuest: monitor code!\n");
#endif


#if ANAL_CHECKS
  if ( !context->eflags.fields.if_ )
     monpanic(vm, "preGuest: guest IF=0\n");

  if ( context->eflags.raw & (
       FLG_IOPL | /* FLG_NT | FLG_RF | FLG_VM | FLG_AC | */
       FLG_VIF | FLG_VIP) )
     monpanic(vm, "preGuest: eflags=0x%x\n", context->eflags);
#endif


  STI();

  /* Prime with initial modeChange if any. */
  if (vm->modeChange)
    monModeChange(vm);

  while (1) {
    unsigned reason;
    unsigned instructions;

    instructions = 0; /* Indefinite. */
    reason = cpuEmulate(vm, context, &instructions);
    /* If execution is being driven by a debugger. */
// xxx This should be conditionally compiled in.
    if (vm->executeN) {
      if (instructions > vm->executeN)
        monpanic(vm, "preGuest: i > executeN.\n");
      vm->executeN -= instructions;
      if (vm->executeN==0) {
        sysEOICount(vm);
        }
      }
    if (reason & EmulateStopReasonModeChange) {
      if (vm->modeChange)
        monModeChange(vm);
      }
    if (reason & EmulateStopReasonCycles) {
      //monpanic(vm, "preGuest: StopReasonCycles: elapsed=%u, remain=%u.\n",
      //         (Bit32u) vm->system.cyclesElapsed,
      //         (Bit32u) vm->system.cyclesRemaining);
      if (handleTimers(vm) != 0)
        monpanic(vm, "preGuest: handleTimers error.\n");
      }
    if (reason & EmulateStopReasonDT) {
      monpanic(vm, "preGuest: StopReasonDT.\n");
      }
    if (reason & EmulateStopReasonExecuteNative) {
      monpanic(vm, "preGuest: StopReasonExecuteNative.\n");
      }
    if (reason & EmulateStopReasonInstructions) {
      monpanic(vm, "preGuest: StopReasonInstructions.\n");
      }
    }

  return(1); // xxx Bogus for now



#if 0
  if (vm->eipModified) {
    guest_cycles_elapsed(vm);
    /* If there is an asynchronous event waiting, let the tcode know
     * we are requesting a return soon.
     */
    goto returnToGuest;
    }

loop:

  guest_cycles_elapsed(vm);

  /* Past execution (emulation) could have created a need for
   * a remap of the guest in the monitor space
   */
  if (vm->modeChange)
    monModeChange(vm);

  /* Check for asynchronous events, inhibits, traps etc before
   * executing guest code
   */
  emulate_async_checks(vm);

  /* +++ Look again here???  Remove check above??? */
  if (vm->modeChange)
    monModeChange(vm);

/* Notes:
 *   if (vm->guest_cpu.inhibit_mask || vm->guest_cpu.debug_trap)
 *     then have to make sure inhibits are honored.
 */

  /* See if cosimulation demands the guest takes an interrupt
   * at this point.  This would be the case if it is being
   * driven as a slave simulator.
   */
  if (vm->dbg_force_int) {
    unsigned vector = vm->dbg_force_int & 0xff;
    if (vm->guest_cpu.inhibit_mask || vm->guest_cpu.debug_trap)
      monpanic(vm, "preGuest: force_int w/ IM || DT\n");
    vm->dbg_force_int = 0;
    emulate_interrupt(vm, vector);
    sysReqComplete(vm);
    /* An interrupt may put the VM in a state which requires
     * remapping.  Repeat the process
     */
    goto loop;
    }

  switch (vm->executeMethod) {
    case RunGuestNMethodExecute:
      /* Really, should copy guest TF to monitor TF here.  For
       * now just turn off TF always.  Need to check what case
       * set it in handler when enabled.
       */
      if (vm->guest_cpu.inhibit_mask || vm->guest_cpu.debug_trap)
        monpanic(vm, "preGuest: MethodExecute: IM || DT\n");
      context->eflags.fields.tf = 0;
      break;

    case RunGuestNMethodEmulate:
      emulate_instr(vm, context, 4);
      /* Above call will decrement executeN if execution completes */
      goto loop;

    case RunGuestNMethodBreakpoint:
      /* Regardless of guest TF, we need to set monitor TF to generate
       * a breakpoint after the next instruction executes
       */
      context->eflags.fields.tf = 1;
      break;

    default:
      monpanic(vm, "preGuest: Method default\n");
    }

  cache_sreg(vm, SRegCS);
  if (context->eip > vm->guest_cpu.desc_cache[SRegCS].limit_scaled)
    monpanic(vm, "preGuest: %x > CS.limit\n", context->eip);
  guestLinAddr = vm->guest_cpu.desc_cache[SRegCS].base + context->eip;
  us = (G_GetCPL(vm)==3);
  rw = 0; /* need at least read priv for code */
  switch ( m = map_guest_laddr(vm, guestLinAddr, &guest_ppage_index,
                           us, rw, PageUsageVCode, &gerror) ) {
    case MapLinOK:
    case MapLinAlreadyMapped:
      break;
    case MapLinMonConflict:
      monpanic(vm, "preGuest: MapLinMonConflict:\n");
    case MapLinPPageOOB:
      monpanic(vm, "preGuest: MapLinPPageOOB:\n");
    case MapLinException:
      /* Executing code at this location would generate a #PF */
      vm->guest_cpu.cr2 = guestLinAddr;
      emulate_exception(vm, ExceptionPF, gerror);
      /* An exception means a possible monitor remap request, mode
       * change etc.  We need to repeat this process.
       */
      goto loop;
    case MapLinEmulate:
      monpanic(vm, "preGuest: MapLinEmulate:\n");
      emulate_instr(vm, context, 5);
      goto loop;
    default:
      monpanic(vm, "preGuest: MapLin: default case:\n");
    }

  if ( vm->vOpcodeMap ) {
    Bit32u   guestPhyAddr;

    seg32 = vm->guest_cpu.desc_cache[SRegCS].desc.d_b;
    guestPhyAddr = (guest_ppage_index<<12) | (guestLinAddr & 0xfff);
  
    tcodeAddr = dtTranslateG2T(vm, context->eip, guestLinAddr, guestPhyAddr);

    if (tcodeAddr == TcodeOffsetNone) {
      /* xxx If we can have dtTranslateG2T pass back a flag, notifying us
       * xxx when the 1st instruction is virtualized, we can emulate it
       * xxx immediately, rather than wasting a round trip to guest space.
       */
      monpanic(vm, "preGuest: dtTranslateG2T returns None.\n");
      emulate_instr(vm, context, 5);
      goto loop;
      }

    {
    // xxx Fix these.  Not all should be set here.
    /* Our r3h stack can use the same segment as GS which is virtualized
     * to be used as the r3h data segment.
     */
    vm->guest.addr.r3hData->r3hSS = Selector(4+SRegGS, 0, RPL3);
    vm->guest.addr.r3hData->r3hESP =
        ((Bit32u)vm->guest.addr.r3hData) + 4096;
    vm->guest.addr.r3hData->r3hRequest = R3HToMonRequestNone;
    vm->guest.addr.r3hData->dtG2THash = vm->guest.addr.dtG2THash;
    vm->guest.addr.r3hData->guestSS = Selector(4+SRegSS, 0, RPL3);
// xxx Selector() usage below should use 4+SReg?S format
    }
  
    /* Update timestamp for latest time of guest execution */
    // (vm->addr->dtPageMetaTable)[metaI].ts.guest_executed = vm_rdtsc();
    }

  /* Before returning execution to the guest, we have to copy certain
   * information from the vm->guest_cpu area to the guest_context
   * area, depending on guest/monitored mode.
   */

  if (GetMonMode(vm) == MonModeVM) {
    /* Guest code is monitored in V86M.  In this case, segment registers
     * are used natively.  A previous event (like emulation of an
     * instruction) may have changed some values.  Copy any updated
     * values which are currently managed in vm->guest_cpu, to the current
     * guest context before executing.
     */

    if ( vm->selectorInEmu & (1 << SRegES) )
      context->es = vm->guest_cpu.selector[SRegES].raw;
    if ( vm->selectorInEmu & (1 << SRegCS) )
      context->cs = vm->guest_cpu.selector[SRegCS].raw;
    if ( vm->selectorInEmu & (1 << SRegSS) )
      context->ss = vm->guest_cpu.selector[SRegSS].raw;
    if ( vm->selectorInEmu & (1 << SRegDS) )
      context->ds = vm->guest_cpu.selector[SRegDS].raw;
    if ( vm->selectorInEmu & (1 << SRegFS) )
      context->fs = vm->guest_cpu.selector[SRegFS].raw;
    if ( vm->selectorInEmu & (1 << SRegGS) )
      context->gs = vm->guest_cpu.selector[SRegGS].raw;

    /* Since we are executing guest code using v86 mode, we will
     * allow natural selector reads, and segment reloads to occur
     * unvirtualized.  Thus, we need to mark the fact that selectors
     * and descriptor cache values are no longer consistent with
     * those stored in the guest_cpu area.  
     */
    vm->selectorInEmu   = 0;
    vm->descriptorInEmu = 0;
    vm->segmentUpdated  = 0; // xxx Is this correct?

/* +++ we could eliminate double copy of selectors here and below */
    }
  else if ( vm->vOpcodeMap ) {

if ( (vm->selectorInEmu & 0x3f) != 0x3f )
  monpanic(vm, "preGuest: SIV=1, sInEmu=0x%x\n",
  vm->selectorInEmu);

    /* MonModePMR3, GuestModeRM.  Since the monitor is not running
     * guest code in VM, there must be non-compatible values in
     * the descriptor caches.  In this case we are virtualizing
     * segment registers accesses, and using PM selectors.  Update
     * virtualized descriptors based on updated guest values.  In
     * this case, there is no notion of a null selector.
     * Descriptor cache values remain maintained in the guest_cpu area.
     * Skip updating CS/GS, since they are virtualized for tcode use.
     */

    if ( vm->segmentUpdated ) {
      unsigned sreg;

/* +++ similar code in mon-mode.c */
      /* If the descriptor caches for each segment register is
       * invalid, then set the virtualized selector to NULL so
       * an exception is not generated upon return to the guest.
       * Otherwise, point them at virtualized descriptors representing
       * the shadow cache values of each segment register.
       */
      if ( vm->segmentUpdated & (1<<SRegES) ) {
        if ( !vm->guest_cpu.desc_cache[SRegES].valid )
          context->es = nullSelector.raw;
        else
          context->es = Selector(4, 0, RPL3);
        }
   
      if ( vm->segmentUpdated & (1<<SRegSS) ) {
        if ( !vm->guest_cpu.desc_cache[SRegSS].valid )
          context->ss = nullSelector.raw;
        else
          context->ss = Selector(6, 0, RPL3);
        }

      if ( vm->segmentUpdated & (1<<SRegDS) ) {
        if ( !vm->guest_cpu.desc_cache[SRegDS].valid )
          context->ds = nullSelector.raw;
        else
          context->ds = Selector(7, 0, RPL3);
        }

      if ( vm->segmentUpdated & (1<<SRegFS) ) {
        if ( !vm->guest_cpu.desc_cache[SRegFS].valid )
          context->fs = nullSelector.raw;
        else
          context->fs = Selector(8, 0, RPL3);
        }

      for (sreg=0; sreg<6; sreg++) {
        if ( vm->segmentUpdated & (1<<sreg) ) {
          /* CS/GS are handled separatedly - they are used for tcode */
          if ( (sreg==SRegCS) || (sreg==SRegGS) )
            continue;
          if (vm->guest_cpu.desc_cache[sreg].valid) {
            vm->guest.addr.gdt[4+sreg] = vm->guest_cpu.desc_cache[sreg].desc;
            vm->guest.addr.gdt[4+sreg].dpl = 3;
            }
          else {
            vm->guest.addr.gdt[4+sreg] = nullDescriptor;
            }
          }
        }
      vm->segmentUpdated = 0;
      }

    /* Up to this point, CS:EIP are the expected guest values.  Modify
     * them to use values for DT tcode.  The interrupt/exception
     * handlers will reset them back to guest values.
     */
    context->eip = tcodeAddr;
    vm->eipModified = 1;
    }
  else {
    monpanic(vm, "preGuest: SIV off not handled yet.\n");
    vm->eipModified = 0;
    }

returnToGuest:

  if (context->eflags.fields.vm) {
    /* Copy working sregs back to stack so IRET will use them. */
    v86_sregs.es = context->es;
    v86_sregs.ds = context->ds;
    v86_sregs.fs = context->fs;
    v86_sregs.gs = context->gs;
    vm->guest.addr.tss->esp0 = ((Bit32u)nexus) + PAGESIZE;
    }
  else {
    vm->guest.addr.tss->esp0 =
      ((Bit32u)nexus) + PAGESIZE - sizeof(v86_sregs_t);
    }

  CLI();

// xxx Should all code below, be after CLI?

  /* Set a flag to report back to the assembly code, which mode
   * the guest is being monitored in.  Also, the 16-bit ESP stack
   * hack is setup, when appropriate.  See the notes at the top of
   * this file.
   */
  if (context->eflags.fields.vm) {
    ret = 0; /* to guest in V86M */
    }
  else if ( !vm->guest.addr.gdt[4+SRegSS].d_b ) {
    Bit32u monBaseNormal, monESPUpperNormal, monBaseHacked;
    Bit32u guestESPUpperNormal;
    unsigned ssNormalSlot, ssHackedSlot;
  
    ssNormalSlot = vm->guest.addr.nexus->SSNormal>>3;
    ssHackedSlot = vm->guest.addr.nexus->SS16BitSSHack>>3;
    monBaseNormal = BaseOfDescriptor(vm->guest.addr.gdt[ssNormalSlot]);
  
    asm volatile (
      "movl %%esp, %0 \n\t"
      "andl $0xffff0000, %0"
      : "=r" (monESPUpperNormal)
      );
    guestESPUpperNormal = context->esp & 0xffff0000;
  
    monBaseHacked = monBaseNormal +
                    (monESPUpperNormal - guestESPUpperNormal);
  
    vm->guest.addr.nexus->espUpperNormal = monESPUpperNormal;
    vm->guest.addr.nexus->espUpper16BitSSHack = guestESPUpperNormal;
    vm->guest.addr.gdt[ssHackedSlot] = vm->guest.addr.gdt[ssNormalSlot];
    vm->guest.addr.gdt[ssHackedSlot].base_low  = monBaseHacked;
    vm->guest.addr.gdt[ssHackedSlot].base_med  = monBaseHacked >> 16;
    vm->guest.addr.gdt[ssHackedSlot].base_high = monBaseHacked >> 24;
    ret = 2; /* to guest in PM with SS16 hack */
    }
  else {
    ret = 1; /* to guest in PM */
    }

  if (vm->guest_cpu.async_event)
    __r3hAsyncEvent = 1;
  else
    __r3hAsyncEvent = 0;

  vm->system.t0 = vm_rdtsc();
  return (ret);
#endif
}


  unsigned
handleInt(guest_context_t *context)
/*
 * handleInt(): Redirect a hardware interrupt back to the host
 */
{
  nexus_t *nexus = (nexus_t *) (((Bit32u) context) & 0xfffff000);
  vm_t    *vm    = (vm_t *) nexus->vm;
  v86_sregs_t *v86_sregs =
      (v86_sregs_t *) (((Bit32u)context) + sizeof(guest_context_t));
  unsigned from_monitor;
  Bit64u t1;

  t1 = vm_rdtsc();

  if (context->eflags.fields.vm) {
    /* End of elapsed guest execution duration.  Add elapsed */
    /* cycles to time framework. */
    vm->system.cyclesElapsed += (t1 - vm->system.t0);

    /* Interrupt from guest code executing in v8086 mode. */
    /* The data segment selectors will have been pushed by the */
    /* the CPU for the interrupt.  Copy them down to the area */
    /* where we save them for protected mode. */
    context->es = v86_sregs->es;
    context->ds = v86_sregs->ds;
    context->fs = v86_sregs->fs;
    context->gs = v86_sregs->gs;
    from_monitor = 0; /* Event from guest code */
    }
  else if ( (context->cs & 0x0003) == 0x0003 ) {
    /* End of elapsed guest execution duration.  Add elapsed */
    /* cycles to time framework. */
    vm->system.cyclesElapsed += (t1 - vm->system.t0);

    from_monitor = 0; /* Event from guest code */
    }
  else {
    from_monitor = 1; /* Event from monitor code */
    }

  /* Interrupts are off naturally here. */
  vm->mon_request = MON_REQ_REDIRECT;
  vm->redirect_vector = context->vector;
  vm->guest.__mon2host();
  return(from_monitor);
}


  void
handleGuestFault(guest_context_t *context)
/*  Handle a fault from the guest.  Called from the assembly stub
 *  __handle_fault.
 */
{
  nexus_t *nexus = (nexus_t *) (((Bit32u) context) & 0xfffff000);
  vm_t    *vm    = (vm_t *) nexus->vm;
  v86_sregs_t *v86_sregs =
      (v86_sregs_t *) (((Bit32u)context) + sizeof(guest_context_t));
  Bit32u  cr2    = readCR2();
  Bit64u t1;

  /* End of elapsed guest execution duration */
  t1 = vm_rdtsc();
  vm->system.cyclesElapsed += (t1 - vm->system.t0);

  if (context->eflags.fields.vm) {
    /* Interrupt from guest code executing in v8086 mode. */
    /* The data segment selectors will have been pushed by the */
    /* the CPU for the interrupt.  Copy them down to the area */
    /* where we save them for protected mode. */
    context->es = v86_sregs->es;
    context->ds = v86_sregs->ds;
    context->fs = v86_sregs->fs;
    context->gs = v86_sregs->gs;
    }

#if ANAL_CHECKS
  if ( !context->eflags.fields.if_ )
    monpanic(vm, "handleGuestFault: guest IF=0\n");
  if ( !context->eflags.fields.vm && ((context->cs & 0x0003) != 0x0003) )
    monpanic(vm, "handleGuestFault: CS.RPL!=3\n");
#endif

  STI();

#if 0
  if (vm->eipModified) {
    r3hData_t *r3hData = vm->guest.addr.r3hData;
    if (context->eip == (Bit32u) __r3hNewEIP) {
      if (r3hData->r3hData > vm->guest_cpu.desc_cache[SRegCS].limit_scaled) {
        monpanic(vm, "hF: r3h EIP > CS.limit.\n");
        }
      context->eip = r3hData->r3hData;
      vm->eipModified = 0;
      }
    else {
      dtFixContext(vm, context);
      vm->eipModified = 0;
      }
    }
  else {
    // xxx Fix this
    monpanic(vm, "fault: SIV off.\n");
    }
#endif

  switch ( context->vector ) {
    case ExceptionDB: /* 1 */
      if (vm->executeMethod == RunGuestNMethodBreakpoint) {
        /* Breakpoint generated because we requested it via TF=1 */
        if (vm->executeN) {
          vm->executeN--;
          if (vm->executeN==0) {
            sysEOICount(vm);
            }
          }
        }
      else {
        monpanic(vm, "handleGuestFault: #DB, method=%u not coded\n",
          vm->executeMethod);
        }
      break;

    case ExceptionBR: /* 5 */
monpanic(vm, "handleGuestFault: BR unfinished.\n");
      /* BOUND instruction fault; array index not in bounds */
      emulate_exception(vm, context->vector, 0);
      break;

    case ExceptionDE: /* 0 */
    case ExceptionBP: /* 3 */
    case ExceptionOF: /* 4 */
    case ExceptionNM: /* 7 */
    case ExceptionMF: /* 16 */
monpanic(vm, "handleGuestFault: DE/BP/OF/NM/MF unfinished.\n");
      monpanic(vm, "handleGuestFault: %u\n", context->vector);
      /* emulate_interrupt(vm, context->vector); */
      break;

    case ExceptionNP: /* 11 */
    case ExceptionSS: /* 12 */
    case ExceptionAC: /* 17 */
monpanic(vm, "handleGuestFault: NP/SS/AC unfinished.\n");
      /* use emulate_xyz() */
      /*interrupt(vm, context->vector, 0, 1, context->error); */
      monpanic(vm, "handleGuestFault: %u\n", context->vector);
      break;

    case ExceptionUD: /* 6 */
    case ExceptionGP: /* 13 */
monpanic(vm, "handleGuestFault: UD/GP unfinished.\n");
//emulate_instr(vm, context, 1);
      break;

    case ExceptionPF: /* 14 */
      monPageFault(vm, context, cr2);
      break;

    default:
      monpanic(vm, "Other Fault: %u\n", context->vector);
      break;
    }
}

  void
handleMonFault(guest_context_t *monContext)
{
  nexus_t *nexus = (nexus_t *) (((Bit32u) monContext) & 0xfffff000);
  vm_t    *vm    = (vm_t *) nexus->vm;

  if (vm->inMonFault)
    monpanic(vm, "handleMonFault called recursively.\n");
  vm->inMonFault = 1;

  /* Fault occurred inside monitor code. */

  /* Except for a page fault, load GS with the null selector, in
   * case using it for accelerated guest access was the cause of the
   * problem.  For page faults, we may map in the page and restart
   * the instruction, leaving GS intact.
   */
  if ( monContext->vector!=ExceptionPF )
    monContext->gs = 0;

  switch ( monContext->vector ) {
    case ExceptionPF:
    case ExceptionGP:
      {
      Bit32u cr2, guest_ppi, gerror, fixEip;
      unsigned us, rw;

      cr2 = readCR2();
      STI();

      if (monContext->error & 0x8) /* If RSVD bits used in PDir */
        monpanic(vm, "handleMF: RSVD\n");
      us = G_GetCPL(vm)==3;
      rw = (monContext->error >> 1) & 1;
      if ( monContext->eip == (Bit32u) __fetchCheckAttempt )
        fixEip = (Bit32u) __readError;
      else if ( monContext->eip == (Bit32u) __readByteAttempt )
        fixEip = (Bit32u) __readError;
      else if ( monContext->eip == (Bit32u) __readWordAttempt )
        fixEip = (Bit32u) __readError;
      else if ( monContext->eip == (Bit32u) __readDWordAttempt )
        fixEip = (Bit32u) __readError;
      else if ( monContext->eip == (Bit32u) __readRMWByteAttempt )
        fixEip = (Bit32u) __readError;
      else if ( monContext->eip == (Bit32u) __readRMWWordAttempt )
        fixEip = (Bit32u) __readError;
      else if ( monContext->eip == (Bit32u) __readRMWDWordAttempt )
        fixEip = (Bit32u) __readError;

      else if ( monContext->eip == (Bit32u) __writeByteAttempt )
        fixEip = (Bit32u) __writeError;
      else if ( monContext->eip == (Bit32u) __writeWordAttempt )
        fixEip = (Bit32u) __writeError;
      else if ( monContext->eip == (Bit32u) __writeDWordAttempt )
        fixEip = (Bit32u) __writeError;

      else
        fixEip = 0;
      if ( (monContext->vector==ExceptionPF) && fixEip ) {
        switch (mapGuestLinAddr(vm, cr2, &guest_ppi, us, rw, 0, &gerror)) {
          case MapLinOK:
            {
#if 0
            /* Map was successful.  Restart instruction @ current eip. */
            Bit32u lAddr, lAddr0, lAddrN;
            unsigned r;

            /* Since the paged was mapped OK, this was a lazy (demand) paging
             * fault.  This is a good time to speculatively map in some more
             * neighboring pages, to reduce future faults.  Do that before
             * restarting instruction.
             */
            lAddr0 = cr2    & 0xffff8000;
            lAddrN = lAddr0 | 0x00007000;
            cr2 &= 0xfffff000; /* Need strict page address for compare. */
            for (lAddr=lAddr0; lAddr<=lAddrN; lAddr++) {
              /* Skip the page we already mapped. */
              if (lAddr == cr2)
                continue;
              /* For the read-write flag, just pass 0=read since there really
               * is no particular access occurring.  The monitor will map in
               * the page according to the guest attributes.
               */
              r = mapGuestLinAddr(vm, lAddr, &guest_ppi, us, 0, 0, &gerror);
              /* Stop speculative mapping, when we hit a non-standard case. */
              if ( (r!=MapLinOK) && (r!=MapLinAlreadyMapped) )
                break;
              }
#endif
            break;
            }
          default:
            /* A new mapping was unsuccessful.  Adjust monitor EIP to
             * error recovery code, specific to the access code which
             * generated the event.
             */
            monContext->eip = fixEip;
            monContext->gs = 0;
            break;
          }
        }
      else if ( (monContext->vector==ExceptionGP) && fixEip ) {
        /* Adjust monitor EIP to error routine. */
        monContext->eip = fixEip;
        monContext->gs = 0;
        }
// xxx Delete this debug stuff
      else {
        monContext->gs = 0;
monprint(vm, "nexus = 0x%x\n", nexus);
monprint(vm, "rVBA: 0x%x\n", (Bit32u) __readByteAttempt);
monprint(vm, "rVWA: 0x%x\n", (Bit32u) __readWordAttempt);
monprint(vm, "rVDA: 0x%x\n", (Bit32u) __readDWordAttempt);
monprint(vm, "wVBA: 0x%x\n", (Bit32u) __writeByteAttempt);
monprint(vm, "wVWA: 0x%x\n", (Bit32u) __writeWordAttempt);
monprint(vm, "wVDA: 0x%x\n", (Bit32u) __writeDWordAttempt);
monprint(vm, "desc.l=0x%x\n", *(Bit32u*) &vm->guest.addr.gdt[4+SRegGS]);
monprint(vm, "desc.h=0x%x\n", *( ((Bit32u*) &vm->guest.addr.gdt[4+SRegGS]) + 1) );
monprint(vm, "gs sel = 0x%x\n", monContext->gs);
monpanic(vm, "hMF: fault=%u, eip=0x%x\n",
         monContext->vector, monContext->eip);
        monpanic(vm, "hMF: vector=%u\n", monContext->vector);
        }
      break;
      }
    default:
      monpanic(vm, "hMF: vector=%u\n", monContext->vector);
      break;
    }

  //vm->abort_code = 1;
  //monpanic_nomess(vm);
  CLI();
  vm->inMonFault = 0;
}
