/******************************************************************-*-c-*-
 * Myricom GM networking software and documentation			 *
 * Copyright (c) 1999 by Myricom, Inc.					 *
 * All rights reserved.	 See the file `COPYING' for copyright notice.	 *
 *************************************************************************/

/* author: glenn@myri.com */

/************************************************************************
 * IRQL level summary
 ************************************************************************/

/* In this driver, as few functions as possible run at elevated IRQ levels.
   Specifically, the functions run at the following levels:

   DIRQL_LEVEL:		gm_nt_Isr() (the interrupt handler)
   DISPATCH_LEVEL:	gm_nt_Isr_dpc() (the interrupt handler delayed
				proceedure call)
			gm_nt_cancel_irp() (the NT IRP cancellation function)
   			_gm_maybe_cancel_sync() (a helper function for
				gm_nt_cancel_irp()
   APC_LEVEL:		(none)
   PASSIVE_LEVEL:	(all others) */

#include <limits.h>

#include "gm_debug.h"
#include "gm_internal.h"
#include "gm_arch.h"
#include "gm_call_trace.h"
#include "gm_page_hash.h"
#include "gm_lanai.h"
#include "gm_instance.h"
#include "cheat.h"
#include "gm_debug_lanai_dma.h"
#include "gm_enable_ethernet.h"
#include "gm_event_log.h"
#include <stdarg.h>
#include <stdio.h>
#include "gm_pio.h"

#if GM_ENABLE_ETHERNET
NTSTATUS
gmm_DriverEntry (IN PDRIVER_OBJECT DriverObject,
		 IN PUNICODE_STRING RegistryPath);
#endif

#if !GM_CAN_REGISTER_MEMORY
#  error GM_CAN_REGISTER_MEMORY not defined in NT gm_arch.c
#endif

#if GM_ENABLE_ETHERNET
#  include "gm_ether.h"
#  include "../gm_miniport/gmm_driver_link.h"
#endif

/****************************************************************
 * NDIS layering support
 ****************************************************************/

/* entry points cached from the NDIS driver */

static struct
{
  PDRIVER_DISPATCH create;
  PDRIVER_DISPATCH close;
  PDRIVER_DISPATCH cleanup;
  PDRIVER_DISPATCH device_control;
  PDRIVER_UNLOAD unload;
}
ndis;

static struct gm_hash *gm_device_hash;	/* references all GM devices */

static gm_inline int
is_gm_device (PDEVICE_OBJECT d)
{
  return gm_hash_find (gm_device_hash, d) == d;
}

/************************************************************************
 * NT Device Extension support
 ************************************************************************/

/* These structures allow clean support for the different types of
   device extensions required in the GM NT driver. */

enum gm_nt_device_extension_type
{
  GM_NT_UNKNOWN_DEVICE_EXTENSION,
  GM_NT_MINOR_DEVICE_EXTENSION,
  GM_NT_RESOURCE_DEVICE_EXTENSION
};

union gm_nt_device_extension
{
  /* used for minor devices */
  struct gm_nt_minor_device_extension
  {
    enum gm_nt_device_type type;
    gm_arch_minor_t minor;
  }
  minor;

  /* used for resource reporting devices */
  struct gm_nt_resource_device_extension
  {
    enum gm_nt_device_type type;
  }
  resource;
};

/************
 * Extension extraction functions
 ************/

static gm_inline union gm_nt_device_extension *
gm_nt_device_extension (PDEVICE_OBJECT dev)
{
  return (union gm_nt_device_extension *) dev->DeviceExtension;
}

/************
 * Extension type extraction function
 ************/

static gm_inline enum gm_nt_device_extension_type
gm_nt_device_extension_type (PDEVICE_OBJECT dev)
{
  return gm_nt_device_extension (dev)->minor.type;
}


/************************************************************************
 * NT utility functions for managing minor nodes.
 ************************************************************************

 These gm_nt_* functions come near the top so they don't need
 forward declarations to be used in gm_arch_* functions below.  The
 gm_arch_* functions below have prototypes in gm_impl.h, so they can
 safely be anywhere in this file. */

/* Return the minor device number corresponding to a device. */

static gm_arch_minor_t
gm_nt_device_to_minor (PDEVICE_OBJECT dev)
{
  gm_assert (gm_nt_device_extension_type (dev)
	     == GM_NT_MINOR_DEVICE_EXTENSION);
  return (gm_nt_device_extension (dev)->minor.minor);
}

/* Return the port state corresponding to a device. */

static gm_port_state_t *
gm_nt_device_to_ps (PDEVICE_OBJECT dev)
{
  return gm_minor_get_port_state (gm_nt_device_to_minor (dev));
}

/************
 * Minor node management
 ************/

/* Create a new minor node, mimicing the Unix functionality as closely
   as possible.  The minor node includes the NT named device and an
   associated gm_port_state.  However, the gm_port_state created is
   only partially initialized.  Clone devices will never be fully
   initialized, and non-clone devices will be later initialized
   with gm_open_port_state(). */

static gm_status_t
gm_nt_create_minor_node (gm_instance_state_t * is,
			 PCWSTR _basename,
			 gm_arch_minor_t * minorp,
			 int is_clone_device,
			 int is_priv, unsigned long port_id)
{
  NTSTATUS nt_status;
  gm_arch_minor_t minor;
  gm_port_state_t *ps;

  UNICODE_STRING basename;

  UNICODE_STRING unit;
  WCHAR wchar_unit[32];

  UNICODE_STRING port;
  WCHAR wchar_port[32];

  if (!minorp)
    {
      GM_NOTE (("internal error: minorp==0\n"));
      goto abort_with_nothing;
    }

  if (port_id >= GM_NUM_PORTS)
    {
      GM_NOTE (("User tried to set port number out of range.\n"));
      goto abort_with_nothing;
    }

  /* Allocate a minor number for the new device. */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Allocating a minor node.\n"));
  if (gm_minor_alloc (is, &minor) != GM_SUCCESS)
    {
      GM_NOTE (("Failed to allocate minor number.\n"));
      goto abort_with_nothing;
    }
  ps = gm_minor_get_port_state (minor);
  gm_assert (ps);
  GM_PRINT (GM_PRINT_LEVEL >= 4, ("minor<->ps = %d,%p.\n", minor, ps));
  ps->privileged = is_priv;
  ps->arch.is_clone_device = is_clone_device;
  if (!is_clone_device)
    ps->id = port_id;


  {
    unsigned int port_num = (unsigned int) port_id;

    if (gm_hash_find (ps->instance->port_hash, &port_num) != 0)
      {
	GM_NOTE (("gm_nt_create_minor_node: User tried to claim port_num (%d)"
		  " that is in use.\n", port_num));
	goto abort_with_minor;
      }
  }


  /* init scratch unicode strings */

  unit.Length = 0;
  unit.MaximumLength = 32;
  unit.Buffer = wchar_unit;

  port.Length = 0;
  port.MaximumLength = 32;
  port.Buffer = wchar_port;

  RtlInitUnicodeString (&basename, _basename);

  /* Convert ints to strings */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Converting ints to strings.\n"));
  nt_status = RtlIntegerToUnicodeString (is->id, 10, &unit);
  if (NT_ERROR (nt_status))
    {
      GM_WARN (("Can't convert integer to Unicode\n"));
      goto abort_with_minor;
    }

  nt_status = RtlIntegerToUnicodeString (ps->id, 10, &port);
  if (NT_ERROR (nt_status))
    {
      GM_WARN (("Can't convert integer to Unicode\n"));
      goto abort_with_minor;
    }

  /* Initialize storage for device name with at least enough storage */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Initializing storage for device name.\n"));
  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("basename.Length = %d.\n", basename.Length));
  GM_PRINT (GM_PRINT_LEVEL >= 4, ("unit.Length = %d.\n", unit.Length));
  GM_PRINT (GM_PRINT_LEVEL >= 4, ("port.Length = %d.\n", port.Length));

  RtlInitUnicodeString (&ps->arch.nt_device_name, NULL);
  gm_assert (ps->arch.nt_device_name.Length == 0);
  ps->arch.nt_device_name.MaximumLength
    = ( /*BAD*/ 2 * 15 * sizeof (WCHAR) + basename.Length + unit.Length
       + port.Length);
  ps->arch.nt_device_name.Buffer
    = gm_malloc (ps->arch.nt_device_name.MaximumLength);
  if (ps->arch.nt_device_name.Buffer == NULL)
    {
      GM_WARN (("Can't allocate memory for unicode string\n"));
      nt_status = STATUS_INSUFFICIENT_RESOURCES;
      goto abort_with_minor;
    }

  RtlInitUnicodeString (&ps->arch.win32_device_name, NULL);
  gm_assert (ps->arch.win32_device_name.Length == 0);
  ps->arch.win32_device_name.MaximumLength
    = ( /*BAD*/ 2 * 15 * sizeof (WCHAR) + basename.Length + unit.Length
       + port.Length);
  ps->arch.win32_device_name.Buffer
    = gm_malloc (ps->arch.win32_device_name.MaximumLength);
  if (ps->arch.win32_device_name.Buffer == NULL)
    {
      GM_WARN (("Can't allocate memory for unicode string\n"));
      nt_status = STATUS_INSUFFICIENT_RESOURCES;
      goto abort_with_nt_buffer;
    }

  /* Format the device name into the allocated buffers */

  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("Formatting device names with basename \"%S\".\n",
	     basename.Buffer));
  RtlAppendUnicodeToString (&ps->arch.nt_device_name, L"\\Device\\");
  RtlAppendUnicodeStringToString (&ps->arch.nt_device_name, &basename);
  RtlAppendUnicodeStringToString (&ps->arch.nt_device_name, &unit);
  if (!ps->arch.is_clone_device)
    {
      RtlAppendUnicodeToString (&ps->arch.nt_device_name, L"-");
      RtlAppendUnicodeStringToString (&ps->arch.nt_device_name, &port);
    }
  GM_PRINT (GM_PRINT_LEVEL >= 2, ("NT device name is \"%S.\"\n",
				  ps->arch.nt_device_name.Buffer));

  RtlAppendUnicodeToString (&ps->arch.win32_device_name, L"\\??\\");
  RtlAppendUnicodeStringToString (&ps->arch.win32_device_name, &basename);
  RtlAppendUnicodeStringToString (&ps->arch.win32_device_name, &unit);
  if (!ps->arch.is_clone_device)
    {
      RtlAppendUnicodeToString (&ps->arch.win32_device_name, L"-");
      RtlAppendUnicodeStringToString (&ps->arch.win32_device_name, &port);
    }
  GM_PRINT (GM_PRINT_LEVEL >= 2, ("WIN32 device name is \"%S.\"\n",
				  ps->arch.win32_device_name.Buffer));

  /* Create a device with that name and minor device number. */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Creating the minor device for port %d of "
				  "instance %d.\n", ps->id, is->id));
  GM_PRINT (GM_PRINT_LEVEL >= 5,
	    ("Calling IoCreateDevice (%p, %d, %p, %d, %d, %d, %p)\n",
	     is->arch.driver_object,
	     sizeof (struct gm_nt_minor_device_extension),
	     &ps->arch.nt_device_name, FILE_DEVICE_PHYSICAL_NETCARD, 0, FALSE,
	     &ps->arch.device_object));
  nt_status =
    IoCreateDevice (is->arch.driver_object,
		    sizeof (struct gm_nt_minor_device_extension),
		    &ps->arch.nt_device_name, FILE_DEVICE_PHYSICAL_NETCARD, 0,
		    FALSE, &ps->arch.device_object);
  if (NT_ERROR (nt_status))
    {
      GM_NOTE (("Could not create minor device (error 0x%x).\n", nt_status));
      GM_NOTE (("WIN32 device name was \"%S.\"\n",
		ps->arch.win32_device_name.Buffer));

      goto abort_with_win32_buffer;
    }
  gm_assert (ps->arch.device_object);

  /* record that we created this device */

  if (gm_hash_insert
      (gm_device_hash, ps->arch.device_object,
       ps->arch.device_object) != GM_SUCCESS)
    {
      GM_NOTE (("Could not remember device is ours.\n"));
      goto abort_with_device;
    }

  nt_status = IoCreateSymbolicLink (&ps->arch.win32_device_name,
				    &ps->arch.nt_device_name);
  if (NT_ERROR (nt_status))
    {
      GM_NOTE (("Could not create WIN32 subsystem symbolic link.\n"));
      goto abort_with_hash_entry;
    }

  /* Fill the device extension */

  GM_PRINT (GM_PRINT_LEVEL >= 5, ("Filling device extension.\n"));
  gm_nt_device_extension (ps->arch.device_object)->minor.type
    = GM_NT_MINOR_DEVICE_EXTENSION;
  gm_nt_device_extension (ps->arch.device_object)->minor.minor = minor;
  /* If this is a dynamically created device, clear the initializing
     flag so that client applications can open it. */

#if GM_OS_NT4
  if (!is_clone_device)
    ps->arch.device_object->Flags &= ~DO_DEVICE_INITIALIZING;
#elif GM_OS_WIN2K
  /* Under Win2K, all devices get dynamically created. */
  ps->arch.device_object->Flags &= ~DO_DEVICE_INITIALIZING;
#else
#error What am I doing here?
#endif

  GM_PRINT (GM_PRINT_LEVEL >= 5, ("Returning the minor device number.\n"));
  *minorp = minor;
  GM_RETURN_STATUS (GM_SUCCESS);

abort_with_hash_entry:
  gm_hash_remove (gm_device_hash, ps->arch.device_object);
abort_with_device:
  IoDeleteDevice (ps->arch.device_object);
abort_with_win32_buffer:
  gm_free (ps->arch.win32_device_name.Buffer);
abort_with_nt_buffer:
  gm_free (ps->arch.nt_device_name.Buffer);
abort_with_minor:
  gm_minor_free (minor);
abort_with_nothing:
  GM_RETURN_STATUS (GM_FAILURE);
}

/* Destroy a minor node, including the NT device and associated
   gm_port_state. */

static void
gm_nt_destroy_minor_node (gm_arch_minor_t minor)
{
  gm_port_state_t *ps;

  ps = gm_minor_get_port_state (minor);
  if (!ps)
    {
      GM_WARN (("Tried to destroy nonexistant minor node %d.\n", minor));
      return;
    }

  gm_assert (!ps->opened);

  IoDeleteSymbolicLink (&ps->arch.win32_device_name);
  gm_hash_remove (gm_device_hash, ps->arch.device_object);
  IoDeleteDevice (ps->arch.device_object);
  gm_free (ps->arch.win32_device_name.Buffer);
  gm_free (ps->arch.nt_device_name.Buffer);
  gm_minor_free (minor);
}

#if GM_ENABLE_ETHERNET

/************************************************************************
 * GM ethernet driver function table support 
 ************************************************************************/

void
gmm_allocate_shared_memory (struct gm_port *p,
			    int size,
			    void **virtualAddress,
			    ULONG * physicalLow, ULONG * physicalHigh)
{
  PHYSICAL_ADDRESS pa;

#if (GM_SIZEOF_DP_T == 4)
  pa.HighPart = 0;
#elif (GM_SIZEOF_DP_T == 8)
  pa.HighPart = 0xffffffff;
#else
#error sizeof gm_dp_t unknown
#endif
#if GM_TRACE_LANAI_DMA
  pa.LowPart = (GM_MAX_HOST_PAGES * GM_PAGE_LEN) - 1;
#else
  pa.LowPart = 0xffffffff;
#endif

#if 1
  *virtualAddress = MmAllocateContiguousMemory (size, pa);
#else
  gm_always_assert ((unsigned) size < GM_PAGE_LEN);
  *virtualAddress = gm_page_alloc ();
#endif
  if (*virtualAddress)
    {
      pa = MmGetPhysicalAddress (*virtualAddress);
#if (GM_SIZEOF_DP_T == 4)
      if (pa.HighPart != 0) {
	GM_NOTE (("gmm_allocate_shared_memory: MmGetPhysicalAddress\n"));
      }
#endif
      *physicalLow = pa.LowPart;
      *physicalHigh = pa.HighPart;
#if GM_TRACE_LANAI_DMA
      GM_NOTE (("*** Ethernet buffer at DMA addr 0x%x.\n", *physicalLow));
      gm_assert (!*physicalHigh);
      gm_assert (p);
      gm_assert (p->kernel_port_state);
      gm_assert (p->kernel_port_state->instance);
      gm_assert (p->kernel_port_state->instance->dma_pages_bitmap);
      {
	gm_dp_t d;

	for (d = *physicalLow; d < *physicalLow + size; d += GM_PAGE_LEN)
	  {
	    unsigned long page_num;

	    page_num = d / GM_PAGE_LEN;
	    GM_PRINT (GM_PRINT_LEVEL >= 1,
		      ("*** Ethernet buffer at page 0x%x.\n", page_num));
	    p->kernel_port_state->instance->dma_pages_bitmap[page_num / 32] |=
	      gm_htonl (1 << page_num % 32);
	  }
      }
#endif
    }
}

void
gmm_free_shared_memory (struct gm_port *p, void *virtualAddress)
{
#if GM_TRACE_LANAI_DMA
  PHYSICAL_ADDRESS pa;

  pa = MmGetPhysicalAddress (virtualAddress);
#if (GM_SIZEOF_DP_T == 4)
  gm_assert (!pa.HighPart);
#endif
  gm_assert (p);
  gm_assert (p->kernel_port_state);
  gm_assert (p->kernel_port_state->instance);
  gm_assert (p->kernel_port_state->instance->dma_pages_bitmap);
  p->kernel_port_state->instance->dma_pages_bitmap[pa.LowPart
						   / GM_PAGE_LEN / 32]
    &= ~gm_htonl (1 << (pa.LowPart / GM_PAGE_LEN % 32));
#endif
  MmFreeContiguousMemory (virtualAddress);
}

void
gmm_disable_interrupts (struct gm_port *port)
{
  gm_assert (port);
  gm_assert (port->kernel_port_state);
  gm_assert (port->kernel_port_state->instance);

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("*** gmm_disable_interrupts called.\n"));
  gm_disable_interrupts (port->kernel_port_state->instance);
}

void
gmm_enable_interrupts (struct gm_port *port)
{
  gm_assert (port);
  gm_assert (port->kernel_port_state);
  gm_assert (port->kernel_port_state->instance);

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("*** gmm_enable_interrupts called.\n"));
  gm_enable_interrupts (port->kernel_port_state->instance);
}

#endif /* GM_ENABLE_ETHERNET */

/***********************************************************************
 * Required functions (gm_arch_*)
 ***********************************************************************/

gm_status_t gm_arch_get_page_len (unsigned long *result)
{
#if GM_CPU_x86
  *result = 4096;
#elif GM_CPU_ia64
  *result = 8192;
#else
#  error Unknown cpu type
#endif
  return GM_SUCCESS;
}

/************
 * Driver entry point serialization.
 ************/

/* This is really simple on NT, which does not block in the driver to
   sleep a process.  We just call gm_nt_enter() when we enter a
   driver entry point, and we call gm_nt_exit() when we return, and
   have DriverEntry and Unload init and destroy the sync var. */

/* initialized by DriverEntry */
static gm_arch_sync_t _gm_arch_serialize_sync;

static void
gm_nt_enter ()
{
  gm_arch_mutex_enter (&_gm_arch_serialize_sync);
}

static void
gm_nt_exit ()
{
  gm_arch_mutex_exit (&_gm_arch_serialize_sync);
}

/************
 * gm_port_state initialization 
 ************/

gm_status_t gm_arch_port_state_init (gm_port_state_t * ps)
{
  gm_assert (ps->arch.device_object == 0);
  gm_assert (ps->arch.nt_device_name.Buffer == 0);
  gm_assert (ps->arch.win32_device_name.Buffer == 0);
  InitializeListHead (&ps->arch.sleep_q_root);
  InitializeListHead (&ps->arch.mapping_list);
  gm_assert (ps->arch.sleep_cnt == 0);
  gm_assert (ps->arch.is_clone_device == 0);
  return GM_SUCCESS;
}

gm_status_t gm_arch_port_state_open (gm_port_state_t * ps)
{
  if (!ps->arch.is_clone_device)
    {
      gm_instance_state_t *is;

      is = ps->instance;
      gm_assert (is);
    }

  return GM_SUCCESS;
}

void
gm_arch_port_state_close (gm_port_state_t * ps)
{
  /*** Free resources allocated since gm_arch_port_state_open() */

  gm_arch_munmap_contiguous_segments (ps);
}

void
gm_arch_port_state_fini (gm_port_state_t * ps)
{
  return;
}

#if GM_SUPPORT_PCI
/************
 * PCI configuration space access.
 ************/

gm_status_t
gm_arch_read_pci_config_32 (gm_instance_state_t * is,
			    gm_offset_t offset, gm_u32_t * value)
{
  return (HalGetBusDataByOffset (PCIConfiguration, is->arch.bus_number,
				 is->arch.slot_number, value,
				 GM_STATIC_CAST (ULONG, offset), 4)
	  == 4 ? GM_SUCCESS : GM_FAILURE);
}

gm_status_t
gm_arch_write_pci_config_32 (gm_instance_state_t * is,
			     gm_offset_t offset, gm_u32_t value)
{
  return (HalSetBusDataByOffset (PCIConfiguration, is->arch.bus_number,
				 is->arch.slot_number, &value,
				 GM_STATIC_CAST (ULONG, offset), 4)
	  == 4 ? GM_SUCCESS : GM_FAILURE);
}

gm_status_t
gm_arch_read_pci_config_16 (gm_instance_state_t * is,
			    gm_offset_t offset, gm_u16_t * value)
{
  return (HalGetBusDataByOffset (PCIConfiguration, is->arch.bus_number,
				is->arch.slot_number, value,
				 GM_STATIC_CAST (ULONG, offset), 2)
	  == 2 ? GM_SUCCESS : GM_FAILURE);
}

gm_status_t
gm_arch_write_pci_config_16 (gm_instance_state_t * is,
			     gm_offset_t offset, gm_u16_t value)
{
  return (HalSetBusDataByOffset (PCIConfiguration, is->arch.bus_number,
				 is->arch.slot_number, &value,
				 GM_STATIC_CAST (ULONG, offset), 2)
	  == 2 ? GM_SUCCESS : GM_FAILURE);
}

gm_status_t
gm_arch_read_pci_config_8 (gm_instance_state_t * is,
			   gm_offset_t offset, gm_u8_t * value)
{
  return (HalGetBusDataByOffset (PCIConfiguration, is->arch.bus_number,
				is->arch.slot_number, value,
				 GM_STATIC_CAST (ULONG, offset), 1)
	  == 1 ? GM_SUCCESS : GM_FAILURE);
}

gm_status_t
gm_arch_write_pci_config_8 (gm_instance_state_t * is,
			    gm_offset_t offset, gm_u8_t value)
{
  return (HalSetBusDataByOffset (PCIConfiguration, is->arch.bus_number,
				 is->arch.slot_number, &value,
				 GM_STATIC_CAST (ULONG, offset), 1)
	  == 1 ? GM_SUCCESS : GM_FAILURE);
}
#endif /* GM_SUPPORT_PCI */

/************
 * Copy data into/out of user buffers.
 ************/

gm_status_t
gm_arch_copyin (gm_port_state_t * ps, void *from, void *to, gm_size_t len)
{
  if (len)
    gm_bcopy (from, to, len);
  return GM_SUCCESS;
}

gm_status_t
gm_arch_copyout (gm_port_state_t * ps, void *from, void *to, gm_size_t len)
{
  if (len)
    gm_bcopy (from, to, len);
  return GM_SUCCESS;
}

/************
 * Introduce delay
 ************/

/* Spin for at least USECS microseconds */

void
gm_arch_spin (gm_instance_state_t * is, gm_u32_t usecs)
{
  LARGE_INTEGER large;

  large = RtlConvertUlongToLargeInteger (usecs);

  GM_PRINT (GM_PRINT_LEVEL >= 7, ("Pausing for %d usecs.\n", usecs));

  /* Multiply large by -10, since Microsoft does not supply a
     RtlLargeIntegerMultiply() function.  The factor of 10 is required
     because KeSetTimer requires times in 100 nanosecond intervals.  The
     negation is required to specify a relative time. */

  large = (RtlLargeIntegerNegate
	   (RtlLargeIntegerAdd (RtlLargeIntegerShiftLeft (large, 1),
				RtlLargeIntegerShiftLeft (large, 3))));

  /* Wait for the specified amount of time (or more if the timer is busy). */

  ExAcquireFastMutex (&is->arch.spin_fast_mutex);
  KeSetTimer (&is->arch.spin_timer, large, NULL);
  KeWaitForSingleObject (&is->arch.spin_timer, Executive, KernelMode, FALSE,
			 NULL);
  ExReleaseFastMutex (&is->arch.spin_fast_mutex);

  GM_PRINT (GM_PRINT_LEVEL >= 7, ("Done pausing.\n"));

}

/************
 * kernel memory allocation
 ************/

static int _gm_arch_alloc_cnt;

void *
gm_arch_kernel_malloc (unsigned long len, int flags)
{
  void *ret;

  flags &= ~GM_KERNEL_MALLOC_PAGEABLE;

  if (flags & GM_KERNEL_MALLOC_PAGEABLE)
    ret = ExAllocatePool (PagedPool, len);
  else
    ret = ExAllocatePool (NonPagedPool, len);

  if (!ret)
    return ret;
  GM_PRINT (GM_PRINT_LEVEL >= 7,
	    ("%d kernel buffers allocated.\n", ++_gm_arch_alloc_cnt));
  return ret;
}

void
gm_arch_kernel_free (void *ptr)
{
  ExFreePool ((void *) ptr);
  GM_PRINT (GM_PRINT_LEVEL >= 7,
	    ("%d kernel buffers allocated.\n", --_gm_arch_alloc_cnt));
}

/************
 * synchronization
 ************/

void
gm_arch_sync_reset (gm_arch_sync_t * s)
{
  GM_PRINT (GM_PRINT_LEVEL >= 5, ("gm_arch_sync_reset\n"));
  KeInitializeSemaphore (&s->sem, 0, 1);
}

void
gm_arch_sync_init (gm_arch_sync_t * s, gm_instance_state_t * is)
{
  GM_PRINT (GM_PRINT_LEVEL >= 5, ("gm_arch_sync_init.\n"));
  RtlZeroMemory (s, sizeof (*s));
  ExInitializeFastMutex (&s->mu);
  gm_arch_sync_reset (s);
  InitializeListHead (&s->q);
}

void
gm_arch_mutex_enter (gm_arch_sync_t * s)
{
  GM_PRINT (GM_PRINT_LEVEL >= 10, ("Entering a mutex.\n"));
  ExAcquireFastMutex (&s->mu);
  GM_PRINT (GM_PRINT_LEVEL >= 10, ("Entered a mutex.\n"));
}

void
gm_arch_mutex_exit (gm_arch_sync_t * s)
{
  GM_PRINT (GM_PRINT_LEVEL >= 10, ("Exiting a mutex.\n"));
  ExReleaseFastMutex (&s->mu);
  GM_PRINT (GM_PRINT_LEVEL >= 10, ("Exited a mutex.\n"));
}

void
gm_arch_sync_destroy (gm_arch_sync_t * s)
{
  RtlZeroMemory (s, sizeof (*s));
}

/* This may be called at DISPATCH_LEVEL */

void
gm_arch_wake (gm_arch_sync_t * s)
{
  KeReleaseSemaphore (&s->sem, 0, 1, FALSE);
}

/* Called by gm_pause_lanai() at PASSIVE_LEVEL to wait for LANai to
   pause. */

gm_arch_sleep_status_t gm_arch_timed_sleep (gm_arch_sync_t * s, int secs)
{
  LARGE_INTEGER large;
  NTSTATUS nt_status;

  large = RtlConvertUlongToLargeInteger (secs);

  /* Multiply large by -10,000,000.  The factor of 10,000,000 is
     required because KeSetTimer requires times in 100 nanosecond
     intervals.  The negation is required to specify a relative
     time. */

  large = (RtlLargeIntegerNegate
	   (RtlLargeIntegerAdd
	    (RtlLargeIntegerShiftLeft (large, 23),
	     RtlLargeIntegerShiftLeft (large, 21))));

  /* Wait for up to the specified amount of time. */
  nt_status = KeWaitForSingleObject (&s->sem, Executive, KernelMode, FALSE,
				     &large);

  if (nt_status == STATUS_SUCCESS)
    return GM_SLEEP_WOKE;
  else if (nt_status == STATUS_TIMEOUT)
    return GM_SLEEP_TIMED_OUT;
  else
    {
      GM_PANIC (("Unexpected return value from KeWaitForSingleObject"));
      /* return needed to keep compiler happy */
      return GM_SLEEP_TIMED_OUT;
    }
}

/************
 * Misc required functions
 ************/

gm_status_t gm_arch_page_len (unsigned *result)
{
  gm_u32_t i;

  /* Use the NT BYTES_TO_PAGES macro to infer the page size, since NT does
     not make the page size directly available. */

  i = 4096;
  ASSERT (BYTES_TO_PAGES (i) == 1);
  do
    i <<= 1;
  while (BYTES_TO_PAGES (i) == 1);;
  i >>= 1;

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("There are 0x%x bytes in a page.\n", i));
  *result = i;
  return GM_SUCCESS;
}

/* As indicated by the GM_ARCH_SYNC_FOR_DEVICE and
   GM_ARCH_SYNC_FOR_CPU bits of COMMAND, flush all transactions on the
   DMA region. */

gm_status_t gm_arch_dma_region_sync (gm_arch_dma_region_t * r, int command)
{
#if 0
  if (command & GM_ARCH_SYNC_FOR_DEVICE)
    {
      KeFlushIoBuffers (r->mdl, FALSE, FALSE);
      KeFlushIoBuffers (r->mdl, FALSE, TRUE);
    }
  if (command & GM_ARCH_SYNC_FOR_CPU)
    {
      KeFlushIoBuffers (r->mdl, TRUE, FALSE);
      KeFlushIoBuffers (r->mdl, TRUE, TRUE);
    }
#endif
  return GM_SUCCESS;
}

/************
 * Memory mapping
 ************/

/* Structure for recording mappings so they can be unmapped */

typedef struct gm_map_record
{
  PVOID base;
  PVOID physicalMemorySection;
  LIST_ENTRY list_entry;
}
gm_map_record_t;

/* Map a segment of kernel memory into user space, and return the user
   virtual address. The segment is known to be contiguous in physical
   memory and in DMAable memory (in the bottom 2**32 bytes)

   Must run at PASSIVE_LEVEL */

gm_status_t
gm_arch_mmap_contiguous_segment (gm_port_state_t * ps,
				 void *kaddr,
				 unsigned long oldBlockSize, gm_up_t * upvaddr)
{
  NTSTATUS nt_status;
  PHYSICAL_ADDRESS physicalAddress;
  UNICODE_STRING physicalMemoryString;
  PVOID physicalMemorySection;
  OBJECT_ATTRIBUTES objectAttributes;
  HANDLE physicalMemoryHandle;
  PHYSICAL_ADDRESS viewBase;
  gm_map_record_t *record;
  void **vaddr;
#if GM_OS_NT4
  ULONG blockSize;
#elif GM_OS_WIN2K
  SIZE_T blockSize;
#else
#error unknown OS
#endif

  blockSize = oldBlockSize;
  vaddr = (void **) upvaddr;
  ASSERT (kaddr != (PVOID) - 1 && kaddr != 0);

  physicalAddress = MmGetPhysicalAddress (kaddr);
#if (GM_SIZEOF_DP_T == 4)
  if (physicalAddress.HighPart != 0)
    GM_NOTE (("gm_arch_mmap_contiguous_segment: HighPart != 0\n"));
  ASSERT (physicalAddress.HighPart == 0);
  /* ASSERT ((ULONG)kaddr == (ULONG)physicalAddress.LowPart | 0x80000000); */
#endif

  /* Get handle for physical memory */

  RtlInitUnicodeString (&physicalMemoryString, L"\\Device\\PhysicalMemory");
  InitializeObjectAttributes (&objectAttributes,
			      &physicalMemoryString,
			      OBJ_CASE_INSENSITIVE,
			      (HANDLE) NULL, (PSECURITY_DESCRIPTOR) NULL);
  nt_status = ZwOpenSection (&physicalMemoryHandle,
			     SECTION_ALL_ACCESS, &objectAttributes);
  if (NT_ERROR (nt_status))
    {
      GM_WARN (("Open section failed\n"));
      goto abort_with_nothing;
    }

  /* Allocate a record of the mapping */

  record = gm_malloc (sizeof (*record));
  if (!record)
    {
      GM_WARN (("Could not allocate mapping record.\n"));
      goto abort_with_nothing;
    }

  /* Record a new reference to physical memory, and get pointer to
     memory object */

  nt_status = ObReferenceObjectByHandle (physicalMemoryHandle,
					 SECTION_ALL_ACCESS,
					 (POBJECT_TYPE) NULL,
					 KernelMode,
					 &physicalMemorySection,
					 (POBJECT_HANDLE_INFORMATION) NULL);
  if (NT_ERROR (nt_status))
    {
      GM_WARN (("Reference object failed\n"));
      goto abort_with_record;
    }

  /* Map the section. */

  viewBase = physicalAddress;
#if GM_CAN_MAP_PAGES
#  error May be broken
  if (!*vaddr)
    {
#endif
      /* Map contiguous memory to system determined VMA */

      *vaddr = NULL;

      nt_status = ZwMapViewOfSection (physicalMemoryHandle,
				      (HANDLE) (-1),
				      vaddr,
				      0,
				      GM_STATIC_CAST(ULONG, blockSize),
				      &viewBase,
				      &blockSize,
				      ViewShare,
				      0, PAGE_READWRITE | PAGE_NOCACHE);
      if (NT_ERROR (nt_status))
	{
	  GM_WARN (("Map view of section failed\n"));
	  goto abort_with_object_reference;
	}
#if GM_CAN_MAP_PAGES
#  error
    }
  else
    {
      /* Map non-contiguous or contiguous memory to user-supplied VMA
         one page at a time. */

      ULONG offset;

      if (!GM_PAGE_ALIGNED (*vaddr) || !GM_PAGE_ALIGNED (blockSize))
	{
	  GM_NOTE (("User attempted bad mapping of length 0x%x at %p.",
		    blocksize, *vaddr));
	  goto abort_with_object_reference;
	}

      for (offset = 0; offset < blockSize; offset += GM_PAGE_LEN)
	{
	  LARGE_INTEGER pageBase;
	  ULONG pageSize;
	  PVOID userAddr;

	  pageBase.QuadPart = viewBase.QuadPart + offset;
	  pageSize = GM_PAGE_LEN;
	  userAddr = (char *) *vaddr + offset;

	  GM_PRINT (GM_PRINT_LEVEL >= 4,
		    ("Mapping a page of physical memory.\n"));
	  nt_status =
	    ZwMapViewOfSection (physicalMemoryHandle, (HANDLE) (-1),
				&userAddr, 0, 0, &pageBase, &pageSize,
				ViewShare, 0, PAGE_READWRITE | PAGE_NOCACHE);
	  if (NT_ERROR (nt_status))
	    {
	      while (offset >= GM_PAGE_LEN)
		{
		  offset -= GM_PAGE_LEN;
		  ZwUnmapViewOfSection ((HANDLE) (-1),
					(PVOID) ((char *) *vaddr + offset));
		}
	      *vaddr = (PVOID) (-1);
	      goto abort_with_object_reference;
	    }
	}
    }
#endif

  /* Mapping the section above rounded the physical address down to
     the nearest 64K boundary. Compute the real virtual address by
     adding in the offset from the beginning of the section. */

#if GM_CPU_x86
  {
    gm_u32_t u32;
    u32 = (gm_u32_t) *vaddr;
    u32 += physicalAddress.LowPart - viewBase.LowPart;
    *vaddr = (void*) u32;
  }
#elif GM_CPU_ia64
  {
    gm_u64_t u64;
    u64 = (gm_u64_t) *vaddr;
    u64 += physicalAddress.QuadPart - viewBase.QuadPart;
    *vaddr = (void*) u64;
  }
#else
#error Unknown cpu
#endif
  /* Record the base of the mapping for later unmapping. */

  record->base = *vaddr;
  record->physicalMemorySection = physicalMemorySection;
  InsertTailList (&ps->arch.mapping_list, &record->list_entry);

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Mapped to address %p.\n", *vaddr));

  return GM_SUCCESS;

abort_with_object_reference:
  ObDereferenceObject (physicalMemoryHandle);
abort_with_record:
  gm_free (record);
abort_with_nothing:
  *vaddr = (void *) -1;
  return GM_FAILURE;
}

void
gm_arch_munmap_contiguous_segments (gm_port_state_t * ps)
{
  gm_map_record_t *record;
  LIST_ENTRY *list_entry;
  NTSTATUS nt_status;

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Unmapping user mappings.\n"));

  while (!IsListEmpty (&ps->arch.mapping_list))
    {
      list_entry = RemoveHeadList (&ps->arch.mapping_list);
      ASSERT (list_entry);
      record = CONTAINING_RECORD (list_entry, gm_map_record_t, list_entry);
      GM_PRINT (GM_PRINT_LEVEL >= 4, ("Unmapping base %p section %p.\n",
				      record->base,
				      record->physicalMemorySection));
      nt_status = ZwUnmapViewOfSection ((HANDLE) - 1, record->base);
      if (NT_ERROR (nt_status))
	GM_WARN (("Error 0x%08x when unmapping memory.\n", nt_status));
      ObDereferenceObject (record->physicalMemorySection);
      gm_free (record);
    }
}

/************
 * DMA regions
 ************/

/* DMA regions functions (required by generic code in gm.c to grow the
   copy block.) */

/* Return the DMA address of the next page in the DMA region.

   Runs at PASSIVE_LEVEL but may run at any level. */

gm_dp_t gm_arch_dma_region_dma_addr (gm_arch_dma_region_t * r)
{
  PHYSICAL_ADDRESS phys;

  phys = MmGetPhysicalAddress ((char *) r->addr + r->iterator_offset);
#if (GM_SIZEOF_DP_T == 4)
  if (phys.HighPart != 0) {
    GM_NOTE (("gm_arch_dma_region_dma_addr: MmGetPhysicalAddress high"));
  }
  ASSERT (phys.HighPart == 0);
  /* ASSERT ((ULONG)r->addr == (ULONG)phys.LowPart | 0x80000000); */
#endif
#if (GM_SIZEOF_DP_T == 4)
  return (gm_dp_t) phys.LowPart;
#elif (GM_SIZEOF_DP_T == 8)
  return (gm_dp_t) phys.QuadPart;
#else
#error sizeof gm_dp_t unknown
#endif
}

/* Runs at PASSIVE_LEVEL but may be run at any level */

gm_dp_t gm_arch_dma_region_dma_addr_advance (gm_arch_dma_region_t * r)
{
  PHYSICAL_ADDRESS phys;

  phys = MmGetPhysicalAddress ((char *) r->addr + r->iterator_offset);
#if (GM_SIZEOF_DP_T == 4)
  if (phys.HighPart != 0) {
    GM_NOTE (("gm_arch_dma_region_dma_addr_advance: MmGetPhysicalAddress high"));
  }
  ASSERT (phys.HighPart == 0);
  /* ASSERT ((ULONG)r->addr == (ULONG)phys.LowPart | 0x80000000); */
#endif
  r->iterator_offset += GM_PAGE_LEN;
#if (GM_SIZEOF_DP_T == 4)
  return (gm_dp_t) phys.LowPart;
#elif (GM_SIZEOF_DP_T == 8)
  return (gm_dp_t) phys.QuadPart;
#else
#error sizeof gm_dp_t unknown
#endif
}

/* Allocate LEN bytes of DMA memory that is contiguous in kernal space
   but possibly segmented in DMA space.  HACK: This memory must also be
   contiguous in physical memory in NT.

   If r->register_function is non-null, call r->register_page_function
   (r, dma_addr, page_num) for each page.

   Must be run at PASSIVE_LEVEL */

gm_status_t
gm_arch_dma_region_alloc (gm_instance_state_t * is,
			  gm_arch_dma_region_t * r,
			  gm_size_t len,
			  gm_u32_t flags,
			  gm_status_t
			  (*register_page_func) (void *user_arg,
						 gm_dp_t dma_addr,
						 gm_u32_t page_num),
			  void *user_arg)
{
  PHYSICAL_ADDRESS phys;
  unsigned int page_num = 0;
  unsigned int page_offset;

  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("Allocating a DMA region of length 0x%x with flags 0x%x.\n", len,
	     flags));

  r->flags = flags;
  len = GM_PAGE_ROUNDUP (u32, len);
  gm_assert (len >= GM_PAGE_LEN);
  gm_assert (GM_PAGE_LEN);
  if (r->flags & GM_ARCH_DMA_CONTIGUOUS)
    {
      /* Allocate contiguous page-aligned memory */

      PHYSICAL_ADDRESS highest_acceptable;

      GM_PRINT (GM_PRINT_LEVEL >= 4,
		("Attempting to allocate contiguous memory.\n"));

#if (GM_SIZEOF_DP_T == 4)
      highest_acceptable.HighPart = 0;
#elif (GM_SIZEOF_DP_T == 8)
      highest_acceptable.HighPart = 0xffffffff;
#else
#error sizeof gm_dp_t unknown
#endif
      highest_acceptable.LowPart = 0xffffffff;
      r->orig_addr = MmAllocateContiguousMemory (len, highest_acceptable);
      if (!r->orig_addr)
	{
	  GM_NOTE (("Could not allocate contiguous DMA region"
		    " of 0x%x bytes.\n", len));
	  goto abort_with_nothing;
	}
      r->addr = GM_PAGE_ROUNDUP (ptr, r->orig_addr);
    }
  else
    {
      /* Allocate noncontiguous page-aligned memory */

      GM_PRINT (GM_PRINT_LEVEL >= 4,
		("Attempting to allocate noncontiguous memory.\n"));

      r->orig_addr = ExAllocatePool (NonPagedPool, len + GM_PAGE_LEN);
      if (!r->orig_addr)
	{
	  GM_NOTE (("Could not allocate noncontiguous DMA region"
		    " of 0x%x bytes.\n", len));
	  goto abort_with_nothing;
	}
      r->addr = r->orig_addr;
    }
  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("Allocated DMA region at system address %p.\n", r->orig_addr));
  r->len = GM_STATIC_CAST (unsigned, len);
  r->iterator_offset = 0;
  r->is = is;

  /* Verify that NT actually page-aligns allocations larger than a
     page size as stated in the docs. */

  if (!GM_PAGE_ALIGNED (r->addr))
    {
      GM_NOTE (("Address of DMA buffer not page aligned.\n"));
      goto abort_with_memory;
    }

  /* Check that each page is DMAable. */

  for (page_offset = 0; page_offset < len; page_offset += GM_PAGE_LEN)
    {
      phys = MmGetPhysicalAddress ((char *) r->addr + page_offset);
#if (GM_SIZEOF_DP_T == 4)
      if (phys.HighPart != 0)
	{
	  GM_NOTE (("At least some of DMA buffer is not DMAable."));
	  goto abort_with_memory;
	}
      ASSERT (phys.HighPart == 0);
      /* ASSERT ((ULONG) r->addr == (ULONG)phys.LowPart | 0x80000000); */
#endif
    }

#if GM_USE_MDLS
  /* Build a NT memory descriptor list to allow the DMA region to be
     flushed efficiently. */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Allocating an MDL.\n"));
  r->mdl = IoAllocateMdl (r->addr, r->len, FALSE, FALSE, NULL);
  if (r->mdl == NULL)
    {
      GM_WARN (("Could not allocate MDL"));
      goto abort_with_memory;
    }
  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Building an MDL.\n"));
  MmBuildMdlForNonPagedPool (r->mdl);
#endif

  /* Register each page of the memory unless the user passed a NULL 
     registration function. */

  /* Registering the pages of the DMA region */

  if (register_page_func != 0 || GM_TRACE_LANAI_DMA)
    {
      GM_PRINT (GM_PRINT_LEVEL >= 4, ("Registering the pages.\n"));
      GM_PRINT (GM_PRINT_LEVEL >= 4, ("LANai SRAM at %p-%p.\n",
				      is->lanai.sram,
				      ((char *) is->lanai.sram
				       + is->lanai.eeprom.lanai_sram_size)));
      ASSERT (page_num == 0);
      for (page_offset = 0; page_offset < len; page_offset += GM_PAGE_LEN)
	{
	  GM_PRINT (GM_PRINT_LEVEL >= 6, ("Getting physical address.\n"));
	  phys = MmGetPhysicalAddress ((char *) r->addr + page_offset);
#if (GM_SIZEOF_DP_T == 4)
	  if (phys.HighPart != 0) {
	    GM_NOTE (("gm_arch_dma_alloc: MmGetPhysicalAddress high != 0\n"));
	  }
	  ASSERT (phys.HighPart == 0);
	  /* ASSERT ((ULONG) r->addr == (ULONG)phys.LowPart | 0x80000000); */
#endif
	  GM_PRINT (GM_PRINT_LEVEL >= 6, ("registering.\n"));
	  if (register_page_func)
	    {
#if (GM_SIZEOF_DP_T == 4)
	      register_page_func (user_arg, phys.LowPart, page_num++);
#elif (GM_SIZEOF_DP_T == 8)
	      register_page_func (user_arg, phys.QuadPart, page_num++);
#else
#error sizeof gm_dp_t unknown
#endif
	    }
#if GM_TRACE_LANAI_DMA
	  GM_NOTE (("*** GM DMA page at 0x%x.\n", phys.LowPart));
	  is->dma_pages_bitmap[phys.LowPart / GM_PAGE_LEN / 32]
	    |= gm_htonl (1 << (phys.LowPart / GM_PAGE_LEN % 32));
#endif
	}
    }
  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Done registering.\n"));

  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("DMA region has system addr=%p and starting physaddr=%p.\n",
	     r->addr, gm_arch_dma_region_dma_addr (r)));

#if 0
  if ((r->flags & GM_ARCH_DMA_CONTIGUOUS)
      && (((long) r->orig_addr & 0xf0000000) == 0xf0000000))
    {
      GM_WARN (("HACK to test contiguous DMA free of addr of form "
		"0xf???????.\n"));
      gm_arch_dma_region_free (r);
      return GM_FAILURE;
    }
#endif

  return GM_SUCCESS;

abort_with_memory:
  if (r->flags & GM_ARCH_DMA_CONTIGUOUS)
    MmFreeContiguousMemory (r->orig_addr);
  else
    ExFreePool (r->orig_addr);
abort_with_nothing:
  RtlZeroMemory (r, sizeof (*r));
  return GM_FAILURE;
}

/* Must be run at PASSIVE_LEVEL */

void
gm_arch_dma_region_free (gm_arch_dma_region_t * r)
{
  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Freeing %scontiguous DMA region %p.\n",
				  r->flags & GM_ARCH_DMA_CONTIGUOUS ? "" :
				  "non-", r->orig_addr));
  ASSERT (r->orig_addr);

#if GM_TRACE_LANAI_DMA
  /* remove the pages from the bitmap of safe DMA pages. */
  {
    char *addr;
    PHYSICAL_ADDRESS phys;

    for (addr = r->addr; addr < (char *) r->addr + r->len;
	 addr += GM_PAGE_LEN)
      {
	phys = MmGetPhysicalAddress (addr);
#if (GM_SIZEOF_DP_T == 4)
	if (phys.HighPart != 0) {
	  GM_NOTE (("gm_arch_dma_region_free: MmGetPhysicalAddress\n"));
	}
	ASSERT (phys.HighPart == 0);
#endif
	r->is->dma_pages_bitmap[phys.LowPart / GM_PAGE_LEN / 32]
	  &= ~gm_htonl (1 << (phys.LowPart / GM_PAGE_LEN % 32));
      }
  }
#endif

  if (r->flags & GM_ARCH_DMA_CONTIGUOUS)
    {
      GM_PRINT (GM_PRINT_LEVEL >= 4,
		("Freeing contiguous DMA region at %p.\n", r->orig_addr));
      MmFreeContiguousMemory (r->orig_addr);
    }
  else
    {
      GM_PRINT (GM_PRINT_LEVEL >= 4,
		("Freeing discontiguous DMA region at %p.\n", r->orig_addr));
      ExFreePool (r->orig_addr);
    }
  RtlZeroMemory (r, sizeof (*r));
}

/* Return the kernal virtual address for a DMA region */

void *
gm_arch_dma_region_kernel_addr (gm_arch_dma_region_t * r)
{
  return r->addr;
}

/* Return the STS bits for a DMA region.

   Runs at PASSIVE_LEVEL but may be run at any level */

gm_s32_t gm_arch_dma_region_status (gm_arch_dma_region_t * r)
{
  return 0xf;			/* Invariant for LANai PCI interfaces. */
}

/************
 * Interrupt handling
 ************/

/* Interrupts are handled as follows:

   (1) The LANai generates an interrupt by setting HOST_SIG_BIT in the ISR.

   (2) NT invokes the interrupt handler for each instance that might
   have caused the interrupt.

   (3) The interrupt handler checks to see if it is responsible for
   handling the interrupt.  If so, it disables interrupt generation
   (by clearing EIMR in the LANai), schedules a delayed proceedure
   call (DPC) to handle the interrupt, and claims the interrupt by
   returning TRUE.  Otherwise, it returns FALSE.

   (4) Eventually, the DPC runs.  The DPC handles the interrupt, informs
   the LANai that it has handled the interrupt (by clearing HOST_SIG_BIT
   in the ISR) and reenables interrupts by resetting the EIMR, allowing
   the LANai to once again generate interrupts. */

/* The interrupt handler DPC.

   Runs at DISPATCH_LEVEL */


static int spurious = 0;

#define GM_INTR_PRINT 0

VOID
gm_nt_DpcForIsr (IN PKDPC Dpc,
		 IN PVOID DeferredContext,
		 IN PVOID SystemArgument1, IN PVOID SystemArgument2)
{
  gm_instance_state_t *is;
  volatile gm_lanai_globals_t *globals;
  gm_u32_t type;
  int gm_used_generic_interrupt = 0;

  GM_PRINT (GM_INTR_PRINT, ("Interrupt DPC invoked.\n"));


  /* Perform sanity check */

  ASSERT (DeferredContext == SystemArgument1);

  is = (gm_instance_state_t *) DeferredContext;
  ASSERT (is);
  ASSERT (is->lanai.running);

  globals = is->lanai.globals;
  ASSERT (globals);

  type = gm_ntohl (globals->interrupt.type);

  GM_PRINT (GM_INTR_PRINT, ("IntDPC type = %d  EIMR = 0x%x spurious = %d\n",
			    type, is->get_EIMR (is), spurious));

  switch (type)
    {
      /* Determine which port caused the interrupt, and wake it. */

    case GM_WAKE_INTERRUPT:
      {
	unsigned int port;
	gm_port_state_t *ps;

	port = gm_read_lanai_global_u32 (is, interrupt.wake.port);
	GM_PRINT (GM_INTR_PRINT, ("Waking port %d if needed.\n", port));
	gm_assert (port < GM_NUM_PORTS);

	ps = __gm_port_state_for_id (is, port);
	if (ps)
	  {
	    KIRQL irql;

	    if (port == GM_MAPPER_PORT_ID)
	      GM_PRINT (GM_INTR_PRINT, ("Waking mapper port.\n"));

	    /* Wake the sleeping process if it has been awakened more
	       times than it has gone to sleep. */

	    IoAcquireCancelSpinLock (&irql);
	    if (--ps->arch.sleep_cnt == 0)
	      {
		PLIST_ENTRY list_entry;

		ASSERT (!IsListEmpty (&ps->arch.sleep_q_root));

		list_entry = RemoveHeadList (&ps->arch.sleep_q_root);
		if (list_entry)
		  {
		    PIRP irp;

		    irp = CONTAINING_RECORD (list_entry, IRP,
					     Tail.Overlay.ListEntry);
		    GM_PRINT (GM_INTR_PRINT,
			      ("Got IRP to wake. irp=0x%lx\n", irp));

		    /* Make IRP no longer cancellable. */

		    IoSetCancelRoutine (irp, NULL);
		    IoReleaseCancelSpinLock (irql);

		    /* Complete the IRP successfully */

		    GM_PRINT (GM_INTR_PRINT, ("Waking up.\n"));
		    irp->IoStatus.Status = STATUS_SUCCESS;
		    irp->IoStatus.Information = 0;
		    IoCompleteRequest (irp, IO_NO_INCREMENT);
		  }
		else
		  {
		    IoReleaseCancelSpinLock (irql);
		    GM_NOTE (("Orphaned wake, perhaps due to IRP"
			      " cancellation.\n"));
		  }
	      }
	    else
	      {
		IoReleaseCancelSpinLock (irql);
		GM_PRINT (GM_INTR_PRINT, ("No need to really wake.\n"));
	      }
	  }
      }
      break;

    default:
      gm_used_generic_interrupt = 1;
      gm_handle_claimed_interrupt (is);
      break;
    }

  /* remove the interrupt from the interrupt queue. */

  if (!gm_used_generic_interrupt)
    {
      globals->interrupt.type = gm_htonl (0);

      /* claim the interrupt */
      is->set_ISR (is, GM_HOST_SIG_BIT);
      is->get_ISR (is);		/* needed??? --Glenn  yes needed -- feldy */
      GM_STBAR ();

      /* reenable interrupts. */
      gm_set_EIMR (is, GM_HOST_SIG_BIT);
      is->get_EIMR (is);		/* needed??? yes needed -- feldy */
      GM_STBAR ();
    }

  GM_PRINT (GM_INTR_PRINT, ("Interrupt DPC completed.\n"));
}

/* The interrupt handler.  Return TRUE if interrupt claimed, else FALSE.

   Runs at some DIRQ level > DISPATCH_LEVEL */

int gm_in_intr;

BOOLEAN gm_nt_Isr (IN PKINTERRUPT intr, IN PVOID context)
{
  gm_instance_state_t *is;
  gm_s32_t ret;

  gm_in_intr = 1;
  
  is = (gm_instance_state_t *) context;

  ASSERT (is);

  /* Exit if interrupt doesn't belong to us. */

  ret = gm_interrupting (is);
  if (ret != GM_ARCH_INTR_CLAIMED)
    {
      gm_in_intr = 0;
      return FALSE;
    }
  
  if (KeInsertQueueDpc (&is->arch.dpc_for_isr, context, NULL) == TRUE)
    {
      /* Disable interrupt generation until the interrupt has been
         handled.  Don't clear ISR bit until interrupt has been
         handled. */

      gm_set_EIMR (is, 0);
      is->get_EIMR (is);	/* needed -- feldy */
      GM_STBAR ();
    }
  else
    {
/*
	is this safe at a high IRQ?
	GM_NOTE (("spurious interrupt detected\n"));
      GM_NOTE and GM_WARN are not safe at high IRQ - Eugene
*/
      spurious++;
    }

  gm_in_intr = 0;
  return TRUE;
}

#if 0
/* Perform the equivalent of

   port_id == GM_DAEMON_PORT_ID
   ? sprintf (device_name, "%s%x", sysName, unit)
   : sprintf (device_name, "%s%x_%x", sysName, unit, port_id);

   but for Unicode.  This function is a prime
   example of the nastyness of NT's Unicode facilities.  Plan 9 handles
   multiple languages MUCH more cleanly and efficiently.

   Must be called from PASSIVE_LEVEL */

gm_status_t
gm_nt_form_device_name (gm_instance_state_t * is,
			IN PUNICODE_STRING device_name,
			IN PWSTR sysName, ULONG unit, ULONG port_id)
{
  WCHAR buf0[32], buf1[32];
  UNICODE_STRING num0, num1;
  UNICODE_STRING system_name;
  NTSTATUS nt_status;

  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("Forming a device name for unit %d port %d.\n", unit, port_id));

  num1.Length = num0.Length = 0;
  num1.MaximumLength = num0.MaximumLength = 32;
  num0.Buffer = buf0;
  num1.Buffer = buf1;

  /* Make sysName into unicode string */

  RtlInitUnicodeString (&system_name, sysName);

  /* Convert numbers to unicode strings. */

  nt_status = RtlIntegerToUnicodeString (unit, 16, &num0);
  if (NT_ERROR (nt_status))
    {
      GM_WARN (("Can't convert integer to Unicode\n"));
      goto abort_with_nothing;
    }

  nt_status = RtlIntegerToUnicodeString (port_id, 16, &num1);
  if (NT_ERROR (nt_status))
    {
      GM_WARN (("Can't convert integer to Unicode\n"));
      goto abort_with_nothing;
    }

  RtlInitUnicodeString (device_name, NULL);
  device_name->MaximumLength
    = system_name.Length + num0.Length + sizeof (WCHAR);
  if (port_id != GM_DAEMON_PORT_ID)
    device_name->MaximumLength += sizeof (WCHAR) + num1.Length;
  device_name->Buffer = gm_malloc (device_name->MaximumLength);
  if (device_name->Buffer == NULL)
    {
      GM_WARN (("Can't allocate memory for unicode string\n"));
      nt_status = STATUS_INSUFFICIENT_RESOURCES;
      goto abort_with_nothing;
    }
  RtlAppendUnicodeToString (device_name, sysName);
  RtlAppendUnicodeStringToString (device_name, &num0);
  if (port_id != GM_DAEMON_PORT_ID)
    {
      /* RtlAppendUnicodeToString (device_name, L"_"); BAD */
      RtlAppendUnicodeStringToString (device_name, &num1);
    }

  return GM_SUCCESS;

abort_with_nothing:
  return GM_FAILURE;
}
#endif

/************************************************************************
 * IRP cancellation functions
 ************************************************************************/

/* Cancel an IRP that is in the work queue for its class */

VOID gm_nt_cancel_irp (IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp)
{
  gm_port_state_t *ps;

  GM_PRINT (GM_PRINT_LEVEL >= 5,
	    ("*** gm_nt_cancel_irp:  dev=0x%lx  irp=0x%lx.\n", DeviceObject,
	     Irp));

  /* The only cancellable IRPs are those in port sleep queue. */

  ps = gm_nt_device_to_ps (DeviceObject);
  gm_assert (ps);

  if (!IsListEmpty (&Irp->Tail.Overlay.ListEntry))
    {
      RemoveEntryList (&Irp->Tail.Overlay.ListEntry);
      ps->arch.sleep_cnt--;
    }
  else
    {
      GM_WARN (
	       ("Internal driver error: NT asked to cancel completed IRP.\n"));
    }
  IoReleaseCancelSpinLock (Irp->CancelIrql);
  Irp->IoStatus.Status = STATUS_CANCELLED;
  Irp->IoStatus.Information = 0;
  IoCompleteRequest (Irp, IO_NO_INCREMENT);
}

#if GM_CAN_REGISTER_MEMORY
/* lock down a page and return the dma address for the page and a lock
   that will later be passed to gm_arch_unlock_user_buffer_page. */

gm_status_t
gm_arch_lock_user_buffer_page (gm_instance_state_t * is,
			       gm_up_t upin,
			       gm_dp_t * dma_addr, gm_arch_page_lock_t * lock)
{
  PMDL mdl;
  PHYSICAL_ADDRESS phys;
  void *in;

  in = (void *) upin;
  __try
  {
    mdl = IoAllocateMdl (in, GM_PAGE_LEN, FALSE, FALSE, NULL);
  }
  __except (EXCEPTION_EXECUTE_HANDLER)
  {
    GM_NOTE (("Could not allocate MDL to register user page."));
    /* status = STATUS_INVALID_PARAMETER; */
    goto abort_with_nothing;
  }
  if (!mdl)
    {
      GM_NOTE (("Could not allocate MDL to register user page."));
      /* status = STATUS_INSUFFICIENT_RESOURCES; */
      goto abort_with_nothing;
    }
  ASSERT (in == MmGetMdlVirtualAddress (mdl));

  /* Lock the page */

  __try
  {
    GM_PRINT (GM_PRINT_LEVEL >= 4, (">>>>>>>> Locking a page.\n"));
    MmProbeAndLockPages (mdl, KernelMode, IoModifyAccess);
  }
  __except (EXCEPTION_EXECUTE_HANDLER)
  {
    GM_NOTE (("Could not pin page."));
    /* status = STATUS_INVALID_PARAMETER; */
    goto abort_with_mdl;
  }

  /* Now that the page is locked, we can get the dma addr */

  __try
  {
    phys = MmGetPhysicalAddress (in);
#if (GM_SIZEOF_DP_T == 4)
    if (phys.HighPart != 0) {
      GM_NOTE (("gm_arch_lock_user_buffer_page: MmGetPhysicalAddress\n"));
    }
#endif
  }  
  __except (EXCEPTION_EXECUTE_HANDLER)
  {
    GM_NOTE (("Exception getting phys addr for user buff."));
    /* status = STATUS_INVALID_PARAMETER; */
    goto abort_with_locked_page;
  }
#if (GM_SIZEOF_DP_T == 4)
  ASSERT (phys.HighPart == 0);
  ASSERT (phys.LowPart != 0);
#endif
#if (GM_SIZEOF_DP_T == 4)
  if (phys.HighPart != 0)
    GM_NOTE (("Something is rotten in Denmark.\n"));
#endif
  if (phys.LowPart == 0)
    GM_NOTE (("Abandon ship!\n"));

  /* Done.  Report results. */
#if (GM_SIZEOF_DP_T == 4)
  *dma_addr = GM_PAGE_ROUND_DOWN (dp, phys.LowPart);
#elif (GM_SIZEOF_DP_T == 8)
  *dma_addr = GM_PAGE_ROUND_DOWN (dp, phys.QuadPart);
#else
#error sizeof gm_dp_t unknown
#endif
  *lock = mdl;
  return GM_SUCCESS;

abort_with_locked_page:
  GM_PRINT (GM_PRINT_LEVEL >= 4, ("******** Unlocking a page.\n"));
  MmUnlockPages (mdl);
abort_with_mdl:
  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Freeing an MDL.\n"));
  IoFreeMdl (mdl);
abort_with_nothing:
  return GM_FAILURE;
}

void
gm_arch_unlock_user_buffer_page (gm_arch_page_lock_t * lock)
{
  gm_assert (*lock);
  MmUnlockPages (*lock);
  IoFreeMdl (*lock);
  *lock = 0;
}
#endif /* GM_CAN_REGISTER_MEMORY */

/************************************************************************
 * Device I/O Controls (ioctls)
 ************************************************************************/

NTSTATUS gm_nt_localize_status (gm_status_t status)
{
#define CASE(from,to) case from : return to
  switch (status)
    {
      CASE (GM_SUCCESS, STATUS_SUCCESS);
      CASE (GM_INPUT_BUFFER_TOO_SMALL, STATUS_BUFFER_TOO_SMALL);
      CASE (GM_OUTPUT_BUFFER_TOO_SMALL, STATUS_BUFFER_TOO_SMALL);
      CASE (GM_TRY_AGAIN, STATUS_WAIT_0);
      CASE (GM_BUSY, STATUS_ABANDONED_WAIT_0);
      CASE (GM_MEMORY_FAULT, STATUS_SEGMENT_NOTIFICATION);
      CASE (GM_INTERRUPTED, STATUS_CANCELLED);
      CASE (GM_INVALID_PARAMETER, STATUS_INVALID_DISPOSITION);
      CASE (GM_OUT_OF_MEMORY, STATUS_NO_MEMORY);
      CASE (GM_INVALID_COMMAND, STATUS_INVALID_DISPOSITION);
      CASE (GM_PERMISSION_DENIED, STATUS_ACCESS_VIOLATION);
      CASE (GM_INTERNAL_ERROR, STATUS_INTERNAL_ERROR);
      CASE (GM_UNATTACHED, STATUS_NONCONTINUABLE_EXCEPTION);
      CASE (GM_UNSUPPORTED_DEVICE, STATUS_BAD_DEVICE_TYPE);
    default:
      return gm_nt_localize_status (GM_INTERNAL_ERROR);
    }
}

#define GM_REQUIRE_OPEN_PORT() do {					\
  if (!ps->opened)							\
    {									\
      GM_NOTE (("User attempted port IOCTL "				\
		   "on uninitialized port.\n"));			\
      gm_status = GM_PERMISSION_DENIED;					\
      break;								\
    }									\
  } while (0)

/* Fuction to actually perform the requested DeviceControl in the worker
   thread. */

#define GM_DEBUG_NT_IOCTL 0

NTSTATUS gm_nt_ioctl (PDEVICE_OBJECT dev, PIRP Irp)
{
  PIO_STACK_LOCATION irpStack;
  NTSTATUS nt_status;
  gm_status_t gm_status;
  PVOID ioBuffer;
  ULONG inBufferLen, outBufferLen;
  int instance, port_id;
  gm_instance_state_t *is;
  gm_port_state_t *ps;
  ULONG output_bytes = 0;
  ULONG code;
  int inhibit_status_copy = 0;

  /* Compute values needed by nearly all ioctls */

  irpStack = IoGetCurrentIrpStackLocation (Irp);
  ASSERT (irpStack);
  ioBuffer = Irp->AssociatedIrp.SystemBuffer;
  inBufferLen = irpStack->Parameters.DeviceIoControl.InputBufferLength;
  outBufferLen = irpStack->Parameters.DeviceIoControl.OutputBufferLength;

  ps = gm_nt_device_to_ps (dev);
  gm_assert (ps);
  is = ps->instance;
  gm_assert (is);

  port_id = ps->id;
  instance = is->id;

  /* Prepare for successful return */

  output_bytes = 0;
  gm_status = GM_SUCCESS;

  /* Handle cases of clone devices and non-clone devices */

  code = irpStack->Parameters.DeviceIoControl.IoControlCode;

  GM_PRINT (GM_DEBUG_NT_IOCTL,
	    ("gm_nt_port_ioctl called with control code 0x%x %s irp=0x%lx.\n",
	     code, _gm_ioctl_cmd_name (code), Irp));

  switch (code)
    {
      /*******************************
       * Platform-specific IOCTLs
       ********************************/

    case GM_SET_PORT_NUM:
      {
	unsigned long port_id;
	gm_arch_minor_t minor;
	gm_port_state_t *new_ps;

	GM_PRINT (GM_DEBUG_NT_IOCTL, ("ioctl: GM_SET_PORT_NUM\n"));
	if (inBufferLen < sizeof (gm_u32_t))
	  {
	    gm_status = GM_INPUT_BUFFER_TOO_SMALL;
	    break;
	  }
	if (!ps->arch.is_clone_device)
	  {
	    gm_status = GM_PERMISSION_DENIED;
	    break;
	  }
	port_id = *(gm_u32_t *) ioBuffer;

	gm_status = gm_nt_create_minor_node (is, L"gm", &minor,
					     0 /* not clone */ ,
					     ps->privileged, port_id);
	if (gm_status != GM_SUCCESS)
	  {
	    GM_NOTE (("Failed to create minor node.\n"));
	    break;
	  }
	new_ps = gm_minor_get_port_state (minor);
	gm_assert (new_ps);

	gm_status = gm_port_state_open (new_ps, port_id);
	if (gm_status != GM_SUCCESS)
	  {
	    GM_NOTE (("Failed to open port state.\n"));
	    gm_nt_destroy_minor_node (minor);
	    break;
	  }
	break;
      }

      /************
       * Simulate the Unix mmap functionality. */

    case GM_MMAP:
      {
	gm_mmap_info_t *mmi;
	void *kaddr;

	inhibit_status_copy = 1;	/* mustn't touch mapped buffer */

	GM_PRINT (GM_DEBUG_NT_IOCTL, ("ioctl: GM_MMAP\n"));
	GM_REQUIRE_OPEN_PORT ();

	if (0 && (ps->id == GM_DAEMON_PORT_ID))
	  {
	    gm_status = GM_PERMISSION_DENIED;
	    break;
	  }

	if (inBufferLen < sizeof (*mmi))
	  {
	    gm_status = GM_INPUT_BUFFER_TOO_SMALL;
	    break;
	  }
	mmi = ioBuffer;
	mmi->va = (void *) -1;	/* default */
	if (!GM_PAGE_ALIGNED (mmi->offset) || !GM_PAGE_ALIGNED (mmi->len))
	  {
	    gm_status = GM_INVALID_PARAMETER;
	    break;
	  }

	/* Verify that the user has requested a legitimate mapping, and
	   preallocate any required resource. */

	gm_status = gm_prepare_to_mmap (ps, mmi->offset, mmi->len,
					mmi->requested_permissions);
	if (gm_status != GM_SUCCESS)
	  {
	    GM_NOTE (("User attempted forbidden mmap.\n"));
	    break;
	  }

	/* Determine the kernel address of the resource to map.  This
	   should never fail because gm_prepare_to_mmap checks the
	   legitimacy of the offsets. */
	gm_status = gm_mmap (ps, mmi->offset, &kaddr);
	if (gm_status != GM_SUCCESS)
	  {
	    GM_WARN (("internal error: gm_mmap failed.\n"));
	    break;
	  }

	/* Map the requested region.  We know that the mapping is
	   contiguous in physical memory (HACK) */

	gm_assert (kaddr);
	gm_status = gm_arch_mmap_contiguous_segment (ps, kaddr, mmi->len,
						     (gm_up_t *) & mmi->va);
	if (gm_status != GM_SUCCESS)
	  {
	    gm_status = GM_INPUT_BUFFER_TOO_SMALL;
	    break;
	  }
	gm_assert (mmi->va != (void *) 0);
	gm_assert (mmi->va != (void *) -1);

	/* Finish the mapping, adding any mapped page to the page hash
	   table */

	gm_finish_mmap (ps, mmi->offset, mmi->len, (gm_up_t) mmi->va);
	output_bytes = sizeof (*mmi);
	break;
      }

    case GM_FINISH_MMAP:
      GM_PRINT (GM_DEBUG_NT_IOCTL, ("ioctl: GM_FINISH_MMAP\n"));
      break;


#if GM_CAN_REGISTER_MEMORY
    /**************
     * Register user-allocated memory for DMA access. */

    case GM_REGISTER_MEMORY:
    case GM_DEREGISTER_MEMORY:

      GM_PRINT (GM_DEBUG_NT_IOCTL, ("ioctl: GM_(DE)REGISTER_MEMORY\n"));
      GM_REQUIRE_OPEN_PORT ();

      /* fixup the input buffer, which is in an unusual place for
         this IOCTL because it had to use METHOD_NEITHER buffer
         management. */

      ioBuffer = irpStack->Parameters.DeviceIoControl.Type3InputBuffer;

      /* Perform the default memory registration. */

      gm_status = gm_ioctl (ps, code, ioBuffer, inBufferLen, ioBuffer,
			    outBufferLen, &output_bytes);
      inhibit_status_copy = 1;	/* mustn't do for METHOD_NEITHER buffer */
      break;

#endif /* GM_CAN_REGISTER_MEMORY */

      /************
       * Sleep the calling process until a message is sent, a send
       * completes, or a signal is received. */

    case GM_SLEEP:
      GM_PRINT (GM_DEBUG_NT_IOCTL, ("ioctl: GM_SLEEP\n"));
      GM_REQUIRE_OPEN_PORT ();

      if (ps->id == GM_DAEMON_PORT_ID)
	{
	  gm_status = GM_PERMISSION_DENIED;
	  GM_PRINT (GM_DEBUG_NT_IOCTL,
		    ("GM_SLEEP ioctl: daemon port - no permission\n"));
	  break;
	}
      {
	KIRQL irql;

	/* daemon may NOT use this */

	if (port_id == GM_DAEMON_PORT_ID)
	  {
	    gm_status = GM_PERMISSION_DENIED;
	    GM_PRINT (GM_DEBUG_NT_IOCTL,
		      ("GM_SLEEP ioctl: daemon port - no permission\n"));
	    break;
	  }


	IoAcquireCancelSpinLock (&irql);

	/* Don't sleep if awakened already. */

	if (++ps->arch.sleep_cnt == 0)
	  {
	    IoReleaseCancelSpinLock (irql);
	    GM_PRINT (GM_DEBUG_NT_IOCTL,
		      ("GM_SLEEP ioctl awakened already??\n"));
	    break;
	  }

	/* Mark the IRP as pending. */

	IoMarkIrpPending (Irp);

	/* Put the IRP on the port's sleep queue. */

	GM_PRINT (GM_DEBUG_NT_IOCTL,
		  ("GM_SLEEP ioctl: put IRP on sleep queue dev=0x%lx"
		   " irp=0x%lx\n", dev, Irp));
	InsertTailList (&ps->arch.sleep_q_root, &Irp->Tail.Overlay.ListEntry);
	IoSetCancelRoutine (Irp, gm_nt_cancel_irp);
	IoReleaseCancelSpinLock (irql);

	return STATUS_PENDING;
      }

      /*******************************
       * Platform-independent IOCTLs (also returns error 
       ********************************/

    default:
      GM_PRINT (GM_DEBUG_NT_IOCTL, ("ioctl: default - using gm_ioctl\n"));
      gm_status = gm_ioctl (ps, code, ioBuffer, inBufferLen, ioBuffer,
			    outBufferLen, &output_bytes);
      inhibit_status_copy = 1;	/* done already */
      break;
    }

  /* If there was an error, inform the user via the copy buffer, ensuring
     that the OS does not get a chance to mangle the error code. */

  if ((gm_status != GM_SUCCESS)
      && ioBuffer
      && outBufferLen >= sizeof (gm_status) && !inhibit_status_copy)
    {
      GM_PRINT (GM_DEBUG_NT_IOCTL,
		("ioctl: gm_status != GM_SUCCESS, copyout the error\n"));
      output_bytes = sizeof (gm_status);
      gm_copyout (ps, (void *) &gm_status, ioBuffer, output_bytes);
    }

  nt_status = gm_nt_localize_status (gm_status);
  GM_PRINT (GM_DEBUG_NT_IOCTL,
	    ("ioctl: gm_status = 0x%x nt_status = 0x%x\n", gm_status,
	     nt_status));
  if (nt_status != STATUS_SUCCESS)
    {
      GM_PRINT
	(GM_DEBUG_NT_IOCTL,
	 ("Reporting failed ioctl completion with GM status 0x%x;"
	  " code was 0x%x\n", gm_status, code));
    }



/* The Irp status and the return code are supposed to be the same
   according to the NT example code. We've decided to return fail,
   but leave success in the Irp so that the error will get copied to the
   user's buffer.
   
   My testing - feldy
   
   Irp     Function
   Status  Return          Result
   
   Fail    Fail            ioctl fails, but the error return code is not
   copied to user
   Fail    Success         ioctl returns success
   Success Fail            ioctl fails, error code is copied
   Success Success         ioctl suceeds, normal data is copied */


  Irp->IoStatus.Status = /* nt_status */ STATUS_SUCCESS;
  Irp->IoStatus.Information = output_bytes;
  IoCompleteRequest (Irp, IO_NO_INCREMENT);
  GM_PRINT (GM_DEBUG_NT_IOCTL,
	    ("Ioctl complete. irp=0x%lx  status = 0x%x obytes = %d\n", Irp,
	     nt_status, output_bytes));

  return nt_status;
}

/* Simplified interface to NT registry: Fetch the entry from
   KEY_NAME\VALUE_NAME of type DATA_TYPE into a dynamically allocated
   buffer and store a pointer to that buffers at *PTR. User will free
   the buffer using Free SnarfedData. */

NTSTATUS
gm_nt_SnarfFromRegistry (IN PCWSTR key_name,
			 IN PCWSTR value_name,
			 IN ULONG data_type,
			 OUT PKEY_VALUE_FULL_INFORMATION * ptr)
{
  OBJECT_ATTRIBUTES oa;
  NTSTATUS nt_status;
  UNICODE_STRING us, us2;
  ULONG buf_len = 0;
  ULONG trash;
  PKEY_VALUE_FULL_INFORMATION buffer;
  HANDLE h;

  ASSERT (ptr != NULL);
  RtlInitUnicodeString (&us, key_name);
  RtlInitUnicodeString (&us2, value_name);
  InitializeObjectAttributes (&oa, &us, OBJ_CASE_INSENSITIVE, NULL, NULL);
  nt_status = ZwOpenKey (&h, KEY_READ, &oa);
  if (NT_ERROR (nt_status))
    {
      GM_NOTE (("Could not open registry key. (error 0x%x)\n", nt_status));
      goto abort_with_nothing;
    }
  nt_status = ZwQueryValueKey (h, &us2, KeyValueFullInformation, NULL, 0,
			       &buf_len);

  if (!buf_len ||
      (NT_ERROR (nt_status) && nt_status != STATUS_BUFFER_TOO_SMALL))
    {
      GM_NOTE (("Could not determine size of registry key data. "
		"(error 0x%x)\n", nt_status));
      goto abort_with_open_key;
    }
  GM_PRINT (GM_PRINT_LEVEL >= 3,
	    ("Allocating buffer for %d bytes of registry information.\n",
	     buf_len - sizeof (KEY_VALUE_FULL_INFORMATION)));
  buffer = gm_malloc (buf_len);
  if (!buffer)
    {
      GM_NOTE (("Could not allocate buffer for registry data.\n"));
      nt_status = STATUS_INSUFFICIENT_RESOURCES;
      goto abort_with_open_key;
    }
  /* Querying registery. */
  nt_status = ZwQueryValueKey (h, &us2, KeyValueFullInformation, buffer,
			       buf_len, &trash);
  if (NT_ERROR (nt_status))
    {
      GM_NOTE (("Could not read registry data.\n"));
      goto abort_with_buffer;
    }
  if (trash != buf_len)
    {
      GM_NOTE (("read less (%d bytes) than expected (%d bytes)\n",
		trash, buf_len));
      goto abort_with_buffer;
    }
  if (buffer->Type != data_type)
    {
      GM_NOTE (("Read value from registry, but it had wrong type (0x%x).\n",
		buffer->Type));
      nt_status = STATUS_INVALID_PARAMETER;
      goto abort_with_buffer;
    }

  GM_PRINT (GM_PRINT_LEVEL >= 3, ("Closing registry handle.\n"));
  ZwClose (h);
  *ptr = buffer;
  return STATUS_SUCCESS;

abort_with_buffer:
  gm_free (buffer);
abort_with_open_key:
  ZwClose (h);
abort_with_nothing:
  return nt_status;
}

void
gm_nt_FreeSnarfedData (void *ptr)
{
  gm_free (ptr);
}

/* Report the number of pages of physical memory in the machine.

   This is done by fetching the system memory resource list from the
   registry and summing up the lengths of the memory resources found
   there.
   
   Does and must run at PASSIVE_LEVEL. */
gm_status_t gm_arch_physical_pages (gm_u32_t * result)
{
  NTSTATUS nt_status;
  PKEY_VALUE_FULL_INFORMATION value;
  PCM_RESOURCE_LIST list;
  PCM_FULL_RESOURCE_DESCRIPTOR full;
  PCM_PARTIAL_RESOURCE_LIST partial_list;
  PCM_PARTIAL_RESOURCE_DESCRIPTOR partial;
  static ULONG pages = 0;
  unsigned int i, j;

  /* Return cached page count, if any */
  if (pages != 0)
    {
      *result = pages;
      return GM_SUCCESS;
    }

  /* Get the Internal resource descriptor from the registry. */
  nt_status =
    gm_nt_SnarfFromRegistry
    (L"\\Registry\\Machine\\HARDWARE\\RESOURCEMAP\\System Resources\\Physical Memory", L".Translated", REG_RESOURCE_LIST, &value);
  if (NT_ERROR (nt_status))
    {
      GM_NOTE (("Could not snarf resource list from registry.\n"));
      return GM_FAILURE;
    }
  list = (PCM_RESOURCE_LIST) ((char *) value + value->DataOffset);
  GM_PRINT (GM_PRINT_LEVEL >= 3,
	    ("Scanning system resources list of length %d for memory.\n",
	     list->Count));
  /* Scan the CM_RESOURCE_LIST for memory resources and compute the
     total amount of memory.  The labyrinthine nature of this data
     structure is further evidence that MicroSerfs are Cretans. */
  for (i = 0; i < list->Count; i++)
    {
      full = &list->List[i];
      GM_PRINT (GM_PRINT_LEVEL >= 3, ("partial descriptor:\n"));
      GM_PRINT (GM_PRINT_LEVEL >= 3,
		("  InterfaceType = %d\n", full->InterfaceType));
      GM_PRINT (GM_PRINT_LEVEL >= 3, ("  BusNumber = %d\n", full->BusNumber));
      if (full->InterfaceType != Internal)
	{
	  GM_WARN (("Interface type is not \"Internal.\"\n"));
	  continue;
	}
      partial_list = &full->PartialResourceList;
      GM_PRINT (GM_PRINT_LEVEL >= 4,
		("Scanning partial resource list of length %d.\n",
		 partial_list->Count));
      for (j = 0; j < partial_list->Count; j++)
	{
	  ULONG new_pages;

	  partial = &partial_list->PartialDescriptors[j];
	  if (partial->Type != CmResourceTypeMemory)
	    {
	      GM_NOTE (("Skipping non-memory resource.\n"));
	      continue;
	    }
	  GM_PRINT (GM_PRINT_LEVEL >= 3,
		    ("Found system memory at 0x%x%08x of length 0x%x.\n",
		     partial->u.Memory.Start.HighPart,
		     partial->u.Memory.Start.LowPart,
		     partial->u.Memory.Length));
	  /* Update the total amount of host memory, handling overflow
	     gracefully. */
	  new_pages
	    = ((partial->u.Memory.Length + GM_PAGE_LEN - 1) / GM_PAGE_LEN);
	  if (pages + new_pages < pages)
	    {
	      GM_WARN (("Overflow in memory computation.\n"));
	      pages = ULONG_MAX;
	    }
	  else
	    pages += new_pages;
	}
    }
  gm_nt_FreeSnarfedData (value);
  GM_PRINT (GM_PRINT_LEVEL >= 3,
	    ("*** total physical memory: 0x%x pages (0x%x MB).\n", pages,
	     (pages * GM_PAGE_LEN) >> 20));
  *result = pages;
  return GM_SUCCESS;
}

/************
 * Instance manipulation
 ************/

gm_status_t
gm_nt_assign_resources (gm_instance_state_t* is, PDRIVER_OBJECT driver_object,
			ULONG bus_number, gm_u8_t interrupt_line,
			gm_u32_t base_address_high, gm_u32_t base_address_low)
{
#if GM_OS_NT4
  int                             dma_resources_known = 0;
  int                             interrupt_resources_known = 0;
  int                             memory_resources_known = 0;
  NTSTATUS                        nt_status;
  PCM_PARTIAL_RESOURCE_DESCRIPTOR part;
  PCM_RESOURCE_LIST               resources;
  UNICODE_STRING                  class_name;

  /* Assign slot resources using PCI configuration info in adapter. */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Assigning slot resources.\n"));
  RtlInitUnicodeString (&class_name, L"LANAI RESOURCES");

  /* Ran into problems with HalAssignSlotResources. Use alternate method to
     claim resources. */

#if USE_HALASSIGNSLOTRESOURCES
  nt_status = HalAssignSlotResources (registry_path,
				      &class_name,
				      driver_object,
				      is->arch.resource_device,
				      PCIBus,
				      bus_number, slot_number, &resources);
#else
  nt_status = STATUS_UNSUCCESSFUL;
#endif
  /*
   * HalAssignSlotResources sometimes fails. Claim these resources anyways.
   * MSDN knowledge base articles Q152044, Q148501
   */
  if (nt_status != STATUS_SUCCESS)
    {
      BOOLEAN resourceConflict;
      PCM_RESOURCE_LIST cmResList;
      PCM_PARTIAL_RESOURCE_DESCRIPTOR cmResDescriptor;
      size_t sizeToAllocate;

      sizeToAllocate = sizeof (CM_RESOURCE_LIST) +
	2 * sizeof (CM_PARTIAL_RESOURCE_LIST);
      cmResList = ExAllocatePool (PagedPool, sizeToAllocate);

      if (!cmResList)
	{
	  GM_WARN (("Could not allocate memory to report resources\n"));
	  goto abort_with_nothing;
	}
      RtlZeroMemory (cmResList, sizeToAllocate);

      cmResList->Count = 1;
      cmResList->List[0].InterfaceType = PCIBus;
      cmResList->List[0].BusNumber = bus_number;
      cmResList->List[0].PartialResourceList.Version = 1;
      cmResList->List[0].PartialResourceList.Revision = 1;
      cmResList->List[0].PartialResourceList.Count = 2;

      cmResDescriptor =
	cmResList->List[0].PartialResourceList.PartialDescriptors;

      cmResDescriptor->Type = CmResourceTypeMemory;
      cmResDescriptor->ShareDisposition = CmResourceShareDeviceExclusive;
      cmResDescriptor->Flags = CM_RESOURCE_MEMORY_READ_WRITE;
      cmResDescriptor->u.Memory.Start.HighPart = base_address_high;
      cmResDescriptor->u.Memory.Start.LowPart = base_address_low;
      cmResDescriptor->u.Memory.Length = gm_pci_board_span (is);
      cmResDescriptor++;

      cmResDescriptor->Type = CmResourceTypeInterrupt;
      cmResDescriptor->ShareDisposition = CmResourceShareShared;
      cmResDescriptor->Flags = CM_RESOURCE_INTERRUPT_LEVEL_SENSITIVE;
      cmResDescriptor->u.Interrupt.Level = interrupt_line;
      cmResDescriptor->u.Interrupt.Vector = interrupt_line;
      cmResDescriptor->u.Interrupt.Affinity = -1;

      resourceConflict = FALSE;
      IoReportResourceUsage (&class_name, driver_object, NULL, 0,
			     is->arch.resource_device, cmResList,
			     sizeToAllocate, TRUE, &resourceConflict);
      resources = cmResList;
      GM_WARN (("HalAssignSlotResources failed, forcing assignment\n"));
      if (resourceConflict)
	{
	  GM_WARN (("IoReportResourceUsage reports resource conflict\n"));
	}
      nt_status = STATUS_SUCCESS;
    }

  if (NT_ERROR (nt_status))
    {
      GM_WARN (("Could not assign slot resources (error 0x%x)\n", nt_status));
      goto abort_with_nothing;
    }

  if (resources->Count != 1)
    {
      GM_WARN (("HalAssignSlotResources returned multiple resource lists\n"));
      goto abort_with_resources;
    }
  if (resources->List[0].InterfaceType != PCIBus)
    {
      GM_WARN (("HalAssignSlotResources returned bad bus type.\n"));
      goto abort_with_resources;
    }
  if (resources->List[0].BusNumber != bus_number)
    {
      GM_WARN (("HalAssignSlotResources returned bad bus number.\n"));
      goto abort_with_resources;
    }

  /* Scan the resource list looking for relevant board info. */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Scanning for relevant board info.\n"));
  part = &resources->List[0].PartialResourceList.PartialDescriptors[0];
  while (part < (&resources->List[0].PartialResourceList.PartialDescriptors
		 [resources->List[0].PartialResourceList.Count]))
    {
      switch (part->Type)
	{
	case CmResourceTypeInterrupt:
	  GM_PRINT (GM_PRINT_LEVEL >= 4,
		    ("Found interrupt resource.\n"
		     "  Level:0x%lx Vector:0x%lx Affinity:0x%lx\n",
		     part->u.Interrupt.Level,
		     part->u.Interrupt.Vector,
		     (unsigned long) part->u.Interrupt.Affinity));
	  is->arch.interrupt_level = part->u.Interrupt.Level;
	  is->arch.interrupt_vector = part->u.Interrupt.Vector;
	  is->arch.interrupt_affinity = part->u.Interrupt.Affinity;
	  interrupt_resources_known = 1;
	  break;

	case CmResourceTypeMemory:
	  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Found memory resource\n"
					  " Start:0x%08lx%08lx Length:0x%lx\n",
					  part->u.Memory.Start.HighPart,
					  part->u.Memory.Start.LowPart,
					  part->u.Memory.Length));
	  is->arch.board_phys_addr.HighPart = part->u.Memory.Start.HighPart;
	  is->arch.board_phys_addr.LowPart = part->u.Memory.Start.LowPart;
	  is->board_span = part->u.Memory.Length;
	  memory_resources_known = 1;
	  break;

	case CmResourceTypeDma:
	  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Found DMA resource\n"
					  " Channel:0x%lx Port:0x%lx\n",
					  part->u.Dma.Channel,
					  part->u.Dma.Port));
	  is->arch.dma_channel = part->u.Dma.Channel;
	  is->arch.dma_port = part->u.Dma.Port;
	  dma_resources_known = 1;
	  break;
	}
      part++;
    }

  /* Free the interface resources structure, since we don't need it
     any more. */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Freeing temporary resource space.\n"));
  GM_PRINT (GM_PRINT_LEVEL >= 5, ("Freeing resources %p\n", resources));
  ExFreePool (resources);

  /* Verify that we found each resources we need */

  if (!memory_resources_known)
    GM_WARN (("Could not find memory resources.\n"));
  if (!interrupt_resources_known)
    GM_WARN (("Could not find interrupt resources.\n"));
  if (!interrupt_resources_known || !memory_resources_known)
    goto abort_with_resources;

  return GM_SUCCESS;

 abort_with_resources:
  ExFreePool (resources);
 abort_with_nothing:
  return GM_FAILURE;

#elif GM_OS_WIN2K
  int                             dma_resources_known = 0;
  int                             interrupt_resources_known = 0;
  int                             memory_resources_known = 0;
  NDIS_HANDLE                     handle;
  NDIS_STATUS                     ndis_status;
  PCM_FULL_RESOURCE_DESCRIPTOR    FullResourceDescriptor;
  PCM_PARTIAL_RESOURCE_DESCRIPTOR PartialDescriptor;
  PCM_PARTIAL_RESOURCE_LIST       PartialResourceList;
  PNDIS_RESOURCE_LIST             AssignedResources;
  PCM_RESOURCE_LIST               AllocatedResourcesTranslated;
  ULONG                           i;
  
  handle = GM_STATIC_CAST (NDIS_HANDLE, is->arch.NdisAdapterHandleAsU64);
  ndis_status = NdisMPciAssignResources (handle, 0, &AssignedResources);
  if (ndis_status != NDIS_STATUS_SUCCESS)
    {
      GM_PRINT (1, ("NdisMPciAssignResources did not return success\n"));
      goto abort_with_nothing;
    }
  
  GM_PRINT (1, ("AssignedResources = %p\n", (void*) AssignedResources));
  if (AssignedResources == NULL)
    {
      GM_WARN (("Could not get assigned resources\n"));
      goto abort_with_nothing;
    }
  PartialDescriptor = AssignedResources->PartialDescriptors;
  for (i = 0; i < AssignedResources->Count; i++, PartialDescriptor++)
    {
      switch (PartialDescriptor->Type)
	{
	case CmResourceTypeInterrupt:
	  GM_PRINT (1, ("Found interrupt resource.\n"
			"  Level:%lx Vector:%lx Affinity:%lx\n",
			PartialDescriptor->u.Interrupt.Level,
			PartialDescriptor->u.Interrupt.Vector,
			(unsigned long) PartialDescriptor->u.Interrupt.Affinity));
	  is->arch.interrupt_level = PartialDescriptor->u.Interrupt.Level;
	  is->arch.interrupt_vector = PartialDescriptor->u.Interrupt.Vector;
	  is->arch.interrupt_affinity = PartialDescriptor->u.Interrupt.Affinity;
	  interrupt_resources_known = 1;
	  break;
	  
	case CmResourceTypeMemory:
	  GM_PRINT (1, ("Found memory resource\n"
			"  Start:0x%08lx%08lx Length:0x%lx\n",
			PartialDescriptor->u.Memory.Start.HighPart,
			PartialDescriptor->u.Memory.Start.LowPart,
			PartialDescriptor->u.Memory.Length));
	  is->arch.board_phys_addr.HighPart = PartialDescriptor->u.Memory.Start.HighPart;
	  is->arch.board_phys_addr.LowPart = PartialDescriptor->u.Memory.Start.LowPart;
	  is->board_span = PartialDescriptor->u.Memory.Length;
	  memory_resources_known = 1;
	  break;
	  
	case CmResourceTypeDma:
	  GM_PRINT (1, ("Found DMA resource\n"
			"  Channel:0x%lx Port:0x%lx\n",
			PartialDescriptor->u.Dma.Channel,
			PartialDescriptor->u.Dma.Port));
	  is->arch.dma_channel = PartialDescriptor->u.Dma.Channel;
	  is->arch.dma_port = PartialDescriptor->u.Dma.Port;
	  dma_resources_known = 1;
	  break;
	}
    }
  
  if (!memory_resources_known)
    GM_WARN (("Could not find memory resources.\n"));
  if (!interrupt_resources_known)
    GM_WARN (("Could not find interrupt resources.\n"));
  if (!memory_resources_known || !interrupt_resources_known)
    goto abort_with_nothing;
  
  NdisMGetDeviceProperty (handle, &is->arch.pdo, NULL, NULL, NULL,
			  &AllocatedResourcesTranslated);
  DbgPrint("gmndis: is->arch.pdo = %p\n", (void*) is->arch.pdo);
  if (AllocatedResourcesTranslated == NULL) {
    GM_WARN (("Could not get translated resources.\n"));
    goto abort_with_nothing;
  }

  DbgPrint("gmndis: AllocatedResourcesTranslated->Count = %lu\n",
	   AllocatedResourcesTranslated->Count);
  FullResourceDescriptor = AllocatedResourcesTranslated->List;
  DbgPrint("gmndis: FullResourceDescriptor\n");
  DbgPrint("gmndis: InterfaceType = %d\n",
	   GM_STATIC_CAST(int, FullResourceDescriptor->InterfaceType));
  is->arch.bus_number = FullResourceDescriptor->BusNumber;
  DbgPrint("gmndis: BusNumber = %lu\n",
	   FullResourceDescriptor->BusNumber);
  PartialResourceList = &FullResourceDescriptor->PartialResourceList;
  DbgPrint("gmndis: PartialResourceList->Count = %lu\n",
	   PartialResourceList->Count);
  PartialDescriptor = PartialResourceList->PartialDescriptors;
  for (i = 0; i < PartialResourceList->Count; i++, PartialDescriptor++)
    {
      switch (PartialDescriptor->Type)
	{
	case CmResourceTypeInterrupt:
	  DbgPrint("gmndis: CmResourceTypeInterrupt\n");
	  DbgPrint ("tiv = %lx, irql = %lx, aff = %lx\n",
		    PartialDescriptor->u.Interrupt.Vector,
		    PartialDescriptor->u.Interrupt.Level,
		    (unsigned long) PartialDescriptor->u.Interrupt.Affinity);
	  is->arch.translated_interrupt_vector = PartialDescriptor->u.Interrupt.Vector;
	  is->arch.irql = (KIRQL) PartialDescriptor->u.Interrupt.Level;
	  is->arch.affinity = PartialDescriptor->u.Interrupt.Affinity;
	  break;
	case CmResourceTypeMemory:
	  DbgPrint("gmndis: CmResourceTypeMemory\n");
	  GM_PRINT (1, ("Found memory resource\n"
			"  Start:0x%08lx%08lx Length:%lx\n",
			PartialDescriptor->u.Memory.Start.HighPart,
			PartialDescriptor->u.Memory.Start.LowPart,
			PartialDescriptor->u.Memory.Length));
	  break;
	case CmResourceTypeDma:
	  DbgPrint("gmndis: CmResourceTypeDma\n");
	  break;
	} // switch
    } // for
  return GM_SUCCESS;

abort_with_nothing:
  return GM_FAILURE;
#else
#error unknown OS
#endif
}

/* Report the hardware resources (interrupt lines and memory) used for
   this instance.

   Does and must run at PASSIVE_LEVEL. */

gm_status_t
gm_nt_report_resources (gm_instance_state_t * is,
			PUNICODE_STRING registry_path,
			PDRIVER_OBJECT driver_object, ULONG bus_number,
			ULONG slot_number, gm_u8_t interrupt_line,
			gm_u32_t base_address_high, gm_u32_t base_address_low)
{
  UNICODE_STRING driver_name;
  gm_status_t gm_status;
  NTSTATUS nt_status;

  gm_assert (is);

  /* Create the daemon port structure, which is done while creating
     the device. Reference the associated gm_port_state_t structure */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Creating resource device.\n"));
  RtlInitUnicodeString (&driver_name, L"\\Device\\GM_resources");
  GM_PRINT (GM_PRINT_LEVEL >= 5,
	    ("Calling IoCreateDevice (%p, %d, %p (%S), %d, %d, %d, %p)\n",
	     driver_object, sizeof (struct gm_nt_resource_device_extension),
	     &driver_name, driver_name.Buffer, FILE_DEVICE_PHYSICAL_NETCARD,
	     0, TRUE, &is->arch.resource_device));
  nt_status =
    IoCreateDevice (driver_object,
		    sizeof (struct gm_nt_resource_device_extension),
		    &driver_name, FILE_DEVICE_PHYSICAL_NETCARD, 0, FALSE,
		    &is->arch.resource_device);
  if (NT_ERROR (nt_status))
    {
      GM_WARN (("Cannot create resource device: error 0x%x", nt_status));
      goto abort_with_nothing;
    }

  /* Record the type of this device. */

  gm_nt_device_extension (is->arch.resource_device)->resource.type
    = GM_NT_RESOURCE_DEVICE_EXTENSION;

  /* record that we created this device */

  if (gm_hash_insert
      (gm_device_hash, is->arch.resource_device,
       is->arch.resource_device) != GM_SUCCESS)
    {
      GM_NOTE (("Could remember device is ours.\n"));
      goto abort_with_resource_device;
    }

  gm_status = gm_nt_assign_resources (is, driver_object, bus_number,
				      interrupt_line,
				      base_address_high, base_address_low);
  if (gm_status != GM_SUCCESS) {
    goto abort_with_hash_entry;
  }

  GM_RETURN_STATUS (GM_SUCCESS);

abort_with_hash_entry:
  gm_hash_remove (gm_device_hash, is->arch.resource_device);
abort_with_resource_device:
  IoDeleteDevice (is->arch.resource_device);
abort_with_nothing:
  GM_RETURN_STATUS (GM_FAILURE);
}

/* Unreport all resources used by this instance.

   Can and must run at PASSIVE_LEVEL. */

void
gm_nt_unreport_resources (gm_instance_state_t * is)
{
  CM_RESOURCE_LIST resource_list;
  BOOLEAN bool;
  UNICODE_STRING className;
  NTSTATUS status;

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Unreporting resources.\n"));
  RtlInitUnicodeString (&className, L"LANAI RESOURCES");
  RtlZeroMemory (&resource_list, sizeof (CM_RESOURCE_LIST));
  resource_list.Count = 0;
  status = IoReportResourceUsage (&className,
				  is->arch.driver_object,
				  NULL,
				  0,
				  is->arch.resource_device,
				  &resource_list,
				  sizeof (CM_RESOURCE_LIST), FALSE, &bool);

  if (NT_ERROR (status))
    GM_WARN (("Error unreporting resources (0x%x).\n", status));
  gm_hash_remove (gm_device_hash, is->arch.resource_device);
  IoDeleteDevice (is->arch.resource_device);
  is->arch.resource_device = NULL;
}

gm_status_t
gm_arch_map_io_space (gm_instance_state_t * is, gm_u32_t offset, gm_u32_t len,
		      void **ptr)
{
  PHYSICAL_ADDRESS translatedAddress;
  PHYSICAL_ADDRESS bus_addr;
  ULONG addressSpace;

  bus_addr = RtlLargeIntegerAdd (is->arch.board_phys_addr,
				 RtlConvertUlongToLargeInteger (offset));

  addressSpace = 0;
  gm_assert (is->lanai.eeprom.bus_type == GM_MYRINET_BUS_PCI);
  if (!HalTranslateBusAddress (PCIBus,
			       is->arch.bus_number,
			       bus_addr, &addressSpace, &translatedAddress))
    {
      GM_WARN (("Can't translate the address\n"));
      return GM_FAILURE;
    }
  if (addressSpace != 0)
    {
      GM_WARN (("Unexpected address space value\n"));
      return GM_FAILURE;
    }

  gm_assert (sizeof (is->ifc.pci.config.Command) == 2);
  gm_arch_read_pci_config_16 (is, GM_OFFSETOF (gm_pci_config_t, Command),
			      &is->ifc.pci.config.Command);
  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("PCI Command = 0x%x.\n", is->ifc.pci.config.Command));

  /* Map the region of LANai space into system memory and return a
     pointer to it, or NULL on failure. */

  *ptr = MmMapIoSpace (translatedAddress, len, FALSE);
  if (!*ptr)
    {
      GM_NOTE (("MmMapIoSpace failed - phys_addr = 0x%x  len = %d\n",
		translatedAddress, len));
      return GM_FAILURE;
    }


  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Mapped 0x%x bytes at offset 0x%x to %p.\n",
				  len, offset, *ptr));

  return GM_SUCCESS;
}

void
gm_arch_unmap_io_space (gm_instance_state_t * is, gm_u32_t offset,
			gm_u32_t len, void **ptr)
{
  MmUnmapIoSpace (*ptr, len);
}

gm_status_t gm_arch_gethostname (char *ptr, int len)
{
  PKEY_VALUE_FULL_INFORMATION full;
  NTSTATUS nt_status;
  UNICODE_STRING us;
  ANSI_STRING as;
  char *name;

  /* Read the hostname from the register as a WCHAR string */
  nt_status =
    gm_nt_SnarfFromRegistry
    (L"\\Registry\\Machine\\SYSTEM\\CurrentControlSet\\Control\\ComputerName\\ActiveComputerName",
     L"ComputerName", REG_SZ, &full);
  if (NT_ERROR (nt_status))
    {
      GM_NOTE (("Could not snarf computer name from registry.\n"));
      goto abort_with_nothing;
    }
  /* Convert the hostname to a character string */
  RtlInitUnicodeString (&us, (PWCHAR) ((char *) full + full->DataOffset));
  nt_status = RtlUnicodeStringToAnsiString (&as, &us, TRUE);
  if (NT_ERROR (nt_status))
    {
      gm_nt_FreeSnarfedData (full);
      goto abort_with_hash;
    }
  /* Convert hostname to lower case. */
  for (name = as.Buffer; *name; name++)
    *name = tolower (*name);
  name = as.Buffer;
  GM_PRINT (GM_PRINT_LEVEL >= 1,
	    ("**** Hostname = \"%s\" (length %d). ****\n", name,
	     strlen (name)));
  /* Store the host name in the configuration packets. */
  strncpy (ptr, as.Buffer, len);
  RtlFreeAnsiString (&as);
  gm_nt_FreeSnarfedData (full);

  return GM_SUCCESS;

abort_with_hash:
  gm_nt_FreeSnarfedData (full);
abort_with_nothing:
  return GM_FAILURE;
}

/* Create an gm_instance_state_t object describing an physical
   instance of a LANai interface card on bus BUS_NUMBER and in slot
   SLOT_NUMBER.  Assign the identifier UNIT to the instance, and report
   resources using REGISTRY_PATH.

   Must run at PASSIVE_LEVEL. */

gm_status_t
gm_nt_create_instance (unsigned int unit,
		       ULONG bus_number,
		       ULONG slot_number,
		       PUNICODE_STRING registry_path,
		       PDRIVER_OBJECT driver_object,
		       BOOLEAN *no_such_bus,
		       KIRQL *max_irql,
		       gm_u64_t NdisAdapterHandleAsU64)
{
  gm_status_t		status;
  gm_instance_state_t	*is;
  gm_pci_config_t	pci_config;
  ULONG			retLength;
  
  /************
   * Check for available Myrinet interface
   ************/

  /* Check for card at bus, slot by trying to read its configuration
     information. */

  retLength = HalGetBusDataByOffset (PCIConfiguration, bus_number,
				     slot_number, &pci_config, 0,
				     sizeof (pci_config));

  if (retLength == 0)
    {
      *no_such_bus = TRUE;
      goto abort_with_nothing;
    }
  *no_such_bus = FALSE;

  /* Check if there is a Myrinet device in this slot. */

  if (((pci_config.Vendor_ID == GM_PCI_VENDOR_MYRICOM) &&
       (pci_config.Device_ID == GM_PCI_DEVICE_MYRINET)) ||
      ((pci_config.Vendor_ID == GM_PCI_VENDOR_MYRICOM2) &&
       (pci_config.Device_ID == GM_PCI_DEVICE_MYRINET)))
    {
      GM_PRINT (GM_PRINT_LEVEL >= 4,
		("Found Myrinet interface (Rev %d) in bus %d slot %d.\n",
		 pci_config.Revision_ID, bus_number, slot_number));
    }
  else
    {
      goto abort_with_nothing;
    }

  if (pci_config.Revision_ID < 1 || pci_config.Revision_ID > 3)
    {
      GM_WARN (
	       ("Board revision %d not supported.\n",
		pci_config.Revision_ID));
      goto abort_with_nothing;
    }

  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("PCI Command = 0x%x.\n", pci_config.Command));

  /* We have found an available interface.  Alloc and Initialize it. */

  /***********
   * Allocate cleared storage for the instance state.
   ***********/

  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("Allocating storage for instance state.\n"));
  is =
    (gm_instance_state_t *) gm_malloc (GM_PAGE_ROUNDUP (u32, sizeof (*is)));
  if (is == 0)
    {
      GM_WARN (("Could not allocate instance state.\n"));
      goto abort_with_nothing;
    }
  RtlZeroMemory (is, sizeof (*is));
  /* Record remaining architecture-specific details */

  is->arch.NdisAdapterHandleAsU64 = NdisAdapterHandleAsU64;

  /* Record whatever we know already. */

  is->lanai.eeprom.bus_type = GM_MYRINET_BUS_PCI;
  is->ifc.pci.config = pci_config;
  is->arch.bus_number = bus_number;
  is->arch.slot_number = slot_number;
  is->arch.driver_object = driver_object;

  /* Obtain exclusive access to the interface, and report the board
     resources to NT and get the instance's DMA adapter object.  Refer to
     PCI 2.1 spec. for use of base address registers. */

  status = gm_nt_report_resources (is, registry_path, driver_object,
				   bus_number, slot_number,
				   pci_config.Interrupt_Line,
				   pci_config.Base_Addresses_Registers[1],
				   pci_config.Base_Addresses_Registers[0] &
				   0xFFFFFFF0);
  if (status != GM_SUCCESS)
    {
      GM_NOTE (("Unsuccessful resource report\n"));
      goto abort_with_instance_state;
    }

  /**********
   * Perform architecture-specific initialization, including installing
   * interrupts.
   **********/

  /* Get the DMA adapter object for the PCI interface.  This should
     turn on bus-master DMA for the interface. */

  gm_assert (sizeof (is->ifc.pci.config.Command) == 2);
  gm_arch_read_pci_config_16 (is, GM_OFFSETOF (gm_pci_config_t, Command),
			      &is->ifc.pci.config.Command);
  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("PCI Command = 0x%x.\n", is->ifc.pci.config.Command));

  {
    DEVICE_DESCRIPTION dd;

    RtlZeroMemory (&dd, sizeof (dd));
#if GM_OS_WIN2K
    dd.Version = DEVICE_DESCRIPTION_VERSION;
    dd.Master = TRUE;
    /*dd.Dma32BitAddresses = TRUE;*/
    /*dd.Dma64BitAddresses = TRUE;*/
    dd.BusNumber = bus_number;
    dd.DmaChannel = is->arch.dma_channel;
    dd.InterfaceType = PCIBus;
    /*dd.MaximumLength = 4096;*/
    dd.DmaPort = is->arch.dma_port;
    is->arch.io_dma_adapter = IoGetDmaAdapter (is->arch.pdo, &dd,
					       &is->arch.io_dma_num_map_regs);
    if (!is->arch.io_dma_adapter)
      {
	GM_WARN (("IoGetDmaAdapter returned NULL\n"));
	goto abort_with_reported_resources;
      }
#elif GM_OS_NT4
    dd.Version = DEVICE_DESCRIPTION_VERSION;
    dd.Master = TRUE;
    dd.BusNumber = bus_number;
    dd.DmaChannel = is->arch.dma_channel;
    dd.InterfaceType = PCIBus;
    dd.DmaPort = is->arch.dma_port;
    is->arch.dma_adapter = HalGetAdapter (&dd, &is->arch.dma_num_map_regs);
    if (!is->arch.dma_adapter)
      {
	GM_WARN (("Could not get PCI adapter object.\n"));
	goto abort_with_reported_resources;
      }
    GM_PRINT (GM_PRINT_LEVEL >= 4,
	      ("HalGetAdapter reports %lu map registers\n",
	       is->arch.dma_num_map_regs));
#else
#error unknown OS
#endif
  }
  gm_arch_read_pci_config_16 (is, GM_OFFSETOF (gm_pci_config_t, Command),
			      &is->ifc.pci.config.Command);
  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("PCI Command = 0x%x.\n", is->ifc.pci.config.Command));
  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("Enabling PCI memory mapping, bus mastery, and"
	     " write&invalidate.\n"));
  is->ifc.pci.config.Command |= GM_PCI_COMMAND_INVALIDATE;
  if (gm_arch_write_pci_config_16 (is, GM_OFFSETOF (gm_pci_config_t, Command),
				   is->ifc.pci.config.Command) != GM_SUCCESS)
    {
      GM_NOTE (("Could not enable PCI write and invalidate.\n"));
      goto abort_with_reported_resources;
    }
  gm_arch_read_pci_config_16 (is, GM_OFFSETOF (gm_pci_config_t, Command),
			      &is->ifc.pci.config.Command);
  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("PCI Command = 0x%x.\n", is->ifc.pci.config.Command));

#if (GM_PRINT_LEVEL>=4)
  {
    unsigned char c[4];
    int i, j;
    GM_PRINT (GM_PRINT_LEVEL >= 4, ("Dumping config space\n"));
    for (i = 0; i < 64; i += 4)
      {
	for (j = 0; j < 4; j++)
	  {
	    gm_arch_read_pci_config_8 (is, i + j, &c[j]);
	  }
	GM_PRINT (GM_PRINT_LEVEL >= 4,
		  ("0x%x: %02x %02x %02x %02x\n", i, c[0], c[1], c[2], c[3]));
      }
  }
#endif /* (GM_PRINT_LEVEL>=4) */


  /* Initialize interrupt spin lock */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Initializing the interrupt spin lock.\n"));
  KeInitializeSpinLock (&is->arch.intr_spin_lock);

  /* Initialize the DPC for the ISR */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Initializing interrupt DPC request.\n"));
  KeInitializeDpc (&is->arch.dpc_for_isr,
		   (PKDEFERRED_ROUTINE) gm_nt_DpcForIsr, (PVOID) is);

  
#if GM_OS_NT4
  /* Get interrupt vector, and record max IRQL */

  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Getting interrupt vector.\n"));
  is->arch.translated_interrupt_vector
    = HalGetInterruptVector (PCIBus,
			     is->arch.bus_number,
			     is->arch.interrupt_level,
			     is->arch.interrupt_vector,
			     &is->arch.irql,
			     &is->arch.affinity);
#endif
  GM_PRINT (0, ("tiv = %d, il = %d, iv = %d, irql = %d, aff = %d\n",
		(int) is->arch.translated_interrupt_vector,
		(int) is->arch.interrupt_level,
		(int) is->arch.interrupt_vector,
		(int) is->arch.irql,
		(int) is->arch.affinity));

  if (*max_irql < is->arch.irql)
    *max_irql = is->arch.irql;

  /* Initialize lock and timer required to implement gm_spin() */

  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("Initializing gm_spin() support structures.\n"));
  ExInitializeFastMutex (&is->arch.spin_fast_mutex);
  KeInitializeTimer (&is->arch.spin_timer);

  /************
   * Perform OS-independent initialization
   ************/

  if (gm_instance_init (is, unit, GM_MYRINET_BUS_PCI) != GM_SUCCESS)
    {
      GM_WARN (("Could not perform OS-independent instance init.\n"));
      goto abort_with_dpc;
    }

  /************
   * Create minor devices.  /dev/gm%d is the clone device for normal users,
   * and /dev/gmp%d is the clone device for privileged users.
   ************/

  /* Create minor node number to use for /dev/gm? . */

  GM_PRINT (GM_PRINT_LEVEL >= 3, ("Creating minor node.\n"));
  if (gm_nt_create_minor_node (is, L"gm", &is->clone_minor, 1, 0, 0)
      != GM_SUCCESS)
    {
      GM_NOTE (("Could not create clone device.\n"));
      goto abort_with_initialized_ifc;
    }

  /* Create minor node number to use for /dev/gmp? . */

  GM_PRINT (GM_PRINT_LEVEL >= 3, ("Creating privileged minor node.\n"));
  if (gm_nt_create_minor_node (is, L"gmp", &is->privileged_clone_minor,
			       1, 1, 0) != GM_SUCCESS)
    {
      GM_NOTE (("Could not create privileged clone device.\n"));
      goto abort_with_clone_minor;
    }

  GM_PRINT (GM_PRINT_LEVEL >= 3, ("created minor nodes\n"));

  return GM_SUCCESS;

abort_with_clone_minor:
  gm_nt_destroy_minor_node (is->clone_minor);
abort_with_initialized_ifc:
  gm_instance_finalize (is);
abort_with_dpc:
  KeRemoveQueueDpc (&is->arch.dpc_for_isr);	/* In case queued. */
abort_with_reported_resources:
  gm_nt_unreport_resources (is);
abort_with_instance_state:
  GM_PRINT (GM_PRINT_LEVEL >= 5, ("Freeing instance state %p\n", is));
  gm_free (is);
  GM_PRINT (GM_PRINT_LEVEL >= 5, ("Freed instance state.\n"));
abort_with_nothing:
  return GM_FAILURE;
}

void
gm_nt_destroy_instance (gm_instance_state_t * is)
{
  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Destroying an instance.\n"));

  gm_nt_destroy_minor_node (is->privileged_clone_minor);
  gm_nt_destroy_minor_node (is->clone_minor);
  gm_instance_finalize (is);
  KeRemoveQueueDpc (&is->arch.dpc_for_isr);	/* In case queued. */
  gm_nt_unreport_resources (is);
  GM_PRINT (GM_PRINT_LEVEL >= 5, ("Freeing instance state %p\n", is));
  gm_free (is);
  GM_PRINT (GM_PRINT_LEVEL >= 5, ("gm_nt_destroy_instance returning.\n"));
}

/* Utility routine for DispatchCleanup.

   NOTE: Must hold Cancel spin lock before calling. */

void
gm_nt_cancel_last (IN PDEVICE_OBJECT dev, PLIST_ENTRY root, KIRQL irql)
{
  PIRP irp;
  PDRIVER_CANCEL cancel_routine;

  /* Get last IRP in list. */

  irp = CONTAINING_RECORD (root->Blink, IRP, Tail.Overlay.ListEntry);
  ASSERT (irp);

  /* Cancel the IRP following the NT cancelRoutine conventions.  The
     cancel routine will remove the IRP from the list. */

  cancel_routine = irp->CancelRoutine;
  ASSERT (cancel_routine);
  irp->CancelIrql = irql;
  irp->CancelRoutine = NULL;
  irp->Cancel = TRUE;

  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("gm_nt_cancel_last: Calling cancel routine for dev=0x%lx"
	     "  irp=0x%lx.\n",
	     dev, irp));
  cancel_routine (dev, irp);	/* releases cancel spin lock */

  /* reacquire since the cancel_routine released it */
  IoAcquireCancelSpinLock (&irql);
}

void
gm_nt_print_offsets ()
{
#define gm_print_offset(name) do {					\
  GM_PRINT (GM_PRINT_LEVEL >= 4, ("0x%x: %s\n",				\
		GM_OFFSETOF (gm_lanai_globals_t, name), #name));	\
} while (0)

  gm_print_offset (magic);
  gm_print_offset (dma_descriptor);
  /*gm_print_offset (reserved_after_dma_descriptor); */
  gm_print_offset (event_index_table);
  gm_print_offset (page_hash);
  gm_print_offset (first_port_with_sent_packets);
  gm_print_offset (_subport[0]);
  gm_print_offset (_subport[1]);
  gm_print_offset (timeout_time);
/*
  gm_print_offset (_state);
*/
  gm_print_offset (current_rdma_port);
/*
  gm_print_offset (_free_send_chunk_cnt);
*/
  gm_print_offset (send_chunk[0]);
  gm_print_offset (recv_chunk[1]);
  gm_print_offset (recv_token_bin[0]);
  gm_print_offset (recv_token_bin[1]);
  gm_print_offset (first_active_connection);
  gm_print_offset (_send_record[0]);
  gm_print_offset (_send_record[1]);
/*
  gm_print_offset (_remaining_sdma_ctr);
*/
  gm_print_offset (remaining_sdma_lar);
  /* gm_print_offset (remaining_sdma_ear); */
  gm_print_offset (recv_token_dma_stage);
  gm_print_offset (failed_send_event_dma_stage);
  gm_print_offset (report_dma_stage);
  gm_print_offset (nack_delay);
  gm_print_offset (backlog_delay);
  gm_print_offset (led);
  gm_print_offset (port[0]);
  gm_print_offset (port[1]);
  gm_print_offset (finishing_rdma_for_port);
  gm_print_offset (record_log);
  /* gm_print_offset (last_handler); */
  gm_print_offset (resume_after_halt);
  gm_print_offset (dispatch_seen[0][0]);
  gm_print_offset (dispatch_cnt[0][0]);
  gm_print_offset (pause_cnt);
  gm_print_offset (mapper_state);
  gm_print_offset (trash[0]);
  gm_print_offset (max_node_id);
  gm_print_offset (connection[0]);
}


/************************************************************************
 * NT event logging functions
 ************************************************************************/

#define LOG_LEVEL_NONE 0
#define LOG_LEVEL_DEBUG 1
#define GM_MAX_INSERTION_STRINGS 4
#define GM_ERRORLOG 1

PDRIVER_OBJECT the_one_and_only_driver_object;
UNICODE_STRING the_one_and_only_registry_path;

static ULONG _gm_nt_event_log_level;

static ULONG
gm_nt_event_log_strlen (IN PWSTR string)
{
  UNICODE_STRING tmp;

  RtlInitUnicodeString (&tmp, string);
  return tmp.Length;
}

static BOOLEAN
gm_nt_report_event (IN ULONG message_level, IN NTSTATUS error_code,
		    IN ULONG unique_error_value, IN PVOID io_object,
		    IN PIRP irp, IN ULONG dump_data[],
		    IN ULONG dump_data_count, IN PWSTR strings[],
		    IN ULONG string_count)
{
  PIO_ERROR_LOG_PACKET packet;
  PIO_STACK_LOCATION irp_stack;
  PUCHAR insertion_string;
  UCHAR packet_size;
  UCHAR string_size[GM_MAX_INSERTION_STRINGS];
  ULONG i;

  if ((_gm_nt_event_log_level == LOG_LEVEL_NONE) ||
      (message_level > _gm_nt_event_log_level))
    {
      return TRUE;
    }

  /* calculate size of packet */

  packet_size = sizeof (IO_ERROR_LOG_PACKET);
  if (dump_data_count > 0)
    {
      packet_size += (UCHAR) ((dump_data_count - 1) * sizeof (ULONG));
    }
  if (string_count > 0)
    {
      if (string_count > GM_MAX_INSERTION_STRINGS)
	{
	  string_count = GM_MAX_INSERTION_STRINGS;
	}
      for (i = 0; i < string_count; ++i)
	{
	  string_size[i] = (UCHAR) gm_nt_event_log_strlen (strings[i])
	    + sizeof (WCHAR);
	  packet_size += string_size[i];
	}
    }

  packet = IoAllocateErrorLogEntry (io_object, packet_size);
  if (packet == NULL)
    {
      return FALSE;
    }

  /* fill in packet */

  RtlZeroMemory (packet, packet_size);

  {
    char *pch;
    pch = (char *) packet;
    for (i = 0; i < packet_size; ++i)
      {
	ASSERT (*pch == 0);
      }
    ASSERT (packet->DumpDataSize == 0);
  }

  packet->ErrorCode = error_code;
  packet->UniqueErrorValue = unique_error_value;

  if (irp != NULL)
    {
      irp_stack = IoGetCurrentIrpStackLocation (irp);

      packet->MajorFunctionCode = irp_stack->MajorFunction;
      packet->FinalStatus = irp->IoStatus.Status;

      if ((irp_stack->MajorFunction == IRP_MJ_DEVICE_CONTROL) ||
	  (irp_stack->MajorFunction == IRP_MJ_INTERNAL_DEVICE_CONTROL))
	{
	  packet->IoControlCode =
	    irp_stack->Parameters.DeviceIoControl.IoControlCode;
	}
    }

  /* add dump data */

  packet->DumpDataSize = (USHORT) (dump_data_count * sizeof (ULONG));
  if (dump_data_count > 0)
    {
      packet->DumpDataSize = (USHORT) (dump_data_count * sizeof (ULONG));
      for (i = 0; i < dump_data_count; ++i)
	{
	  packet->DumpData[i] = dump_data[i];
	}
    }

  /* add insertion strings */

  if (string_count > 0)
    {
      packet->NumberOfStrings = (USHORT) string_count;
      packet->StringOffset = GM_STATIC_CAST (USHORT,
	sizeof (IO_ERROR_LOG_PACKET) +
	(dump_data_count - 1) * sizeof (ULONG));
      insertion_string = (PUCHAR) packet + packet->StringOffset;
      for (i = 0; i < string_count; ++i)
	{
	  RtlCopyBytes (insertion_string, strings[i], string_size[i]);
	  insertion_string += string_size[i];
	}
    }

  /* log the message */

  IoWriteErrorLogEntry (packet);
  return TRUE;
}

VOID gm_nt_log (int code, char *fmt, va_list ap)
{
#if 0
  NTSTATUS nt_status;
  char event_log_buffer[256];
  ANSI_STRING astr;
  UNICODE_STRING ustr;
  
  astr.Length = vsprintf (event_log_buffer, fmt, ap);
  astr.MaximumLength = sizeof (event_log_buffer);
  astr.Buffer = event_log_buffer;
  nt_status = RtlAnsiStringToUnicodeString (&ustr, &astr, TRUE);
  if (nt_status == STATUS_SUCCESS)
    {
      gm_nt_report_event (LOG_LEVEL_DEBUG, GM_MSG_INFO,
			  GM_ERRORLOG,
			  (PVOID) the_one_and_only_driver_object,
			  NULL, NULL, 0, &ustr.Buffer, 1);
      RtlFreeUnicodeString (&ustr);
    }
#endif
}

VOID gm_nt_info (char *fmt, ...)
{
  va_list ap;

  va_start (ap, fmt);
  gm_nt_log (GM_MSG_INFO, fmt, ap);
  va_end (ap);
}

VOID gm_nt_note (char *fmt, ...)
{
  va_list ap;

  va_start (ap, fmt);
  gm_nt_log (GM_MSG_NOTE, fmt, ap);
  va_end (ap);
}

VOID gm_nt_warn (char *fmt, ...)
{
  va_list ap;

  va_start (ap, fmt);
  gm_nt_log (GM_MSG_WARN, fmt, ap);
  va_end (ap);
}

VOID gm_nt_panic (char *fmt, ...)
{
  va_list ap;

  va_start (ap, fmt);
  gm_nt_log (GM_MSG_PANIC, fmt, ap);
  va_end (ap);
}

static VOID
gm_nt_initialize_event_log (IN PDRIVER_OBJECT DriverObject)
{
  NTSTATUS nt_status;
  PKEY_VALUE_FULL_INFORMATION value;

  nt_status =
    gm_nt_SnarfFromRegistry
    (L"\\Registry\\Machine\\SYSTEM\\CurrentControlSet\\Services\\GM\\Parameters",
     L"EventLogLevel", REG_DWORD, &value);
  if (NT_ERROR (nt_status))
    {
      _gm_nt_event_log_level = LOG_LEVEL_NONE;
      GM_PRINT (GM_PRINT_LEVEL >= 1,
		("Could not snarf event log level from registry.\n"));
    }
  else
    {
      _gm_nt_event_log_level =
	(gm_u32_t) * ((char *) value + value->DataOffset);
      gm_nt_FreeSnarfedData (value);
    }

  GM_NOTE (("GM event logging enabled.\n"));
}

/*
 * More required kernel debugging functions.
 */

/* better have it static, kernel stacks are quite small */
static char printk_buf[256];

int
gm_printf(const char *format,...)
{
#if !GM_KERNEL
  va_list ap;
  
  va_start(ap, format);
  strcpy(printk_buf, "GM:");
  vsprintf(printk_buf + strlen(printk_buf), format, ap);
  va_end(ap);
  
  printf(printk_buf);
#endif
  
  return GM_SUCCESS;
}

void gm_arch_abort (void)
{
  /* This is what the linux driver does. Try to do some of the same in the
   * future?
   */
#if 0
#if GM_DEBUG
  if (debug_is && debug_is->lanai.running)
    gm_disable_lanai(debug_is);
  if (debug_is && atomic_read(&debug_is->page_hash.sync.mu.count) == 0)
    gm_arch_mutex_exit(&debug_is->page_hash.sync);
  if (debug_is && atomic_read(&debug_is->pause_sync.mu.count) == 0)
    gm_arch_mutex_exit(&debug_is->pause_sync);
  if (gm_in_interrupt) {
#if GM_DEBUG_SETJMP
    printk(KERN_EMERG "Aie, GM-PANIC inside interrupt, let go out of this\n");
    __longjmp(intr_jmp_buf, 1);
#else  /* !GM_DEBUG_SETJMP */
    panic("GM-PANIC in interrupt handler:cannot recover");
#endif /* !GM_DEBUG_SETJMP */
  }
  if (gm_current->state & PF_EXITING) {
    /* we probably failed in the close procedure, so we will never execute the dec count in linux_close */
    MOD_DEC_USE_COUNT;
  }
  hack_sys_exit(12);
#else
  panic("gm_arch_abort() called");
#endif
#endif
  DbgPrint ("gm_arch_abort\n");
  DbgBreakPoint ();
}


/************************************************************************
 * Driver entry points
 ************************************************************************/

static NTSTATUS
gm_nt_DispatchCreate (IN PDEVICE_OBJECT DeviceObject, IN PIRP irp)
{
  gm_status_t status = STATUS_SUCCESS;
  enum gm_nt_device_extension_type type;

  gm_nt_enter ();

  GM_PRINT (GM_PRINT_LEVEL >= 3, ("DispatchCreate called\n"));

  /* intercept create of NDIS device */

  if (!is_gm_device (DeviceObject))
    {
      gm_nt_exit ();
      GM_PRINT (GM_PRINT_LEVEL >= 3, ("returning NDIS create\n"));
      return ndis.create (DeviceObject, irp);
    }

  /* This is a GM device */
  type = gm_nt_device_extension_type (DeviceObject);
  switch (type)
    {
    case GM_NT_MINOR_DEVICE_EXTENSION:
      GM_PRINT (GM_PRINT_LEVEL >= 3, ("minor device being opened.\n"));
      ASSERT (gm_nt_device_to_ps (DeviceObject));
      status = STATUS_SUCCESS;
      break;

    default:
      GM_NOTE (("User tried to open a device of type %d that should not"
		" be opened\n", type));
      status = STATUS_INVALID_PARAMETER;
      break;
    }
  irp->IoStatus.Status = status;
  irp->IoStatus.Information = 0;
  IoCompleteRequest (irp, IO_NO_INCREMENT);
  gm_nt_exit ();
  GM_RETURN (STATUS_SUCCESS);
}

static NTSTATUS
gm_nt_DispatchClose (IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp)
{
  gm_nt_enter ();

  GM_PRINT (GM_PRINT_LEVEL >= 3,
	    ("DispatchClose() called  dev=0x%lx  irp=0x%lx\n", DeviceObject,
	     Irp));

  /* Intercept NDIS close */

  if (!is_gm_device (DeviceObject))
    {
      gm_nt_exit ();
      GM_PRINT (GM_PRINT_LEVEL >= 3, ("Returning NDIS close.\n"));
      return ndis.close (DeviceObject, Irp);
    }

  switch (gm_nt_device_extension_type (DeviceObject))
    {
    case GM_NT_MINOR_DEVICE_EXTENSION:
      {
	gm_port_state_t *ps;
	ps = gm_nt_device_to_ps (DeviceObject);
	gm_assert (ps);
	gm_assert (ps->instance);

	/* Close the device iff it is not a clone device. */

	if (ps->arch.is_clone_device)
	  {
	    GM_PRINT (GM_PRINT_LEVEL >= 1,
		      ("Clone device for instance %d being closed.\n",
		       ps->instance->id));
	  }
	else
	  {
	    gm_arch_minor_t minor;

	    minor = ps->minor;

	    GM_PRINT (GM_PRINT_LEVEL >= 1,
		      ("device for port %d of instance %d being closed.\n",
		       ps->id, ps->instance->id));

	    /* For NT, open and create occur atomically. */

	    gm_port_state_close (ps);
	    gm_nt_destroy_minor_node (minor);
	  }
	break;
      }

    case GM_NT_RESOURCE_DEVICE_EXTENSION:
      GM_PRINT (GM_PRINT_LEVEL >= 1, ("Resource device closed.\n"));
      break;

    default:
      gm_always_assert (0);
    }

  GM_PRINT (GM_PRINT_LEVEL >= 4,
	    ("Reporting success of close.  dev=0x%lx  irp=0x%lx\n",
	     DeviceObject, Irp));

  Irp->IoStatus.Status = STATUS_SUCCESS;
  Irp->IoStatus.Information = 0;
  IoCompleteRequest (Irp, IO_NO_INCREMENT);

  gm_nt_exit ();

  return STATUS_SUCCESS;
}

/* Handle I/O request messages which have been queued for each device.
   This function will be called only when the process is not handling
   any other requests for the same device.  However, the driver may be
   processing I/O for other devices concurrently.

   NOTE: returns only STATUS_SUCCESS or STATUS_PENDING.  The status to
   be returned to the user is set in Irp->IoStatus.Status.

   NOTE NOTE: Returning STATUS_SUCCESS doesn't seem to work correctly.
   Already opened ports are allowed to be re-opened. Even though the Irp
   is marked as STATUS_INSUFFICIENT_RESOURCES. Now gm_nt_ioctl returns
   a status that is what the Irp says. feldy - 990721

   Runs at PASSIVE_LEVEL in user context. */

static NTSTATUS
gm_nt_DispatchDeviceControl (IN PDEVICE_OBJECT device, IN PIRP Irp)
{
  NTSTATUS status;

  gm_nt_enter ();

  /* Intercept device_control of NDIS devices */

  GM_PRINT (GM_PRINT_LEVEL >= 5,
	    ("gm_nt_DispatchDeviceControl: dev=0x%lx  irp=0x%lx\n", device,
	     Irp));
  if (!is_gm_device (device))
    {
      gm_nt_exit ();
      GM_PRINT (GM_PRINT_LEVEL >= 4,
		("returning NDIS device_control status\n"));
      return ndis.device_control (device, Irp);
    }

  switch (gm_nt_device_extension_type (device))
    {
    case GM_NT_MINOR_DEVICE_EXTENSION:
      status = gm_nt_ioctl (device, Irp);
      break;

    case GM_NT_RESOURCE_DEVICE_EXTENSION:
      GM_PRINT (GM_PRINT_LEVEL >= 3, ("Resource device got an ioctl 0x%x.\n",
				      IoGetCurrentIrpStackLocation
				      (Irp)->Parameters.DeviceIoControl.
				      IoControlCode));
      status = STATUS_INVALID_PARAMETER;
      break;

    default:
      GM_WARN (("driver received ioctl for bogus device.\n"));
      status = STATUS_INVALID_PARAMETER;
      break;
    }

  gm_nt_exit ();

  return status;
}

static VOID
gm_nt_Unload (IN PDRIVER_OBJECT DriverObject)
{
  PDEVICE_OBJECT device, tmp;
  PDEVICE_OBJECT gm_device = 0;
  PDEVICE_OBJECT ndis_device = 0;

  gm_nt_enter ();

  GM_PRINT (GM_PRINT_LEVEL >= 1, ("Unloading driver.\n"));

  /* Segregate the NDIS and GM devices */

  device = DriverObject->DeviceObject;
  while (device)
    {
      if (is_gm_device (device))
	{
	  tmp = gm_device;
	  gm_device = device;
	  device = device->NextDevice;
	  gm_device->NextDevice = tmp;
	}
      else
	{
	  tmp = ndis_device;
	  ndis_device = device;
	  device = device->NextDevice;
	  ndis_device->NextDevice = tmp;
	}
    }

  /* Let NDIS unload, thinking all the devices belong to it. */

  if (ndis_device)
    {
      GM_PRINT (GM_PRINT_LEVEL >= 1, ("Calling NDIS unload routine.\n"));
      DriverObject->DeviceObject = ndis_device;
      ndis.unload (DriverObject);
      GM_PRINT (GM_PRINT_LEVEL >= 1, ("NDIS unload routine returned.\n"));
    }

  /* Repeatedly scan through the device list looking for instances
     associated with the devices and destroy the devices.  Destroying
     the instances causes the associated devices to be closed and
     removed from the device list. */

  device = gm_device;
  while (device)
    {
      gm_port_state_t *ps;
      gm_instance_state_t *is;

      switch (gm_nt_device_extension_type (device))
	{
	case GM_NT_RESOURCE_DEVICE_EXTENSION:

	  /* skip resource devices, which are freed indirectly when the
	     instance is freed. */

	  device = device->NextDevice;
	  break;

	case GM_NT_MINOR_DEVICE_EXTENSION:

	  /* Find the instance associated with the device and destroy it,
	     indirectly closing the clone devices associated with it. */

	  ps = gm_nt_device_to_ps (device);
	  gm_assert (ps->arch.is_clone_device);
	  GM_PRINT (GM_PRINT_LEVEL >= 4,
		    ("Destroying instance %p.\n", ps->instance));
	  is = ps->instance;
	  gm_disable_interrupts (is);
	  IoDisconnectInterrupt (is->arch.intr_obj);
	  gm_nt_destroy_instance (ps->instance);

	  device = DriverObject->DeviceObject;
	  break;
	}
    }
  DriverObject->DeviceObject = 0;

  gm_nt_exit ();
  gm_arch_sync_destroy (&_gm_arch_serialize_sync);
  gm_finalize ();
}

/* The Dispatch Cleanup routine cancels all IRPs associated with a
   device. This is done by cancelling all IRPs in the worker thread
   queue.

   BAD: If this is the daemon port, then this function should cleanup
   all ports. */

static NTSTATUS
gm_nt_DispatchCleanup (IN PDEVICE_OBJECT dev, IN PIRP cleanup_irp)
{
  KIRQL irql;
  gm_port_state_t *ps;

  gm_nt_enter ();

  GM_PRINT (GM_PRINT_LEVEL >= 3,
	    ("DispatchCleanup  dev = 0x%lx  irp = 0x%lx.\n", dev,
	     cleanup_irp));

  if (!is_gm_device (dev))
    {
      gm_nt_exit ();
      GM_PRINT (GM_PRINT_LEVEL >= 3,
		("DispatchCleanup: returning NDIS cleanup.\n"));
      return ndis.cleanup (dev, cleanup_irp);
    }

  if (gm_nt_device_extension_type (dev) != GM_NT_MINOR_DEVICE_EXTENSION)
    {
      GM_PRINT (GM_PRINT_LEVEL >= 3,
		("DispatchCleanup: returning "
		 "!= GM_NT_MINOR_DEVICE_EXTENSION.\n"));
      gm_nt_exit ();
      return STATUS_SUCCESS;
    }

  ASSERT (dev);
  ps = gm_nt_device_to_ps (dev);
  ASSERT (ps);

  GM_PRINT (GM_PRINT_LEVEL >= 3,
	    ("DispatchCleanup: port number is %d\n", ps->id));

  GM_PRINT (GM_PRINT_LEVEL >= 3,
	    ("DispatchCleanup: Acquiring cancel spin lock.\n"));
  IoAcquireCancelSpinLock (&irql);

  /* Cancel any IRP awaiting a sleep */

  GM_PRINT (GM_PRINT_LEVEL >= 3, ("Checking emptiness of sleep list.\n"));
  while (!IsListEmpty (&ps->arch.sleep_q_root))
    {
      GM_PRINT (GM_PRINT_LEVEL >= 3,
		("DispatchCleanup: Cancelling an IRP.\n"));
      gm_nt_cancel_last (dev, &ps->arch.sleep_q_root, irql);
    }

  GM_PRINT (GM_PRINT_LEVEL >= 3, ("DispatchCleanup: releasing spin lock.\n"));
  IoReleaseCancelSpinLock (irql);

  /* Report successful handling of cleanup IRP */

  GM_PRINT (GM_PRINT_LEVEL >= 3,
	    ("DispatchCleanup: Reporting successful cleanup  dev=0x%lx"
	     "  irp=0x%lx\n",
	     dev, cleanup_irp));
  if (cleanup_irp)
    {
      cleanup_irp->IoStatus.Status = STATUS_SUCCESS;
      cleanup_irp->IoStatus.Information = 0;
      IoCompleteRequest (cleanup_irp, IO_NO_INCREMENT);
    }

  gm_nt_exit ();

  GM_RETURN (STATUS_SUCCESS);
}

/* BAD: Currently only supports one interface. */
NTSTATUS
gm_nt_find_myrinet_interfaces (IN PDRIVER_OBJECT DriverObject,
			       IN PUNICODE_STRING RegistryPath,
			       gm_u64_t NdisAdapterHandleAsU64)
{
  ULONG bus_number;
  PCI_SLOT_NUMBER slot_number;
  NTSTATUS status;
  ULONG unit = 0, num_units;
  KIRQL max_irql = 0;
  unsigned int level, vector;

  /* Scan for Myrinet interfaces */

  for (bus_number = 0; bus_number < PCI_BUSSES; bus_number++)
    {
      for (slot_number.u.AsULONG = 0;
	   slot_number.u.AsULONG < PCI_SLOTS; slot_number.u.AsULONG++)
	{
	  BOOLEAN no_such_bus;
	  gm_status_t gm_status;
    
	  gm_status = gm_nt_create_instance (unit,
					     bus_number,
					     slot_number.u.AsULONG,
					     RegistryPath,
					     DriverObject,
					     &no_such_bus,
					     &max_irql,
					     NdisAdapterHandleAsU64);
	  if (gm_status != GM_SUCCESS)
	    {
	      status = gm_nt_localize_status (gm_status);
	      if (no_such_bus)
		break;
	      else
		continue;
	    }
	  unit++;
	}
    }
  num_units = unit;

  /* Now that we know what the max_irql is, and now that we are sure
     no spurious interrupt will be generated, connect and enable the
     interrupts for each instance. */
  
  while (unit--)
    {
      PDEVICE_OBJECT device;
      
      /* Scan for matching instance */
      for (device = DriverObject->DeviceObject;
	   device; device = device->NextDevice)
	{
	  gm_port_state_t *ps;
	  gm_instance_state_t *is;

	  /* Catch resource devices, which do not correspond to port states. */
	  
	  if (gm_nt_device_extension_type (device)
	      != GM_NT_MINOR_DEVICE_EXTENSION)
	    continue;
	  
	  ps = gm_nt_device_to_ps (device);
	  gm_assert (ps);
	  is = ps->instance;
	  if (is->id != unit)
	    continue;
	  
	  level = is->arch.irql;
	  vector = is->arch.translated_interrupt_vector;

	  GM_PRINT (GM_PRINT_LEVEL >= 4, ("Connecting interrupt for unit %d\n"
					  "  (level %d, sync_level %d).\n",
					  unit, is->arch.irql, max_irql));
	  status = IoConnectInterrupt (&is->arch.intr_obj,
				       gm_nt_Isr,
				       is,
				       &is->arch.intr_spin_lock,
				       is->arch.translated_interrupt_vector,
				       is->arch.irql,
				       max_irql,
				       LevelSensitive,
				       TRUE, is->arch.affinity, FALSE);

	  if (NT_ERROR (status))
	    {
	      GM_WARN (("Can't connect the interrupt vector for unit %d\n",
			unit));
	      gm_nt_destroy_instance (is);
	      num_units--;
	      break;
	    }
	  gm_enable_interrupts (is);
	  break;
	}
    }

  if (num_units == 0)
    {
      GM_NOTE (("Unable to detect PCI Myrinet adapters or create devices\n"));
      status = STATUS_UNSUCCESSFUL;
      goto abort_with_nothing;
    }
  
  GM_RETURN (STATUS_SUCCESS);

abort_with_nothing:
  GM_RETURN (status);
}

/* The driver entry routine

   Note: the name of the function cannot be changed
   Note: GM_NOTE and GM_WARN cannot be used until after
         gm_nt_initialize_event_log */

NTSTATUS
DriverEntry (IN PDRIVER_OBJECT DriverObject, IN PUNICODE_STRING RegistryPath)
{
  NTSTATUS status;

  gm_init ();
  gm_arch_sync_init (&_gm_arch_serialize_sync, 0);
  gm_nt_enter ();

  ASSERT (sizeof (gm_u64_t) == 8);
  ASSERT (sizeof (gm_u32_t) == 4);
  ASSERT (sizeof (gm_u16_t) == 2);
  ASSERT (sizeof (gm_u8_t) == 1);

  the_one_and_only_driver_object = DriverObject;

  the_one_and_only_registry_path.MaximumLength = RegistryPath->Length
    + sizeof (UNICODE_NULL);
  the_one_and_only_registry_path.Length = RegistryPath->Length;
  the_one_and_only_registry_path.Buffer
    = ExAllocatePool (NonPagedPool,
		      the_one_and_only_registry_path.MaximumLength);

  if (!the_one_and_only_registry_path.Buffer)
    {
      status = STATUS_INSUFFICIENT_RESOURCES;
      goto abort_with_serialize_sync;
    }

  RtlCopyUnicodeString (&the_one_and_only_registry_path, RegistryPath);

  gm_nt_initialize_event_log (DriverObject);

  if (gm_arch_page_len (&GM_PAGE_LEN) != GM_SUCCESS)
    {
      status = STATUS_INTERNAL_ERROR;
      goto abort_with_serialize_sync;
    }

  ASSERT (GM_PAGE_LEN);
  ASSERT (GM_POWER_OF_TWO (GM_PAGE_LEN));

  gm_nt_print_offsets ();

  /* create hash table to help us identify which devices belong to GM
     and which to NDIS */

  gm_device_hash = gm_create_hash (gm_hash_compare_ptrs, gm_hash_hash_ptr,
				   0, 0, 2, 0);
  if (!gm_device_hash)
    {
      status = STATUS_NO_MEMORY;
      GM_WARN (("Could not allocate device hash table."));
      goto abort_with_serialize_sync;
    }

#if GM_OS_NT4
  status = gm_nt_find_myrinet_interfaces (DriverObject, RegistryPath, 0);
  if (status != STATUS_SUCCESS)
    {
      GM_WARN (("Could not find Myrinet interfaces.\n"));
      goto abort_with_device_hash;
    }
#endif

  GM_INFO (("initializing GM NDIS driver portion.\n"));
#if GM_ENABLE_ETHERNET
  status = gmm_DriverEntry (DriverObject, RegistryPath);
  if (status != STATUS_SUCCESS)
    {
      GM_WARN (("Could not initialize GM NDIS driver portion."));
      goto abort_with_device_hash;
    }
#endif

  /* cache the NDIS entry points. */

  ndis.create = DriverObject->MajorFunction[IRP_MJ_CREATE];
  ndis.close = DriverObject->MajorFunction[IRP_MJ_CLOSE];
  ndis.cleanup = DriverObject->MajorFunction[IRP_MJ_CLEANUP];
  ndis.device_control = DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL];
  ndis.unload = DriverObject->DriverUnload;

  /* Report driver's dispatch routines  */

  DriverObject->MajorFunction[IRP_MJ_CREATE] = gm_nt_DispatchCreate;
  DriverObject->MajorFunction[IRP_MJ_CLOSE] = gm_nt_DispatchClose;
  DriverObject->MajorFunction[IRP_MJ_CLEANUP] = gm_nt_DispatchCleanup;
  DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL]
    = gm_nt_DispatchDeviceControl;
  DriverObject->DriverUnload = gm_nt_Unload;

  GM_INFO (("GM driver loaded successfully.\n"));

  gm_nt_exit ();
  GM_RETURN (STATUS_SUCCESS);

abort_with_device_hash:
  gm_destroy_hash (gm_device_hash);
  gm_device_hash = 0;
abort_with_serialize_sync:
  gm_nt_exit ();
  gm_arch_sync_destroy (&_gm_arch_serialize_sync);
  gm_finalize ();
  GM_RETURN (status);
}


/*
  This file uses GM standard indentation:

  Local Variables:
  c-file-style:"gnu"
  tab-width:8
  c-backslash-column:72
  End:
*/

