/******************************************************************-*-c-*-
 * Myricom GM networking software and documentation			 *
 * Copyright (c) 2000 by Myricom, Inc.					 *
 * All rights reserved.	 See the file `COPYING' for copyright notice.	 *
 *************************************************************************/

/************************************************************************
 * This file includes the OS-specific driver code for Linux 2.4 kernel
 *
 * Created by: Nelson Escobar <nelson@myri.com>
 *
 * Heavily based on the code done by Loic Prylli for the
 *   Linux 2.0/2.2 kernel.
 *
 * send questions to: help@myri.com
 *
 * Also see:
 *   gm/drivers/linux/gm/README, for supported platforms
 *   gm/drivers/linux/gm/TODO, for future work 
 *   _Linux Device Drivers_, Alessandro Rubini, O'Reilly, 1998
 *   _Linux Kernel Internals_, Michael Beck et. al, Addison-Wesley, 1997
 *
 ************************************************************************/

#define __KERNEL__ 1

#if GM_NEED_MODVERSIONS
#include <linux/config.h>
#ifdef CONFIG_MODVERSIONS
#define MODVERSIONS
#include <linux/modversions.h>
#endif
#endif							/*GM_NEED_MODVERSIONS */

/* first for proper asm/io.h inclusion on Alpha */
#include "gm_internal.h"
#include "gm_call_trace.h"
#include "gm_page_hash.h"
#include "gm_lanai.h"
#include "gm_instance.h"
#include "gm_malloc_debug.h"
#include "gm_klog_debug.h"
#include "gm_debug_lanai_dma.h"
#include "gm_debug_malloc.h"
#include "gm_impl.h"

#include <linux/mm.h>
#include <linux/sched.h>
#include <asm/pgtable.h>
#include <asm/unistd.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/malloc.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/ptrace.h>
#include <linux/string.h>
#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/vmalloc.h>
#include <linux/version.h>
#include <linux/wrapper.h>
#include <asm/io.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>

#include "gm_arch_def.h"


#ifdef USE_ZLIB
#include "zlib.h"
#endif

/****************************************************************
 * Device Driver Module Functions are at the end of the file
 ****************************************************************/



/****************************************************************
 * Globals
 ****************************************************************/

static gm_instance_state_t *gm_instances[GM_ARCH_MAX_INSTANCE];
static int gm_num_instance = 0;
static int gm_max_user_locked_pages;
static int gm_skip_init;
static struct gm_hash *gm_arch_phys_hash = (struct gm_hash *)0;

#if GM_DEBUG
int gm_print_level = GM_PRINT_LEVEL;
static gm_instance_state_t *debug_is;
static int gm_in_interrupt;
#endif


/*
 * poor man's memory leak detection
 */
#if GM_DEBUG
static int kmalloc_cnt = 0, kfree_cnt = 0;
static int vmalloc_cnt = 0, vfree_cnt = 0;
static int ioremap_cnt = 0, iounmap_cnt = 0;
static int dma_alloc_cnt = 0, dma_free_cnt = 0;
static int kernel_alloc_cnt = 0, kernel_free_cnt = 0;
static int user_lock_cnt = 0, user_unlock_cnt = 0;
#endif

/****************************************************************
 ****************************************************************
 * Low level architecture dependent functions
 ****************************************************************
 ****************************************************************/


/************
 * gm_port_state initialization 
 ************/

/* This is called just after the port state is created (in gm_minor.c)
   to perform architecture-specific initialization. */

gm_status_t
gm_arch_port_state_init(gm_port_state_t * ps)
{
	ps->privileged = ps->minor & 1;
	ps->arch.ref_count = 1;
	return GM_SUCCESS;
}

/* This is called just before the port state is destroyed (in
   gm_minor.c) to perform architecture-specific finalization. */

void
gm_arch_port_state_fini(gm_port_state_t * ps)
{
	return;
}

/************
 * gm_port_state initialization 
 ************/

/* This is called at the end of gm_port_state_open() to perform architecture-
   specific initialization. */

gm_status_t
gm_arch_port_state_open(gm_port_state_t * ps)
{
	return GM_SUCCESS;
}

/* This is called at the start of gm_port_state_close to perform
   architecture-specific finalization. */

void
gm_arch_port_state_close(gm_port_state_t * ps)
{
	return;
}

/***********************************************************************
 * Utility functions
 ***********************************************************************/

/* These are required so the gm_ioctl( ) can copy its arguments
   and results. */

gm_status_t
gm_arch_copyin(gm_port_state_t * ps,
			   void *what,
			   void *where,
			   gm_size_t amount)
{
	GM_PRINT (GM_PRINT_LEVEL >= 7, ("gm_arch_copyin(0x%p, 0x%p, %ld)\n",
				 what, where, amount));
	copy_from_user(where, what, amount);
	return GM_SUCCESS;
}

gm_status_t
gm_arch_copyout(gm_port_state_t * ps,
				void *what,
				void *where,
				gm_size_t amount)
{
	GM_PRINT (GM_PRINT_LEVEL >= 7, ("gm_arch_copyout(0x%p, 0x%p, %ld)\n",
				 what, where, amount));
	copy_to_user(where, what, amount);
	return GM_SUCCESS;
}

/* atomic manipulation of an integer */

void 
gm_arch_atomic_set(gm_atomic_t * v, gm_u32_t val)
{
	atomic_set(v, val);
}

gm_u32_t 
gm_arch_atomic_read(gm_atomic_t * v)
{
	return atomic_read(v);
}


/* Utility functions for generic error printing */

/* why do we not get vsnprintf in the kernel??
 * assumption: messages are <= 256 bytes */

/* better have it static, kernel stacks are quite small */

/* The MCP printf functionality might print up to 1024 bytes via a
   GM_WRITE_INTERRUPT, and the code below adds slop to the
   beginning. --Glenn */

static char printk_buf[10000];
static gm_u32_t after_printk_buf = 0xcafebabe;

static
void
gm_linux_puts(const char *buf)
{
	/* Linux printk will corrupt memory if the buffer length is much
	   longer than this. */
	const gm_size_t max_print_len = 256;
	gm_size_t buf_len;
	char minibuf[256];
	
	/* verify that printf HACKs did not corrupt kernel memory. */
	gm_assert(after_printk_buf == 0xcafebabe);
	
	/* Perform oversize prints by printing in small parts that
	   have been copied to a safe buffer. */
	
	for (buf_len = strlen (buf);
		 buf_len > max_print_len;
		 buf_len -= sizeof (minibuf) - 1, buf += sizeof (minibuf) - 1)
	{
		gm_assert(after_printk_buf == 0xcafebabe);
		gm_assert (buf_len == strlen (buf));
		
		gm_strncpy (minibuf, buf, sizeof (minibuf) - 1);
		minibuf[sizeof (minibuf) - 1] = 0;
		printk ("%s", minibuf);
	}
	gm_assert (buf_len == strlen (buf));

	/* print the remainder of the packet */
	
	gm_assert (strlen (buf) <= max_print_len);
	printk ("%s", buf);
}

/****************************************************************
 * Replacement for linux printk(), which is broken for prints longer
 * than about 256 bytes.  This one should work for messages as large
 * as printk_buf. 
 ****************************************************************/

void
gm_linux_printk (const char *format, ...)
{
	va_list ap;

	/* mark the end of the buff */
	
	(printk_buf + sizeof (printk_buf))[-1] = 0;

	/* print to the buffer */
	
	va_start(ap, format);
	vsprintf (printk_buf, format, ap);
	va_end (ap);

	/* Check for sprintf overflow */
	
	if ((printk_buf + sizeof (printk_buf))[-1] != 0) {
		/* warn of sprintf overflow. */
		gm_linux_puts (KERN_CRIT
					   "****** printk_buf[] vsprintf() overflow ******\n");
		/* minimize the damage of the overflow by terminating the buffer */
		(printk_buf + sizeof (printk_buf))[-1] = 0;
	}

	/* print the formatted message */
	
	gm_linux_puts (printk_buf);
}


void gm_arch_abort (void)
{
#if GM_DEBUG
	if (debug_is && debug_is->lanai.running)
		gm_disable_lanai(debug_is);
	if (debug_is && atomic_read(&debug_is->page_hash.sync.mu.count) == 0)
		gm_arch_mutex_exit(&debug_is->page_hash.sync);
	if (debug_is && atomic_read(&debug_is->pause_sync.mu.count) == 0)
		gm_arch_mutex_exit(&debug_is->pause_sync);
	if (gm_in_interrupt) {
		panic("GM-PANIC in interrupt handler:cannot recover");
	}
	if (gm_current->state & PF_EXITING) {
		/* we probably failed in the close procedure, so we will
		   never execute the dec count in linux_close */
		MOD_DEC_USE_COUNT;
	}
#else
	panic("gm_arch_abort() called");
#endif
}




/*
 * find the physical address of either a kernel page or a user pager
 * by walking the page tables (see Rubini p.287)
 *
 * NOTE: cannot be called from interrupt handler since we use the
 * current MM context
 */

#define GM_DEBUG_KVIRT_TO_PHYS 0

static unsigned long
kvirt_to_phys(gm_instance_state_t * is, unsigned long addr, int kernel)
{
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *pte;
	ulong phys;

	GM_PRINT (GM_DEBUG_KVIRT_TO_PHYS,
			  ("kvirt_to_phys(0x%lx, %d)\n", addr, kernel));
	GM_PRINT (GM_DEBUG_KVIRT_TO_PHYS,
			  ("PAGE_OFFSET = 0x%lx, high_memory = 0x%lx\n",
			   PAGE_OFFSET, high_memory));

	/* if kernel:
	   if addr in low range conversion is done by translation (most cases),
	   else (vremap or vmalloc), we deal with the segment offset of 2.0
	   via VMALLOC_VMADDR */

	if (kernel) {
		void *ptr = (void *) addr;
		
		if ((addr >= PAGE_OFFSET) && (addr < (unsigned long) high_memory))
			return __pa(ptr);
		
		gm_assert(is->board_base);
		GM_PRINT (GM_DEBUG_KVIRT_TO_PHYS,
				  ("is->board_base = 0x%lx\n", is->board_base));
		if (ptr >= is->board_base
			&& (ulong) ptr < (ulong) is->board_base + is->board_span)
			return is->arch.phys_base_addr + (ulong) ptr - (ulong) is->board_base;
		gm_assert(addr >= VMALLOC_START && addr < VMALLOC_END);
		
		addr = VMALLOC_VMADDR(addr);	/* to remove the segment offset of intel/2.0 kernels (maybe others) */
		GM_PRINT (GM_DEBUG_KVIRT_TO_PHYS,
				  ("kvirt_to_phys new addr = 0x%lx\n", addr));
	}

#if defined CONFIG_BIGMEM || GM_CPU_ia64 || (!GM_CPU_x86 && !GM_CPU_alpha && !GM_CPU_powerpc )
	if (kernel) {
		pgd = pgd_offset_k(addr);
	}
	else
#else
#warning **************************************************************
#warning In theory this code is wrong although it works on Alpha and
#warning x86 (and powerpc?) We would need init_mm to be exported to use
#warning the right solution.
#warning **************************************************************
#endif
	{
		pgd = pgd_offset(gm_current->mm, addr);
	}
	GM_PRINT (GM_DEBUG_KVIRT_TO_PHYS,
			  ("kvirt_to_phys pgd = 0x%x\n", *pgd));
	gm_always_assert(!pgd_none(*pgd));
	gm_always_assert(!pgd_bad(*pgd));
	pmd = pmd_offset(pgd, addr);
	GM_PRINT (GM_DEBUG_KVIRT_TO_PHYS,
			  ("kvirt_to_phys pmd = 0x%x\n", *pmd));
	gm_always_assert(!pmd_none(*pmd));
	gm_always_assert(!pmd_bad(*pmd));
	pte = pte_offset(pmd, addr);
	GM_PRINT (GM_DEBUG_KVIRT_TO_PHYS,
			  ("kvirt_to_phys pte = 0x%lx\n", *pte));
	gm_always_assert(pte_present(*pte));
	/* damned, no pte_phys_page, we use pte_page and we undo the
	   PAGE_OFFSET that is added inside,
	   do not use virt_to_phys, that would not work for IO */
	/*phys = pte_page(*pte) - PAGE_OFFSET; */
	  
	/* pte_page in 2.3.99 isn't returning the same type of value that
	   it returned in 2.2.xx, so for now I am using this hack to get
	   the physical page...  nelson */
	phys = (unsigned long) pte_val(*pte) & PAGE_MASK; /* 2.3.99 change */

	GM_PRINT (GM_DEBUG_KVIRT_TO_PHYS,
			  ("kvirt_to_phys phys = 0x%lx\n", phys));
	
#ifdef __alpha__
	phys &= ((1UL << 41) - 1);
	/* KSEG currently limited to 41 bit, altough there seems to
	   be some support for 48 bit forthcoming */
#endif
#ifdef __ia64__
	/* somehow walking the page table seems to result in an unexpected 1
	   in bit 52, so lets clean out all the bits above 51. --nelson
	   ia64 2.3.99 horrible hack */
	phys &= ((1UL << 52) - 1);
#endif /* __ia64__ */

	GM_PRINT (GM_DEBUG_KVIRT_TO_PHYS,
			  ("kvirt_to_phys returning 0x%lx\n", phys));
	
	return phys;
}

/****************************************************************
 * PCI config space functions  
 ****************************************************************/
 
#define pcibios_to_gm_arch(rw, size, linuxname, c_type, star)			\
gm_status_t																\
gm_arch_##rw##_pci_config_##size (gm_instance_state_t *is,				\
								  gm_offset_t offset,					\
								  gm_u##size##_t star value)			\
{																		\
  gm_assert (is);														\
  gm_assert (is->arch.pci_dev);										    \
  return ((pci_##rw##_config_##linuxname (is->arch.pci_dev,				\
					  (unsigned char) offset,							\
					  (c_type star) value)								\
	   == PCIBIOS_SUCCESSFUL)											\
	  ? GM_SUCCESS														\
	  : GM_FAILURE);													\
}

pcibios_to_gm_arch(read, 32, dword, unsigned int, *);
pcibios_to_gm_arch(write, 32, dword, unsigned int,);
pcibios_to_gm_arch(read, 16, word, unsigned short, *);
pcibios_to_gm_arch(write, 16, word, unsigned short,);
pcibios_to_gm_arch(read, 8, byte, unsigned char, *);
pcibios_to_gm_arch(write, 8, byte, unsigned char,);


/****************************************************************
 * memory allocation
 ****************************************************************/

/****************
 * bookkeeping
 ****************/

#define GM_DEBUG_MEM_ALLOCATION 0
#if !GM_DEBUG_MEM_ALLOCATION
#define record_memory_allocation(x,y,z) GM_SUCCESS
#define check_mem
#else

static struct allocation_record *first_allocation_record;

struct allocation_record {
	struct allocation_record *next;
	void *ptr;
	unsigned long len;
	void *free;
};

static gm_status_t
record_memory_allocation(void *ptr, unsigned long len, void *free)
{
	struct allocation_record *record;

	record = kmalloc(sizeof(*record), GFP_KERNEL);
	if (!record)
		goto abort_with_nothing;
	record->next = first_allocation_record;
	record->ptr = ptr;
	record->len = len;
	record->free = free;
	first_allocation_record = next;
	return GM_SUCCESS;

  abort_with_nothing:
	return GM_FAILURE;
}

static gm_status_t
unrecord_memory_allocation(void *ptr, unsigned long len, void *free)
{
	struct allocation_record **where, *match;

	for (where = &first_allocation_record; *where; where = &(*where)->next) {
		if ((*where)->ptr == ptr
				&& (*where)->len == len
				&& (*where->free == free)) {
			match = *where;
			*where = match->next;
			kfree(match);
			return GM_SUCCESS;
		}
	}
	return GM_FAILURE;
}

static void
print_memory_allocation_leaks()
{

}

#endif							/* GM_DEBUG_MEM_ALLOCATION */

/****************
 * kmalloc
 ****************/

static void *
gm_kmalloc(unsigned int size, int priority)
{
#if GM_DEBUG
	kmalloc_cnt++;
#endif
	return kmalloc(size, priority);
}

static void
gm_kfree(void *obj)
{
#if GM_DEBUG
	kfree_cnt++;
#endif
	kfree(obj);
}

/****************
 * vmalloc
 ****************/

static void *
gm_vmalloc(unsigned long size)
{
#if GM_DEBUG
	vmalloc_cnt++;
#endif
	return vmalloc(size);
}

static void
gm_vfree(void *obj)
{
#if GM_DEBUG
	vfree_cnt++;
#endif
	vfree(obj);
}

static void *
gm_ioremap(unsigned long phys, unsigned long size)
{
	void *ptr;
#if GM_DEBUG
	ioremap_cnt++;
#endif
	GM_PRINT (GM_PRINT_LEVEL >= 2, ("Mapping IO at 0x%lx\n", phys));
	ptr = ioremap_nocache(phys, size);
	GM_PRINT (1, ("Ioremapped 0x%lx to  0x%p\n", phys, ptr));
	return ptr;
}

static void
gm_iounmap(void *obj)
{
#if GM_DEBUG
	iounmap_cnt++;
#endif
	iounmap(obj);
}

static
gm_status_t
_gm__kernel_recv_queue_update(gm_port_t * p)
{
	/* this is a no-op on x86 and alpha; it should probably be replaced
	   with a less heavy-handed solution on architectures without
	   coherent caches */
#if GM_CPU_sparc64
#else
	flush_cache_all();
#endif
	return GM_SUCCESS;
}

/****************************************************************
 * Synchronization functions  
 ****************************************************************/

void
gm_arch_sync_init(gm_arch_sync_t * s, gm_instance_state_t * is)
{
	GM_PRINT(0, ("gm_arch_sync_init() called\n"));
	init_MUTEX( &s->mu );
	atomic_set(&s->wake_cnt, 0);
	init_waitqueue_head( &s->sleep_queue );
	init_MUTEX( &s->wake_sem );
}

void
gm_arch_sync_reset(gm_arch_sync_t * s)
{
	atomic_set(&s->wake_cnt, 0);
}

void
gm_arch_sync_destroy(gm_arch_sync_t * s)
{
}

void
gm_arch_mutex_enter(gm_arch_sync_t * s)
{
	gm_down(&(s->mu));
}

void
gm_arch_mutex_exit(gm_arch_sync_t * s)
{
	gm_up(&(s->mu));
}



#define GM_SLEEP_PRINT 0

/****************
 * waking
 ****************/

/* Wake the thread sleeping on the synchronization variable. */

void
gm_arch_wake(gm_arch_sync_t * s)
{
	GM_PRINT(GM_SLEEP_PRINT, ("gm_arch_wake called on s = %p\n", s));

	/* record the wake interrupt by incrementing the wake count.  This
	   need to be atomic because disabling interrupt globally on SMP
	   is very costly while accessing wake_cnt. */

	atomic_inc(&s->wake_cnt);

	wake_up(&s->sleep_queue);
}



/****************
 * sleeping
 ****************
 
 loic: The following code claims a wake interrupt by atomically
 testing for a positive WAKE_CNT and decrementing WAKE_CNT.  We can
 assume we are the only one trying to consume wake_cnt, the caller is
 responsible to get a mutex to ensure this, so wake_cnt can only
 increase while we are here.  A basic Linux rule: if you need to
 disable interrupts globally, your code is not written the right way
 :-) */

/* sleep until awakend or timeout */

gm_arch_sleep_status_t
gm_arch_timed_sleep(gm_arch_sync_t * s, int seconds)
{
	long timeout;
	DECLARE_WAITQUEUE( wait, gm_current );

	/* we refuse signals here so do not block for more than 15 seconds */
	gm_always_assert(seconds <= 15);

	GM_PRINT(GM_SLEEP_PRINT, ("gm_arch_timed_sleep  s = %p  sec=%d\n",
								   s, seconds));

	timeout = seconds * HZ;

	/* a bit tricky: Linux provide the powerful but quite low-level
	   primitives */
	/* put the process in the queue before testing the event */
	add_wait_queue(&s->sleep_queue, &wait);
	/* use UN(INTERRUPTIBLE) variant to ignore signals */
	gm_current->state = TASK_UNINTERRUPTIBLE;	

	if (atomic_read(&s->wake_cnt) <= 0) {
		timeout = schedule_timeout(timeout);
		/* is there any other event that can wake us? */
		gm_always_assert(timeout == 0 || atomic_read(&s->wake_cnt) > 0);
	}
	gm_current->state = TASK_RUNNING;	/* in case we did not wait */
	/*  we done it without globally disabling interrupt :-)
		(and without race conditions)!! */
	remove_wait_queue(&s->sleep_queue, &wait);

	if (atomic_read(&s->wake_cnt) > 0) {
		atomic_dec(&s->wake_cnt);
		return GM_SUCCESS;
	}
	else
		return GM_SLEEP_TIMED_OUT;

}

/* sleep until awakened or get a signal */
/* we protect against multiple usage, glenn told he should not be
   necessary, but there seems no protection for sleep_sync in gm.c,
   there no cost for being safe */

gm_arch_sleep_status_t
gm_arch_signal_sleep(gm_arch_sync_t * s)
{
	gm_arch_sleep_status_t ret = GM_SLEEP_WOKE;
	DECLARE_WAITQUEUE( wait, gm_current );

	GM_PRINT(GM_SLEEP_PRINT, ("gm_arch_signal_sleep  s = %p\n", s));

	gm_down(&s->wake_sem);

	add_wait_queue(&s->sleep_queue, &wait);
	/* use INTERRUPTIBLE variant to allow signals */
	gm_current->state = TASK_INTERRUPTIBLE;		

	if (atomic_read(&s->wake_cnt) <= 0) {
		schedule();
	}

	gm_current->state = TASK_RUNNING;
	remove_wait_queue(&s->sleep_queue, &wait);

	if (atomic_read(&s->wake_cnt) > 0)
		atomic_dec(&s->wake_cnt);
	else if (signal_pending(gm_current))
		ret = GM_SLEEP_INTERRUPTED;
	else
		GM_PANIC (("Not interrupted, not woken and I am here ?!?!\n"));

	gm_up(&s->wake_sem);

	return ret;
}


/*********************************************************************
 * DMA region functions
 *********************************************************************/

static void
gm_arch_lock_page(ulong phys)
{
	/*mem_map_reserve(GM_PHYS_MAP_NR(phys));*/ /* 2.4.0test6 change */
	mem_map_reserve( virt_to_page(__va(phys)) );
}

static void
gm_arch_unlock_page(ulong phys)
{
	/*mem_map_unreserve(GM_PHYS_MAP_NR(phys));*/ /* 2.4.0test6 change */
	mem_map_unreserve( virt_to_page(__va(phys)) );
}


gm_status_t
gm_arch_lock_user_buffer_page(gm_instance_state_t * is,
							  gm_up_t in,
							  gm_dp_t * dma_addr, gm_arch_page_lock_t * lock)
{
	unsigned long phys;
	int *user_data = (int *) in;
	int tmp;
	struct page *page;
	struct vm_area_struct *vma;
	/*long pagenum;*/

    GM_PRINT (GM_PRINT_LEVEL >= 4,
			  ("gm_arch_LOCK_user_buffer_page(%p addr=0x%lx)\n",
			   lock,(unsigned long)in));

	if (verify_area(VERIFY_WRITE, (void *) in, GM_PAGE_LEN)) {
		GM_NOTE (("GM: verify_write failed for page 0x%lx\n", user_data));
		return GM_FAILURE;
	}
	/* should bring the page into physical memory and solve
	   copy-on-write problems */
	if (get_user(tmp, user_data) || put_user(tmp, user_data)) {
		GM_NOTE (("GM: EFAULT while trying to lock-register page 0x%p\n",
				  user_data));
		return GM_FAILURE;
	}
	/* although linux 2.2 is multithreaded, it looks like any
	   operation that can modify the page table acquire a kernel lock
	   so, hopefully nobody will swapout the page between the above
	   code and the code below FIXME: actually, I am not really
	   sure how linux 2.2 SMP works, maybe check this later...
         */

	phys = kvirt_to_phys(is, (long) in, 0);
	if (!gm_linux_phys_ok (phys)) {
		GM_WARN(("Tried to register a mapping, of a non-memory area\n"));
		return GM_FAILURE;
	}
	/*pagenum = GM_PHYS_MAP_NR(phys);
	  page = mem_map + pagenum;*/
	page = virt_to_page(__va(phys));
	
	if (PageReserved(page)) {
		GM_WARN(("Tried to register a mapping of a reserved memory area\n"));
		return GM_FAILURE;
	}
	if (page->buffers) {
		GM_PANIC (("Page contains IO buffers!!!!\n"));
		return GM_FAILURE;
	}
	if (atomic_read(&page->count) < 1) {
		GM_WARN(("Tried to register a page with count %d\n", 
			atomic_read(&page->count)));
		return GM_FAILURE;
	}

	if (gm_max_user_locked_pages <= 0) {
		GM_NOTE (("Caannot register memory for lack of physical resources\n"
				  "    total pages = %d  locked pages = %d  (%d MBytes)\n",
				  ((GM_LINUX_HIGH_MEM - GM_LINUX_PAGE_ZERO)
				   / PAGE_SIZE),
				  ((((GM_LINUX_HIGH_MEM - GM_LINUX_PAGE_ZERO) * 3) / 4)
				   / PAGE_SIZE),
				  ((((GM_LINUX_HIGH_MEM - GM_LINUX_PAGE_ZERO) * 3) / 4)
				   /(1024*1024))));
		return GM_OUT_OF_MEMORY;
	}

	vma = find_vma(gm_current->mm, (long) in);
	gm_always_assert (vma && vma->vm_start <= (unsigned long) in);
	/*
	 * see:
	 * linux-2.2.14/include/linux/mm.h:77
	 * linux-2.2.14/ipc/shm.c:507
	 * shmem is OK?
	 */
	if (vma->vm_ops && !(vma->vm_flags & VM_SHM)) {
		GM_WARN(("GM: cannot register specially mapped memory\n"));
		return GM_FAILURE;
        }

	/* ok now let do it */
#if GM_LINUX_FULL_MM
	if (gm_full_mm) {
		if (gm_linux_reg_page(lock, (long) in))
			return GM_FAILURE;
	}
	else
#endif
	{
		int *counter_ptr = (int *)0;

		counter_ptr = gm_hash_find(gm_arch_phys_hash,&phys);
		if (counter_ptr) {
			counter_ptr[0]++;
			GM_PRINT (GM_PRINT_LEVEL >= 4,
					  ("Page already locked by GM ref_cnt now = %d\n",
					   counter_ptr[0]));
		}
		else {
			gm_status_t status;
			int counter = 1;
			status = gm_hash_insert(gm_arch_phys_hash,&phys,&counter);
			if (status != GM_SUCCESS) {
				GM_NOTE (("gm_hash_insert(%p,%p,%p) for phys page failed\n",
						  gm_arch_phys_hash,&phys,&counter));
				return GM_FAILURE;
			}
		}

        if (PageLocked(page)) {
		}
		else {
			if (test_and_set_bit(PG_locked, &page->flags)) {
				GM_NOTE (("GM: trying to register an already locked page\n"
						  "    is=%p  in=%p  dmaP=%p lockP=%p\n",
						  is, (char *)in, dma_addr,lock));
				return GM_FAILURE;
			}
		}
	}

	/* now increment the page referenced count */
	atomic_inc(&page->count);
#if GM_DEBUG
	user_lock_cnt++;
#endif
	gm_max_user_locked_pages -= 1;
	/* safe because phys is main memory, we need to introduce
	   more cases for framebuffer mappings and such */
	*dma_addr = (gm_dp_t) gm_virt_to_bus(__va(phys));

	gm_notice_dma_addr (is->dma_page_bitmap, *dma_addr);

	/*lock->pagenum = pagenum;*/
	lock->page = page;
	lock->phys = phys;
	lock->virt_pagenum = (long) in >> PAGE_SHIFT;
	lock->is = is;
	gm_assert(lock->magic == 0);
	lock->magic = USER_LOCK_ALIVE;
	return GM_SUCCESS;
}


void
gm_arch_unlock_user_buffer_page(gm_arch_page_lock_t * lock)
{
	/*unsigned long pagenum = lock->pagenum;*/
	/*struct page *page = mem_map + pagenum;*/
	struct page *page = lock->page;
	int c;
	int *counter_ptr = (int *)0;
	int page_inuse = 0;

	gm_dp_t dma_addr;
	
	GM_PRINT (GM_PRINT_LEVEL >= 4,
			  ("gm_arch_UNlock_user_buffer_page(%p)\n",lock));
	
	if (lock->magic != USER_LOCK_ALIVE) {
		GM_PANIC (("GM: internal error, releasing a user page with cookie= 0x%x\n",
				   lock->magic));
		return;
	}

	counter_ptr = gm_hash_find(gm_arch_phys_hash,&lock->phys);
	if (!counter_ptr) {
		GM_NOTE (("unlock_user_buffer failed to find entry for phys = 0x%lx\n",
				  lock->phys));
		return;
	}

	if (counter_ptr[0]>1) {
		counter_ptr[0]--;
		GM_PRINT (GM_PRINT_LEVEL >= 4,
				  ("Page still locked by GM ref_cnt now = %d\n",
				   counter_ptr[0]));
		page_inuse = 1;
	}
	else {
		counter_ptr[0]--;
		counter_ptr = gm_hash_remove(gm_arch_phys_hash,&lock->phys);
		if (!counter_ptr) {
			GM_NOTE (("UNlocking user_buffer_page remove ptr is NULL?\n"));
		}
		else {
			GM_PRINT (GM_PRINT_LEVEL >= 4,
					  ("UNlocking user_buffer_page counter = %d\n",
					   counter_ptr[0]));
		}
	}
	
	c = atomic_read(&page->count);
	if (c == 2) {
#if GM_LINUX_FULL_MM
		if (gm_full_mm) {
			/* undo the special vma stuff, not this is not
			   strictly necessary as anyway,
			   the user can make the vma move or disappear,
			   leaving it somewhere by mistake only causes
			   fork() to use copy rather than cow 
			 */

			gm_linux_unreg_page(lock);
		}
#endif
	}
	else {
		GM_PRINT (GM_PRINT_LEVEL >= 4,
				  ("Try to dereg a page with count(%d) != 2\n",
				   atomic_read(&page->count)));
	}
	gm_assert(c <= 1000);		/* sanity check */
	gm_always_assert(c >= 1);
	gm_always_assert(!PageReserved(page));

	lock->magic = USER_LOCK_DEAD;

#if GM_LINUX_FULL_MM
	if (!gm_full_mm)
#endif
	{
		if (!page_inuse) {
			gm_always_assert(PageLocked(page));
			clear_bit(PG_locked, &page->flags);
		}
	}

	dma_addr = (gm_dp_t) gm_virt_to_bus(__va(lock->phys));
	gm_forget_dma_addr (lock->is->dma_page_bitmap, dma_addr);
	
#if GM_DEBUG
	/* when doing an explicit register, issue a warning if the address
	   space of the process has changed between the register and the
	   deregister */
	{
		gm_port_state_t *ps = __gm_port_state_for_id(lock->is, 2);
		if (ps && ps->arch.ref_count > 0) {
			ulong phys;

			phys = kvirt_to_phys(lock->is,
								 (long) lock->virt_pagenum * PAGE_SIZE, 0);
			if (phys != lock->phys)
				printk("gm_arch_unlock_user_page:warning:"
					   "vma was 0x%lx->0x%lx,now ->0x%lx\n",
				  lock->virt_pagenum * PAGE_SIZE, lock->phys,
					   phys);
		}
	}
	user_unlock_cnt++;
#endif
	if (!page_inuse) {
		__free_page(page);
	}
	gm_max_user_locked_pages += 1;
}


static gm_dp_t
region_dma_addr(gm_arch_dma_region_t * r, void *addr)
{
	ulong phys = kvirt_to_phys(r->is, (long) addr, 1);
	gm_dp_t bus_addr;
	GM_PRINT (GM_PRINT_LEVEL >= 8, ("phys=0x%lx,pzero=0x%lx,highmem=0x%lx\n",
				 phys, GM_LINUX_PAGE_ZERO, GM_LINUX_HIGH_MEM_THRESHOLD));
	gm_assert (gm_linux_phys_ok (phys));
	bus_addr = gm_virt_to_bus(__va(phys));
	return bus_addr;
}


/* Allocate LEN bytes of DMA memory that is contiguous in kernel space
   by possibly segmented in DMA space.

   If r->register_function is non-null, call r->register_page_function
   (r, dma_addr) for each page. */

gm_status_t
gm_arch_dma_region_alloc(gm_instance_state_t * is, gm_arch_dma_region_t * r,
						 gm_size_t len, gm_u32_t flags,
						 gm_register_page_function_t register_page_func,
						 void *arg)
{
	void *addr;
	int pages, page_num;

	GM_CALLED ();

#if GM_DEBUG
	dma_alloc_cnt++;
#endif

	GM_PRINT (GM_PRINT_LEVEL >= 4,
			  ("in gm_arch_dma_region_alloc; len = %d,%s\n", len,
			   (flags & GM_ARCH_DMA_CONTIGUOUS) ?
			   "contiguous" : "non-contiguous"));

	/* Make sure that users aren't allocating too much memory. */

	pages = GM_PAGE_ROUNDUP(u32, len) / GM_PAGE_LEN;
	gm_always_assert(pages >= 1);



	/* grab the memory and remember how we allocated it */
	r->is = is;
	r->flags = flags;
	r->len = len;
	if (len < GM_PAGE_LEN)
		GM_PRINT (GM_PRINT_LEVEL >= 4,
				  ("Called with length(%d) < GM_PAGE_LEN=%d\n",
				   len, GM_PAGE_LEN));
	if (flags & GM_ARCH_DMA_CONTIGUOUS || pages <= 1) {
		r->type = GM_ARCH_GFP_REGION;
		r->order = 0;
		while ((1 << r->order) < pages)
			r->order += 1;
		r->alloc_addr = (void *) __get_free_pages(GFP_KERNEL, r->order);
		GM_PRINT (GM_PRINT_LEVEL >= 4,
				  ("get_free_pages %d pages, addr= 0x%p\n",
				   1 << r->order, r->alloc_addr));
	}
	else {
		r->type = GM_ARCH_VMALLOC_REGION;
		r->alloc_addr = gm_vmalloc(pages * PAGE_SIZE);
		GM_PRINT (GM_PRINT_LEVEL >= 4,
				  ("vmalloced %d pages, addr= 0x%p\n",
				   pages, r->alloc_addr));
	}

	if (!r->alloc_addr)
		return GM_FAILURE;

	r->addr2 = r->addr = r->alloc_addr;

	gm_always_assert(GM_PAGE_ALIGNED(r->addr));

	/* Call the page registration functions for each page in the DMA
	   region. */

	if (register_page_func) {
		page_num = 0;
		addr = r->addr;
		while (page_num < pages) {
			gm_dp_t bus_addr = region_dma_addr(r, addr);
			register_page_func(arg, bus_addr, page_num);
			page_num++;
			addr = (char *) addr + PAGE_SIZE;
		}
	}

	if (GM_TRACE_LANAI_DMA)	{
		page_num = 0;
		addr = r->addr;
		while (page_num < pages) {
			unsigned long phys = kvirt_to_phys(is, (unsigned long) addr, 1);
			gm_notice_dma_addr (is->dma_page_bitmap, phys);
			page_num++;
			addr = (char *) addr + PAGE_SIZE;
		}
	}

	GM_RETURN_STATUS (GM_SUCCESS);
}

void
gm_arch_dma_region_free(gm_arch_dma_region_t * r)
{
	ulong p = (ulong) r->addr;

#if GM_DEBUG
	dma_free_cnt++;
#endif
	while (p < (ulong) r->addr + r->len) {
		ulong phys = kvirt_to_phys(r->is, p, 1);
		gm_assert (gm_linux_phys_ok (phys));
		gm_arch_unlock_page(phys);
		gm_forget_dma_addr (r->is->dma_page_bitmap, phys);
		p += GM_PAGE_LEN;
	}
	
	switch (r->type) {
	 case GM_ARCH_KMALLOC_REGION:
		 gm_kfree(r->alloc_addr);
		 break;
	 case GM_ARCH_VMALLOC_REGION:
		 gm_vfree(r->alloc_addr);
		 break;
	 case GM_ARCH_GFP_REGION:
		 free_pages((ulong) r->alloc_addr, r->order);
		 break;
	 default:
		 gm_always_assert(0);
	}
	/* just in case */
	r->alloc_addr = 0;
	r->addr = 0;
}

void *
gm_arch_dma_region_kernel_addr(gm_arch_dma_region_t * r)
{
	return r->addr;
}

gm_s32_t
gm_arch_dma_region_status(gm_arch_dma_region_t * r)
{
	return 0xf;
}


gm_dp_t
gm_arch_dma_region_dma_addr(gm_arch_dma_region_t * r)
{
	gm_dp_t ret;
	ret = region_dma_addr(r, r->addr);
	GM_PRINT (GM_PRINT_LEVEL >= 8, ("gm_arch_dma_region_dma_addr:0x%p->0x%x\n", r->addr, ret));
	return ret;
}

gm_dp_t
gm_arch_dma_region_dma_addr_advance(gm_arch_dma_region_t * r)
{
	void *previous = r->addr2;
	r->addr2 += PAGE_SIZE;
	return region_dma_addr(r, previous);
}

gm_status_t
gm_arch_dma_region_sync(gm_arch_dma_region_t * r, int command)
{
	/* this is a no-op on x86 and alpha; it should probably be replaced
	   with a less heavy-handed solution on architectures without
	   coherent caches */
#if GM_CPU_sparc64
#else
	flush_cache_all();
#endif
	return GM_SUCCESS;
}


/*********************************************************************
 * kernel memory allocation functions 
 *********************************************************************/

void *
gm_arch_kernel_malloc(unsigned long len, int flags)
{
#if GM_DEBUG
	kernel_alloc_cnt++;
#endif
	/* 64 is a safe value, 32 is sufficient for 2.0 kmalloc.c, any way
	   it will work even if the threshold does not exactly correspond
	   to kmalloc internals */
	if (len <= PAGE_SIZE - 64)
		return gm_kmalloc(len, GFP_KERNEL);
	else
		return gm_vmalloc(len);
}

void
gm_arch_kernel_free(void *ptr)
{
#if GM_DEBUG
	kernel_free_cnt++;
#endif
	if (ptr > (void *) PAGE_OFFSET && ptr < (void *) high_memory)
		gm_kfree(ptr);
	else
		gm_vfree((void *) ptr);
}

/*********************************************************************
 * memory mapping (into kernel space)
 *********************************************************************/

gm_status_t
gm_arch_map_io_space(gm_instance_state_t * is, gm_u32_t offset, gm_u32_t len,
					 void **kaddr)
{
	GM_PRINT (GM_PRINT_LEVEL >= 6,
			  ("gm_arch_map_io_space(%p, 0x%x, %d, )\n",
			   is, offset, len));
	*kaddr = gm_ioremap((unsigned long) is->arch.phys_base_addr + (unsigned long) offset, len);
	GM_PRINT (GM_PRINT_LEVEL >= 6,
			  ("ioremapped 0x%p (offset 0x%x, len 0x%x)\n",
			   *kaddr, offset, len));
	return (*kaddr) ? GM_SUCCESS : GM_FAILURE;
}

void
gm_arch_unmap_io_space(gm_instance_state_t * is, gm_u32_t offset, gm_u32_t len,
					   void **kaddr)
{
	GM_PRINT (GM_PRINT_LEVEL >= 6,
			  ("iounmapping 0x%x (offset 0x%x, len 0x%x)\n",
			   *kaddr, offset, len));
	gm_iounmap(*kaddr);
	*kaddr = 0;
}

/* needed for memory registration? */

gm_status_t
gm_arch_mmap_contiguous_segment(gm_port_state_t * ps,
								void *kaddr,
								ulong blockSize,
								gm_up_t * vaddr)
{
	GM_NOT_IMP ();
	return GM_FAILURE;
}

void
gm_arch_munmap_contiguous_segments(gm_port_state_t * ps)
{
	GM_NOT_IMP ();
}

/*********************************************************************
 * miscellaneous functions
 *********************************************************************/

static
gm_status_t
gm_arch_page_len(unsigned long *result)
{
	/* this is okay because in Linux, we always know page size at
	 * compile time (on x86 and alphas, at least) */
	*result = PAGE_SIZE;

	return GM_SUCCESS;
}

static
gm_status_t
gm_arch_physical_pages(gm_u32_t * result)
{
	struct sysinfo si;

	si_meminfo(&si);
	*result = si.totalram / PAGE_SIZE;

	return GM_SUCCESS;
}

gm_status_t
gm_arch_gethostname(char *ptr, int len)
{
	gm_bzero(ptr, len);
	strncpy(ptr, system_utsname.nodename, len - 1);
	ptr[len - 1] = 0;

#if GM_SHORT_HOSTNAME
	{
		char *s;
		/* drop everything from first '.' on to end */
		if (s = strchr(system_utsname.nodename, '.'))
			*s = 0;
	}
#endif

	return GM_SUCCESS;
}

void
gm_arch_spin(gm_instance_state_t * is, gm_u32_t usecs)
{
	/*
	 * Rubini sez we shouldn't call udelay with arg > 1000
	 */
	/* loic: we do not want to call udelay here, we do not need a busy
	   loop here!!!  FIXME: is there any place when performance
	   matters when arch_spin is called: 10ms is minimum with the
	   following implementation */
	if ((usecs < 1000) || in_interrupt()) {
		gm_udelay(usecs);
	}
	else {
		gm_current->state = TASK_INTERRUPTIBLE;
		schedule_timeout(usecs * HZ / 1000000 + 1);
	}
}

/*********************************************************************
 * Directcopy
 *********************************************************************/
#if GM_ENABLE_DIRECTCOPY

#warning Direct Copy is completely untested in linux 2.4 and will 
#warning probably break.

gm_status_t
gm_arch_directcopy_get(void * source_addr, 
					   void * target_addr, 
					   ulong length, 
					   uint pid_source)
{
	
/* Only x86 has been tested and validated */
#if GM_CPU_x86 || GM_CPU_sparc64
	struct task_struct * task, * task1, * task2;
	ulong source, target, len, phys_source;
	uint offset_source, pack;

	struct vm_area_struct * vma;
	pgd_t *page_dir;
	pmd_t *page_middle;
	pte_t *page_table, pte;
	

	/* pid 0 ? yeah, right !! */	
	if (pid_source == 0) {
		GM_NOTE (("Directcopy : bad pid (0)\n"));
		return GM_INVALID_PARAMETER;
	}
	
	/* nothing to do :-) */
	if (length == 0) {
		return GM_SUCCESS;
	}
	else if ((ulong)(source_addr) == 0) {
		GM_NOTE (("Directcopy : buffer NULL but length %ld", length));
		return GM_INVALID_PARAMETER;
	}

    /* a funny part : we need to find the task descriptor of the
       another process in the double linked list of running
       processes. A good heuristic : sender and receiver have been
       spawned roughly at the same time, so their pid are close, so
       the distance between them in the linked list is small. We
       explore the list from the current task in both directions to
       search for the sender */
    task1 = current->prev_task;
	task2 = current->next_task;
    while (1) {
		if (task1 != NULL)
			if (task1->pid == pid_source) {
				task = task1;
				break;
			} else
				task1 = task1->prev_task;
		if (task2 != NULL)
			if (task2->pid == pid_source) {
				task = task2;
				break;
			} else
				task2 = task2->next_task;
		if ((task1 == NULL) && (task2 == NULL)) {
			GM_NOTE (("Directcopy : bad pid (%d)\n", pid_source));
			return GM_INVALID_PARAMETER;
		}
	}
	source = (ulong)source_addr;
	target = (ulong)target_addr;
	len = length;
	offset_source = source & (GM_PAGE_LEN - 1);

	/* we need this locks to protect the virtual memory space
	   structure we are using */
	down(&task->mm->mmap_sem);

	while (1) {
		/* respect of the physical memory page boundaries */
		pack =  GM_PAGE_LEN - offset_source;
		if (pack > len)
			pack = len;
		
		/* Not a problem if it's the same area, there's a cache in
		   find_vma */
		vma = find_vma(task->mm, source);
		if (!vma || vma->vm_start > source) {
			GM_PRINT (GM_PRINT_LEVEL >= 5,
					  ("Directcopy : invalid VMA\n"));
			goto memory_fault;
		}
		
		/* in the following, we convert the virtual adresses in
		   physical adresses by walking in the pages tables. If
		   something is wrong, we ask the kernel to fix it if we
		   can access to the kernel function.  If not, boom. :-)) */
		page_dir = pgd_offset(task->mm, source);
		if (!pgd_present(*page_dir)) {
			GM_PRINT (GM_PRINT_LEVEL >= 5,
					  ("Directcopy : page directory not present\n"));
			if (GM_MM_FAULT(task, vma, source, 0) < 0)
				goto memory_fault;
		}
			
		if (pgd_none(*page_dir)) {
			GM_PRINT (GM_PRINT_LEVEL >= 5,
					  ("Directcopy : no page directory\n"));
			if (GM_MM_FAULT(task, vma, source, 0) < 0)
				goto memory_fault;
		}
		if (pgd_bad(*page_dir)) {
			GM_PRINT (GM_PRINT_LEVEL >= 5,
					  ("Directcopy : bad page directory\n"));
			if (GM_MM_FAULT(task, vma, source, 0) < 0)
				goto memory_fault;
		}
		page_middle = pmd_offset(page_dir, source);
		if (pmd_none(*page_middle)) {
			GM_PRINT (GM_PRINT_LEVEL >= 5,
					  ("Directcopy : No page table for addr=0x%lx\n",
					   source));
			if (GM_MM_FAULT(task, vma, source, 0) < 0)
				goto memory_fault;
		}
		if (pmd_bad(*page_middle)) {
			GM_PRINT (GM_PRINT_LEVEL >= 5,
					  ("Directcopy : bad middle page table\n"));
			if (GM_MM_FAULT(task, vma, source, 0) < 0)
				goto memory_fault;
		}
		
		page_table = pte_offset(page_middle, source);
		pte = *page_table;
		if (!pte_present(pte)){
			GM_PRINT (GM_PRINT_LEVEL >= 5,
					  ("Directcopy : pte not present\n"));
			if (GM_MM_FAULT(task, vma, source, 0) < 0)
				goto memory_fault;
		}
		
		phys_source = pte_page(pte);
		
		/* we don't need to translate the receiver side adresses,
		   the MMU is here for that */ 
		copy_to_user((void *)target, (void *)(phys_source+offset_source),
					 pack);
		
		source += pack;
		target += pack;
		len -= pack;
		offset_source = source & (GM_PAGE_LEN - 1);
		if (len == 0) {
			up(&task->mm->mmap_sem);
			return GM_SUCCESS;
		}
	}
 memory_fault:
	/* Boom :-)) */	
	GM_PRINT (GM_PRINT_LEVEL >= 5, ("Directcopy : memory fault !\n"));
	up(&task->mm->mmap_sem);
	return GM_MEMORY_FAULT;
	
#else
#error DIRECTCOPY turned on for untested architecture??
	return GM_FAILURE;
#endif
}
#endif

/*********************************************************************
 * other stuff
 *********************************************************************/

/*
 * interrupt handler
 */

#define GM_DEBUG_LINUX_INTR 1
static
void
gm_linux_intr(int irq, void *instance_id, struct pt_regs *regs)
{
	gm_instance_state_t *is = instance_id;
	
	GM_PRINT (GM_DEBUG_LINUX_INTR, ("got an interrupt to IRQ%d\n", irq));
	GM_KLOG_EVT(GM_KLOG_INTR0, (void *) regs->pc);

	gm_assert (is);
	
	/* if case we are sharing interrupts, return as soon as possible */
	
	if (gm_interrupting (is) == GM_ARCH_INTR_UNCLAIMED) {
		return;
	}
	
	if (!is->lanai.running) {
		/* do not print a message each time becuse if the interrupt
		   is shared we may fill up some log quickly */
		static unsigned long last_jiffies;
		static int count;
		
		if (jiffies - last_jiffies > HZ) {
            count = 0;
            last_jiffies = jiffies;
		}
		if (count < 5) {
			count++;
			GM_PRINT (1, ("GM: LANai is not running in interrupt handler.\n"));
		}
		gm_set_EIMR (is, 0);	/* disable interrupts */
		return;
	}

	if (test_and_set_bit(0, (void *) &is->arch.interrupt) != 0) {
		static int count;
		static unsigned long last_jiffies;
		if (jiffies - last_jiffies > HZ) {
			count = 0;
			last_jiffies = jiffies;
		}
		/* ack the interrupt without doing anything, hopefully the
           driver will continue working */
		/* Can't clear the ISR bits, it breaks GM's model of interrupt
           operation */
		/* loic: but we do not care if it breaks as here we
		   are already in very badly broken situation here:-) */
		count += 1;
                
		gm_set_EIMR (is, 0);
		/*is->set_EIMR(is, 0);*/

		if (count < 10) {
			printk(KERN_ERR "myri/gm %d: recursive interruption details follow\n", is->id);
			printk(KERN_ERR "myri/gm %d: Re-entering the interrupt handler:ISR status=0x%x\n",
				   is->id, (int) is->get_ISR(is));
		}
		return;
	}

#if GM_DEBUG
	gm_in_interrupt = 1;
#endif
	
	if (gm_intr(is) == GM_ARCH_INTR_CLAIMED)
		GM_PRINT (GM_DEBUG_LINUX_INTR, ("the interrupt was claimed\n"));
	else
		GM_PRINT (GM_DEBUG_LINUX_INTR, ("the interrupt was not claimed\n"));
	
#if GM_DEBUG
	gm_in_interrupt = 0;
#endif
	is->arch.interrupt = 0;
	return;
}


/***********************************************************************
 * Character device entry points.
 ***********************************************************************/

static int
gm_linux_localize_status(gm_status_t status)
{
#define CASE(from,to) case from : return to
	switch (status) {
		CASE(GM_SUCCESS, 0);
		CASE(GM_INPUT_BUFFER_TOO_SMALL, EFAULT);
		CASE(GM_OUTPUT_BUFFER_TOO_SMALL, EFAULT);
		CASE(GM_TRY_AGAIN, EAGAIN);
		CASE(GM_BUSY, EBUSY);
		CASE(GM_MEMORY_FAULT, EFAULT);
		CASE(GM_INTERRUPTED, EINTR);
		CASE(GM_INVALID_PARAMETER, EINVAL);
		CASE(GM_OUT_OF_MEMORY, ENOMEM);
		CASE(GM_INVALID_COMMAND, EINVAL);
		CASE(GM_PERMISSION_DENIED, EPERM);
		CASE(GM_INTERNAL_ERROR, EPROTO);
		CASE(GM_UNATTACHED, EUNATCH);
		CASE(GM_UNSUPPORTED_DEVICE, ENXIO);
	default:
		return gm_linux_localize_status(GM_INTERNAL_ERROR);
	}
}

#define GM_DEBUG_IOCTL 1

/* misc GM functionality - pass control to arch-independent code */
static int
gm_linux_ioctl(struct inode *inodeP, struct file *fileP,
			   unsigned int cmd, unsigned long arg)
{
	gm_port_state_t *ps;
	gm_status_t status;
	
	GM_CALLED ();
	
	GM_PRINT (GM_DEBUG_IOCTL,
			  ("gm_linux_ioctl called with cmd 0x%x %s.\n",
			   cmd,_gm_ioctl_cmd_name(cmd)));
	
	ps = fileP->private_data;
	gm_arch_mutex_enter (&ps->sync);
	
#if 0
#ifdef LINUX_21
	if (cmd == GM_LINUX_DEBUG_MODULE) {
		atomic_set(&__this_module.uc.usecount, 1);
		GM_RETURN_INT (GM_SUCCESS);
	}
#endif
#endif
	GM_PRINT (GM_DEBUG_IOCTL,
			  ("gm_linux_ioctl: cmd = %s\n",
			   _gm_ioctl_cmd_name(cmd)));
	
	status = gm_ioctl(ps, cmd,
					  (void *) arg, INT_MAX,
					  (void *) arg, INT_MAX,
					  0);
	gm_arch_mutex_exit (&ps->sync);
	
	GM_PRINT (GM_DEBUG_IOCTL,
			  ("gm_linux_ioctl: status = %d\n", status));
	
	GM_RETURN_INT (gm_linux_localize_status (status));
}

#define MAPPING_OF_TYPE(type, off)					      \
  (ps->mappings.type.offset <= off					      \
   && off < (ps->mappings.type.offset					  \
			 + (gm_offset_t) ps->mappings.type.len))



static void
linux_vm_open(struct vm_area_struct *vma)
{
	gm_port_state_t *ps;
	ps = (gm_port_state_t *) vma->vm_private_data;
	gm_arch_mutex_enter (&ps->sync);
	gm_always_assert(ps->arch.ref_count > 0);
	ps->arch.ref_count += 1;
	gm_arch_mutex_exit (&ps->sync);
}

static void gm_linux_port_close(gm_port_state_t * ps);

static void
linux_vm_close(struct vm_area_struct *vma)
{
	gm_port_state_t *ps;
	int ready_to_close;
	
	ps = (gm_port_state_t *) vma->vm_private_data;
	gm_arch_mutex_enter (&ps->sync);
	gm_always_assert(ps->arch.ref_count > 0);
	ready_to_close = (--ps->arch.ref_count == 0);
	gm_arch_mutex_exit (&ps->sync);
	if (ready_to_close) {
		GM_PRINT (GM_PRINT_LEVEL >= 0,
				  ("myri/gm: closing port after last mapping finished\n"));
		gm_linux_port_close(ps);
	}
	
}


static struct vm_operations_struct gm_linux_vm_ops =
{
	linux_vm_open,				/* open */
	linux_vm_close,				/* close */
	NULL,						/* unmap */
	NULL,						/* protect */
	NULL,						/* sync */
	NULL,						/* nopage */
	NULL,						/* wppage */
	NULL,						/* swapout */
};

/* map some pages into user space */

#define GM_DEBUG_MMAP 0

static int
gm_linux_mmap(struct file *fileP,
			  struct vm_area_struct *vma)
{
	gm_port_state_t *ps;
	gm_status_t status;
	gm_offset_t off;
	gm_size_t len;
	unsigned long start, end, pos;
	pgprot_t prot;
	void *kptr;
	ulong phys;
	unsigned int requested_permissions;

	GM_CALLED ();

	ps = fileP->private_data;
	gm_arch_mutex_enter (&ps->sync);
	gm_assert(ps->arch.ref_count > 0);
	ps->arch.ref_count += 1;
	gm_assert(vma->vm_ops == NULL);
	vma->vm_ops = &gm_linux_vm_ops;
	gm_assert(vma->vm_private_data == 0);
	vma->vm_private_data = (void *) ps;
	start = vma->vm_start;
	end = vma->vm_end;
	off = (vma->vm_pgoff) * GM_PAGE_LEN;
	len = end - start;

	requested_permissions = 0;
	if ((vma->vm_flags) & VM_READ)
		requested_permissions |= GM_MAP_READ;
	if ((vma) & VM_WRITE)
		requested_permissions |= GM_MAP_WRITE;


	if (GM_DEBUG_MMAP) {
		GM_PRINT (GM_PRINT_LEVEL >= 0,
				  ("mmap: offset= %lx  start= %lx  end= %lx  len= %lx\n",
					  off, start, end, len));
		if (MAPPING_OF_TYPE(copy_block, off)) {
			GM_PRINT (GM_PRINT_LEVEL >= 0,
					  ("mmap: mapping a copy block segment\n"));
		}
		else if (MAPPING_OF_TYPE(sram, off)) {
			GM_PRINT (GM_PRINT_LEVEL >= 0,
					  ("mmap: mapping an sram segment\n"));
		}
		else if (MAPPING_OF_TYPE(recv_queue, off)) {
			GM_PRINT (GM_PRINT_LEVEL >= 0,
					  ("mmap: mapping a recv_queue segment\n"));
		}
		else if (MAPPING_OF_TYPE(control_regs, off)) {
			GM_PRINT (GM_PRINT_LEVEL >= 0,
					  ("mmap: mapping a control_regs segment\n"));
		}
		else if (MAPPING_OF_TYPE(special_regs, off)) {
			GM_PRINT (GM_PRINT_LEVEL >= 0,
					  ("mmap: mapping a special_regs segment\n"));
		}
		else {
			GM_PRINT (GM_PRINT_LEVEL >= 0,
					  ("mmap: mapping unknown type of segment\n"));
		}
	}

	status = gm_prepare_to_mmap(ps, off, len, requested_permissions);
	if (status != GM_SUCCESS) {
		GM_PRINT (GM_DEBUG_MMAP,
				  ("whoops: gm_prepare_to_mmap returned an error\n"));
		goto abort_with_mutex;
	}


	/*
	 * do the mapping a page at a time
	 */
	for (pos = 0; pos < len; pos += PAGE_SIZE) {

		status = gm_mmap (ps, off + pos, &kptr);
		if (status != GM_SUCCESS) {
			GM_PRINT (GM_DEBUG_MMAP,
					  ("whoops: gm_generic_mmap failed 0\n"));
			goto abort_with_mutex;
		}

		/*
		 * remap_page_range wants a physical address; only the recv_queue
		 * was allocated with kmalloc (we do not care, kvirt_to_phys
		 * is working in any case) 
		 */
		/*     if (MAPPING_OF_TYPE(recv_queue, off)) {
		   phys = virt_to_phys (kptr);
		   } else
		 */
		phys = kvirt_to_phys(ps->instance, (ulong) kptr, 1);

		GM_PRINT (GM_DEBUG_MMAP,
				  ("off = 0x%x  kptr = 0x%lx  phys = 0x%lx  user = 0x%x\n",
				   off, kptr, phys, start + pos));

		/*
		 * pages allocated with the kernel allocators need to be reserved
		 * before being mapped into user space
		 *
		 * FIXME: should unreserve pages upon deallocation, or better yet,
		 * use the nopage technique from Rubin ip.283 so we don't have to
		 * reserve at all
		 */


		prot = vma->vm_page_prot;

		if (gm_linux_phys_ok (phys)) {
			gm_arch_lock_page(phys);
			gm_assert(MAPPING_OF_TYPE(copy_block, off) ||
					  MAPPING_OF_TYPE(recv_queue, off));
		}
		else {
			if ((phys < ps->instance->arch.phys_base_addr) ||
			    (phys >= (ps->instance->arch.phys_base_addr +
						  ps->instance->board_span))) {
				
				
				if (GM_DEBUG_MMAP)
				{
					if (MAPPING_OF_TYPE(copy_block, off)) {
						GM_PRINT (GM_PRINT_LEVEL >= 0,
								  ("mapping a copy block segment\n"));
					}
					else if (MAPPING_OF_TYPE(sram, off)) {
						GM_PRINT (GM_PRINT_LEVEL >= 0,
								  ("mapping an sram segment\n"));
					}
					else if (MAPPING_OF_TYPE(recv_queue, off)) {
						GM_PRINT (GM_PRINT_LEVEL >= 0,
								  ("mapping a recv_queue segment\n"));
					}
					else if (MAPPING_OF_TYPE(control_regs, off)) {
						GM_PRINT (GM_PRINT_LEVEL >= 0,
								  ("mapping a control_regs segment\n"));
					}
					else if (MAPPING_OF_TYPE(special_regs, off)) {
						GM_PRINT (GM_PRINT_LEVEL >= 0,
								  ("mapping a special_regs segment\n"));
					}
					else {
						GM_PRINT (GM_PRINT_LEVEL >= 0,
								  ("mapping unknown type of segment\n"));
					}
				}

				GM_WARN
					(("Bad physical address in gm_linux_mmap\n"
					  "    phys=0x%lx  base=0x%lx  span=0x%lx sum=0x%lx\n"
					  "    HIGH_THRESH = 0x%lx  HIGH_MEM = 0x%lx\n"
					  "    PAGE_ZERO = 0x%lx\n",
					  phys, ps->instance->arch.phys_base_addr,
					  ps->instance->board_span,
					  (ps->instance->arch.phys_base_addr
					   + ps->instance->board_span),
					  (unsigned long)GM_LINUX_HIGH_MEM_THRESHOLD, 
					  (unsigned long)GM_LINUX_HIGH_MEM, 
					  (unsigned long)GM_LINUX_PAGE_ZERO));
#ifdef CONFIG_BIGMEM
				{
					struct page *page;
					/*long pagenum;*/
					/*pagenum = GM_PHYS_MAP_NR(phys);
					  page = mem_map + pagenum;*/
					page = virt_to_page(__va(phys));
					if (PageBIGMEM(page)) {
						GM_PRINT (GM_PRINT_LEVEL >= 0,
								  ("page = 0x%lx is a BIGMEM page\n",
								   (unsigned long)page));
					}
				}
#endif

				goto abort_with_mutex;
			}

			/*
			 * disable caching - is this necessary?
			 * loic: yes from lanai access as done here
			 */
			pgprot_val(prot) &= ~GM_PAGE_CACHE;
			pgprot_val(prot) |= GM_PAGE_NOCACHE;

			gm_assert(!MAPPING_OF_TYPE(copy_block, off) &&
					  !MAPPING_OF_TYPE(recv_queue, off));
		}


		GM_PRINT
			(GM_DEBUG_MMAP,
			 ("phys = 0x%lx  remaparg = 0x%lx\n", phys, REMAPARG(phys)));

		if (remap_page_range(start + pos, REMAPARG(phys), PAGE_SIZE, prot)) {
			GM_PRINT (GM_DEBUG_MMAP, ("oops: remap page range failed\n"));
			return -1;
		}
	}

	status = gm_finish_mmap(ps, off, len, start);
	if (status != GM_SUCCESS) {
		GM_PRINT
			(GM_DEBUG_MMAP, ("whoops: gm_finish_mmap returned an error\n"));
		goto abort_with_mutex;
	}

	GM_PRINT (GM_DEBUG_MMAP, ("mmap was successful\n"));
	gm_arch_mutex_exit (&ps->sync);
	GM_RETURN_INT (0);

 abort_with_mutex:
	GM_PRINT (GM_DEBUG_MMAP, ("gm_linux_mmap() failed\n"));
	gm_arch_mutex_exit (&ps->sync);
	GM_RETURN_INT (-1);
}

/* open a device. */
static int
gm_linux_open(struct inode *inodeP, struct file *fileP)
{
	unsigned int unit, minor;
	gm_instance_state_t *is;
	gm_port_state_t *ps;
	gm_status_t status;
	
	GM_CALLED ();

	minor = MINOR(inodeP->i_rdev);
	unit = minor / 2;
	if (minor >= GM_ARCH_MAX_INSTANCE)
		GM_RETURN_INT (-ENODEV);
	is = gm_instances[unit];
	if (!is)
		GM_RETURN_INT (-ENODEV);
	gm_assert(is->id == unit);

	/* Alloc and initialize a port state structure for the new open.
	   This code looks funny since Linux does not use GM minor numbers.
	   
	   NOTE: The linux-specific initialization of the port state is in
	   gm_arch_port_state_init(). */
	
	{
		int fake_minor;
		
		status = gm_minor_alloc (is, &fake_minor);
		if (status != GM_SUCCESS) {
			return -gm_linux_localize_status (status);
		}
		ps = gm_minor_get_port_state (fake_minor);
	}
	
	/* Cache the port state so that we can avoid the hash table
	   lookup overhead of gm_minor_get_port_state() from now on. */
	
	fileP->private_data = ps;
	
	/* don't let the kernel unload the module while a device is open */
	
	MOD_INC_USE_COUNT;

	GM_RETURN_INT (0);
}

/* close a device */

static void
gm_linux_port_close(gm_port_state_t * ps)
{
	gm_always_assert(ps->arch.ref_count == 0);
	GM_PRINT (GM_PRINT_LEVEL >= 3, ("gm_linux_port_close\n"));
	
	/* Finalize and free the port state in the usual GM way.  This
	   code looks funny since Linux does not use GM minor numbers. */

	gm_minor_free (ps->minor);
	
	MOD_DEC_USE_COUNT;
}



static int
gm_linux_close(struct inode *inodeP, struct file *fileP)
{
	gm_port_state_t *ps;
	int ready_to_close;
	
	GM_CALLED ();

	ps = fileP->private_data;
	gm_arch_mutex_enter (&ps->sync);
	gm_assert(ps);
	gm_always_assert(ps->arch.ref_count > 0);
	/* only close the port if there is no more mapping */
	ready_to_close = (--ps->arch.ref_count == 0);
	gm_arch_mutex_exit (&ps->sync);
	if (ready_to_close) {
		gm_linux_port_close(ps);
	}
	else {
		GM_PRINT (GM_PRINT_LEVEL >= 0,
				  ("App. closed file desc. while mappings still alive."
				   "  port destruct delayed\n"));
	}
	fileP->private_data = 0;
	return(0);
}


/****************************************************************
 ****************************************************************
 * Linux Device Module Functions
 ****************************************************************
 ****************************************************************/

/* do we want a more verbose module initialization ? */
#define GM_VERBOSE_MODULE_INIT 1

/****************************************************************
 * Forward declarations
 ****************************************************************/

static int gm_linux_ioctl(struct inode *inodeP, struct file *fileP,
						  unsigned int cmd, unsigned long arg);

static int gm_linux_mmap(struct file *file,
						 struct vm_area_struct *vma);

static int gm_linux_open(struct inode *inodeP, struct file *fileP);

/* this one called when file descriptor release */
static int gm_linux_close(struct inode *inodeP,
								struct file *fileP);

/* this one called when both file descriptor has been closed and
   there is no remaining user mmaping */
static void gm_linux_port_close(gm_port_state_t * ps);

static int gm_init_one (struct pci_dev *pdev,
							   const struct pci_device_id *ent);

static void gm_remove_one (struct pci_dev *pdev);

static void gm_cleanup(void);

void gm_cleanup_module(void);

static gm_status_t gm_create_instance (struct pci_dev *dev);

/***********************************************************************
 * Loadable module required data structures
 ***********************************************************************/

static struct file_operations gm_linux_file_ops =
{
	ioctl:        gm_linux_ioctl,
	mmap:         gm_linux_mmap,
	open:         gm_linux_open,
	release:      gm_linux_close,
};

static struct pci_device_id gm_pci_tbl[] __initdata = {
  { GM_PCI_VENDOR_MYRICOM, GM_PCI_DEVICE_MYRINET,
	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
  { GM_PCI_VENDOR_MYRICOM2, GM_PCI_DEVICE_MYRINET,
	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
  {0, },
};

static struct pci_driver gm_driver = {
	name:      "Myricom GM driver",
	probe:     gm_init_one,
	remove:    gm_remove_one,
	id_table:  gm_pci_tbl,
};

/***********************************************************************
 * Module entry points.
 ***********************************************************************/


MODULE_AUTHOR("Myricom <help@myri.com>");
MODULE_DESCRIPTION("Myrinet GM driver");

/****************************************************************
 * gm_init_one
 *
 * Initializes one Myrinet card.  Called by the kernel pci
 *   scanning routines when the module is loaded.
 ****************************************************************/

static int __init
gm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
{
	if ( gm_create_instance(pdev) == GM_SUCCESS )
		return 0;
	else
	{
		GM_NOTE (("Failed to initialize Myrinet Card\n"));
		return -ENODEV;
	}
	
}

/****************************************************************
 * gm_remove_one
 *
 * Does what is necessary to shutdown one Myrinet device. Called
 *   once for each Myrinet card by the kernel when a module is
 *   unloaded.
 ****************************************************************/

static void __exit
gm_remove_one (struct pci_dev *pdev)
{
	gm_instance_state_t *is;
	
	is = (gm_instance_state_t *) pdev->driver_data;

	gm_assert (is != NULL);
	gm_assert (gm_instances[is->id] == is);
	
	if (gm_skip_init)
		goto unlink;

	gm_disable_interrupts(is);

	if (GM_VERBOSE_MODULE_INIT)
	{
		GM_INFO (("Freeing irq %d\n", is->arch.irq));
	}
	
	/* remove debugging timer (not used currently but...) */
	del_timer (&is->arch.timer);
	free_irq (is->arch.irq, is);
	gmip_finalize (is);
 unlink:
	gm_instance_finalize (is);
	gm_instances[is->id] = 0;
	gm_kfree (is);
		
}

/****************************************************************
 * gm_init_module
 *
 * The entry point for the module.
 ****************************************************************/

int __init
gm_init_module(void)
{
	GM_CALLED ();
	
	GM_INFO (("GM driver version %s build %s\n",
			  _gm_version, _gm_build_id));
	GM_PRINT (GM_DEBUG_LANAI_DMA,
			  ("PAGE_ZERO=0x%lx,HIGH_MEM=0x%lx,HIGH_THRESH=0x%lx\n", 
			   GM_LINUX_PAGE_ZERO, GM_LINUX_HIGH_MEM,
			   GM_LINUX_HIGH_MEM_THRESHOLD));
	
#if defined(GM_ENABLE_DIRECTCOPY)
	GM_INFO (("GM DIRECTCOPY enabled\n"));
#endif
	
	/* some sanity checking */
	gm_always_assert(sizeof(gm_u64_t) == 8);
	gm_always_assert(sizeof(gm_u32_t) == 4);
	gm_always_assert(sizeof(gm_u16_t) == 2);
	gm_always_assert(sizeof(gm_u8_t) == 1);

	if (gm_arch_page_len(&GM_PAGE_LEN) != GM_SUCCESS) {
		/* can't fail in Linux */
		gm_always_assert(0);
		return -ENXIO;
	}

	/*  this is arbitrary to prevent the user from bringing the system
		to a complete stop */
	gm_max_user_locked_pages = 
		((GM_LINUX_HIGH_MEM - GM_LINUX_PAGE_ZERO) * 3) / 4 / PAGE_SIZE;

	if (register_chrdev(GM_MAJOR, "gm", &gm_linux_file_ops)) {
		GM_NOTE (("failed to register character device\n"));
		return -EBUSY;
	}

	GM_PRINT (GM_VERBOSE_MODULE_INIT, ("page size is %d\n", GM_PAGE_LEN));
	
	pci_module_init (&gm_driver);

	/* initialize the hash table to track locking virtual memory */
	gm_arch_phys_hash = gm_create_hash (gm_hash_compare_longs,
										gm_hash_hash_long,
										sizeof(unsigned long), sizeof (int),
										0, 0);
	
    if (!gm_arch_phys_hash) {
		GM_NOTE (("init_module: couldn't initialize physical memory"));
		_GM_NOTE (("  for the gm hash table\n"));
		gm_cleanup_module();
		return -ENODEV;
	}

	GM_RETURN_INT (0);
}

/****************************************************************
 * gm_cleanup_module
 *
 * Called when a module is removed.
 ****************************************************************/

void __exit
gm_cleanup_module(void)
{
	gm_cleanup ();
	GM_PRINT (GM_VERBOSE_MODULE_INIT,
			  ("Unregistering character device\n"));
	pci_unregister_driver (&gm_driver);
	unregister_chrdev (GM_MAJOR, "gm");
}


static void __exit
gm_cleanup(void)
{

	GM_CALLED ();
	
	if (gm_arch_phys_hash) {
		gm_destroy_hash(gm_arch_phys_hash);
		gm_arch_phys_hash = (struct gm_hash *)0;
	}
	
#if GM_DEBUG
	GM_INFO (("memory leak info:\n"));
	_GM_INFO (("  kmallocs      = %d\n", kmalloc_cnt));
	_GM_INFO (("  kfrees        = %d\n", kfree_cnt));
	_GM_INFO (("  vmallocs      = %d\n", vmalloc_cnt));
	_GM_INFO (("  vfrees        = %d\n", vfree_cnt));
	_GM_INFO (("  ioremaps      = %d\n", ioremap_cnt));
	_GM_INFO (("  iounmaps      = %d\n", iounmap_cnt));
	_GM_INFO (("  dma allocs    = %d\n", dma_alloc_cnt));
	_GM_INFO (("  dma frees     = %d\n", dma_free_cnt));
	_GM_INFO (("  kernel allocs = %d\n", kernel_alloc_cnt));
	_GM_INFO (("  kernel frees  = %d\n", kernel_free_cnt));
	_GM_INFO (("  user buffer lock = %d\n", user_lock_cnt));
	_GM_INFO (("  user buffer unlock  = %d\n", user_unlock_cnt));
#endif
	
	GM_INFO (("GM: driver unloaded\n"));
#if GM_DEBUG_MALLOC
	gm_malloc_residual();
#endif
	GM_RETURN_NOTHING ();
}


static gm_pci_config_t pci_config;

/****************************************************************
 * gm_create_instance
 *
 * Initializes the myrinet card specified.  If the card is
 *   initialized correctly, it increments gm_num_instance
 *   and adds it into the device array.
 * Arguments:
 *   dev - a pointer to the pci structure for the myrinet card
 * Returns:
 *   GM_SUCCESS if card was initialized correctly
 *   GM_FAILURE otherwise
 ****************************************************************/

static gm_status_t
gm_create_instance(struct pci_dev *dev)
{
	int i;
	u_int class, iobase;
	gm_instance_state_t *is;
	unsigned short vendor, device;
	unsigned char byte_value;
	unsigned char *cptr;

	if (GM_VERBOSE_MODULE_INIT)
	{
		GM_INFO (("Configuring board bus: %d dev %d base: 0x%p, irq: %d\n",
				  dev->bus->number, PCI_SLOT(dev->devfn),
				  pci_resource_start(dev, 0), dev->irq));
	}
	
	is = (void *) gm_kmalloc(sizeof(*is), GFP_KERNEL);
	if (!is) {
		GM_WARN (("couldn't get memory for instance_state\n"));
		dev->driver_data = NULL;
		return GM_FAILURE;
	}
	memset(is, 0, sizeof(*is));

#if GM_DEBUG
	debug_is = is;
#endif


	is->arch.busbase = pci_resource_start(dev, 0);
	
	is->arch.phys_base_addr
		= GM_BASE2PHYS ((unsigned long) pci_resource_start(dev, 0));
	
	is->arch.irq = dev->irq;
	is->arch.pci_dev = dev;

	/* FIXME does it really belong in arch specific code */
	is->ifc.pci.config = pci_config;

	if (gm_skip_init)
		goto init_ok;

	/* generic board initialization; load MCP and stuff */
	if (gm_instance_init(is, gm_num_instance, GM_MYRINET_BUS_PCI)
		!= GM_SUCCESS)
	{
		GM_NOTE (("gm_instance_init failed\n"));
		goto error_instance_init;
	}

	if (request_irq(is->arch.irq, (void *) gm_linux_intr,
					SA_SHIRQ, "myri/gm", is) == 0)
	{
		GM_INFO (("Allocated IRQ%d\n", is->arch.irq));
	}
	else
	{
		GM_WARN (("Couldn't allocate IRQ%d\n", is->arch.irq));
		_GM_WARN ((" trying _with_ SA_INTERRUPT flag:\n"));
		
		if (request_irq(is->arch.irq, (void *) gm_linux_intr,
						SA_SHIRQ | SA_INTERRUPT, "myri/gm", is)) {
			GM_NOTE (("Couldn't allocate IRQ%d\n", is->arch.irq));
			goto error_irq_allocation;
		}
		
		_GM_WARN (("Allocated IRQ%d, _with_ SA_INTERRUPT flag.",
				  is->arch.irq));
		_GM_WARN (("VERY POOR PERFORMANCE\n"));
	}

	/*
	 * enable interrupts
	 */
	if (gm_enable_interrupts(is) != GM_SUCCESS) {
		GM_NOTE (("Cannot enable interrupts.\n"));
		goto error_enabling_interrupt;
	}

	/* add the IP device */
	if (gmip_init(is) != 0)
		goto error_enabling_ip;

  init_ok:

	if (GM_VERBOSE_MODULE_INIT) {
		GM_INFO (("Initialization succeeded for unit %d\n",
				  gm_num_instance));
	}
	
	gm_assert(gm_instances[gm_num_instance] == 0);
	gm_assert(gm_num_instance < GM_ARCH_MAX_INSTANCE);
	gm_instances[gm_num_instance] = is;
	gm_num_instance += 1;

	dev->driver_data = is;
	
	return GM_SUCCESS;

	/* ERROR Handling */
  error_enabling_ip:
	gm_disable_interrupts(is);
  error_enabling_interrupt:
	GM_PRINT (GM_PRINT_LEVEL >= 5, ("freeing irq %d\n", is->arch.irq));
	free_irq(is->arch.irq, is);
  error_irq_allocation:
	gm_instance_finalize(is);
  error_instance_init:
	gm_kfree(is);
	dev->driver_data = NULL;
	return GM_FAILURE;
}


module_init( gm_init_module );
module_exit( gm_cleanup_module );



/****************
 * KLUDGE: defeat the evil
 * linux macros.
 ****************/
#if !GM_CPU_powerpc && !GM_CPU_alpha && !GM_CPU_ia64
#undef memcpy
#undef memset

/* Needed to allow gcc structure copy to
   work. */
void *
memcpy(void *dest, const void *from, __kernel_size_t bytes)
{
	__memcpy(dest, from, bytes);
	return dest;
}

void *
memset(void *s, int c, __kernel_size_t n)
{
	while (n--) {
		((char *) s)[n] = c;
	}
	return s;
}

#endif


/*
  This comment holds special emacs settings for this file.

  Local Variables:
  tab-width:4
  c-file-style:"bsd"
  End:
*/
