/*
 * Copyright (c) 2004-2005 Endace Technology Ltd, Hamilton, New Zealand.
 * All rights reserved.
 *
 * This source code is proprietary to Endace Technology Limited and no part
 * of it may be redistributed, published or disclosed except as outlined in
 * the written contract supplied with this product.
 *
 */

/* Pcap header */
#include <pcap.h>

/* File header. */
#include "inline_filter.h"

/* DAG headers. */
#include "dagapi.h"
#include "dag_platform.h"
#include "dagutil.h"

/* Other headers. */
#include <time.h>


#define BURST_MAX 4194304
#define FLAGS 0
#define OPTBUFSIZE 1024


/* Unit variables. */
static int uDagfd;
static int uTxDagfd;
static int uKeepRunning = 1;
static int uDiskTreadRunning = 0;
static int uSwapInterface = 0;

struct bpf_program uFilterProgram;
static const char* uFilterExpression = NULL;

static const char* uFileName         = NULL;   /*Name of the file to store records*/  
struct bpf_program uFileFilterProg;
static const char* uFileFilterExpr   = NULL;   /*Filter expression to for records to store in file*/
static int         uFileFd           = -1;

static unsigned int uElapsedSeconds = 0;
static char uDeviceName[1024];
static char uTxDeviceName[1024];
static int uRxStream;
static int uTxStream;
static int uStreamsAttached = 0;
static int uStreamsStarted = 0;
static int uDoFilter = 0;
static int uRxLowLatency = 0;
static int uDagFwdVerbosity = 0;
static int uDagFwdCopyTx = 0;

static int tx_accum_bytes_max = TX_ACCUM_BYTES_MAX;

/* Statistics kept per receive interface. */
static uint64_t uPacketCount[4] = { 0, 0, 0, 0 };
static uint64_t uDropCount[4] = { 0, 0, 0, 0 };
static uint64_t uRejectCount[4] = { 0, 0, 0, 0 };
static uint64_t uPadCount[4] = { 0, 0, 0, 0 };
static uint64_t uDiskDrop  = 0;
static uint64_t uDiskWrite = 0;
static uint64_t uDiskActualWrite = 0;
static uint64_t uDiskWriteCall = 0;


static uint32_t uDagApiAdvanceCnt = 0;
static uint32_t uDagApiTxGetSpaceCnt = 0;
static uint32_t uDagApiCommitCnt = 0;

/*interface swap table*/
static uint8_t iface_swap_table[4];


/*size of the disk memory cache*/
#define DISK_CACHE_SIZE   (1024*1024*256)  /* 256MB*/

uint32_t disk_cache_size = DISK_CACHE_SIZE;

uint8_t* disk_cache=NULL;
uint8_t* disk_cache_alloc=NULL;
uint32_t disk_cache_offset_start =0;
uint32_t disk_cache_offset_end   =0;

/* Internal routines. */
static void signal_handler(int sig);
static void init_card(void);
static void close_card(void);
static void free_filter_expression(void);
static void filt_record(uint8_t* packet);
static void setup_streams(unsigned int poll_usecs);
static void run_over_lap_fwd(unsigned int runtime_seconds);
static void run_copy_fwd(unsigned int runtime_seconds);

static int disk_cache_flush();
static int disk_cache_write(uint8_t* data, uint32_t size);


#if defined(__FreeBSD__) || defined(__linux__) || defined(__NetBSD__) || (defined(__SVR4) && defined(__sun)) || (defined(__APPLE__) && defined(__ppc__))

static void timer_function(int signal);

#elif defined(_WIN32)

static VOID CALLBACK timer_function(LPVOID lpArgToCompletionRoutine, DWORD dwTimerLowValue, DWORD dwTimerHighValue);
/*static void set_windows_timer(void);*/
static DWORD WINAPI timer_thread(LPVOID lpParam);

#endif /* Platform-specific code. */


/* Implementation of internal routines. */
static void
signal_handler(int sig)
{
	uKeepRunning = 0;
}


static void
init_card(void) 
{
	unsigned int rx_buffer;
	unsigned int tx_buffer;
	uint8_t erfs[TYPE_MAX];

	if ((uDagfd = dag_open(uDeviceName)) < 0)
		dagutil_panic("dag_open %s: %s\n", uDeviceName, strerror(errno));
	
	if (!strncmp(uDeviceName, uTxDeviceName, 1024))
		uTxDagfd = uDagfd;
	else
		if ((uTxDagfd = dag_open(uTxDeviceName)) < 0)
			dagutil_panic("dag_open %s: %s\n", uTxDeviceName, strerror(errno));
	
	/* Setting reverse mode for using with vdag from daemon side */
	if( (uTxStream & 0x1)== 0) {
		/* Setting reverse mode for using with vdag from daemon side */
		if (dag_set_mode(uTxDagfd, uTxStream, DAG_REVERSE_MODE)) {
			dagutil_panic("Could not set reverse mode on %s:%d\n", uTxDeviceName, uTxStream);
		}
	} else 	{
		dag_set_mode(uTxDagfd, uTxStream, DAG_NORMAL_MODE);
	};

	/* Setting reverse mode for RX stream if needed */
	if( (uRxStream & 0x1)== 1) {
		/* Setting reverse mode for using with vdag from daemon side */
		if (dag_set_mode(uDagfd, uRxStream, DAG_REVERSE_MODE)) {
			dagutil_panic("Could not set reverse mode on %s:%d\n", uDeviceName, uRxStream);
		}
	} else 	{
		dag_set_mode(uDagfd, uRxStream, DAG_NORMAL_MODE);
	};
		
	/* Attach transmit stream. */
	if (dag_attach_stream(uTxDagfd, uTxStream, FLAGS, BURST_MAX) != 0)
		dagutil_panic("dag_attach_stream tx %s: %s uTxDagfd %d\n", uDeviceName, strerror(errno), uTxDagfd);

	/* Attach receive stream. */
	if (dag_attach_stream(uDagfd, uRxStream, FLAGS, BURST_MAX) != 0)
		dagutil_panic("dag_attach_stream %s: %s\n", uDeviceName, strerror(errno));

	uStreamsAttached = 1;
	
	/* Sanity checks. */
	rx_buffer = dag_get_stream_buffer_size(uDagfd, uRxStream);
	tx_buffer = dag_get_stream_buffer_size(uTxDagfd, uTxStream);

	if (!uDagFwdCopyTx&&rx_buffer != tx_buffer)
	{
		close_card();
		dagutil_panic("DAG card does not appear to be correctly configured for inline operation\n"
					  "(receive buffer size = %u bytes, transmit buffer size = %u bytes).\n"
					  "Please run:\n"
					  "    dagthree -d %s default overlap     (for DAG 3 cards)\n"
					  "    dagfour -d %s default overlap      (for DAG 4 cards)\n"
					  "before using dagfwddemo.\n", 
					  rx_buffer, tx_buffer, uDeviceName, uDeviceName);
	}

	dag_get_stream_erf_types(uDagfd,   uRxStream, erfs, TYPE_MAX);
	dag_set_stream_erf_types(uTxDagfd, uTxStream, erfs);

	/* Start transmit stream. */
	if (dag_start_stream(uTxDagfd, uTxStream) < 0)
		dagutil_panic("dag_start_stream %s: %s\n", uDeviceName, strerror(errno));

	/* Start receive stream. */
	if (dag_start_stream(uDagfd, uRxStream) < 0)
		dagutil_panic("dag_start_stream %s: %s\n", uDeviceName, strerror(errno));

	uStreamsStarted = 1;
}


static void 
close_card(void)
{
	int rx_stop_result = 0;
	int tx_stop_result = 0;
	int rx_detach_result = 0;
	int tx_detach_result = 0;
	int close_result = 0;
	int tx_close_result = 0;

	/* The problem with calling dagutil_panic() immediately a stream call fails
	 * is that the remaining cleanup routines don't get called.
	 * So we call all the cleanup routines that should be called first,
	 * and then check to see if any failed. 
	 */

	if (1 == uStreamsStarted)
	{
		/* Stop both before checking for errors. */
		rx_stop_result = dag_stop_stream(uDagfd, uRxStream);
		tx_stop_result = dag_stop_stream(uTxDagfd, uTxStream);

		uStreamsStarted = 0;
	}

	if (1 == uStreamsAttached)
	{
		/* Detach both before checking for errors. */
		rx_detach_result = dag_detach_stream(uDagfd, uRxStream);
		tx_detach_result = dag_detach_stream(uTxDagfd, uTxStream);

		uStreamsAttached = 0;
	}

	close_result = dag_close(uDagfd);
	if(uDagfd != uTxDagfd)
		tx_close_result = dag_close(uTxDagfd);

	/* Check error codes. */
	if (rx_stop_result != 0)
		dagutil_panic("dag_stop_stream(%s, %d): %s\n", uDeviceName, uRxStream, strerror(errno));

	if (tx_stop_result != 0)
		dagutil_panic("dag_stop_stream(%s, %d): %s\n", uDeviceName, uTxStream, strerror(errno));

	if (rx_detach_result != 0)
		dagutil_panic("dag_detach_stream(%s, %d): %s\n", uDeviceName, uRxStream, strerror(errno));
	
	if (tx_detach_result != 0)
		dagutil_panic("dag_detach_stream(%s, %d): %s\n", uDeviceName, uTxStream, strerror(errno));

	if (close_result < 0)
		dagutil_panic("dag_close(%s): %s\n", uDeviceName, strerror(errno));

	if (tx_close_result < 0)
		dagutil_panic("dag_close(%s): %s\n", uTxDeviceName, strerror(errno));
}


static void
free_filter_expression(void)
{
	if (NULL != uFilterExpression)
	{
		free((char*) uFilterExpression);
		uFilterExpression = NULL;
	}
}

/* Function to filt the record according to filter expression
 * two filter could be used, one for forwarding, one for disk writing
 * If there is no disk filter, forwarding filter will be used for disk writing
 * Disk filter does affect forwarding at all*/
static void
filt_record(uint8_t* packet) 
{
	dag_record_t* record = (dag_record_t*) packet;
	uint32_t iface = record->flags.iface;
	uint8_t* header = NULL;
	uint32_t len = 0;
	int result;
	int	write_flag=0;


#ifndef NDEBUG
/*	printf ("packet interface: %d\n", record->flags.iface);*/
#endif /* NDEBUG */

	header = (uint8_t*) packet;
	header += dag_record_size;
	header += 2;
	len = ntohs(record->rlen);

	uPacketCount[iface]++;

	if(TYPE_PAD == record->type) {
		uPadCount[iface]++;
		record->flags.rxerror = 1;
		return;
	}

	if(1 == record->flags.rxerror)
	{
		/* Packet arrived bad, so drop it. */
		uDropCount[iface]++;
		return;
	}

	/*if a file name is there, we should write the record to disk*/
	if(uFileName)
		write_flag=1;
	if (uFileFilterExpr)
	{
		if(bpf_filter(uFileFilterProg.bf_insns, header, len, len) == 0)
		{
			/*BPF rejected this packets, do not write*/
			write_flag=0;
		}
	}

	if (uDoFilter)
	{
		/* Apply BPF filter. */
		if ((bpf_filter(uFilterProgram.bf_insns, header, len, len) == 0))
		{
			/* BPF rejected this packet. */	
			uRejectCount[iface]++;
			if (write_flag&&uFileFilterExpr)
			{/*file filter say we should write this record to disk before return*/
				result = disk_cache_write(packet, len);
				if (result != len)
				{
					uDiskDrop ++;
				}			
			}
			record->flags.rxerror = 1;
			return;
		}
	}
	if (uFileFd!=-1&&write_flag)
	{
		result = disk_cache_write(packet, len);
		if (result != len)
		{
			uDiskDrop ++;
		}			
	}

	/* Set packet output on the 'other' interface. */
	record->flags.iface = iface_swap_table[iface];
}


static void
setup_streams(unsigned int poll_usecs)
{
	struct timeval maxwait;
	struct timeval poll;
	uint32_t mindata = dag_record_size;

	/*
	 * Continuously poll the streams.
	 * Note: poll=0 means that we are not sleeping between polls.  This
	 * requires a lot of CPU, but provides reasonable delays when forwarding
	 * at low rates. When the traffic bandwidth is high or if you don't mind
	 * to have big delays with low bandwith, leave the poll parameter
	 * unchanged.
	 */

	dag_get_stream_poll (uDagfd, uRxStream, &mindata, &maxwait, &poll);

	if (uRxLowLatency)
	{
		poll.tv_usec = 0; /* hard poll */
	}
	else
	{
		poll.tv_usec = poll_usecs; /* 1ms sleep when no data available to read. */
	}
	
	maxwait.tv_usec = 100000; /* instead of zero. */

	dag_set_stream_poll (uDagfd, uRxStream, mindata, &maxwait, &poll);
	printf("Rx stream %s:%d, mindata: %u, maxwait: %u.%06u, poll: %u.%06u\n",
	       uDeviceName, uRxStream,
		mindata, (unsigned) maxwait.tv_sec, (unsigned) maxwait.tv_usec,
		(unsigned) poll.tv_sec, (unsigned) poll.tv_usec);

	dag_get_stream_poll (uTxDagfd, uTxStream, &mindata, &maxwait, &poll);
	poll.tv_usec = 1000; // instead of 10000
	maxwait.tv_usec = 100000; // instead of zero
	dag_set_stream_poll (uTxDagfd, uTxStream, mindata, &maxwait, &poll);
	printf("Tx stream %s:%d, mindata: %d, maxwait: %d.%06d, poll: %d.%06d\n",
	       uTxDeviceName, uTxStream,
		mindata, (unsigned) maxwait.tv_sec, (unsigned) maxwait.tv_usec,
		(unsigned) poll.tv_sec, (unsigned) poll.tv_usec);
}


#if defined(__FreeBSD__) || defined(__linux__) || defined (__NetBSD__) || (defined(__SVR4) && defined(__sun)) || (defined(__APPLE__) && defined(__ppc__))

static void
timer_function(int signal)

#elif defined(_WIN32)

static VOID 
CALLBACK
timer_function(LPVOID lpArgToCompletionRoutine,
			   DWORD dwTimerLowValue,
			   DWORD dwTimerHighValue)

#endif /* Platform-specific code. */
{
	static uint64_t old_packet_count[4] = { 0, 0, 0, 0 };
	static uint64_t old_reject_count[4] = { 0, 0, 0, 0 };
	static uint64_t old_drop_count[4] = { 0, 0, 0, 0 };
	static uint64_t old_pad_count[4] = { 0, 0, 0, 0 };

	static uint64_t old_disk_drop = 0;
	static uint64_t old_disk_write_call = 0;

	static uint64_t old_disk_write = 0;
	static uint64_t old_disk_actual_write = 0;
	
	uint64_t sum[4] = { 0, 0, 0, 0 }; /* count, reject, drop, pad */
	uint64_t incremental[4] = { 0, 0, 0, 0 };

	int c;

	for (c = 0; c < 4; c++) {
		printf ("%s:%d Interface %d -> %s:%d Interface %d\n", uDeviceName, uRxStream, c,
			uTxDeviceName, uTxStream, iface_swap_table[c]);

		printf ("  Received: %10" PRIu64 " Total %10" PRIu64 "\n",
			uPacketCount[c] - old_packet_count[c], uPacketCount[c]);
		sum[0] += uPacketCount[c];
		incremental[0] += uPacketCount[c] - old_packet_count[c];

		printf ("  Errored:  %10" PRIu64 " Total %10" PRIu64 "\n",
			uDropCount[c] - old_drop_count[c], uDropCount[c]);
		sum[1] += uDropCount[c];
		incremental[1] += uDropCount[c] - old_drop_count[c];

		printf ("  Rejected: %10" PRIu64 " Total %10" PRIu64 "\n",
			uRejectCount[c] - old_reject_count[c], uRejectCount[c]);
		sum[2] += uRejectCount[c];
		incremental[2] += uRejectCount[c] - old_reject_count[c];

		printf ("  Pad:      %10" PRIu64 " Total %10" PRIu64 "\n",
			uPadCount[c] - old_pad_count[c], uPadCount[c]);
		sum[3] += uPadCount[c];
		incremental[3] += uPadCount[c] - old_pad_count[c];
	}
		
	printf ("Totals\n");

	printf ("  Received: %10" PRIu64 " Total %10" PRIu64 "\n",
		incremental[0], sum[0]);

	printf ("  Errored:  %10" PRIu64 " Total %10" PRIu64 "\n",
		incremental[1], sum[1]);
	
	printf ("  Rejected: %10" PRIu64 " Total %10" PRIu64 "\n",
		incremental[2], sum[2]);
	
	printf ("  Pad:      %10" PRIu64 " Total %10" PRIu64 "\n",
		incremental[3], sum[3]);

	if(uDagFwdVerbosity)
	{
		printf("dag_advance_stream:   %10d\ndag_tx_commit_bytes:  %10d\ndag_tx_get_space:     %10d\n",
		     uDagApiAdvanceCnt,uDagApiCommitCnt,uDagApiTxGetSpaceCnt);
		uDagApiAdvanceCnt=0;
		uDagApiCommitCnt=0;
		uDagApiTxGetSpaceCnt = 0;
	}

	if(uFileName)
	{
		printf ("DiskDrop : %9" PRIu64 "  %10" PRIu64 " WriteSysCall: %9" PRIu64 "  %10" PRIu64 "\n",
				uDiskDrop - old_disk_drop, uDiskDrop,
				uDiskWriteCall - old_disk_write_call, uDiskWriteCall);
			
		if(uDagFwdVerbosity)
		{
			printf ("DiskWrite: %9" PRIu64 "  %10" PRIu64 " ActualWrite:  %9" PRIu64 "  %10" PRIu64 "\n",
					uDiskWrite - old_disk_write, uDiskWrite,
					uDiskActualWrite - old_disk_actual_write, uDiskActualWrite);
			printf ("DiskCacheOffset Start: %9u End:         %10u \n",
			        disk_cache_offset_start,disk_cache_offset_end);
		}
		
		else
		{
			printf ("DiskWrite: %9" PRIu64 "  %10" PRIu64 "\n",
					uDiskActualWrite - old_disk_actual_write, uDiskActualWrite);
		}
	}
	printf("\n");

	fflush(stdout);

	for (c = 0; c < 4; c++) {
		old_packet_count[c] = uPacketCount[c];
		old_drop_count[c] = uDropCount[c];
		old_reject_count[c] = uRejectCount[c];
		old_pad_count[c] = uPadCount[c];
	}

	old_disk_drop = uDiskDrop;
	old_disk_write_call = uDiskWriteCall;
	old_disk_write = uDiskWrite;
	old_disk_actual_write = uDiskActualWrite;

	uElapsedSeconds++;
}


#if defined(_WIN32)
#if 0
static void
set_windows_timer(void)
{
	HANDLE hTimer = NULL;
	LARGE_INTEGER liDueTime;
	
	liDueTime.QuadPart = -10000000; /* set the timer to expire every second. */

	/* Create a timer. */
	if (SetTimer(NULL, 34, 1000, (TIMERPROC) timer_function) == 0)
		dagutil_panic("Set Timer failed (%d)\n", GetLastError());
}
#endif

static DWORD WINAPI
timer_thread(LPVOID lpParam)
{
#if 0
	MSG msg;

	set_windows_timer();

	while (uKeepRunning == 1)
	{
		while (PeekMessage(&msg, NULL, 0, 0, PM_QS_POSTMESSAGE | PM_REMOVE) != 0)
		{
			DispatchMessage(&msg);
		}

		Sleep(0); /* give up rest of timeslice */
	}
	return 0;
#else
	while(uKeepRunning==1)
	{
		timer_function(NULL,0,0);
		Sleep(1000);
	}
#endif
}
#endif /* _WIN32 */



/* Externally accessible routines. */
void
set_device_name(const char* name, const int stream) 

{
	strncpy(uDeviceName, name, 1024);
	uRxStream = stream;
}


void
set_tx_device_name(const char* name, const int stream) 

{
	strncpy(uTxDeviceName, name, 1024);
	uTxStream = stream;
}


void
set_filter_expression(const char* str)
{
	free_filter_expression();

	uFilterExpression = strdup(str);

	uDoFilter = 1;
}

void set_file_name(const char* filename)
{
	uFileName = filename;
}
void set_file_filter_expr(const char* file_filter)
{ 
	
	uFileFilterExpr = strdup(file_filter);
}


void
set_low_latency(unsigned int rx)
{
	uRxLowLatency = rx;
}

void
set_interface_swap(void)
{
	uSwapInterface = 1;
}

void set_dag_fwd_verbosity()
{
	uDagFwdVerbosity = 1;
}
void set_disk_cache_size(unsigned long size_in_mb)
{
	disk_cache_size = size_in_mb*1024*1024;
}

void set_copy_transmit()
{
	uDagFwdCopyTx = 1;
}

void set_tx_accum_bytes(unsigned long size_KB)
{
	tx_accum_bytes_max = size_KB*1024;
}


int
init_filter(void)
{
	int link_type = DLT_EN10MB;
	pcap_t* pcap_handle=NULL;
	pcap_t* p=NULL;
	char buffer[BUFSIZ];
	char* device = NULL;

	/*set swapping interface first*/
	if(uSwapInterface)
	{
		printf("Swapping interface is set\n");
		iface_swap_table[0]=1;
		iface_swap_table[1]=0;
		iface_swap_table[2]=3;
		iface_swap_table[3]=2;
	}
	else
	{
		printf("Swapping interface is not set\n");
		iface_swap_table[0]=0;
		iface_swap_table[1]=1;
		iface_swap_table[2]=2;
		iface_swap_table[3]=3;
	}
	
	if(uFilterExpression ==NULL && uFileFilterExpr==NULL)
		return 0;
	

	/* Find out the linktype */
	device = pcap_lookupdev(buffer);
	p = pcap_open_live(device, 128, 1, 0, buffer);
	if (NULL != p)
	{
		link_type = pcap_datalink(p);
		pcap_close(p);
	}
	if(uFilterExpression)
	{
		pcap_t* pcap_handle;
		printf("Fowarding using filter    :%s\n",uFilterExpression);
		pcap_handle = pcap_open_dead(link_type,BUFSIZ);
		if(-1==pcap_compile(pcap_handle,&uFilterProgram,(char*) uFilterExpression,1,0))
		{
			printf("Filter Error failed :%s\n",pcap_geterr(pcap_handle));;
			return -1;
		}
		pcap_close(pcap_handle);
	}
	if(uFileFilterExpr)
	{
		printf("Disk Writing using filter :%s\n",uFileFilterExpr);
		pcap_handle = pcap_open_dead(link_type,BUFSIZ);
		if(-1==pcap_compile(pcap_handle,&uFileFilterProg,(char*) uFileFilterExpr,1,0))
		{
			printf("Filter Error failed :%s\n",pcap_geterr(pcap_handle));;
			return -1;
		}
		pcap_close(pcap_handle);
	}
	else
	{
		printf("Disk Writing using filter :%s\n",uFilterExpression);
	}
	return 0;
}


#if defined(__FreeBSD__) || defined(__linux__) || (defined(__SVR4) && defined(__sun)) || (defined(__APPLE__) && defined(__ppc__))

static void
disk_thread( void* lpParameter )
{
	int ret;
	static struct timespec req;
	req.tv_sec = 0;
	req.tv_nsec = 1000000;  /*1ms*/
	
	uDiskTreadRunning = 1;
	while(uKeepRunning)
	{
		ret=disk_cache_flush();
		if(ret==0)
		{
			nanosleep( &req, NULL );
		}
		else if(ret==-1)
		{
			uKeepRunning=0;
		}

	}
	
	uDiskTreadRunning = 0;
	return;
}

static int
start_disk_thread( void )
{
	int res = 0;
	pthread_t thread;
	if( 0 > ( res = pthread_create( &thread, NULL,
									(void *) disk_thread, NULL )))
	{
		perror( "Failed to create reporting thread" );
		return -1;
	}
	return 0;
}

#elif defined(_WIN32)

/* In Windows we use a separate thread for timeout scheduling. */
static DWORD WINAPI
disk_thread( LPVOID lpParameter )
{
	int ret;
	uDiskTreadRunning = 1;
	while(uKeepRunning)
	{
		ret=disk_cache_flush();
		if(ret==0)
		{
			Sleep(1);
		}
		else if(ret==-1)
		{
			uKeepRunning=0;
		}
	}
	
	uDiskTreadRunning = 0;
	return 0;
}

static int
start_disk_thread( void )
{
	if( CreateThread(
		NULL,
		0,
		disk_thread,
		NULL,
		0,
		NULL) == NULL )
	{
		printf( "Failed to create reporting thread\n" );
		return -1;
	}
	return 0;
}
#endif /* _WIN32 */


int open_output_file()
{
#if defined(__FreeBSD__) || defined(__linux__) || defined (__NetBSD__) || (defined(__SVR4) && defined(__sun)) || (defined(__APPLE__) && defined(__ppc__))
	int flags = O_RDWR|O_CREAT|O_TRUNC|O_LARGEFILE;
	
	uFileFd = open(uFileName, flags, 0664);
	if(uFileFd == -1)
	{
#elif defined(_WIN32)
	HANDLE hFile;
	hFile = CreateFile(uFileName,                 /* name of file to create */
				GENERIC_READ | GENERIC_WRITE,  /* permissions of file to create */
				FILE_SHARE_READ,               /* sharing permissions */
				NULL,                          /* security attributes for inheritance */
				CREATE_ALWAYS,                 /* action to take on files that exist */
				FILE_ATTRIBUTE_NORMAL,         /* file flags and attributes */
				NULL);                         /* file template for creation attributes */

	if (INVALID_HANDLE_VALUE == hFile)
	{
		LPTSTR message_buffer;
		FormatMessage(
			FORMAT_MESSAGE_ALLOCATE_BUFFER | 
			FORMAT_MESSAGE_FROM_SYSTEM | 
			FORMAT_MESSAGE_IGNORE_INSERTS,
			NULL,
			GetLastError(),
			MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), /* default language*/
			&message_buffer,
			0,
			NULL);

		fprintf(stderr, "open(): %s\n", message_buffer);
		return EXIT_FAILURE;
	}

	uFileFd = _open_osfhandle((intptr_t) hFile, _O_APPEND | _O_CREAT | _O_TRUNC);

	if(uFileFd == -1)
	{
#endif /* Platform-specific code. */
		{
			dagutil_panic("open %s: %s\n", uFileName, strerror(errno));
			return -1;
		}
	}

	disk_cache_alloc = malloc(disk_cache_size+256); /*1MB of disk cache*/
	
	printf("using %uMB of memory for disk buffer\n",disk_cache_size/(1024*1024));

	if(disk_cache_alloc==NULL)
	{
		/*disk create failed, free all the resource we just allocated*/ 
		printf("allocate memory cache failed, size %dMB\n"
		       "Try use -m xxxm to change the size of disk buffer",
		       disk_cache_size/(1024*1024));
		close(uFileFd);
		return -1;		
	}
	disk_cache = (uint8_t*)((uintptr_t)(disk_cache_alloc+255) & ~0xFF);  /*256 bytes alignment*/

	if(0!=start_disk_thread())
	{
		/*disk create failed, free all the resource we just allocated*/ 
		close(uFileFd);
		free(disk_cache_alloc);
		return -1;
	}
	return 0;
}

void close_file()
{
	if(uFileFd)
		close(uFileFd);
	free(disk_cache_alloc);
}



/* call this function with separate fwd and write thread, 
 * this function will only copy the data to the memory cache,
 * A disk-writing thread will flush the data to the disk later
 * or each ms, depend on system architecture, */
int disk_cache_write(uint8_t* data, uint32_t size)
{
	uint32_t free_size;
	uint32_t temp;
	if(!uFileName)
		return size;
	/* calculate free space first*/
	if(disk_cache_offset_end>=disk_cache_offset_start)
	{
		free_size = disk_cache_size - disk_cache_offset_end + disk_cache_offset_start;
	}
	else
	{
		free_size = disk_cache_offset_start - disk_cache_offset_end;
	}
/*	printf("free_size:%d,size:%d\n",free_size,size);*/
	if(free_size < size+8)
		return -1;

	uDiskActualWrite+=size;

	/* see if there is wrap of the offset_end after the copy*/
	temp = disk_cache_size - disk_cache_offset_end;
	if(temp<=size)
	{/* there will be wrap, need a split copy*/
		memcpy(disk_cache+disk_cache_offset_end,data,temp);
		memcpy(disk_cache,data+temp,size-temp);
		disk_cache_offset_end = size-temp;
		return size;
	}
	else
	{/*no wrap, copy directly*/
		memcpy(disk_cache+disk_cache_offset_end,data,size);
		disk_cache_offset_end += size;
	}
	return size;
}

/* the disk-writing thread will call this function to flush data to disk, 
 */
int disk_cache_flush()
{
	int flush_size;
	int ret_val=0;
	if(disk_cache_offset_end>disk_cache_offset_start)
	{/*no wrap of offset_end*/
		flush_size = disk_cache_offset_end - disk_cache_offset_start;
	}
	else if(disk_cache_offset_end<disk_cache_offset_start)
	{/*offset_end wrapped, first write to the end of the CACHE*/
		flush_size = disk_cache_size - disk_cache_offset_start;
		if(flush_size != write(uFileFd, disk_cache+disk_cache_offset_start, flush_size))
		{
			perror("write file error, exit");
			return -1;
		}
		uDiskWriteCall++;
		ret_val +=flush_size;
		disk_cache_offset_start = 0;
		flush_size = disk_cache_offset_end;
	}
	else /*==*/
	{/*no data at all, return 0, disk_thread will sleep for 1ms, */
		return 0;
	}
	/* write rest of the part to disk*/
	if(flush_size != write(uFileFd, disk_cache+disk_cache_offset_start, flush_size))
	{
		perror("write file error, exit");
		return -1;
	}
	disk_cache_offset_start +=flush_size;
	uDiskWriteCall++;
	ret_val += flush_size;
	uDiskWrite+=ret_val;
	return ret_val;
}


#ifdef DEBUG_FWD_DEMO
#define SEND_COUNT_STAT_MAX 100
uint32_t send_statis_count[SEND_COUNT_STAT_MAX+1];

void printf_statis()
{
	int loop=0;
	while(loop<=SEND_COUNT_STAT_MAX)
	{
		printf("%-4d-%-4d:%-10d \n",loop*10+1,loop*10+10,send_statis_count[loop]);
		loop++;
	}
}
#endif

void
run_inline_filter(unsigned int runtime_seconds, unsigned int poll_usecs) 
{
#if defined(_WIN32)
	DWORD dwThreadId;
#endif /* _WIN32 */

	if(uFileName)
	{
		if(-1==open_output_file())
		{
			return;
		}
	}
	init_card();
	setup_streams(poll_usecs);

#if defined(__FreeBSD__) || defined(__linux__) || defined (__NetBSD__) || (defined(__SVR4) && defined(__sun)) || (defined(__APPLE__) && defined(__ppc__))

	dagutil_set_timer_handler(timer_function, 1);

#elif defined(_WIN32)
	CreateThread(NULL, 0, timer_thread, NULL, 0, &dwThreadId);

#endif /* Platform-specific code. */

	dagutil_set_signal_handler(signal_handler);

	/* main forwarding loop function*/
	if(uDagFwdCopyTx)
	{
		run_copy_fwd(runtime_seconds);
	}
	else
	{
		run_over_lap_fwd(runtime_seconds);
	}
	
	close_card();
	

	/*flush for a last time, make sure everything is writen to disk*/
	if(uFileName)
	{
		printf("wait for disk thread\n");
		while(uDiskTreadRunning)
		{
			static struct timespec req;
			req.tv_sec = 0;
			req.tv_nsec = 1000000;  /*1ms*/
			nanosleep( &req, NULL );
		}
		disk_cache_flush();
		close_file();
	}
	
	free_filter_expression();
}

/* this function get the cpu clk cycles past in one ms*/
uint32_t get_cpu_tick_one_ms()
{
	uint32_t tsc_before,tsc_after;
	uint32_t tsc_one_ms;

	struct timespec time_to_sleep;

	/*first determine cpu speed*/
	time_to_sleep.tv_sec = 0;
	time_to_sleep.tv_nsec = 20*1000000;  
	nanosleep(&time_to_sleep,NULL); /*dummy sleep for 20ms, let this interrupt pass*/
	time_to_sleep.tv_nsec = 100*1000000;  /* sleep 200ms, usually takes 20ms */

	dagutil_tsc32_read(&tsc_before);
	nanosleep(&time_to_sleep,NULL);
	dagutil_tsc32_read(&tsc_after);

	tsc_one_ms = (tsc_after - tsc_before)/100;
	printf("cpu speed is %uMhz \n",tsc_one_ms/1000);

	return tsc_one_ms;	
}




void run_over_lap_fwd(unsigned int runtime_seconds)
{
	uint32_t tsc_before,tsc_after;
	uint32_t tsc_one_ms;

	uint32_t bytes_to_commit = 0;
	uint8_t* record;
	int len=0;
	int count = 0;


	tsc_one_ms = get_cpu_tick_one_ms();

	dagutil_tsc32_read(&tsc_before);
	tsc_after = tsc_before;

	while ((uKeepRunning) && ((0 == runtime_seconds) || (uElapsedSeconds < runtime_seconds)))
	{
		/* send data when;
		 * 1. there is data to send, of course :)
		 * 2. Some data has been kept for more than one_ms, 
		 *    more bytes than limits are waiting for send*/
		if (bytes_to_commit && 
		    (((uint32_t)(tsc_after - tsc_before)>=tsc_one_ms) || (bytes_to_commit >= tx_accum_bytes_max)))
		{
			uDagApiCommitCnt ++;
			dag_tx_stream_commit_bytes(uDagfd, uTxStream, bytes_to_commit);
			tsc_before = tsc_after;
			bytes_to_commit = 0;

#ifdef  DEBUG_FWD_DEMO
			send_statis_count[count/10]++;
#endif
			count = 0;
		}

		uDagApiAdvanceCnt ++;
		record = dag_rx_stream_next_inline(uDagfd, uRxStream, uTxStream);
		if (NULL == record)
		{
			int last_error = dag_get_last_error();

			if (last_error == EAGAIN)
			{
/*				count += 10;*/ /*comment this out, do we need this, transmit won't accumulate than 1 ms now*/
				continue;
			}
			else
			{
				printf("NULL record received (%"PRIu64")(%"PRIu64") (errno = %d)\n",  uPacketCount[0], uPacketCount[1], errno);
				break;
			}
		}

		len = ntohs(((dag_record_t *)record)->rlen);
		bytes_to_commit += len;
		 /*prefetch next record,skip the 8 bytes timestamp*/
#if defined(__linux__)
		__builtin_prefetch(record+len+8,0,3);
#endif

		filt_record(record);

		dagutil_tsc32_read(&tsc_after);
		count++;
	}

#ifdef DEBUG_FWD_DEMO
	printf_statis();
#endif
}


void run_copy_fwd(unsigned int runtime_seconds)
{
	uint32_t tsc_before,tsc_after;
	uint32_t tsc_one_ms;

	uint32_t bytes_to_commit = 0;
	dag_record_t* dag_rec;
	uint8_t*    tx_ret;
	uint32_t len;

	uint8_t* bottom = NULL;
	uint8_t* top = NULL;
	uint8_t* copy_base = NULL;
	
	tsc_one_ms = get_cpu_tick_one_ms();

	dagutil_tsc32_read(&tsc_before);
	tsc_after = tsc_before;

	/* Get space for first time writing. */
	while ( (copy_base == NULL) && (uKeepRunning) && 
		((0 == runtime_seconds) || (uElapsedSeconds < runtime_seconds))) {
		copy_base = dag_tx_get_stream_space(uTxDagfd, uTxStream, tx_accum_bytes_max);
	}
	uDagApiTxGetSpaceCnt++;
		
	while ((uKeepRunning) && 
	       ((0 == runtime_seconds) || (uElapsedSeconds < runtime_seconds)))
	{
		uDagApiAdvanceCnt ++;

		top = dag_advance_stream(uDagfd, uRxStream, &bottom);

		while((top-bottom)>dag_record_size)
		{
			dag_rec = (dag_record_t*)bottom;
			len = ntohs(dag_rec->rlen);
			if ((uint32_t)(top-bottom) < len||len==0)
				break;

			filt_record((uint8_t*)dag_rec);

			if(dag_rec->flags.rxerror)
			{/* receive error, or filter drop, we don't want to forward these*/
				bottom+=len;
				continue;
			}

			if((bytes_to_commit + len) >=tx_accum_bytes_max)
			{
				/*Too much accumulated data, Commit them. */				
				uDagApiCommitCnt  ++;
				tx_ret = dag_tx_stream_commit_bytes(uTxDagfd, uTxStream, bytes_to_commit);
				if(tx_ret==NULL)
				{
					perror("TX Error, exit ");
					uKeepRunning = 0;
				}
				dagutil_tsc32_read(&tsc_before);
				
				bytes_to_commit = 0;
				/* Get space for writing, again. */
				uDagApiTxGetSpaceCnt++;
				do {
					copy_base = dag_tx_get_stream_space(uTxDagfd, uTxStream, tx_accum_bytes_max);
					if ((!uKeepRunning) ||  
					    ((0 != runtime_seconds) && (uElapsedSeconds >= runtime_seconds))) {
						return;
					}
				} while (copy_base == NULL);
			}

			/* Copy bytes. */
			memcpy(copy_base+bytes_to_commit, bottom, len);
			bytes_to_commit += len;
			bottom+=len;
		}
		/* commit uncommited data, if 1ms has passed since last commit*/
		dagutil_tsc32_read(&tsc_after);
		if(bytes_to_commit&&
		   (tsc_after - tsc_before)>tsc_one_ms)
		{
			uDagApiCommitCnt  ++;
			tx_ret = dag_tx_stream_commit_bytes(uTxDagfd, uTxStream, bytes_to_commit);
			dagutil_tsc32_read(&tsc_before);
			if(tx_ret==NULL)
			{
				perror("TX Error, exit ");
				uKeepRunning = 0;
			}
			bytes_to_commit = 0;
			/* Get space for writing, again. */
			uDagApiTxGetSpaceCnt++;
			do {
				copy_base = dag_tx_get_stream_space(uTxDagfd, uTxStream, tx_accum_bytes_max);
				if ((!uKeepRunning) ||  
				    ((0 != runtime_seconds) && (uElapsedSeconds >= runtime_seconds))) {
					return;
				}
			} while (copy_base == NULL);
		}
		
	}
	/* run time has expired */
	if(bytes_to_commit && copy_base) {
		/* flush remaining data */
		uDagApiCommitCnt  ++;
		tx_ret = dag_tx_stream_commit_bytes(uTxDagfd, uTxStream, bytes_to_commit);
	}
}
