/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpidi_ch3_impl.h"
#include <stdio.h>

#ifdef USE_RDMA_UNEX

#undef FUNCNAME
#define FUNCNAME ch3i_buffer_unex_read
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static int ch3i_buffer_unex_read(MPIDI_VC *vc_ptr, MPIDI_CH3I_RDMA_Packet_t *pkt_ptr, void *mem_ptr, unsigned int offset, unsigned int num_bytes)
{
    MPIDI_CH3I_RDMA_Unex_read_t *p;
    MPIDI_STATE_DECL(MPID_STATE_RDMAI_BUFFER_UNEX_READ);

    MPIDI_FUNC_ENTER(MPID_STATE_RDMAI_BUFFER_UNEX_READ);

    MPIDI_DBG_PRINTF((60, FCNAME, "%d bytes\n", num_bytes));

    p = (MPIDI_CH3I_RDMA_Unex_read_t *)MPIU_Malloc(sizeof(MPIDI_CH3I_RDMA_Unex_read_t));
    p->pkt_ptr = pkt_ptr;
    p->buf = (unsigned char *)mem_ptr + offset;
    p->length = num_bytes;
    p->next = vc_ptr->rdma.unex_list;
    vc_ptr->rdma.unex_list = p;

    MPIDI_FUNC_EXIT(MPID_STATE_RDMAI_BUFFER_UNEX_READ);
    return 0;
}

#undef FUNCNAME
#define FUNCNAME ch3i_read_unex
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static int ch3i_read_unex(MPIDI_VC *vc_ptr)
{
    unsigned int len;
    MPIDI_CH3I_RDMA_Unex_read_t *temp;
    MPIDI_STATE_DECL(MPID_STATE_RDMAI_READ_UNEX);
    MPIDI_STATE_DECL(MPID_STATE_MEMCPY);

    MPIDI_FUNC_ENTER(MPID_STATE_RDMAI_READ_UNEX);

    MPIDI_DBG_PRINTF((60, FCNAME, "entering"));
    assert(vc_ptr->rdma.unex_list);

    /* copy the received data */
    while (vc_ptr->rdma.unex_list)
    {
	len = MPIDU_MIN(vc_ptr->rdma.unex_list->length, vc_ptr->rdma.read.bufflen);
	MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
	memcpy(vc_ptr->rdma.read.buffer, vc_ptr->rdma.unex_list->buf, len);
	MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
	/* advance the user pointer */
	vc_ptr->rdma.read.buffer = (char*)(vc_ptr->rdma.read.buffer) + len;
	vc_ptr->rdma.read.bufflen -= len;
	vc_ptr->rdma.read.total += len;
	if (len != vc_ptr->rdma.unex_list->length)
	{
	    vc_ptr->rdma.unex_list->length -= len;
	    vc_ptr->rdma.unex_list->buf += len;
	}
	else
	{
	    /* put the receive packet back in the pool */
	    assert(vc_ptr->rdma.unex_list->pkt_ptr != NULL);
	    vc_ptr->rdma.unex_list->pkt_ptr->cur_pos = 
		vc_ptr->rdma.unex_list->pkt_ptr->data;
	    vc_ptr->rdma.unex_list->pkt_ptr->avail = MPIDI_CH3I_PKT_AVAILABLE;
	    /* MPIU_Free the unexpected data node */
	    temp = vc_ptr->rdma.unex_list;
	    vc_ptr->rdma.unex_list = vc_ptr->rdma.unex_list->next;
	    MPIU_Free(temp);
	}
	/* check to see if the entire message was received */
	if (vc_ptr->rdma.read.bufflen == 0)
	{
	    /* place this vc_ptr in the finished list so it will be 
	       completed by shm_wait */
	    vc_ptr->rdma.shm_state &= ~RDMA_READING_BIT;
	    vc_ptr->rdma.unex_finished_next = MPIDI_CH3I_Process.unex_finished_list;
	    MPIDI_CH3I_Process.unex_finished_list = vc_ptr;
	    MPIDI_FUNC_EXIT(MPID_STATE_RDMAI_READ_UNEX);
	    return RDMA_SUCCESS;
	}
    }
    MPIDI_FUNC_EXIT(MPID_STATE_RDMAI_READ_UNEX);
    return RDMA_SUCCESS;
}

#undef FUNCNAME
#define FUNCNAME ch3i_readv_unex
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int ch3i_readv_unex(MPIDI_VC *vc_ptr)
{
    unsigned int num_bytes;
    MPIDI_CH3I_RDMA_Unex_read_t *temp;
    MPIDI_STATE_DECL(MPID_STATE_RDMAI_READV_UNEX);
    MPIDI_STATE_DECL(MPID_STATE_MEMCPY);

    MPIDI_FUNC_ENTER(MPID_STATE_RDMAI_READV_UNEX);

    MPIDI_DBG_PRINTF((60, FCNAME, "entering"));

    while (vc_ptr->rdma.unex_list)
    {
	while (vc_ptr->rdma.unex_list->length && vc_ptr->rdma.read.iovlen)
	{
	    num_bytes = MPIDU_MIN(vc_ptr->rdma.unex_list->length, 
			    vc_ptr->rdma.read.iov[vc_ptr->rdma.read.index].MPID_IOV_LEN);
	    MPIDI_DBG_PRINTF((60, FCNAME, "copying %d bytes\n", num_bytes));
	    /* copy the received data */
	    MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
	    memcpy(vc_ptr->rdma.read.iov[vc_ptr->rdma.read.index].MPID_IOV_BUF, vc_ptr->rdma.unex_list->buf, num_bytes);
	    MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
	    vc_ptr->rdma.read.total += num_bytes;
	    vc_ptr->rdma.unex_list->buf += num_bytes;
	    vc_ptr->rdma.unex_list->length -= num_bytes;
	    /* update the iov */
	    vc_ptr->rdma.read.iov[vc_ptr->rdma.read.index].MPID_IOV_LEN -= num_bytes;
	    vc_ptr->rdma.read.iov[vc_ptr->rdma.read.index].MPID_IOV_BUF = 
		(char*)(vc_ptr->rdma.read.iov[vc_ptr->rdma.read.index].MPID_IOV_BUF) + num_bytes;
	    if (vc_ptr->rdma.read.iov[vc_ptr->rdma.read.index].MPID_IOV_LEN == 0)
	    {
		vc_ptr->rdma.read.index++;
		vc_ptr->rdma.read.iovlen--;
	    }
	}

	if (vc_ptr->rdma.unex_list->length == 0)
	{
	    /* put the receive packet back in the pool */
	    assert(vc_ptr->rdma.unex_list->pkt_ptr != NULL);
	    vc_ptr->rdma.unex_list->pkt_ptr->cur_pos = vc_ptr->rdma.unex_list->pkt_ptr->data;
	    vc_ptr->rdma.unex_list->pkt_ptr->avail = MPIDI_CH3I_PKT_AVAILABLE;
	    /* MPIU_Free the unexpected data node */
	    temp = vc_ptr->rdma.unex_list;
	    vc_ptr->rdma.unex_list = vc_ptr->rdma.unex_list->next;
	    MPIU_Free(temp);
	}
	
	if (vc_ptr->rdma.read.iovlen == 0)
	{
	    vc_ptr->rdma.shm_state &= ~RDMA_READING_BIT;
	    vc_ptr->rdma.unex_finished_next = MPIDI_CH3I_Process.unex_finished_list;
	    MPIDI_CH3I_Process.unex_finished_list = vc_ptr;
	    MPIDI_DBG_PRINTF((60, FCNAME, "finished read saved in MPIDI_CH3I_Process.unex_finished_list\n"));
	    MPIDI_FUNC_EXIT(MPID_STATE_RDMAI_READV_UNEX);
	    return RDMA_SUCCESS;
	}
    }
    MPIDI_FUNC_EXIT(MPID_STATE_RDMAI_READV_UNEX);
    return RDMA_SUCCESS;
}

#endif /* USE_RDMA_UNEX */

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RDMA_read_progress
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3I_read_progress(int blocking, MPIDI_VC **vc_pptr, int *num_bytes_ptr)
{
    MPIDI_CH3I_Process_group_t *pg;
    MPID_IOV iov[1];
    MPIDI_VC *recv_vc_ptr;
    int num_bytes;
    int i;
    int error;
    register int working;
#ifdef USE_RDMA_UNEX
    MPIDI_VC *temp_vc_ptr;
#endif
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_READ_PROGRESS);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_READ_PROGRESS);

    pg = MPIDI_CH3I_Process.pg;

    for (;;) 
    {
#ifdef USE_RDMA_UNEX
	if (MPIDI_CH3I_Process.unex_finished_list)
	{
	    MPIDI_DBG_PRINTF((60, FCNAME, "returning previously received %d bytes", MPIDI_CH3I_Process.unex_finished_list->rdma.read.total));

	    *num_bytes_ptr = MPIDI_CH3I_Process.unex_finished_list->rdma.read.total;
	    *vc_pptr = MPIDI_CH3I_Process.unex_finished_list;
	    /* remove this vc from the finished list */
	    temp_vc_ptr = MPIDI_CH3I_Process.unex_finished_list;
	    MPIDI_CH3I_Process.unex_finished_list = MPIDI_CH3I_Process.unex_finished_list->rdma.unex_finished_next;
	    temp_vc_ptr->rdma.unex_finished_next = NULL;

	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_READ_PROGRESS);
	    return MPI_SUCCESS;
	}
#endif /* USE_RDMA_UNEX */

	working = FALSE;

	for (i=0; i < pg->size; i++)
	{
	    /* skip over the vc to myself */
	    if (MPIDI_CH3I_Process.vc->rdma.pg_rank == i)
		continue;

	    recv_vc_ptr = &pg->vc_table[i];

	    /* check if the vc has data to be read */
	    /*
	    if (!MPIDI_CH3I_RDMA_read_ready(recv_vc_ptr))
		continue;
	    working = TRUE;
	    */

	    if (recv_vc_ptr->rdma.read_state == MPIDI_CH3I_READ_STATE_IDLE)
	    {
#ifdef USE_RDMA_UNEX
		/* Should we buffer unexpected messages or leave them in the shmem queue? */
		/*ch3i_buffer_unex_read(recv_vc_ptr, pkt_ptr, mem_ptr, 0, num_bytes);*/
#endif
		continue;
	    }

	    if (recv_vc_ptr->rdma.read.use_iov)
	    {
		error = MPIDI_CH3I_RDMA_read_datav(recv_vc_ptr, &recv_vc_ptr->rdma.read.iov[recv_vc_ptr->rdma.read.index], recv_vc_ptr->rdma.read.iovlen, &num_bytes);
		if (error != MPI_SUCCESS)
		{
		    *num_bytes_ptr = 0;
		    *vc_pptr = NULL;
		    error = MPIR_Err_create_code(error, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**read_progress", 0);
		    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_READ_PROGRESS);
		    return error;
		}
		if (num_bytes == 0)
		    continue;
		working = TRUE;
		recv_vc_ptr->rdma.read.total += num_bytes;
		while (num_bytes && recv_vc_ptr->rdma.read.iovlen > 0)
		{
		    if ((int)recv_vc_ptr->rdma.read.iov[recv_vc_ptr->rdma.read.index].MPID_IOV_LEN <= num_bytes)
		    {
			/* update the iov */
			num_bytes -= recv_vc_ptr->rdma.read.iov[recv_vc_ptr->rdma.read.index].MPID_IOV_LEN;
			recv_vc_ptr->rdma.read.index++;
			recv_vc_ptr->rdma.read.iovlen--;
		    }
		    else
		    {
			/* update the iov */
			recv_vc_ptr->rdma.read.iov[recv_vc_ptr->rdma.read.index].MPID_IOV_LEN -= num_bytes;
			recv_vc_ptr->rdma.read.iov[recv_vc_ptr->rdma.read.index].MPID_IOV_BUF = 
			    (char*)(recv_vc_ptr->rdma.read.iov[recv_vc_ptr->rdma.read.index].MPID_IOV_BUF) + num_bytes;
			num_bytes = 0;
		    }
		}
		if (recv_vc_ptr->rdma.read.iovlen == 0)
		{
		    recv_vc_ptr->rdma.read_state = MPIDI_CH3I_READ_STATE_IDLE;
		    *num_bytes_ptr = recv_vc_ptr->rdma.read.total;
		    *vc_pptr = recv_vc_ptr;
		    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_READ_PROGRESS);
		    return MPI_SUCCESS;
		}
	    }
	    else
	    {
		iov[0].MPID_IOV_BUF = recv_vc_ptr->rdma.read.buffer;
		iov[0].MPID_IOV_LEN = recv_vc_ptr->rdma.read.bufflen;
		error = MPIDI_CH3I_RDMA_read_datav(recv_vc_ptr, iov, 1, &num_bytes);
		if (error != MPI_SUCCESS)
		{
		    *num_bytes_ptr = 0;
		    *vc_pptr = NULL;
		    error = MPIR_Err_create_code(error, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**read_progress", 0);
		    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_READ_PROGRESS);
		    return error;
		}
		if (num_bytes == 0)
		    continue;
		working = TRUE;
		recv_vc_ptr->rdma.read.total += num_bytes;
		if ((unsigned int)num_bytes == recv_vc_ptr->rdma.read.bufflen)
		{
		    recv_vc_ptr->rdma.read.bufflen = 0;
		}
		else
		{
		    /* advance the user pointer */
		    recv_vc_ptr->rdma.read.buffer = (char*)(recv_vc_ptr->rdma.read.buffer) + num_bytes;
		    recv_vc_ptr->rdma.read.bufflen -= num_bytes;
		}
		if (recv_vc_ptr->rdma.read.bufflen == 0)
		{
		    recv_vc_ptr->rdma.read_state = MPIDI_CH3I_READ_STATE_IDLE;
		    *num_bytes_ptr = recv_vc_ptr->rdma.read.total;
		    *vc_pptr = recv_vc_ptr;
		    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_READ_PROGRESS);
		    return MPI_SUCCESS;
		}
	    }
	}

	if (!blocking && !working)
	{
	    *num_bytes_ptr = 0;
	    *vc_pptr = NULL;
	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_READ_PROGRESS);
	    return MPI_SUCCESS;
	}
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_READ_PROGRESS);
    return MPI_SUCCESS;
}

/* non-blocking functions */

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RDMA_post_read
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3I_post_read(MPIDI_VC *vc, void *buf, int len)
{
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RDMA_POST_READ);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RDMA_POST_READ);
    MPIDI_DBG_PRINTF((60, FCNAME, "entering"));
    vc->rdma.read.total = 0;
    vc->rdma.read.buffer = buf;
    vc->rdma.read.bufflen = len;
    vc->rdma.read.use_iov = FALSE;
    vc->rdma.read_state = MPIDI_CH3I_READ_STATE_READING;
#ifdef USE_RDMA_UNEX
    if (vc->rdma.unex_list)
	shmi_read_unex(vc);
#endif
    MPIU_DBG_PRINTF(("post_read: len = %d\n", len));
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RDMA_POST_READ);
    return MPI_SUCCESS;
}

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RDMA_post_readv
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3I_post_readv(MPIDI_VC *vc, MPID_IOV *iov, int n)
{
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RDMA_POST_READV);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RDMA_POST_READV);
    MPIDI_DBG_PRINTF((60, FCNAME, "entering"));
    vc->rdma.read.total = 0;
    vc->rdma.read.iov = iov;
    vc->rdma.read.iovlen = n;
    vc->rdma.read.index = 0;
    vc->rdma.read.use_iov = TRUE;
    vc->rdma.read_state = MPIDI_CH3I_READ_STATE_READING;
#ifdef USE_RDMA_UNEX
    if (vc->rdma.unex_list)
	shmi_readv_unex(vc);
#endif
#ifdef MPICH_DBG_OUTPUT
    while (n)
    {
	MPIU_DBG_PRINTF(("post_readv: iov[%d].len = %d\n", n-1, iov[n-1].MPID_IOV_LEN));
	n--;
    }
#endif

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RDMA_POST_READV);
    return MPI_SUCCESS;
}
