/**************************************************************************/
/* DESCRIPTION: This file is part of the HILBERT program package for the  */
/*        numerical solution of the Laplace equation with mixed boundary  */
/*        conditions by use of BEM in 2D.                                 */
/*                                                                        */
/*        This file serves the purpose of sharing code between the        */
/*        hyper-threaded code of the operators V, W, K and N.             */
/*                                                                        */
/*        This file contains only the implementation. For extensive       */
/*        documentation consult the corresponding header file.            */
/*                                                                        */
/*        Beware: As POSIX threads may not be available, this file is     */
/*        only compiled if the pre-processor flag HILTHREADS is set.      */
/**************************************************************************/
/* VERSION: 3.1                                                           */
/**************************************************************************/
/* (C) 2009-2013 HILBERT-Team '10, '12                                    */
/* support + bug report:  hilbert@asc.tuwien.ac.at                        */
/**************************************************************************/
#ifdef HILTHREADS

#include "threads.h"

#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))

#define SLEEP_T_BASE_NS 4000 

#ifndef NDEBUG
#  define ISVALIDMATRIX(m) ((m) != NULL && (m)->rows >= 0 && (m)->cols >= 0\
                            && (m)->storage != NULL)
#  define ISVALIDMATRIXBLOCK(b) ((b) != NULL && (b)->col >= 0 \
                              && (b)->first_row >= 0 && (b)->last_row >= 0 \
                              && (b)->first_row <= (b)->last_row)
#  define ISVALIDMATRIXFRAME(f) ((f) != NULL && ISVALIDMATRIX((f)->target) \
                                && ISVALIDMATRIXBLOCK((f)->source))
#  define ISVALIDFILLWORKER(w) ((w) != NULL \
      && ISVALIDMATRIXFRAME((w)->data.frame) \
      && ((w)->data.status == HILTHR_FILLWORKER_STATUS_NOTSTARTED \
          || (w)->data.status == HILTHR_FILLWORKER_STATUS_WORKING \
          || (w)->data.status == HILTHR_FILLWORKER_STATUS_FINISHED))
#else /* "No-debug mode": debugging macros are always true. */
#  define ISVALIDMATRIX(m)        1
#  define ISVALIDMATRIXBLOCK(b)   1
#  define ISVALIDMATRIXFRAME(f)   1
#  define ISVALIDFILLWORKER(w)    1
#endif

#ifdef DIAGNOSTICS
#  define DIAGNOSTIC_MSG(msg) fprintf(DIAGNOSTICS_FH, "%s\n", (msg));
#else
#  define DIAGNOSTIC_MSG(msg) ;
#endif

Matrix* newMatrix(int rows, int cols, double* storage) {
  Matrix* matrix = NULL;

  assert(rows > 0 && cols > 0 && storage != NULL);

  matrix = (Matrix*) malloc(sizeof(Matrix));
  assert(matrix != NULL);
  matrix->rows = rows;
  matrix->cols = cols;
  matrix->storage = storage;

  return matrix;
}

int getMatrixRows(Matrix* matrix) {
  assert(matrix != NULL && matrix->rows > 0);
  return matrix->rows;
}

int getMatrixCols(Matrix* matrix) {
  assert(matrix != NULL && matrix->cols > 0);
  return matrix->cols;
}

double* getMatrixStorage(Matrix* matrix) {
  assert(matrix != NULL && matrix->storage != NULL);
  return matrix->storage;
}

MatrixColumn* newMatrixColumn(int col, int rows) {
  MatrixColumn* block = NULL;

  assert(col >= 0);
  assert(rows >= 0);

  block = (MatrixColumn*) malloc(sizeof(MatrixColumn));

  assert(block != NULL);

  block->col = col;
  block->rows = rows;
  block->values = (double*) calloc(rows, sizeof(double));

  assert(block->values != NULL);

  return block;
}

MatrixColumn* destroyMatrixColumn(MatrixColumn* block) {
  assert(block != NULL);

  free(block->values);
  free(block);

  return NULL;
}

int matrixColumnsIntersect(MatrixColumn* b1, MatrixColumn* b2) {
  if (b1 != NULL && b2 != NULL && b1->col == b2->col)
    return 1;
  return 0;
}

MatrixColumnQueue* newMatrixColumnQueue() {
  MatrixColumnQueue* newQueue = NULL;

  newQueue = (MatrixColumnQueue*) malloc(sizeof(MatrixColumnQueue));
  assert(newQueue != NULL);
  newQueue->first = NULL;
  newQueue->last = NULL;

  return newQueue;
}

void freeMatrixColumnQueue(MatrixColumnQueue* queue) {
  assert(queue != NULL);
  assert(queue->first == NULL && queue->last == NULL);
  free(queue);
}

void matrixColumnQueueAppend(MatrixColumnQueue* q, MatrixColumn* f) {
  MatrixColumnQueueElement* newElement = NULL;

  assert(q != NULL);
  assert(f != NULL);

  newElement = (MatrixColumnQueueElement*) 
                    malloc(sizeof(MatrixColumnQueueElement));
  newElement->element = f;
  newElement->next = NULL;

  if (q->first == NULL) {
    q->first = q->last = newElement;
  }
  else {
    assert(q->last->next == NULL); /* an invariant that ensures that no */
                                   /* memory is lost. */
    q->last->next = newElement;
    q->last = newElement;
  }

  assert((q->first != NULL && q->last != NULL)
          || (q->first == NULL && q->last == NULL));
}

void matrixColumnQueueAppendQueue(MatrixColumnQueue* q1,
                                  MatrixColumnQueue* q2) {
  assert(q1 != NULL);

  if (q2 == NULL)
    return;

  if (q1->first == NULL) {
    q1->first = q2->first;
    q1->last = q2->last;
  }
  else {
    assert(q1->last != NULL);
    q1->last->next = q2->first;
    if (q2->last != NULL)
      q1->last = q2->last;
  }

  free(q2);
  assert((q1->first != NULL && q1->last != NULL)
          || (q1->first == NULL && q1->last == NULL));
}

int matrixColumnQueueHasNext(MatrixColumnQueue* queue) {
  assert(queue != NULL);
  assert((queue->first != NULL && queue->last != NULL)
          || (queue->first == NULL && queue->last == NULL));
  return (queue->first != NULL);
}

MatrixColumn* matrixColumnQueueSafeShift(MatrixColumnQueue* queue) {
  MatrixColumn* frame = NULL;
  MatrixColumnQueueElement* queueElement = NULL;

  assert(queue != NULL);

  if (queue->first == NULL)
    return NULL;

  queueElement = queue->first;
  frame = queueElement->element;
  if (queueElement->next == NULL) {
    return NULL;
  }
  else {
    queue->first = queueElement->next;
    if (queue->first == NULL)
      queue->last = NULL;
    free(queueElement);
    return frame;
  }
}

MatrixColumn* matrixColumnQueueUnsafeShift(MatrixColumnQueue* queue) {
  MatrixColumn* frame = NULL;
  MatrixColumnQueueElement* queueElement = NULL;

  assert(queue != NULL);
  if (queue->first == NULL)
    return NULL;

  queueElement = queue->first;
  frame = queueElement->element;
  queue->first = queueElement->next;
  if (queue->first == NULL)
    queue->last = NULL;

  free(queueElement);
  return frame;
}

FillWorker* newFillWorker(Matrix* targetMatrix) {
  int status = 0;
  FillWorker* worker = NULL;

  worker = (FillWorker*) malloc(sizeof(FillWorker));
  assert(worker != NULL);
  worker->data.status = HILTHR_FILLWORKER_STATUS_FINISHED;
  worker->data.columns = newMatrixColumnQueue();
  worker->data.target = targetMatrix;
  status = pthread_create(&(worker->thread), NULL,
                      theFillWorker, (void*) &(worker->data));
  if (status != 0) {
    fprintf(stderr, "Could not create thread in function %s at line %d.\n",
      __FUNCTION__, __LINE__);
    exit(status);
  }

  return worker;
}

void destroyFillWorker(FillWorker* worker) {
  assert(worker != NULL);

  pthread_join(worker->thread, NULL);
  freeMatrixColumnQueue(worker->data.columns);
}

void* theFillWorker(void* data) {
  MatrixColumn* fetchedColumn = NULL;
  FillWorkerData* fwdata = (FillWorkerData*) data;
#ifndef _WIN32
  struct timespec delay;

  delay.tv_sec = 0;
  delay.tv_nsec = SLEEP_T_BASE_NS;
#endif

  while (fwdata->status != HILTHR_FILLWORKER_STATUS_SHUTDOWN) {
    fetchedColumn = matrixColumnQueueSafeShift(fwdata->columns);
    if (fetchedColumn == NULL) {
#ifndef _WIN32
      nanosleep(&delay, &delay);
#else
      Sleep(1);
#endif
    }
    else {
      writeMatrixColumnToMatrix(fetchedColumn,fwdata->target);
    }
  }

  while ((fetchedColumn = matrixColumnQueueUnsafeShift(fwdata->columns))
            != NULL)
  {
    writeMatrixColumnToMatrix(fetchedColumn,fwdata->target);
  }

  return NULL;
}

void assignMatrixColumnToFillWorker(MatrixColumn* column,
                                    FillWorker* worker)
{
  assert(worker->data.status != HILTHR_FILLWORKER_STATUS_SHUTDOWN);
  matrixColumnQueueAppend(worker->data.columns, column);
}

void writeMatrixColumnToMatrix(MatrixColumn* c, Matrix* m) {
  int i = 0, offset = m->rows * c->col;
  double* targetStorage = m->storage;
  double* sourceStorage = c->values;

  for (i = 0; i < c->rows; ++i)
    targetStorage[offset+i] += sourceStorage[i];

  destroyMatrixColumn(c);
}

CompWorkerSharedData newCompWorkerSharedDataNLike(
    const double* coordinates, const double* elements, int nC, int nE,
    const double* vertices, const double* triangles, int nVert, int nT,
    double eta)
{
  CompWorkerSharedData ret;
  CompWorkerSharedDataNLike* shared = NULL;

  assert(nC > 0 && coordinates != NULL);
  assert(nE > 0 && elements != NULL);
  assert(nVert > 0 && vertices != NULL);
  assert(nT > 0 && triangles != NULL);
  assert(eta >= 0 && eta <= 1);

  shared = (CompWorkerSharedDataNLike*)
              malloc(sizeof(CompWorkerSharedDataNLike));
  assert(shared != NULL);
  shared->nC = nC;
  shared->nE = nE;
  shared->coordinates = coordinates;
  shared->elements = elements;
  shared->nVert = nVert;
  shared->nT = nT;
  shared->vertices = vertices;
  shared->triangles = triangles;
  shared->eta = eta;

  ret.NLike = shared;
  return ret;
}

void freeCompWorkerSharedDataNLike(CompWorkerSharedDataNLike* data) {
  free(data);
}

CompWorkerSharedData newCompWorkerSharedDataVLike(
  const double* coordinates, const double* elements,
  int nC, int nE, double eta)
{
  CompWorkerSharedData ret;
  CompWorkerSharedDataVLike* shared = NULL;

  assert(nC > 0 && coordinates != NULL);
  assert(nE > 0 && elements != NULL);
  assert(eta >= 0 && eta <= 1);

  shared = (CompWorkerSharedDataVLike*)
              malloc(sizeof(CompWorkerSharedDataVLike));
  assert(shared != NULL);
  shared->nC = nC;
  shared->nE = nE;
  shared->coordinates = coordinates;
  shared->elements = elements;
  shared->eta = eta;

  ret.VLike = shared;
  return ret;
}

void freeCompWorkerSharedDataVLike(CompWorkerSharedDataVLike* data) {
  free(data);
}

CompWorker* newCompWorkerSimple(void* (*fct)(void*), Matrix* matrix,
    int firstCol, int lastCol, CompWorkerSharedData shared) {
  int status = 0;
  CompWorker* w = malloc(sizeof(CompWorker));
  w->data.status = HILTHR_COMPWORKER_STATUS_SIMPLE;
  w->data.first_col = firstCol;
  w->data.last_col = lastCol;
  w->data.storage.matrix = matrix;
  w->data.sharedData = shared;
  status = pthread_create(&(w->thread), NULL, fct, &(w->data));

  if (status != 0) {
    fprintf(stderr, "Error in %s, line %d: Could not create thread.\n",
      __FUNCTION__, __LINE__);
    exit(status);
  }

  return w;
}

void newCompWorker(CyclicLinkedCompWorkerList** list,
    void* (*fct)(void*), int first_col, int last_col,
    CompWorkerSharedData shared_data) {
  int status = 0;
  CyclicLinkedCompWorkerList* worker = NULL;

  assert(fct != NULL);
  assert(first_col >= 0 && last_col >= 0);
  assert(first_col <= last_col);
  assert(shared_data.Unknown != NULL);

  worker = (CyclicLinkedCompWorkerList*)
                  malloc(sizeof(CyclicLinkedCompWorkerList));
  assert(worker != NULL);
  worker->element.data.status = HILTHR_COMPWORKER_STATUS_NOTSTARTED;
  worker->element.data.first_col = first_col;
  worker->element.data.last_col = last_col;
  worker->element.data.sharedData = shared_data;
  worker->element.data.storage.queue = newMatrixColumnQueue();
  status = pthread_create(&(worker->element.thread), NULL,
                            fct, (void*) &(worker->element.data));

  #ifdef DIAGNOSTICS
  fprintf(DIAGNOSTICS_FH, "Created Thread for columns from %d to %d\n",
          first_col, last_col);
  #endif

  if (status != 0) {
    fprintf(stderr, "Error in %s, line %d: Could not create thread.\n",
      __FUNCTION__, __LINE__);
    exit(status);
  }

  if (*list == NULL) {
    *list = worker;
    worker->next = worker;
  }
  else {
    worker->next = (*list)->next;
    (*list)->next = worker;
  }
    
}

void destroyCompWorker(CyclicLinkedCompWorkerList* w) {
  assert(w != NULL);
  assert(w->element.data.storage.queue != NULL);
  freeMatrixColumnQueue(w->element.data.storage.queue);
  free(w);
}

/* This is a generic scheduler used for building the K and W matrices.
 * It takes a function pointer to functions that actually compute the
 * matrices, but it contains all the threading / synchronisation logic.
 */
void genericBoss(Matrix* targetMatrix, CompWorkerSharedData shared, 
        void* (*worker)(void* data), int partition_from, int partition_to)
{
  int i = 0, actualNumberOfFillWorkers = 0, actualNumberOfCompWorkers = 0,
      size = 0, nullCycles = 0;
  CyclicLinkedCompWorkerList* compWorkerList = NULL;
  CyclicLinkedCompWorkerList* currentCompWorkerNode = NULL;

#ifdef DIAGNOSTICS
  int noFrameFromCurrentWorker = 0, bossBreaks = 0;
#endif /* DIAGNOSTICS */

#ifndef _WIN32
  struct timespec delay;
#endif /* NOT _WIN32 */

#if MAX_NUMOF_FILLWORKERS > 1
  FillWorker* fillWorker[MAX_NUMOF_FILLWORKERS];
#endif /* MAX_NUMOF_FILLWORKERS > 1 */

  size = partition_to - partition_from;
  assert(size >= 0);

  actualNumberOfCompWorkers = MAX(1, MIN(size/100,
                                          MAX_NUMOF_COMPWORKERS));
  actualNumberOfFillWorkers = MAX(1, MIN(actualNumberOfCompWorkers / 8,
                                          MAX_NUMOF_FILLWORKERS));

#ifndef _WIN32
  delay.tv_sec = 0;
  delay.tv_nsec = SLEEP_T_BASE_NS * size / actualNumberOfCompWorkers;
#endif

#ifdef DIAGNOSTICS
  fprintf(DIAGNOSTICS_FH,
    "Matrix-Size               : %d\n"
    "actualNumberOfCompWorkers : %d\n"
    "actualNumberOfFillWorkers : %d\n",
    size, actualNumberOfCompWorkers, actualNumberOfFillWorkers);
#endif /* DIAGNOSTICS */

  for (i = 0; i < actualNumberOfCompWorkers; ++i) {
    newCompWorker(&compWorkerList, worker,
                partition_from + (i*size) / actualNumberOfCompWorkers,
                partition_from + (i+1)*size / actualNumberOfCompWorkers - 1,
                shared);
  }

#if MAX_NUMOF_FILLWORKERS > 1
  for (i = 0; i < actualNumberOfFillWorkers; ++i) {
    fillWorker[i] = newFillWorker(targetMatrix);
  }
#endif /* MAX_NUMOF_FILLWORKERS > 1 */

  currentCompWorkerNode = compWorkerList;

  while (currentCompWorkerNode != NULL) {
    MatrixColumn* currentFrame = NULL;
    CyclicLinkedCompWorkerList* inspectedWorkerNode = NULL;

    /* Try to get a frame from the current compWorker: */
    if (currentCompWorkerNode != NULL) {
      CompWorker* inspectedWorker = NULL;
      inspectedWorkerNode = currentCompWorkerNode->next;
      inspectedWorker = &(inspectedWorkerNode->element);
      
      if (inspectedWorker->data.status == HILTHR_COMPWORKER_STATUS_FINISHED)
      {
        currentFrame = matrixColumnQueueUnsafeShift(
                           inspectedWorker->data.storage.queue);
        if (currentFrame == NULL) {
          pthread_detach(inspectedWorker->thread);
          if (inspectedWorkerNode != inspectedWorkerNode->next) {
            CyclicLinkedCompWorkerList* tmp = inspectedWorkerNode;
            currentCompWorkerNode->next = 
              inspectedWorkerNode = inspectedWorkerNode->next;
            destroyCompWorker(tmp);
            tmp = NULL;
          }
          else {
            destroyCompWorker(inspectedWorkerNode);
            inspectedWorkerNode = NULL;
          }
          goto end_of_loop;
        }
      }
      else {
        currentFrame = matrixColumnQueueSafeShift(
            inspectedWorker->data.storage.queue);
      }
    }

#ifdef DIAGNOSTICS
    if (currentFrame == NULL)
      noFrameFromCurrentWorker++;
#endif

    if (currentFrame != NULL) {
      nullCycles = 0;
#if MAX_NUMOF_FILLWORKERS <= 1
      writeMatrixColumnToMatrix(currentFrame, targetMatrix);
#else
      assignMatrixColumnToFillWorker(currentFrame,
        fillWorker[currentFrame->col % actualNumberOfFillWorkers]);
#endif
    }
    else {
      nullCycles++;
      if (nullCycles % actualNumberOfCompWorkers == 0) {
#ifdef DIAGNOSTICS
        bossBreaks++;
#endif

#ifdef _WIN32
        Sleep(1);
#else
        nanosleep(&delay, &delay);
#endif
      }
    }

  end_of_loop:
    currentCompWorkerNode = inspectedWorkerNode;
  }

  #if MAX_NUMOF_FILLWORKERS > 1
  for (i = 0; i < actualNumberOfFillWorkers; ++i)
    fillWorker[i]->data.status = HILTHR_FILLWORKER_STATUS_SHUTDOWN;
  for (i = 0; i < actualNumberOfFillWorkers; ++i)
    destroyFillWorker(fillWorker[i]);
  #endif

  #ifdef DIAGNOSTICS
  fprintf(DIAGNOSTICS_FH,
          "noFrameFromCurrentWorker: %d\n"
          "bossBreaks              : %d\n",
          noFrameFromCurrentWorker, bossBreaks);
  #endif

  free(shared.Unknown);
}

#endif

