/**************************************************************************/
/* DESCRIPTION: This file is part of the HILBERT program package for the  */
/*        numerical solution of the Laplace equation with mixed boundary  */
/*        conditions by use of BEM in 2D.                                 */
/*                                                                        */
/*        This file serves the purpose of sharing code between the        */
/*        hyper-threaded version of the operators V, W, K and N. It       */
/*        contains some constants and defaults. Furthermore, basic data   */
/*        types, used by the implementation, are defined and related      */
/*        functions are implemented.                                      */
/*                                                                        */
/*        The entries of the matrices W and K are not calculated at once. */
/*        A number of summands is calculated instead for each entry.      */
/*        These summands are added up and one finally obtains the matrix  */
/*        entry.                                                          */
/*                                                                        */
/*        The order of the calculation of the summands is unknown before  */
/*        execution as it is one of the design goals of these algorithms  */
/*        that the code may not make any assumptions on the data          */
/*        structure that describe the boundary mesh. For the              */
/*        parallelization of the integral operators this means that       */
/*        potential write-write conflicts have to be detected at run-     */
/*        time. We define and implement basic data structures, which are  */
/*        used to avoid these write-write conflicts.                      */
/*                                                                        */
/*        Three types of threads are implemented: The first type takes a  */
/*        vector and adds it to a certain column of a matrix. It is       */
/*        completely implemented in this file and referred to as          */
/*        "fillWorker".                                                   */
/*                                                                        */
/*        The second type calculates summands corresponding to a whole    */
/*        column of a matrix and stores it in a data structure that is    */
/*        suitable to be processed by a fillWorker. These threads are     */
/*        implemented in threadedK.c and threadedW.c, respectivly, and    */
/*        referred to as compWorker.                                      */
/*                                                                        */
/*        Finally, the last thread type splits the matrix into parts and  */
/*        creates compWorkers for each of these parts. It periodically    */
/*        checks whether the matrix has finished with the computation of  */
/*        a column and passes these columns to the fillWorkers. The       */
/*        function avoids potential conflicts among the fillWorkers.      */
/*                                                                        */
/*        Beware: As POSIX threads may not be available, this file is     */
/*        only compiled if the pre-processor flag HILTHREADS is set.      */
/**************************************************************************/
/* VERSION: 3.1                                                           */
/**************************************************************************/
/* (C) 2009-2013 HILBERT-Team '10, '12                                    */
/* support + bug report:  hilbert@asc.tuwien.ac.at                        */
/**************************************************************************/
#ifdef HILTHREADS
#ifndef _THREADS_H_GUARD_
#define _THREADS_H_GUARD_

#ifdef _WIN32
#  include <Windows.h>
#endif

#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
#include <math.h>
#include <assert.h>

#ifndef NUMOF_CORES
#define NUMOF_CORES                              2
#endif

#define MAX_NUMOF_FILLWORKERS                    MIN(NUMOF_CORES/8,1)
#define MAX_NUMOF_COMPWORKERS                    NUMOF_CORES
#define MAX_FRAME_SIZE                           1024

#define HILTHR_FILLWORKER_STATUS_NOTSTARTED     -1
#define HILTHR_FILLWORKER_STATUS_WORKING         0
#define HILTHR_FILLWORKER_STATUS_FINISHED        1
#define HILTHR_FILLWORKER_STATUS_SHUTDOWN        2
#define HILTHR_COMPWORKER_STATUS_NOTSTARTED     -1
#define HILTHR_COMPWORKER_STATUS_WORKING         0
#define HILTHR_COMPWORKER_STATUS_FINISHED        1
#define HILTHR_COMPWORKER_STATUS_SIMPLE          2

#ifdef DIAGNOSTICS
#define DIAGNOSTICS_FH stderr
#endif

/* First, we define a number of data structures and helper functions to use
 * these structures. All of them do the obvious, nevertheless they are
 * documented below.
 */

/* Data type: Matrix.                                                     */
/* DESCRIPTION: Matrix contains two integers rows and cols and a pointer  */
/*              to an array with rows*cols elements that is interpreted   */
/*              as a rows x cols matrix.                                  */
typedef struct _Matrix_ {
  double* storage;
  int rows;
  int cols;
} Matrix;

/* newMatrix allocates memory for a matrix structure and initializes it   */
/*        with the values that are passed as arguments.                   */
/* INPUT: rows and cols are the number of rows and columns of the matrix, */
/*        storage is a pointer to an array of doubles with length         */
/*        rows*cols.                                                      */
/* OUTPUT: A pointer to a Matrix structure.                               */
Matrix* newMatrix(int rows, int cols, double* storage);

/* getMatrixRows returns the number of rows of a Matrix structure.        */
/* INPUT: A pointer to the Matrix structure.                              */
/* OUTPUT: The number of rows of this matrix.                             */
int getMatrixRows(Matrix* m);

/* getMatrixCols returns the number of columns of a Matrix structure.     */
/* INPUT: A pointer to the Matrix structure.                              */
/* OUTPUT: The number of columns of this matrix.                          */
int getMatrixCols(Matrix* m);

/* getMatrixStorage returns a pointer to the array of doubles that        */
/*        contains the matrix entries.                                    */
/* INPUT: A pointer to the Matrix structure.                              */
/* OUTPUT: An array of doubles with length rows*cols, where the matrix is */
/*        a rows x cols matrix.                                           */
double* getMatrixStorage(Matrix* m);

/* Data type: MatrixColumn                                                */
/* DESCRIPTION: A MatrixColumn contains summands for entries of a         */
/*              sub-matrix. At the moment a sub-matrix is limited to have */
/*              only one column, so a MatrixColumn usually contains the   */
/*              values of a single column of a matrix.                    */
typedef struct _MatrixColumn_ {
  double* values;
  int rows;
  int col;
} MatrixColumn;

/* newMatrixColumn allocates memory for a MatixColumn structure and an    */
/*        array of doubles that is long enough to store a whole column.   */
/* INPUT: col is the column number, the structure is representing within  */
/*        an unspecified matrix, rows is the number of rows of that       */
/*        matrix. col and rows must be positive integers.                 */
/* OUTPUT: A pointer to a MatrixColumn structure.                         */
MatrixColumn* newMatrixColumn(int col, int rows);

/* destroyMatrixColumn frees all memory that is used by the MatrixColumn  */
/*        structure and the values-array.                                 */
/* INPUT: A pointer to a MatrixColumn structure.                          */
/* OUTPUT: Always returns NULL (casted to a pointer to a MatrixColumn).   */
MatrixColumn* destroyMatrixColumn(MatrixColumn* c);

/* Data type: MatrixColumnQueueElement                                    */
/* DESCRIPTION: A single element of a matrix column queue. A matrix column*/
/*              queue is a linked list of matrix columns together with two*/
/*              pointers, one pointing at the first element of the queue  */
/*              and one pointing at the last element of the queue.        */
typedef struct _MatrixColumnQueueElement_ {
  MatrixColumn* element;
  struct _MatrixColumnQueueElement_* next;
} MatrixColumnQueueElement;

/* Data type: MatrixColumnQueue                                           */
/* DESCRIPTION: A MatrixColumnQueue contains a pointer to the first and   */
/*              last element of the linked list of                        */
/*              MatrixColumnQueueElements. It is required to accomplish   */
/*              both, shifts and pushes, in constant run-time.            */
typedef struct _MatrixColumnQueue_ {
  MatrixColumnQueueElement* first;
  MatrixColumnQueueElement* last;
} MatrixColumnQueue;

/* newMatrixColumnQueue creates a new, empty MatrixColumnQueue.           */
/* INPUT: None.                                                           */
/* OUTPUT: A pointer to an empty MatrixColumnQueue structure.             */
MatrixColumnQueue* newMatrixColumnQueue();

/* freeMatrixColumnQueue frees a MatrixColumnQueue. It requires the queue */
/*        to be empty.                                                    */
/* INPUT: A pointer to an empty MatrixColumnQueue structure.              */
/* OUTPUT: None.                                                          */
void freeMatrixColumnQueue(MatrixColumnQueue* queue);

/* matrixColumnQueueAppend appends a single MatrixColumn to a             */
/*        MatrixColumnQueue.                                              */
/* INPUT: queue is a pointer to a MatrixColumnQueue, column is a pointer  */
/*        to a MatrixColumn.                                              */
/* OUTPUT: This function modifies the queue structure.                    */
void matrixColumnQueueAppend(MatrixColumnQueue* queue,
                              MatrixColumn* column);

/* matrixColumnQueueAppendQueue appends all MatrixColumns of the Queue q2 */
/*        to the Queue q1. Afterwards, q2 is freed and can't be used      */
/*        anymore.                                                        */
/* INPUT: q1 and q2 are pointers to MatrixColumnQueues. q1 is modified and*/
/*        contains all MatrixColumnQueueElements in q1 and q2 afterwards, */
/*        q2 is freed.                                                    */
/* OUTPUT: None.                                                          */
void matrixColumnQueueAppendQueue(MatrixColumnQueue* q1,
                                    MatrixColumnQueue* q2);

/* matrixColumnQueueHasNext returns 1, if there are still elements in the */
/*        MatrixColumnQueue queue and 0 otherwise.                        */
/* INPUT: A pointer to a MatrixColumnQueue structure.                     */
/* OUTPUT: An integer, either 0 or 1.                                     */
int matrixColumnQueueHasNext(MatrixColumnQueue* queue);

/* matrixColumnQueueSafeShift returns the first MatrixColumn in the       */
/*        MatrixColumnQueue queue if there is another Column within the   */
/*        queue. This function should be used, whenever there is a chance,*/
/*        another thread is calling the function matrixColumnQueueAppend  */
/*        at the same time, as this may lead to unexpected behaviour. If  */
/*        there is only one or no MatrixColumn left in the queue, this    */
/*        function returns NULL.                                          */
/* INPUT: A pointer to a MatrixColumnQueue.                               */
/* OUTPUT: A pointer to a MatrixColumn. This function returns NULL, if    */
/*        there is no MatrixColumn in the queue or it is not safe to      */
/*        remove a MatrixColumn. Otherwise, it returns the first          */
/*        MatrixColumn pointer.                                           */
MatrixColumn* matrixColumnQueueSafeShift(MatrixColumnQueue* queue);

/* matrixColumnQueueUnsafeShift returns the first MatrixColumn in the     */
/*        MatrixCOlumnQueue. If there's no column in the queue, this      */
/*        function returns NULL. Whenever there is a chance, that another */
/*        thread is calling the function matrixColumnQueueAppend at the   */
/*        same time, please use matrixColumnQueueSafeShift instead of this*/
/*        function. Please also note, that the other function is          */
/*        thread-safe, because it has got different behaviour, not because*/
/*        it is synchronized. Usually, you have to do at least one unsafe */
/*        shift!                                                          */
/* INPUT: A pointer to a MatrixColumnQueue.                               */
/* OUTPUT: A pointer to a MatrixColumn. This function returns NULL, if    */
/*        there is no pointer in the queue. Otherwise, it returns the     */
/*        first MatrixColumn pointer.                                     */
MatrixColumn* matrixColumnQueueUnsafeShift(MatrixColumnQueue* queue);

/* Data type: FillWorkerData                                              */
/* DESCRIPTION: This structure contains all data that is shared between   */
/*              the Boss and the worker that copies the values from a     */
/*              matrix block to a target matrix.                          */
/*              It is used by the functions related to the data type      */
/*              FillWorker.                                               */
typedef struct _FillWorkerData_ {
  int status;
  Matrix* target;
  MatrixColumnQueue* columns;
} FillWorkerData;

/* Data type: FillWorker                                                  */
/* DESCRIPTION: Such a data structure is maintained by the Boss for each  */
/*              FillWorker. It basically contains the thread id, the      */
/*              shared data and a list of columns that are blocked by the */
/*              worker because of possible conflicts.                     */
typedef struct _FillWorker_ {
  pthread_t thread;
  FillWorkerData data;
} FillWorker;

/* newFillWorker creates a new FillWorker thread that waits for getting   */
/*        assigned new MatrixColumns to update a certain matrix.          */
/* INPUT: targetMatrix is a pointer to a Matrix structure and represents  */
/*        the matrix, the FillWorker is writing the data to. The Matrix   */
/*        structure is not changed, but the FillWorker updates the memory,*/
/*        the structure refers to.                                        */
/* OUTPUT: Returns a pointer to a FillWorker structure.                   */
FillWorker* newFillWorker(Matrix* targetMatrix);

/* assignMatrixColumnToFillWorker assigns a MatrixColumn to a FillWorker  */
/* INPUT: column, a MatrixColumn; worker, a FillWorker.                   */
/* OUTPUT: None.                                                          */
void assignMatrixColumnToFillWorker(MatrixColumn* column,
                                      FillWorker* worker);

/* writeMatrixColumnToMatrix writes a MatrixColumn to a Matrix and frees  */
/*        the MatrixColumn afterwards.                                    */
/* INPUT: column, a pointer to a MatrixColumn; matrix a pointer to a      */
/*        Matrix.                                                         */
/* OUTPUT: None.                                                          */
void writeMatrixColumnToMatrix(MatrixColumn* column, Matrix* matrix);

/* theFillWorker is a function that contains the implementation of the    */
/*        FillWorker thread. It is NEVER CALLED DIRECTLY, but only using  */
/*        the function newFillWorker.                                     */
void* theFillWorker(void* data);

/* destroyFillWorker joins the pthread that newFillWorker created and     */
/*        frees the data structure. The targetMatrix structure is left    */
/*        unchanged and must be freed manually (as indicated by the const */
/*        keyword).                                                       */
/* INPUT: A pointer to a FillWorker.                                      */
/* OUTPUT: None.                                                          */
void destroyFillWorker(FillWorker* worker);

/* Data type: CompWorkerSharedDataVLike                                   */
/* DESCRIPTION: Contains all the data that is shared among different      */
/*              CompWorkers that compute a matrix only by using the       */
/*              boundary mesh and an admissability constant.              */
typedef struct _CompWorkerSharedDataVLike_ {
  int nC;
  int nE;
  const double* coordinates;
  const double* elements;
  double eta;
} CompWorkerSharedDataVLike;

/* Data type: CompWorkerSharedDataNLike                                   */
/* DESCRIPTION: Contains all the data that is shared among different      */
/*              CompWorkers that compute a matrix using the boundary mesh,*/
/*              the volume mesh and an admissability constant.            */
typedef struct _CompWorkerSharedDataNLike_ {
  int nC;
  int nE;
  const double* coordinates;
  const double* elements;
  int nVert;
  int nT;
  const double* vertices;
  const double* triangles;
  double eta;
} CompWorkerSharedDataNLike;

/* Data type: CompWorkerSharedData                                        */
/* DESCRIPTION: This structure contains all data that is shared among the */
/*              different *computation workers*. If a computation worker  */
/*              needs a specific data format, it's a good idea to add the */
/*              type to the CompWorkerSharedData union though you also may*/
/*              use the Unknown value to provide generic shared data.     */
typedef union _CompWorkerSharedData_ {
  CompWorkerSharedDataVLike* VLike;
  CompWorkerSharedDataNLike* NLike;
  void* Unknown;
} CompWorkerSharedData;

/* newCompWorkerSharedDataVLike simply allocates memory for the structure */
/*        CompWorkerSharedDataVLike, initializes it with the provided     */
/*        values and casts it to the CompWorkerSharedData type.           */
/* INPUT: coordinates and elements are arrays of doubles of length nC*2   */
/*        and nE*2 respectivly. They describe the boundary mesh, consult  */
/*        the documentation for more details. eta is a double between 0.  */
/*        and 1.                                                          */
/* OUTPUT: A CompWorkerSharedDataVLike structure casted to the            */
/*        CompWorkerSharedData type.                                      */
CompWorkerSharedData newCompWorkerSharedDataVLike(const double* coordinates,
    const double* elements, int nC, int nE, double eta);

/* freeCompWorkerSharedDataVLike frees the memory used by the data        */
/*        structure. The arrays are left unchanged by this function (as   */
/*        indicated by the const keyword).                                */
/* INPUT: A pointer to a CompWorkerSharedDataVLike structure.             */
/* OUTPUT: None.                                                          */
void freeCompWorkerSharedDataVLike(CompWorkerSharedDataVLike* data);

/* newCompWorkerSharedDataNLike simply allocates memory for the structure */
/*        CompWorkerSharedDataNLike, initializes it with the provided     */
/*        values and casts it to the CompWorkerSharedData type.           */
/* INPUT: coordinates and elements are arrays of doubles of length nC*2   */
/*        and nE*2 respectivly. They describe the boundary mesh, consult  */
/*        the documentation for more details. vertices and triangles are  */
/*        arrays of doubles with length nVert*3 and nT*3, respectivly,    */
/*        and describe the volume mesh. Again, consult the documentation  */
/*        for more details. eta is a double between 0. and 1.             */
/* OUTPUT: A CompWorkerSharedDataVLike structure casted to the            */
/*        CompWorkerSharedData type.                                      */
CompWorkerSharedData newCompWorkerSharedDataNLike(
    const double* coordinates, const double* elements, int nC, int nE,
    const double* vertices, const double* triangles, int nVert, int nT,
    double eta);

/* freeCompWorkerSharedDataNLike frees the memory used by data structure. */
/*        The arrays are left unchanged by this function (as indicated by */
/*        the const keyword).                                             */
/* INPUT: A pointer to a CompWorkerSharedDataNLike structure.             */
/* OUTPUT: None.                                                          */
void freeCompWorkerSharedDataNLike(CompWorkerSharedDataNLike* data);

/* Data type: CompWorkerData                                              */
/* DESCRIPTION: This structure contains all data that is shared between a */
/*              single computation worker and the Boss.                   */
typedef struct _CompWorkerData_ {
  int status;
  int first_col;
  int last_col;
  CompWorkerSharedData sharedData;
  union { /* CompWorkers may use a queue to store the calculated values in*/
    MatrixColumnQueue* queue; /* or write directly to the matrix.         */
    Matrix* matrix;
  } storage;
} CompWorkerData;

/* Data type: CompWorker                                                  */
/* DESCRIPTION: For each computation worker the Boss maintains a          */
/*              CompWorker data structure that contains the thread id and */
/*              the shared data.                                          */
typedef struct _CompWorker_ {
  pthread_t thread;
  CompWorkerData data;
} CompWorker;

/* Data type: CyclicLinkedCompWorkerList                                  */
/* DESCRIPTION: The Boss queries the status of all computation workers    */
/*              using a round-robin strategy. It therefore maintains a    */
/*              cyclic list that contains all computation workers.        */
/*              Whenever a worker finishes, the Boss can unlink it from   */
/*              the list so that it does not need to query it again.      */
typedef struct _CyclicLinkedCompWorkerList_ {
  CompWorker element;
  struct _CyclicLinkedCompWorkerList_* next;
} CyclicLinkedCompWorkerList;

/* newCompWorkerSimple creates a new CompWorker that does not require any */
/*        matrix synchronisation, but writes directly to the matrix. This */
/*        function also starts a new thread.                              */
/* INPUT: first_col and  last_col are a range of columns, the worker      */
/*        should work on, fct is a pointer to the actual worker function. */
/* OUTPUT: Returns a pointer to a CompWorker (including the id of the     */
/*        created thread.)                                                */
CompWorker* newCompWorkerSimple(void* (*fct)(void*), Matrix* matrix,
    int first_col, int last_col, CompWorkerSharedData shared);

/* newCompWorker creates a new CompWorker structure, starts a new thread  */
/*        and appends it to a cyclic list of CompWorkers.                 */
/* INPUT: list is a reference to a pointer to a CyclicLinkedCompWorkerList*/
/*        fct is a pointer to the function that actually implements the   */
/*        computation of the matrix, first_col and last_col are a range of*/
/*        columns, the CompWorker is working on and shared_data is a      */
/*        CompWorkerSharedData.                                           */
/* OUTPUT: This function inserts a CompWorker structure into the cyclic   */
/*        list of CompWorkers (list).                                     */
void newCompWorker(CyclicLinkedCompWorkerList** list, void* (*fct)(void*),
    int first_col, int last_col, CompWorkerSharedData shared_data);

/****************************** genericBoss *******************************/
/* INPUT: targetMatrix is a Matrix structure. CompWorkerSharedData is an  */
/*        enum and usually either a pointer to a                          */
/*        CompWorkerSharedDataVLike data structure or a                   */
/*        CompWorkerSharedDataNLike data structure, which contains all    */
/*        data that is required by the CompWorker. Finally, worker is a   */
/*        pointer to the CompWorker function, which takes a void pointer  */
/*        as an argument that can be casted to CompWorkerData* and        */
/*        returns a void pointer, which is always NULL. partition_from    */
/*        and partition_to describe a range that includes partition_from, */
/*        but excludes partition_to. It is partitioned into evenly        */
/*        divided parts. For each part, a worker is created that works on */
/*        that index range.                                               */
/* OUTPUT: The function modifies the data structure targetMatrix, i.e. it */
/*        modifies the storage, the Matrix is actually stored at.         */
/* DESCRIPTION: This function creates a herd of CompWorkers and           */
/*        FillWorkers to build any Matrix, you have written a CompWorker  */
/*        for. For some examples, please read the source files            */
/*        threadedW.c or threadedK.c. For extensive documentation on how  */
/*        to write your own CompWorkers and how this function works,      */
/*        please consult the external documentation.                      */
/**************************************************************************/
void genericBoss(Matrix* targetMatrix, CompWorkerSharedData shared,
    void* (*worker)(void* data), int partition_from, int partition_to);
#endif
#endif

