/**************************************************************************/
/* DESCRIPTION: This file is part of the HILBERT program package for the  */
/*        numerical solution of the Laplace equation with mixed boundary  */
/*        conditions by use of BEM in 2D.                                 */
/*                                                                        */
/*        This file contains the function computeNThreaded, which is a    */
/*        threaded version of the function computeN. It is used in        */
/*        exactly the same way as computeN and has the same output as     */
/*        computeN. This function however is optimized for system with    */
/*        multiple cores and distributes the work load equally among all  */
/*        cores. For this purpose it uses threads.h on the one hand and   */
/*        POSIX threads on the other hand.                                */
/*                                                                        */
/*        Beware: As POSIX threads may not be available, this file is     */
/*        only compiled if the pre-processor flag HILTHREADS is set.      */
/**************************************************************************/
/* VERSION: 3.1                                                           */
/**************************************************************************/
/* (C) 2009-2013 HILBERT-Team '10, '12                                    */
/* support + bug report:  hilbert@asc.tuwien.ac.at                        */
/**************************************************************************/
#ifdef HILTHREADS

#include "newtonPotential.h"
#include "threadedN.h"
#include "threads.h"

#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))

void computeNThreaded(double* N, const double* coordinates,
    const double* elements, const double* vertices,
    const double* triangles, int nC, int nE, int nVert, int nT,
    double eta)
{
  int i = 0;
  int firstCol = 0, lastCol = -1;
  Matrix* matrix = newMatrix(nT, nE, N);
  CompWorker* threads[MAX_NUMOF_COMPWORKERS];
  int actualNumberOfThreads = MAX(1, MIN(MAX_NUMOF_COMPWORKERS,
                                         nT/100));
  int colsPerWorker = nT / actualNumberOfThreads;
  CompWorkerSharedData shared =
      newCompWorkerSharedDataNLike(coordinates, elements, nC, nE,
          vertices, triangles, nVert, nT, eta);

  for (i = 0; i < actualNumberOfThreads-1; ++i)
  {
    firstCol = lastCol+1;
    lastCol += colsPerWorker;
    threads[i] = newCompWorkerSimple(computeNThreadedWorker, matrix,
                                      firstCol, lastCol, shared);
  }

  threads[actualNumberOfThreads-1] =
    newCompWorkerSimple(computeNThreadedWorker, matrix, lastCol+1,
                        nT-1, shared);

  for (i = 0; i < actualNumberOfThreads; ++i)
  {
    pthread_join(threads[i]->thread, NULL);
    free(threads[i]);
  }

  freeCompWorkerSharedDataNLike(shared.NLike);
}

void* computeNThreadedWorker(void* data)
{
  const double *coordinates, *elements, *vertices, *triangles;
  int nC=0, nE=0, nVert=0, nT=0;
  double eta = 0.;
  double* N = NULL;
  int j=0, k=0;
  int aidx=0, bidx=0, n1idx=0, n2idx=0, n3idx=0;
  double a[2], b[2], n1[2], n2[2], n3[2];

  CompWorkerData* cwdata = (CompWorkerData*) data;
  coordinates = cwdata->sharedData.NLike->coordinates;
  elements    = cwdata->sharedData.NLike->elements;
  nC          = cwdata->sharedData.NLike->nC;
  nE          = cwdata->sharedData.NLike->nE;
  vertices    = cwdata->sharedData.NLike->vertices;
  triangles   = cwdata->sharedData.NLike->triangles;
  nVert       = cwdata->sharedData.NLike->nVert;
  nT          = cwdata->sharedData.NLike->nT;
  eta         = cwdata->sharedData.NLike->eta;
  N           = cwdata->storage.matrix->storage;

  for (j=cwdata->first_col;j<=cwdata->last_col;++j) {
    /*  running over triangles */
    n1idx = (int) triangles[j]-1;
    n2idx = (int) triangles[j+nT]-1;
    n3idx = (int) triangles[j+2*nT]-1;

    n1[0] = vertices[n1idx];
    n1[1] = vertices[n1idx+nVert];
    n2[0] = vertices[n2idx];
    n2[1] = vertices[n2idx+nVert];
    n3[0] = vertices[n3idx];
    n3[1] = vertices[n3idx+nVert];

    for (k=0;k<nE;++k) { /* running over boundary elements */
      aidx = (int) elements[k]-1;
      bidx = (int) elements[k+nE]-1;

      a[0] = coordinates[aidx];
      a[1] = coordinates[aidx+nC];
      b[0] = coordinates[bidx];
      b[1] = coordinates[bidx+nC];

      N[k+j*nE] = computeNkj(a,b,n1,n2,n3,eta);
    }
  }

  return NULL;
}

#endif
