/**************************************************************************/
/* DESCRIPTION: This file is part of the HILBERT program package for the  */
/*        numerical solution of the Laplace equation with mixed boundary  */
/*        conditions by use of BEM in 2D. It provides functions to        */
/*        compute single layer potential (slp) type integrals.            */
/*                                                                        */
/*        This file contains only the implementation. For detailed        */
/*        documentation see singleLayerPotential.h                        */
/**************************************************************************/
/* VERSION: 3.1                                                           */
/**************************************************************************/
/* (C) 2009-2013 HILBERT-Team '09, '10, '12                               */
/* support + bug report:  hilbert@asc.tuwien.ac.at                        */
/**************************************************************************/
#include <stdlib.h>

#include "singleLayerPotential.h"
#include "constants.h"

double slp(int k, double u[2], double v[2]) {
  double ret = 0.;
  double* tmp = slpIterative(k, u, v);
  ret = tmp[k];
  free(tmp);
  return ret;
}

double* slpIterative(int k, double u[2], double v[2]) {
  /* INPUT: Vectors \u, \v \in \R^2, an integer k.
   * OUTPUT: An array val of length k+1 that is given by
   *           val[i] = int_{-1}^1 s^i log |su+v|^2 ds
   */
  int i = 0;
  double a = u[0]*u[0] + u[1]*u[1];         /* a = <u,u> */
  double b = 2 * ( u[0]*v[0] + u[1]*v[1] ); /* b = 2 <u,v> */
  double c = v[0]*v[0] + v[1]*v[1];         /* c = <v,v> */
  double tmp = 0., D = 0.;
  double* val = malloc(sizeof(double)*(k+1));

  /* Ensure that discriminant is either positive or zero */
  tmp = 4*a*c - b*b;
  assert(fabs(u[0]) > EPS || fabs(u[1]) > EPS
          || fabs(v[0]) > EPS || fabs(v[1]) > EPS);
  assert(tmp >= -fabs(EPS*4*a*c)); /* By theory there holds tmp >= 0. */

  if (tmp > EPS*4*a*c)
    D = sqrt(tmp);
  else
    D = 0.;
  
  /* The case k=0 */
  if (fabs(u[0]) < EPS && fabs(u[1]) < EPS) {
      val[0] = 2*log(c);
  }
  else if (D == 0.) {
    tmp = b + 2*a;
    if (fabs(tmp) > EPS*a)
      val[0] = tmp * log( 0.25*tmp*tmp /a );
    else
      val[0] = 0;
    tmp = b - 2*a;
    if (fabs(tmp) > EPS*a)
      val[0] -= tmp * log( 0.25*tmp*tmp /a );
    val[0] = 0.5*val[0] /a - 4;
  }
  else { /* case D > 0 */
    tmp = c - a;
    if (fabs(tmp) < EPS*c)
      val[0] = 0.5*M_PI;
    else if (a < c)
      val[0] = atan( D /tmp );
    else
      val[0] = atan( D /tmp ) + M_PI;

    val[0] = ( 0.5*( (b+2*a) * log(a+b+c) - (b-2*a) * log(a-b+c) )
                + D*val[0]) / a - 4;
  }
  if (k == 0)
    return val;

  /* The case k=1 */
  if (k>=1) {
    if (fabs(u[0]) < EPS && fabs(u[1]) < EPS) {
      val[1] = 0.;
    }
    else {
      /* val holds \int_{-1}^{+1} \log |a*s^2+b*s+c|^2 ds. */
      val[1] = -b*(2+val[0]);

      tmp = a+b+c;
      if (fabs(tmp) > EPS*a)
        val[1] += tmp * log(tmp);
   
      tmp = a-b+c;
      if (fabs(tmp) > EPS*a)
        val[1] -= tmp * log(tmp);

      val[1] /= (2*a);
    }
  }
  if (k == 1)
    return val;

  /* The case k>=2 */
  for (i=2; i <= k; ++i) {
    if (fabs(u[0]) < EPS && fabs(u[1]) < EPS) {
      if (i%2 == 0)
        val[i] = 2*log(c)/(double)(i+1);
      else
        val[i] = 0.;
    }
    else {
      tmp = a+b+c;
      if (fabs(tmp) > a*EPS)
        val[i] = tmp*log(tmp);
      else
        val[i] = 0.;

      tmp = a-b+c;
      if (i % 2 == 0) {
        if (fabs(tmp) > a*EPS)
          val[i] += tmp*log(tmp);
        val[i] -= 4*a/(i+1);
      }
      else {
        if (fabs(tmp) > a*EPS)
          val[i] -= tmp*log(tmp);
        val[i] -= 2*b/i;
      }

      val[i] -= i*b*val[i-1]+(i-1)*c*val[i-2];
      val[i] /= ((i+1)*a);
    }
  }

  return val;
}

double doubleSlp(int k, int l, double u[], double v[], double w[]) {
  int i = 1;
  double output = 0.;
  double wpv[2], wmv[2], wpu[2], wmu[2];
  double* memTableUWpv = NULL;
  double* memTableUWmv = NULL;
  double* memTableVWpu = NULL;
  double* memTableVWmu = NULL;
  double normUSq = u[0]*u[0]+u[1]*u[1];
  double normVSq = v[0]*v[0]+v[1]*v[1];
  double normWSq = w[0]*w[0]+w[1]*w[1];
  double detUV = u[0]*v[1] - u[1]*v[0];

  if (normUSq < EPS && normVSq < EPS)
  {
    if (k%2 == 0)
    {
      if (l%2 == 0)
      {
        if (normWSq < EPS)
          return 0;
        else
          return (double)2./(double)((k+1)*(l+1))*log(w[0]*w[0]+w[1]*w[1]);
      }
    }
    return 0.;
  }
  else if (normUSq < EPS)
  {
    if (k%2 == 0)
      return (double) 1./(k+1) * slp(l, v, w);
    else
      return 0.;
  }
  else if (normVSq < EPS)
  {
    if (l%2 == 0)
      return (double) 1./(l+1) * slp(k, u, w);
    else
      return 0.;
  }

  wpv[0] = w[0] + v[0]; wpv[1] = w[1] + v[1];
  wmv[0] = w[0] - v[0]; wmv[1] = w[1] - v[1];
  wpu[0] = w[0] + u[0]; wpu[1] = w[1] + u[1];
  wmu[0] = w[0] - u[0]; wmu[1] = w[1] - u[1];

  if (fabs(detUV) < EPS*sqrt(normUSq*normVSq)) { /* u,v parallel */
    double mu = 0.;

    memTableUWpv = slpIterative(k, u, wpv);
    memTableUWmv = slpIterative(k, u, wmv);
    memTableVWpu = slpIterative(k+l+1, v, wpu);
    memTableVWmu = slpIterative(k+l+1, v, wmu);

    if (fabs(u[0]) < fabs(u[1]))
      mu = v[1] / u[1];
    else
      mu = v[0] / u[0];

    output = memTableUWpv[0] - mu*memTableVWpu[k+l+1] +
              mu*memTableVWmu[k+l+1];
    if ((k+l) % 2 == 0)
      output += memTableUWmv[0];
    else
      output -= memTableUWmv[0];
    output /= (2*(k+l+1));

    for (i = 1; i <= k; ++i) {
      output *= 2*i*mu;
      output += memTableUWpv[i] - mu*memTableVWpu[l+k+1-i]; 

      if ((k+l-i) % 2 == 0)
        output += memTableUWmv[i];
      else
        output -= memTableUWmv[i];

      if (i % 2 == 0)
        output += mu*memTableVWmu[l+k+1-i];
      else
        output -= mu*memTableVWmu[l+k+1-i];

      output /= (2*(l+k+1-i));
    }
  }
  else {
    int j = 1;
    double mu1 = 0., mu2 = 0.;
    double* tmp = malloc(sizeof(double) * (l+1));

    if (fabs(w[0]+v[0]-u[0]) < fabs(w[0])*EPS
          && fabs(w[1]+v[1]-u[1]) < fabs(w[1])*EPS) {
      memTableVWmu = malloc(sizeof(double) * (l+1));
      for (i = 0; i <= l; ++i)
        memTableVWmu[i] = 0.;

      memTableUWpv = malloc(sizeof(double) * (k+1));
      for (i = 0; i <= k; ++i)
        memTableUWpv[i] = 0.;
    }
    else {
      memTableVWmu = slpIterative(l, v, wmu);
      memTableUWpv = slpIterative(k, u, wpv);
    }

    if (fabs(w[0]+u[0]-v[0]) < fabs(w[0])*EPS
          && fabs(w[1]+u[1]-v[1]) < fabs(w[1])*EPS) {
      memTableVWpu = malloc(sizeof(double) * (l+1));
      for (i = 0; i <= l; ++i)
        memTableVWpu[i] = 0.;

      memTableUWmv = malloc(sizeof(double) * (k+1));
      for (i = 0; i <= k; ++i)
        memTableUWmv[i] = 0.;
    }
    else {
      memTableVWpu = slpIterative(l, v, wpu);
      memTableUWmv = slpIterative(k, u, wmv);
    }

    mu1 = ( v[1]*w[0] - v[0]*w[1]) / detUV;
    mu2 = (-u[1]*w[0] + u[0]*w[1]) / detUV;

    tmp[0] = -2 + ((mu1+1)*memTableVWpu[0] - (mu1-1)*memTableVWmu[0]
              + (mu2+1)*memTableUWpv[0] - (mu2-1)*memTableUWmv[0]) * 0.25;

    for (i = 1; i <= l; ++i) {
      tmp[i] = 0.5*((mu1+1)*memTableVWpu[i] - (mu1-1)*memTableVWmu[i]
            + (mu2+1)*memTableUWpv[0]) - i*mu2*tmp[i-1];
      if (i%2 == 0) {
        tmp[i] -= (double)4/(double)(i+1);
        tmp[i] -= 0.5 * (mu2-1)*memTableUWmv[0];
      }
      else
        tmp[i] += 0.5 * (mu2-1)*memTableUWmv[0];

      tmp[i] /= (i+2);
    }

    for (i = 1; i <= k; ++i) {
      tmp[0] = 0.5*((mu1+1)*memTableVWpu[0] + (mu2+1)*memTableUWpv[i]
                  - (mu2-1)*memTableUWmv[i]) - i*mu1*tmp[0];
      if (i%2 == 0) {
        tmp[0] -= (double)4/(double)(i+1);
        tmp[0] -= 0.5*(mu1-1)*memTableVWmu[0];
      }
      else {
        tmp[0] += 0.5*(mu1-1)*memTableVWmu[0];
      }

      tmp[0] /= (i+2);

      for (j = 1; j <= l; ++j) {
        tmp[j] *= -i*mu1;
        tmp[j] -= j*mu2*tmp[j-1];
        tmp[j] += 0.5*( (mu1+1)*memTableVWpu[j] + (mu2+1)*memTableUWpv[i] );
        if (i%2 == 0) {
          if (j%2 == 0) {
            tmp[j] -= (double)4/(double)((i+1)*(j+1));
          }
          tmp[j] -= 0.5 * (mu1-1) * memTableVWmu[j];
        }
        else {
          tmp[j] += 0.5 * (mu1-1) * memTableVWmu[j];
        }

        if (j%2 == 0) {
          tmp[j] -= 0.5 * (mu2-1) * memTableUWmv[i];
        }
        else {
          tmp[j] += 0.5 * (mu2-1) * memTableUWmv[i];
        }

        tmp[j] /= (i+j+2);
      }
    }

    output = tmp[l];
    free(tmp);
  }

  free(memTableUWpv);
  free(memTableUWmv);
  free(memTableVWpu);
  free(memTableVWmu);

  return output;
}

double computeVij(double a0, double a1, double b0, double b1,
      double c0, double c1, double d0, double d1, double eta) {
  /*
   * INPUT:  elements Ti = [a,b], Tj = [c,d] with a,b,c,d \in \R^2
   * OUTPUT: Galerkin integral 
   *         -1/(2pi) * \int_{Tj} \int_{Ti} log|x-y| ds_y ds_x
   */
  double hi = (b0-a0)*(b0-a0) + (b1-a1)*(b1-a1); /* hi = norm(b-a)^2 */
  double hj = (d0-c0)*(d0-c0) + (d1-c1)*(d1-c1); /* hj = norm(d-c)^2 */

  return sqrt(hi*hj)*computeWij(a0, a1, b0, b1, c0, c1, d0, d1, eta);
}

double computeWij(double a0, double a1, double b0, double b1,
      double c0, double c1, double d0, double d1, double eta) {
  /*
   * INPUT:  elements Ti = [a,b], Tj = [c,d] with a,b,c,d \in \R^2
   * OUTPUT: Galerkin integral
   *          -1/(2pi) \int_{Tj} \int_{Ti} log|x-y| ds_y ds_x
   */

  double hi = (b0-a0)*(b0-a0) + (b1-a1)*(b1-a1); /* hi = norm(b-a)^2 */
  double hj = (d0-c0)*(d0-c0) + (d1-c1)*(d1-c1); /* hj = norm(d-c)^2 */
  double tmp = 0.;

  /* For stability reasons, we guarantee   hj <= hi   to ensure that *
   * outer integration is over smaller domain. This is done by       *
   * swapping Tj and Ti if necessary.                                */

  if (hj > hi) {
    tmp = a0; a0 = c0; c0 = tmp;   /* swap a and c */
    tmp = a1; a1 = c1; c1 = tmp;
    tmp = b0; b0 = d0; d0 = tmp;   /* swap b and d */
    tmp = b1; b1 = d1; d1 = tmp;
    tmp = hi; hi = hj; hj = tmp;   /* ensure that hj <= hi */
  }

  if ( eta == 0) { /* compute all matrix entries analytically */
    return computeWijAnalytic(a0,a1, b0,b1, c0,c1, d0,d1);
  }
  else { /* compute admissible matrix entries semi-analytically */
    if ( distanceSegmentToSegment(a0,a1,b0,b1,c0,c1,d0,d1) > eta*sqrt(hj) )
    {
      return computeWijSemianalytic(a0,a1, b0,b1, c0,c1, d0,d1);
    }
    else {
      return computeWijAnalytic(a0,a1, b0,b1, c0,c1, d0,d1);
    }
  }
}

double computeWijAnalytic(double a0, double a1, double b0, double b1, 
               double c0, double c1, double d0, double d1) {
  /*
   * INPUT:  elements Ti = [a,b], Tj = [c,d] with a,b,c,d \in \R^2
   * OUTPUT: Galerkin integral 
   *         -1/(2pi)*1/|Ti|*1/|Tj| \int_{Tj} \int_{Ti} log|x-y| ds_y ds_x
   */

  double hi = (b0-a0)*(b0-a0) + (b1-a1)*(b1-a1); /* hi = norm(b-a)^2 */
  double hj = (d0-c0)*(d0-c0) + (d1-c1)*(d1-c1); /* hj = norm(d-c)^2 */
  double val = 0., det = 0.;
  double x[2], y[2], z[2];
  double zxp[2], zxm[2], zyp[2], zym[2];
  double lambda, mu;

  x[0] = 0.5*(b0 - a0);            /* x = (b-a)/2 */
  x[1] = 0.5*(b1 - a1);
  y[0] = 0.5*(c0 - d0);            /* y = (c-d)/2 */
  y[1] = 0.5*(c1 - d1);
  z[0] = 0.5*(a0 + b0 - c0 - d0);  /* z = (a+b-c-d)/2 */
  z[1] = 0.5*(a1 + b1 - c1 - d1);

  zxp[0] = z[0] + x[0];            /* zxp = z+x = (2b-c-d)/2 */
  zxp[1] = z[1] + x[1];
  zxm[0] = z[0] - x[0];            /* zxm = z-x = (2a-c-d)/2 */
  zxm[1] = z[1] - x[1];
  zyp[0] = z[0] + y[0];            /* zyp = z+y = (a+b-2d)/2 */
  zyp[1] = z[1] + y[1];
  zym[0] = z[0] - y[0];            /* zym = z-y = (a+b-2c)/2 */
  zym[1] = z[1] - y[1];

  /* There hold different recursion formulae if Ti and Tj */
  /* are parallel (det = 0) or not                        */

  det = x[0]*y[1] - x[1]*y[0];

  if ( fabs(det) <= EPS*sqrt(hi*hj) ) { /* case that x and y are linearly */
    if ( fabs(x[0]) < fabs(x[1]) )      /* dependent, i.e., Ti and Tj are */
      lambda = y[1] / x[1];             /* parallel. */
    else
      lambda = y[0] / x[0];

    val = 0.5*( lambda * ( slp(1, y, zxm) - slp(1, y, zxp) )
                         + slp(0, x, zyp) + slp(0, x, zym) );
  }

  else { /* case that x and y are linearly independent */
    lambda = (z[0]*y[1] - z[1]*y[0]) /det;
    mu = (x[0]*z[1] - x[1]*z[0]) /det;

    val = 0.25 * (-8 + (lambda+1)*slp(0, y, zxp) - (lambda-1)*slp(0, y, zxm)
                          + (mu+1)*slp(0, x, zyp) - (mu-1)*slp(0, x, zym));
  }
  
  return -0.125*val /M_PI; /* = -1/(8*M_PI)*val */
}

double computeWijSemianalytic(double a0, double a1, double b0, double b1, 
                   double c0, double c1, double d0, double d1) {
  /*
   * INPUT:  elements Ti = [a,b], Tj = [c,d] with a,b,c,d \in \R^2
   * OUTPUT: Galerkin integral 
   *         -1/(2pi)*1/|Ti|*1/|Tj| \int_{Tj} \int_{Ti} log|x-y| ds_y ds_x
   *         where outer integration is performed by Gaussian quadrature
   */
  int k;
  double u[2], v[2];
  double val = 0;
  double sx0 = 0;
  double sx1 = 0;
  const double* gauss_point;
  const double* gauss_wht;

  gauss_point = getGaussPoints(GAUSS_ORDER);
  gauss_wht   = getGaussWeights(GAUSS_ORDER);

  u[0] = 0.5*(a0-b0);
  u[1] = 0.5*(a1-b1);

  for (k=0; k<GAUSS_ORDER; ++k){
      /* transformation of quadrature nodes from [-1,1] to [a,b] */
      sx0 = ((1-gauss_point[k])*c0+(1+gauss_point[k])*d0)*0.5;
      sx1 = ((1-gauss_point[k])*c1+(1+gauss_point[k])*d1)*0.5;
      
      v[0] = sx0 - 0.5*(a0+b0);
      v[1] = sx1 - 0.5*(a1+b1);
 
      /* inner product wht*func(sx) */
      val += gauss_wht[k] * slp(0, u, v);
  }
  
  return -0.0625*val / M_PI; /* = - 1/(16*M_PI) * int(log |.|^2) */
}
