#include "atlas_misc.h"
#include "atlas_prefetch.h"
#include <math.h>

int ATL_UIAMAX(const int N, const TYPE *X, const int incX)
{
   register TYPE xmax, x0, x1, x2, x3;
   const TYPE *stX=X, *x, *xp=X, *vxp;
   int i, nr;
   size_t ii;
   vector float v0, v1, v2, vmax = (vector float)(0.0f, 0.0f, 0.0f, 0.0f);
   void *vp;
   float *tp;
   int cwrd = ATL_MulBySize(N)>>4;

   if (N > 0)
   {
      if (cwrd >= 64) cwrd = ATL_GetCtrl(512, (cwrd+31)>>5, 0);
      else cwrd = ATL_GetCtrl(64, (cwrd+3)>>2, 4);
      ATL_pfavR(X, cwrd, 0);

      nr = sizeof(TYPE)<<2;
      ii = (size_t) X;
      ii -= (ii/nr)*nr;  /* align on loop length */
      if (nr)
      {
         x0 = *X;
         xmax = fabs(x0);
         for (i=1; i != nr; i++)
         {
            x0 = fabs(X[i]);
            if (x0 > xmax) { xmax = x0; xp = X+i; }
         }
         x = X + i;
      }
      else 
      {
         xmax = ATL_rzero;
         x = X;
      }
      nr = ((N - nr)>>2)<<2;
      if (nr)
      {
         stX = x + nr;
         vxp = x;
         do
         {
            v0 = vec_ld(0, x); x += 4;
            v0 = vec_abs(v0);
            if (vec_all_ge(vmax, v0)) continue; 
            vmax = vec_max(v0, vmax);
            v0 = vec_splat(vmax, 0);
            v1 = vec_splat(vmax, 1);
            v2 = vec_splat(vmax, 2);
            vmax = vec_splat(vmax, 3);
            v0 = vec_max(v0, v1);
            vmax = vec_max(v2, vmax);
            vmax = vec_max(v0, vmax);
            vxp = x - 4;
         }
         while (x != stX);
         vp = malloc(ATL_Cachelen + 4*sizeof(float));
/*         ATL_assert(vp); */
         tp = ATL_AlignPtr(vp);
         vec_st(vmax, 0, tp);
         for (i=0; i != 4; i++)
         {
            if (tp[i] > xmax) { xmax = tp[i]; xp = vxp; }
         }
         free(vp);
         /*
fprintf(stderr, "%d: xmax=%f, vmax=%f,%f,%f,%f, xp=%f,%f,%f,%f\n", __LINE__, xmax, tp[0], tp[1], tp[2], tp[3], *xp, xp[1], xp[2], xp[3]);
*/
         if (xp == vxp)
         {
            for (i=0; i != 4; i++) if (fabs(xp[i]) == xmax) break;
            if (i == 4) exit(-1);
            xp += i;
         }
      }
      stX = X + N;
      while (x != stX)
      {
         x0 = fabs(*x);
         if (x0 > xmax) { xmax = x0; xp = x; }
         x++;
      }
   }
   return((int)(xp-X));
}
