#include "atlas_misc.h"
#include "atlas_prefetch.h"
#include <math.h>
TYPE ATL_UNRM2(const int N, const TYPE *X, const int incX)
/*
 * Only machines like x86 with extended precision (both arithmetic and sqrt)
 * will be able to use this kernel.  On machines with standard 64 bit 
 * precision, this will fail the overflow/underflow tests.
 */
{
   int n;
   register TYPE t0=ATL_rzero, t1=ATL_rzero, t2=ATL_rzero, t3=ATL_rzero;
   const TYPE *stX, *stX0 = X+N;
   size_t ii;

   n = ATL_MulBySize(4); /* 4 loop unrolling */
   ii = (size_t) X;
   ii -= (ii/n)*n;  /* align on loop length */
   if (ii > ATL_sizeof)  /* not aligned */
   {
      ii = n - ii;
      n = ATL_DivBySize(ii);
      n = Mmin(n, N);
      stX = X + n;
      do t0 += *X * *X; while(++X != stX);
      n = N - n;
   }
   else n = N;
   
   stX = X + ((n>>2)<<2);
   if (X != stX)
   {
      do
      {
          ATL_pfl1R(X+120); 
          t0 += *X   * *X;
          t1 += X[1] * X[1];
          t2 += X[2] * X[2];
          t3 += X[3] * X[3];
          X += 4;
      }
      while (X != stX);
      t0 += t1;
      t2 += t3;
      t0 += t2;
   }
   if (X != stX0)
   {
      do t0 += *X * *X; while(++X != stX0);
   }
   t0 = sqrt(t0);
   return(t0);
}
