#include "atlas_misc.h"
#include "atlas_prefetch.h"

static void axpyCU(const int N, const SCALAR alpha0, const TYPE *X, TYPE *Y)
{
   const TYPE *stX;
   int nr = N;
   register TYPE alpha=alpha0;

   if (nr >= 2)
   {
      *Y   += alpha * *X;
      Y[1] += alpha * X[1];
      X += 2;
      Y += 2;
      nr -= 2;
   }
   if (nr >= 1)
   {
      *Y   += alpha * *X;
   }
}
void ATL_UAXPY(const int N, const SCALAR alpha0, const TYPE *X, const int incX,
               TYPE *Y, const int incY)
{
   const int n = (N/4)*4;
   const TYPE *stX;
   int nr = N-n;
   register TYPE alpha=alpha0;

   if (n)
   {
      stX = X + n;
      do
      {
      ATL_pfl1R(X+4);
         Y[0] += alpha * X[0];
         Y[1] += alpha * X[1];
         Y[2] += alpha * X[2];
         Y[3] += alpha * X[3];
         X += 4;
         Y += 4;
      }
      while (X != stX);
   }
   if (nr) axpyCU(nr, alpha0, X, Y);
}
