/*  mvm.c  */

#include "../DA2.h"

#define KERNELS 4

/*--------------------------------------------------------------------*/
/*
   ----------------------------
   matrix vector multiply

   y[] := y[] + alpha * a * x[]

   created -- 96apr19, cca
   ----------------------------
*/
void
DA2_mvm1vec (
   DA2      *a,
   double   y[],
   double   alpha,
   double   x[]
) {
double   *entries ;
int      inc1, inc2, n1, n2 ;
/*
   --------------
   check the data
   --------------
*/
if ( a == NULL || y == NULL || x == NULL ) {
   fprintf(stderr, "\n fatal error in DA2_mvm(%p,%p,%e,%p)"
           "\n bad input\n", a, y, alpha, x) ;
   exit(-1) ;
}
if (  (n1 = a->n1) <= 0 || (inc1 = a->inc1) <= 0
   || (n2 = a->n2) <= 0 || (inc2 = a->inc2) <= 0
   || (entries = a->entries) == NULL ) {
   fprintf(stderr, "\n fatal error in DA2_mvm(%p,%p,%e,%p)"
           "\n bad structure\n", a, y, alpha, x) ;
   DA2_writeStats(a, stderr) ;
/*
   DA2_writeForHumanEye(a, stderr) ;
*/
   exit(-1) ;
}
/*
   ---------------------------
   quick check for zero scalar
   ---------------------------
*/
if ( alpha == 0.0 ) {
   return ;
}
/*
   ---------------------------------
   switch over the different strides
   ---------------------------------
*/
if ( inc1 == 1 ) {
   double   fac0, fac1, fac2 ;
   double   *col0, *col1, *col2 ;
   int      ii, j0 ;

   col0 = entries ;
   for ( j0 = 0 ; j0 < n2 - 2 ; j0 += 3 ) {
      col1 = col0 + inc2 ;
      col2 = col1 + inc2 ;
      fac0 = alpha * x[j0] ;
      fac1 = alpha * x[j0+1] ;
      fac2 = alpha * x[j0+2] ;
      for ( ii = 0 ; ii < n1 ; ii++ ) {
         y[ii] += col0[ii] * fac0 + col1[ii] * fac1 
               +  col2[ii] * fac2 ;
      }
      col0 = col2 + inc2 ;
   }
   if ( j0 == n2 - 2 ) {
      col1 = col0 + inc2 ;
      fac0 = alpha * x[j0] ;
      fac1 = alpha * x[j0+1] ;
      for ( ii = 0 ; ii < n1 ; ii++ ) {
         y[ii] += col0[ii] * fac0 + col1[ii] * fac1 ;
      }
   } else if ( j0 == n2 - 1 ) {
      fac0 = alpha * x[j0] ;
      for ( ii = 0 ; ii < n1 ; ii++ ) {
         y[ii] += col0[ii] * fac0 ;
      }
   }
} else if ( inc2 == 1 ) {
   double   sum0, sum1, sum2 ;
   double   *row0, *row1, *row2 ;
   int      ii, i0 ;

   row0 = entries ;
   for ( i0 = 0 ; i0 < n1 - 2 ; i0 += 3 ) {
      row1 = row0 + inc1 ;
      row2 = row1 + inc1 ;
      sum0 = sum1 = sum2 = 0.0 ;
      for ( ii = 0 ; ii < n2 ; ii++ ) {
         sum0 += row0[ii] * x[ii] ;
         sum1 += row1[ii] * x[ii] ;
         sum2 += row2[ii] * x[ii] ;
      }
      y[i0]   += alpha * sum0 ;
      y[i0+1] += alpha * sum1 ;
      y[i0+2] += alpha * sum2 ;
      row0 = row2 + inc1 ;
   }
   if ( i0 == n1 - 2 ) {
      row1 = row0 + inc1 ;
      sum0 = sum1 = 0.0 ;
      for ( ii = 0 ; ii < n2 ; ii++ ) {
         sum0 += row0[ii] * x[ii] ;
         sum1 += row1[ii] * x[ii] ;
      }
      y[i0]   += alpha * sum0 ;
      y[i0+1] += alpha * sum1 ;
   } else if ( i0 == n1 - 1 ) {
      sum0 = 0.0 ;
      for ( ii = 0 ; ii < n2 ; ii++ ) {
         sum0 += row0[ii] * x[ii] ;
      }
      y[i0] += alpha * sum0 ;
   }
} else {
   double   sum ;
   double   *row ;
   int      irow, jcol, kk ;
 
   for ( irow = 0, row = entries ; irow < n1 ; irow++, row += inc1 ) {
      for ( jcol = 0, kk = 0, sum = 0.0 ; 
            jcol < n2 ; 
            jcol++, kk += inc2 ) {
         sum += row[kk] * x[jcol] ;
      }
      y[irow] += alpha * sum ;
   }
}
return ; }

/*--------------------------------------------------------------------*/
/*
   ----------------------------------------------
   matrix vector multiply

   [ y1 y2 ] := [ y1 y2 ] + alpha * a * [ x1 x2 ]

   created -- 96apr19, cca
   ----------------------------------------------
*/
void
DA2_mvm2vec (
   DA2      *a,
   double   y0[],
   double   y1[],
   double   alpha,
   double   x0[],
   double   x1[]
) {
double   *entries ;
int      inc1, inc2, n1, n2 ;
/*
   --------------
   check the data
   --------------
*/
if (  a == NULL || y0 == NULL || y1 == NULL 
   || x0 == NULL || x1 == NULL ) {
   fprintf(stderr, "\n fatal error in DA2_mvm_32(%p,%p,%p,%e,%p,%p)"
           "\n bad input\n", a, y0, y1, alpha, x0, x1) ;
   exit(-1) ;
}
if (  (n1 = a->n1) <= 0 || (inc1 = a->inc1) <= 0
   || (n2 = a->n2) <= 0 || (inc2 = a->inc2) <= 0
   || (entries = a->entries) == NULL ) {
   fprintf(stderr, "\n fatal error in DA2_mvm_32(%p,%p,%p,%e,%p,%p)"
           "\n bad structure\n", a, y0, y1, alpha, x0, x1) ;
   DA2_writeStats(a, stderr) ;
/*
   DA2_writeForHumanEye(a, stderr) ;
*/
   exit(-1) ;
}
/*
   ---------------------------
   quick check for zero scalar
   ---------------------------
*/
if ( alpha == 0.0 ) {
   return ;
}
/*
   ---------------------------------
   switch over the different strides
   ---------------------------------
*/
if ( inc1 == 1 ) {
   double   fac00, fac01, fac10, fac11, fac20, fac21 ;
   double   *col0, *col1, *col2 ;
   int      ii, j0 ;

   col0 = entries ;
   for ( j0 = 0 ; j0 < n2 - 2 ; j0 += 3 ) {
      col1 = col0 + inc2 ;
      col2 = col1 + inc2 ;
      fac00 = alpha * x0[j0] ;
      fac10 = alpha * x0[j0+1] ;
      fac20 = alpha * x0[j0+2] ;
      fac01 = alpha * x1[j0] ;
      fac11 = alpha * x1[j0+1] ;
      fac21 = alpha * x1[j0+2] ;
      for ( ii = 0 ; ii < n1 ; ii++ ) {
         y0[ii] += col0[ii] * fac00 + col1[ii] * fac10 
                +  col2[ii] * fac20 ;
         y1[ii] += col0[ii] * fac01 + col1[ii] * fac11 
                +  col2[ii] * fac21 ;
      }
      col0 = col2 + inc2 ;
   }
   if ( j0 == n2 - 2 ) {
      col1 = col0 + inc2 ;
      fac00 = alpha * x0[j0] ;
      fac10 = alpha * x0[j0+1] ;
      fac01 = alpha * x1[j0] ;
      fac11 = alpha * x1[j0+1] ;
      for ( ii = 0 ; ii < n1 ; ii++ ) {
         y0[ii] += col0[ii] * fac00 + col1[ii] * fac10 ;
         y1[ii] += col0[ii] * fac01 + col1[ii] * fac11 ;
      }
   } else if ( j0 == n2 - 1 ) {
      fac00 = alpha * x0[j0] ;
      fac01 = alpha * x1[j0] ;
      for ( ii = 0 ; ii < n1 ; ii++ ) {
         y0[ii] += col0[ii] * fac00 ;
         y1[ii] += col0[ii] * fac01 ;
      }
   }
} else if ( inc2 == 1 ) {
   double   sum00, sum01, sum10, sum11, sum20, sum21 ;
   double   *row0, *row1, *row2 ;
   int      ii, i0 ;

   row0 = entries ;
   for ( i0 = 0 ; i0 < n1 - 2 ; i0 += 3 ) {
      row1 = row0 + inc1 ;
      row2 = row1 + inc1 ;
      sum00 = sum01 = sum10 = sum11 = sum20 = sum21 = 0.0 ;
      for ( ii = 0 ; ii < n2 ; ii++ ) {
         sum00 += row0[ii] * x0[ii] ;
         sum10 += row1[ii] * x0[ii] ;
         sum20 += row2[ii] * x0[ii] ;
         sum01 += row0[ii] * x1[ii] ;
         sum11 += row1[ii] * x1[ii] ;
         sum21 += row2[ii] * x1[ii] ;
      }
      y0[i0]   += alpha * sum00 ;
      y0[i0+1] += alpha * sum10 ;
      y0[i0+2] += alpha * sum20 ;
      y1[i0]   += alpha * sum01 ;
      y1[i0+1] += alpha * sum11 ;
      y1[i0+2] += alpha * sum21 ;
      row0 = row2 + inc1 ;
   }
   if ( i0 == n1 - 2 ) {
      row1 = row0 + inc1 ;
      sum00 = sum01 = sum10 = sum11 = 0.0 ;
      for ( ii = 0 ; ii < n2 ; ii++ ) {
         sum00 += row0[ii] * x0[ii] ;
         sum10 += row1[ii] * x0[ii] ;
         sum01 += row0[ii] * x1[ii] ;
         sum11 += row1[ii] * x1[ii] ;
      }
      y0[i0]   += alpha * sum00 ;
      y0[i0+1] += alpha * sum10 ;
      y1[i0]   += alpha * sum01 ;
      y1[i0+1] += alpha * sum11 ;
   } else if ( i0 == n1 - 1 ) {
      sum00 = sum01 = 0.0 ;
      for ( ii = 0 ; ii < n2 ; ii++ ) {
         sum00 += row0[ii] * x0[ii] ;
         sum01 += row0[ii] * x1[ii] ;
      }
      y0[i0] += alpha * sum00 ;
      y1[i0] += alpha * sum01 ;
   }
} else {
   double   sum0, sum1 ;
   double   *row ;
   int      irow, jcol, kk ;
 
   for ( irow = 0, row = entries ; irow < n1 ; irow++, row += inc1 ) {
      for ( jcol = 0, kk = 0, sum0 = sum1 = 0.0 ; 
            jcol < n2 ; 
            jcol++, kk += inc2 ) {
         sum0 += row[kk] * x0[jcol] ;
         sum1 += row[kk] * x1[jcol] ;
      }
      y0[irow] += alpha * sum0 ;
      y1[irow] += alpha * sum1 ;
   }
}
return ; }

/*--------------------------------------------------------------------*/
/*
   -------------------------------------------------------
   matrix vector multiply

   [ y1 y2 y3 ] := [ y1 y2 y3 ] + alpha * a * [ x1 x2 x3 ]

   created -- 96apr19, cca
   -------------------------------------------------------
*/
void
DA2_mvm3vec (
   DA2      *a,
   double   y0[],
   double   y1[],
   double   y2[],
   double   alpha,
   double   x0[],
   double   x1[],
   double   x2[]
) {
double   *entries ;
int      inc1, inc2, n1, n2 ;
/*
   --------------
   check the data
   --------------
*/
if (  a == NULL || y0 == NULL || y1 == NULL || y2 == NULL
   || x0 == NULL || x1 == NULL || x2 == NULL) {
   fprintf(stderr, 
           "\n fatal error in DA2_mvm_33(%p,%p,%p,%p,%e,%p,%p,%p)"
           "\n bad input\n", a, y0, y1, y2, alpha, x0, x1, x2) ;
   exit(-1) ;
}
if (  (n1 = a->n1) <= 0 || (inc1 = a->inc1) <= 0
   || (n2 = a->n2) <= 0 || (inc2 = a->inc2) <= 0
   || (entries = a->entries) == NULL ) {
   fprintf(stderr, 
           "\n fatal error in DA2_mvm_33(%p,%p,%p,%p,%e,%p,%p,%p)"
           "\n bad structure\n", a, y0, y1, y2, alpha, x0, x1, x2) ;
   DA2_writeStats(a, stderr) ;
/*
   DA2_writeForHumanEye(a, stderr) ;
*/
   exit(-1) ;
}
/*
   ---------------------------
   quick check for zero scalar
   ---------------------------
*/
if ( alpha == 0.0 ) {
   return ;
}
/*
   ---------------------------------
   switch over the different strides
   ---------------------------------
*/
if ( inc1 == 1 ) {
   double   fac00, fac01, fac02, fac10, fac11, fac12,
            fac20, fac21, fac22 ;
   double   *col0, *col1, *col2 ;
   int      ii, j0 ;

/*
fprintf(stdout, "\n inside DA2_mvm_33") ;
   fprintf(stdout, "\n DA2 a") ;
   DA2_writeForHumanEye(a, stdout) ;
   fprintf(stdout, "\n x0") ;
   DVfprintf(stdout, n2, x0) ;
   fprintf(stdout, "\n x1") ;
   DVfprintf(stdout, n2, x1) ;
   fprintf(stdout, "\n x2") ;
   DVfprintf(stdout, n2, x2) ;
*/
   col0 = entries ;
   for ( j0 = 0 ; j0 < n2 - 2 ; j0 += 3 ) {
      col1 = col0 + inc2 ;
      col2 = col1 + inc2 ;
/*
fprintf(stdout, "\n    col0 = %p, col1 = %p, col2 = %p", 
        col0, col1, col2) ;
*/
      fac00 = alpha * x0[j0] ;
      fac10 = alpha * x0[j0+1] ;
      fac20 = alpha * x0[j0+2] ;
      fac01 = alpha * x1[j0] ;
      fac11 = alpha * x1[j0+1] ;
      fac21 = alpha * x1[j0+2] ;
      fac02 = alpha * x2[j0] ;
      fac12 = alpha * x2[j0+1] ;
      fac22 = alpha * x2[j0+2] ;
      for ( ii = 0 ; ii < n1 ; ii++ ) {
         y0[ii] += col0[ii] * fac00 + col1[ii] * fac10 +  col2[ii] * fac20 ;
         y1[ii] += col0[ii] * fac01 + col1[ii] * fac11 +  col2[ii] * fac21 ;
         y2[ii] += col0[ii] * fac02 + col1[ii] * fac12 +  col2[ii] * fac22 ;
      }
      col0 = col2 + inc2 ;
/*
      fprintf(stdout, "\n 1. y0") ;
      DVfprintf(stdout, n1, y0) ;
      fprintf(stdout, "\n 1. y1") ;
      DVfprintf(stdout, n1, y1) ;
      fprintf(stdout, "\n 1. y2") ;
      DVfprintf(stdout, n1, y2) ;
*/
   }
   if ( j0 == n2 - 2 ) {
      col1 = col0 + inc2 ;
      fac00 = alpha * x0[j0] ;
      fac10 = alpha * x0[j0+1] ;
      fac01 = alpha * x1[j0] ;
      fac11 = alpha * x1[j0+1] ;
      fac02 = alpha * x2[j0] ;
      fac12 = alpha * x2[j0+1] ;
      for ( ii = 0 ; ii < n1 ; ii++ ) {
         y0[ii] += col0[ii] * fac00 + col1[ii] * fac10 ;
         y1[ii] += col0[ii] * fac01 + col1[ii] * fac11 ;
         y2[ii] += col0[ii] * fac02 + col1[ii] * fac12 ;
      }
/*
      fprintf(stdout, "\n 2. y0") ;
      DVfprintf(stdout, n1, y0) ;
      fprintf(stdout, "\n 2. y1") ;
      DVfprintf(stdout, n1, y1) ;
      fprintf(stdout, "\n 2. y2") ;
      DVfprintf(stdout, n1, y2) ;
*/
   } else if ( j0 == n2 - 1 ) {
      fac00 = alpha * x0[j0] ;
      fac01 = alpha * x1[j0] ;
      fac02 = alpha * x2[j0] ;
      for ( ii = 0 ; ii < n1 ; ii++ ) {
         y0[ii] += col0[ii] * fac00 ;
         y1[ii] += col0[ii] * fac01 ;
         y2[ii] += col0[ii] * fac02 ;
      }
/*
      fprintf(stdout, "\n 3. y0") ;
      DVfprintf(stdout, n1, y0) ;
      fprintf(stdout, "\n 3. y1") ;
      DVfprintf(stdout, n1, y1) ;
      fprintf(stdout, "\n 3. y2") ;
      DVfprintf(stdout, n1, y2) ;
*/
   }
/*
   fprintf(stdout, "\n y0") ;
   DVfprintf(stdout, n1, y0) ;
   fprintf(stdout, "\n y1") ;
   DVfprintf(stdout, n1, y1) ;
   fprintf(stdout, "\n y2") ;
   DVfprintf(stdout, n1, y2) ;
*/
} else if ( inc2 == 1 ) {
   double   sum00, sum01, sum02, sum10, sum11, sum12, 
            sum20, sum21, sum22 ;
   double   *row0, *row1, *row2 ;
   int      ii, i0 ;

   row0 = entries ;
   for ( i0 = 0 ; i0 < n1 - 2 ; i0 += 3 ) {
      row1 = row0 + inc1 ;
      row2 = row1 + inc1 ;
      sum00 = sum01 = sum02 = sum10 = sum11 = sum12  
            = sum20 = sum21 = sum22 = 0.0 ;
      for ( ii = 0 ; ii < n2 ; ii++ ) {
         sum00 += row0[ii] * x0[ii] ;
         sum10 += row1[ii] * x0[ii] ;
         sum20 += row2[ii] * x0[ii] ;
         sum01 += row0[ii] * x1[ii] ;
         sum11 += row1[ii] * x1[ii] ;
         sum21 += row2[ii] * x1[ii] ;
         sum02 += row0[ii] * x2[ii] ;
         sum12 += row1[ii] * x2[ii] ;
         sum22 += row2[ii] * x2[ii] ;
      }
      y0[i0]   += alpha * sum00 ;
      y0[i0+1] += alpha * sum10 ;
      y0[i0+2] += alpha * sum20 ;
      y1[i0]   += alpha * sum01 ;
      y1[i0+1] += alpha * sum11 ;
      y1[i0+2] += alpha * sum21 ;
      y2[i0]   += alpha * sum02 ;
      y2[i0+1] += alpha * sum12 ;
      y2[i0+2] += alpha * sum22 ;
      row0 = row2 + inc1 ;
   }
   if ( i0 == n1 - 2 ) {
      row1 = row0 + inc1 ;
      sum00 = sum01 = sum02 = sum10 = sum11 = sum12 = 0.0 ;
      for ( ii = 0 ; ii < n2 ; ii++ ) {
         sum00 += row0[ii] * x0[ii] ;
         sum10 += row1[ii] * x0[ii] ;
         sum01 += row0[ii] * x1[ii] ;
         sum11 += row1[ii] * x1[ii] ;
         sum02 += row0[ii] * x2[ii] ;
         sum12 += row1[ii] * x2[ii] ;
      }
      y0[i0]   += alpha * sum00 ;
      y0[i0+1] += alpha * sum10 ;
      y1[i0]   += alpha * sum01 ;
      y1[i0+1] += alpha * sum11 ;
      y2[i0]   += alpha * sum02 ;
      y2[i0+1] += alpha * sum12 ;
   } else if ( i0 == n1 - 1 ) {
      sum00 = sum01 = sum02 = 0.0 ;
      for ( ii = 0 ; ii < n2 ; ii++ ) {
         sum00 += row0[ii] * x0[ii] ;
         sum01 += row0[ii] * x1[ii] ;
         sum02 += row0[ii] * x2[ii] ;
      }
      y0[i0] += alpha * sum00 ;
      y1[i0] += alpha * sum01 ;
      y2[i0] += alpha * sum02 ;
   }
/*
   fprintf(stdout, "\n y0") ;
   DVfprintf(stdout, n1, y0) ;
   fprintf(stdout, "\n y1") ;
   DVfprintf(stdout, n1, y1) ;
   fprintf(stdout, "\n y2") ;
   DVfprintf(stdout, n1, y2) ;
*/
} else {
   double   sum0, sum1, sum2 ;
   double   *row ;
   int      irow, jcol, kk ;
 
   for ( irow = 0, row = entries ; irow < n1 ; irow++, row += inc1 ) {
      for ( jcol = 0, kk = 0, sum0 = sum1 = sum2 = 0.0 ; 
            jcol < n2 ; 
            jcol++, kk += inc2 ) {
         sum0 += row[kk] * x0[jcol] ;
         sum1 += row[kk] * x1[jcol] ;
         sum2 += row[kk] * x2[jcol] ;
      }
      y0[irow] += alpha * sum0 ;
      y1[irow] += alpha * sum1 ;
      y2[irow] += alpha * sum2 ;
   }
}
return ; }

/*--------------------------------------------------------------------*/
