
#include <stdio.h>
#include <assert.h>
#include <string.h>



#include "pxsp.h"

#ifndef MAX
#define MAX(x,y) (((x) > (y) ) ? (x) : (y))
#endif

#ifndef MIN
#define MIN(x,y) (((x) < (y) ) ? (x) : (y))
#endif












pchpr2k_(  uplo, trans, n, k,
            alpha,     A, ia, ja, desc_A, 
                       B, ib, jb, desc_B,
            beta,      C, ic, jc, desc_C )

/*
*  .. Scalar Arguments ..
*/
   F_CHAR      uplo;
   F_CHAR      trans;
   int         *n, *k, *ia, *ja, *ib, *jb, *ic, *jc;

   complex      * alpha; 
   float      * beta;
/* ..
*  .. Array Arguments ..
*/
   int         desc_A[], desc_B[], desc_C[];
   complex      A[], B[], C[];
{


/*
*  Purpose
*  =======
*
*  PSPR2K  performs one of the symmetric rank 2k operations
*
*     sub( C ) := alpha*sub( A )*sub( B )' + alpha*sub( B )*sub( A )' +
*                 beta*sub( C ),
*
*  or
*
*     sub( C ) := alpha*sub( A )'*sub( B ) + alpha*sub( B )'*sub( A ) +
*                 beta*sub( C ),
*
*  where
*
*     sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1),
*
*     sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1)  if TRANS = 'N',
*                      A(IA:IA+K-1,JA:JA+N-1)  otherwise, and,
*
*     sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1)  if TRANS = 'N',
*                      B(IB:IB+K-1,JB:JB+N-1)  otherwise.
*
*  Alpha  and  beta  are  scalars,  sub( C )  is  an  n by n   symmetric
*  submatrix and sub( A ) and sub( B )  are  n by k  submatrices in  the
*  first case and k by n submatrices in the second case.
*
*  sub( A ) stored in packed storage.
*
*  Notes
*  =====
*
*  A description  vector  is associated with each 2D block-cyclicly dis-
*  tributed matrix.  This  vector  stores  the  information  required to
*  establish the  mapping  between a  matrix entry and its corresponding
*  process and memory location.
*
*  In  the  following  comments,   the character _  should  be  read  as
*  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
*  block cyclicly distributed matrix.  Its description vector is DESC_A:
*
*  NOTATION         STORED IN       EXPLANATION
*  ---------------- --------------- ------------------------------------
*  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
*  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
*                                   the NPROW x NPCOL BLACS process grid
*                                   A  is  distributed over. The context
*                                   itself  is  global,  but  the handle
*                                   (the integer value) may vary.
*  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
*                                   ted matrix A, M_A >= 0.
*  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
*                                   buted matrix A, N_A >= 0.
*  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
*                                   block of the matrix A, IMB_A > 0.
*  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
*                                   left   block   of   the  matrix   A,
*                                   INB_A > 0.
*  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
*                                   bute the last  M_A-IMB_A  rows of A,
*                                   MB_A > 0.
*  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
*                                   bute the last  N_A-INB_A  columns of
*                                   A, NB_A > 0.
*  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
*                                   row of the matrix  A is distributed,
*                                   NPROW > RSRC_A >= 0.
*  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
*                                   first column of  A  is  distributed.
*                                   NPCOL > CSRC_A >= 0.
*  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
*                                   array  storing  the  local blocks of
*                                   the distributed matrix A,
*                                   IF( Lc( 1, N_A ) > 0 )
*                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
*                                   ELSE
*                                      LLD_A >= 1.
*
*  Let K be the number of  rows of a matrix A starting at the global in-
*  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
*  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
*  receive if these K rows were distributed over NPROW processes.  If  K
*  is the number of columns of a matrix  A  starting at the global index
*  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
*  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
*  these K columns were distributed over NPCOL processes.
*
*  The values of Lr() and Lc() may be determined via a call to the func-
*  tion PB_Cnumroc:
*  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
*  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
*
*  Arguments
*  =========
*
*  UPLO    (global input) CHARACTER*1
*          On  entry,   UPLO  specifies  whether  the  local  pieces  of
*          the array  C  containing the  upper or lower triangular  part
*          of the symmetric submatrix  sub( C )  are to be referenced as
*          follows:
*
*             UPLO = 'U' or 'u'   Only the local pieces corresponding to
*                                 the   upper  triangular  part  of  the
*                                 symmetric submatrix sub( C ) are to be
*                                 referenced,
*
*             UPLO = 'L' or 'l'   Only the local pieces corresponding to
*                                 the   lower  triangular  part  of  the
*                                 symmetric submatrix sub( C ) are to be
*                                 referenced.
*
*  TRANS   (global input) CHARACTER*1
*          On entry,  TRANS  specifies the  operation to be performed as
*          follows:
*
*             TRANS = 'N' or 'n'
*               sub( C ) := alpha*sub( A )*sub( B )' +
*                           alpha*sub( B )*sub( A )' +
*                           beta*sub( C ),
*
*             TRANS = 'T' or 't'
*               sub( C ) := alpha*sub( B )'*sub( A ) +
*                           alpha*sub( A )'*sub( B ) +
*                           beta*sub( C ),
*
*             TRANS = 'C' or 'c'
*               sub( C ) := alpha*sub( B )'*sub( A ) +
*                           alpha*sub( A )'*sub( B ) +
*                           beta*sub( C ).
*
*  N       (global input) INTEGER
*          On entry,  N specifies the order of the  submatrix  sub( C ).
*          N must be at least zero.
*
*  K       (global input) INTEGER
*          On entry with  TRANS = 'N' or 'n',  K specifies the number of
*          columns of  the  submatrices  sub( A )  and  sub( B ), and on
*          entry with TRANS = 'T' or 't' or 'C' or 'c', K  specifies the
*          number of rows  of the  submatrices  sub( A )  and  sub( B ).
*          K  must  be at least zero.
*
*  ALPHA   (global input) COMPLEX
*          On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
*          supplied  as  zero  then  the  local entries of the arrays  A
*          and  B  corresponding  to  the  entries  of  the  submatrices
*          sub( A ) and sub( B ) respectively need not be set  on input.
*
*  A       (local input) COMPLEX  array
*          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
*          at least Lc( 1, JA+K-1 ) when  TRANS = 'N' or 'n', and  is at
*          least Lc( 1, JA+N-1 ) otherwise.  Before  entry,  this  array
*          contains the local entries of the matrix A.
*          Before entry with TRANS = 'N' or 'n', this array contains the
*          local entries corresponding to the entries of the n by k sub-
*          matrix sub( A ), otherwise the local entries corresponding to
*          the entries of the k by n submatrix sub( A ).
*
*  IA      (global input) INTEGER
*          On entry, IA  specifies A's global row index, which points to
*          the beginning of the submatrix sub( A ).
*
*  JA      (global input) INTEGER
*          On entry, JA  specifies A's global column index, which points
*          to the beginning of the submatrix sub( A ).
*
*  DESCA   (global and local input) INTEGER array
*          On entry, DESCA  is an integer array of dimension DLEN_. This
*          is the array descriptor for the matrix A.
*
*  B       (local input) COMPLEX array
*          On entry, B is an array of dimension (LLD_B, Kb), where Kb is
*          at least Lc( 1, JB+K-1 ) when  TRANS = 'N' or 'n', and  is at
*          least Lc( 1, JB+N-1 ) otherwise.  Before  entry,  this  array
*          contains the local entries of the matrix B.
*          Before entry with TRANS = 'N' or 'n', this array contains the
*          local entries corresponding to the entries of the n by k sub-
*          matrix sub( B ), otherwise the local entries corresponding to
*          the entries of the k by n submatrix sub( B ).
*
*  IB      (global input) INTEGER
*          On entry, IB  specifies B's global row index, which points to
*          the beginning of the submatrix sub( B ).
*
*  JB      (global input) INTEGER
*          On entry, JB  specifies B's global column index, which points
*          to the beginning of the submatrix sub( B ).
*
*  DESCB   (global and local input) INTEGER array
*          On entry, DESCB  is an integer array of dimension DLEN_. This
*          is the array descriptor for the matrix B.
*
*  BETA    (global input) REAL
*          On entry,  BETA  specifies the scalar  beta.   When  BETA  is
*          supplied  as  zero  then  the  local entries of  the array  C
*          corresponding to the entries of the submatrix  sub( C )  need
*          not be set on input.
*
*  C       (local input/local output) COMPLEX array
*          On entry, C is an array of dimension (LLD_C, Kc), where Kc is
*          at least Lc( 1, JC+N-1 ).  Before  entry, this array contains
*          the local entries of the matrix C.
*          Before  entry  with  UPLO = 'U' or 'u', this  array  contains
*          the local entries corresponding to the upper triangular  part
*          of the  symmetric  submatrix  sub( C ), and the local entries
*          corresponding to the  strictly lower triangular  of  sub( C )
*          are not  referenced.  On exit,  the upper triangular part  of
*          sub( C ) is overwritten by the  upper triangular part  of the
*          updated submatrix.
*          Before  entry  with  UPLO = 'L' or 'l', this  array  contains
*          the local entries corresponding to the lower triangular  part
*          of the  symmetric  submatrix  sub( C ), and the local entries
*          corresponding to the  strictly upper triangular  of  sub( C )
*          are not  referenced.  On exit,  the lower triangular part  of
*          sub( C ) is overwritten by the  lower triangular part  of the
*          updated submatrix.
*
*  IC      (global input) INTEGER
*          On entry, IC  specifies C's global row index, which points to
*          the beginning of the submatrix sub( C ).
*
*  JC      (global input) INTEGER
*          On entry, JC  specifies C's global column index, which points
*          to the beginning of the submatrix sub( C ).
*
*  DESCC   (global and local input) INTEGER array
*          On entry, DESCC  is an integer array of dimension DLEN_. This
*          is the array descriptor for the matrix C.
*/
  extern void pchpr2kf_();

  complex *work;
  int ineed, lwork, work_size;
  complex work1[1]; 



  float one,dbeta ; 
  
  int iia,jja, iib,jjb;
  int kstart,kend,ksize, kinc;
  int nb,iinc,jinc;
  int istransA;

#define MAX_KINC 7

   one = 1;

  istransA = (Mupcase( F2C_CHAR( trans )[0] ) == 'T');
               
  if (istransA) {
     nb = MAX( desc_A[MB_],desc_B[MB_] );
     }
  else {
     nb = MAX( desc_A[NB_],desc_B[NB_] );
     };

  nb = MAX(1,nb);
  kinc = nb*CEIL( MAX_KINC, nb );


  if (istransA) {
     iinc = kinc; jinc = 0;
     }
  else {
     iinc = 0; jinc = kinc;
     };


  work = NULL; work_size = -1;
  
  kstart = 1;  dbeta = (*beta);
  iia = (*ia); jja = (*ja); iib = (*ib); jjb = (*jb);

while (kstart <= (*k)) {
   kend = MIN( kstart + kinc-1, (*k) );
   ksize = kend - kstart + 1;
  
  

  work1[0].re = 0; work1[0].im = 0;

  lwork = -1;
  pchpr2kf_( uplo, trans, n, &ksize,
              alpha,      A,&iia,&jja,desc_A,
                          B,&iib,&jjb,desc_B,
              &dbeta,       C,ic,jc,desc_C,    work1, &lwork );
                 

  ineed = (int) ( work1[0].re + work1[0].im );


  ineed = MAX(1, ineed );

  if (ineed > work_size) {
     /*
       Current work buffer not sufficient.
       Reallocate another buffer.
      */
     if (work != NULL) { free(work); };

     work = (complex *)  malloc( sizeof(complex) * (ineed+1) );
     assert( work != NULL );

     work_size = (ineed + 1);
     };

   lwork = work_size;
   assert( lwork >= ineed );



  pchpr2kf_( uplo, trans, n, &ksize,
              alpha,      A,&iia,&jja,desc_A,
                          B,&iib,&jjb,desc_B,
              &dbeta,       C,ic,jc,desc_C,    work, &lwork );



  iia = iia + iinc; jja = jja + jinc;
  iib = iib + iinc; jjb = jjb + jinc;

  kstart = kend + 1;
  dbeta =  one;
  

  }; /* end while */

  if (work != NULL) {
       free(work);
       };


  return;
}
 
