#include "blaswrap.h"
#include "f2c.h"

/* Subroutine */ int claqps_(integer *m, integer *n, integer *offset, integer 
	*nb, integer *kb, complex *a, integer *lda, integer *jpvt, complex *
	tau, real *vn1, real *vn2, complex *auxv, complex *f, integer *ldf)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CLAQPS computes a step of QR factorization with column pivoting   
    of a complex M-by-N matrix A by using Blas-3.  It tries to factorize   
    NB columns from A starting from the row OFFSET+1, and updates all   
    of the matrix with Blas-3 xGEMM.   

    In some cases, due to catastrophic cancellations, it cannot   
    factorize NB columns.  Hence, the actual number of factorized   
    columns is returned in KB.   

    Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A. M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A. N >= 0   

    OFFSET  (input) INTEGER   
            The number of rows of A that have been factorized in   
            previous steps.   

    NB      (input) INTEGER   
            The number of columns to factorize.   

    KB      (output) INTEGER   
            The number of columns actually factorized.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the M-by-N matrix A.   
            On exit, block A(OFFSET+1:M,1:KB) is the triangular   
            factor obtained and block A(1:OFFSET,1:N) has been   
            accordingly pivoted, but no factorized.   
            The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has   
            been updated.   

    LDA     (input) INTEGER   
            The leading dimension of the array A. LDA >= max(1,M).   

    JPVT    (input/output) INTEGER array, dimension (N)   
            JPVT(I) = K <==> Column K of the full matrix A has been   
            permuted into position I in AP.   

    TAU     (output) COMPLEX array, dimension (KB)   
            The scalar factors of the elementary reflectors.   

    VN1     (input/output) REAL array, dimension (N)   
            The vector with the partial column norms.   

    VN2     (input/output) REAL array, dimension (N)   
            The vector with the exact column norms.   

    AUXV    (input/output) COMPLEX array, dimension (NB)   
            Auxiliar vector.   

    F       (input/output) COMPLEX array, dimension (LDF,NB)   
            Matrix F' = L*Y'*A.   

    LDF     (input) INTEGER   
            The leading dimension of the array F. LDF >= max(1,N).   

    Further Details   
    ===============   

    Based on contributions by   
      G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain   
      X. Sun, Computer Science Dept., Duke University, USA   

    =====================================================================   


       Parameter adjustments */
    /* Table of constant values */
    static complex c_b1 = {0.f,0.f};
    static complex c_b2 = {1.f,0.f};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, f_dim1, f_offset, i__1, i__2, i__3;
    real r__1, r__2;
    complex q__1;
    /* Builtin functions */
    void r_cnjg(complex *, complex *);
    double c_abs(complex *), sqrt(doublereal);
    integer i_nint(real *);
    /* Local variables */
    static real temp, temp2;
    static integer j, k;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *), cgemv_(char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *, complex *, complex *, integer *), cswap_(
	    integer *, complex *, integer *, complex *, integer *);
    static integer itemp;
    extern doublereal scnrm2_(integer *, complex *, integer *);
    static integer rk;
    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, 
	    integer *, complex *);
    static integer lsticc;
    extern integer isamax_(integer *, real *, integer *);
    static integer lastrk;
    static complex akk;
    static integer pvt;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define f_subscr(a_1,a_2) (a_2)*f_dim1 + a_1
#define f_ref(a_1,a_2) f[f_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --jpvt;
    --tau;
    --vn1;
    --vn2;
    --auxv;
    f_dim1 = *ldf;
    f_offset = 1 + f_dim1 * 1;
    f -= f_offset;

    /* Function Body   
   Computing MIN */
    i__1 = *m, i__2 = *n + *offset;
    lastrk = min(i__1,i__2);
    lsticc = 0;
    k = 0;

/*     Beginning of while loop. */

L10:
    if (k < *nb && lsticc == 0) {
	++k;
	rk = *offset + k;

/*        Determine ith pivot column and swap if necessary */

	i__1 = *n - k + 1;
	pvt = k - 1 + isamax_(&i__1, &vn1[k], &c__1);
	if (pvt != k) {
	    cswap_(m, &a_ref(1, pvt), &c__1, &a_ref(1, k), &c__1);
	    i__1 = k - 1;
	    cswap_(&i__1, &f_ref(pvt, 1), ldf, &f_ref(k, 1), ldf);
	    itemp = jpvt[pvt];
	    jpvt[pvt] = jpvt[k];
	    jpvt[k] = itemp;
	    vn1[pvt] = vn1[k];
	    vn2[pvt] = vn2[k];
	}

/*        Apply previous Householder reflectors to column K:   
          A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. */

	if (k > 1) {
/* CC            CALL CGEMM( 'No transpose', 'Conjugate transpose',   
   CC     $                  M-RK+1, 1, K-1, -CONE, A( RK, 1 ), LDA,   
   CC     $                  F( K, 1 ), LDF, CONE, A( RK, K ), LDA ) */
	    i__1 = k - 1;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = f_subscr(k, j);
		r_cnjg(&q__1, &f_ref(k, j));
		f[i__2].r = q__1.r, f[i__2].i = q__1.i;
/* L20: */
	    }
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgemv_("No transpose", &i__1, &i__2, &q__1, &a_ref(rk, 1), lda, &
		    f_ref(k, 1), ldf, &c_b2, &a_ref(rk, k), &c__1)
		    ;
	    i__1 = k - 1;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = f_subscr(k, j);
		r_cnjg(&q__1, &f_ref(k, j));
		f[i__2].r = q__1.r, f[i__2].i = q__1.i;
/* L30: */
	    }
	}

/*        Generate elementary reflector H(k). */

	if (rk < *m) {
	    i__1 = *m - rk + 1;
	    clarfg_(&i__1, &a_ref(rk, k), &a_ref(rk + 1, k), &c__1, &tau[k]);
	} else {
	    clarfg_(&c__1, &a_ref(rk, k), &a_ref(rk, k), &c__1, &tau[k]);
	}

	i__1 = a_subscr(rk, k);
	akk.r = a[i__1].r, akk.i = a[i__1].i;
	i__1 = a_subscr(rk, k);
	a[i__1].r = 1.f, a[i__1].i = 0.f;

/*        Compute Kth column of F:   

          Compute  F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). */

	if (k < *n) {
	    i__1 = *m - rk + 1;
	    i__2 = *n - k;
	    cgemv_("Conjugate transpose", &i__1, &i__2, &tau[k], &a_ref(rk, k 
		    + 1), lda, &a_ref(rk, k), &c__1, &c_b1, &f_ref(k + 1, k), 
		    &c__1);
	}

/*        Padding F(1:K,K) with zeros. */

	i__1 = k;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = f_subscr(j, k);
	    f[i__2].r = 0.f, f[i__2].i = 0.f;
/* L40: */
	}

/*        Incremental updating of F:   
          F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)'   
                      *A(RK:M,K). */

	if (k > 1) {
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    i__3 = k;
	    q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
	    cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &a_ref(rk, 1), 
		    lda, &a_ref(rk, k), &c__1, &c_b1, &auxv[1], &c__1);

	    i__1 = k - 1;
	    cgemv_("No transpose", n, &i__1, &c_b2, &f_ref(1, 1), ldf, &auxv[
		    1], &c__1, &c_b2, &f_ref(1, k), &c__1);
	}

/*        Update the current row of A:   
          A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. */

	if (k < *n) {
	    i__1 = *n - k;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgemm_("No transpose", "Conjugate transpose", &c__1, &i__1, &k, &
		    q__1, &a_ref(rk, 1), lda, &f_ref(k + 1, 1), ldf, &c_b2, &
		    a_ref(rk, k + 1), lda);
	}

/*        Update partial column norms. */

	if (rk < lastrk) {
	    i__1 = *n;
	    for (j = k + 1; j <= i__1; ++j) {
		if (vn1[j] != 0.f) {
		    temp = c_abs(&a_ref(rk, j)) / vn1[j];
/* Computing MAX */
		    r__1 = 0.f, r__2 = (temp + 1.f) * (1.f - temp);
		    temp = dmax(r__1,r__2);
/* Computing 2nd power */
		    r__1 = vn1[j] / vn2[j];
		    temp2 = temp * .05f * (r__1 * r__1) + 1.f;
		    if (temp2 == 1.f) {
			vn2[j] = (real) lsticc;
			lsticc = j;
		    } else {
			vn1[j] *= sqrt(temp);
		    }
		}
/* L50: */
	    }
	}

	i__1 = a_subscr(rk, k);
	a[i__1].r = akk.r, a[i__1].i = akk.i;

/*        End of while loop. */

	goto L10;
    }
    *kb = k;
    rk = *offset + *kb;

/*     Apply the block reflector to the rest of the matrix:   
       A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) -   
                           A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'.   

   Computing MIN */
    i__1 = *n, i__2 = *m - *offset;
    if (*kb < min(i__1,i__2)) {
	i__1 = *m - rk;
	i__2 = *n - *kb;
	q__1.r = -1.f, q__1.i = 0.f;
	cgemm_("No transpose", "Conjugate transpose", &i__1, &i__2, kb, &q__1,
		 &a_ref(rk + 1, 1), lda, &f_ref(*kb + 1, 1), ldf, &c_b2, &
		a_ref(rk + 1, *kb + 1), lda);
    }

/*     Recomputation of difficult columns. */

L60:
    if (lsticc > 0) {
	itemp = i_nint(&vn2[lsticc]);
	i__1 = *m - rk;
	vn1[lsticc] = scnrm2_(&i__1, &a_ref(rk + 1, lsticc), &c__1);
	vn2[lsticc] = vn1[lsticc];
	lsticc = itemp;
	goto L60;
    }

    return 0;

/*     End of CLAQPS */

} /* claqps_ */

#undef f_ref
#undef f_subscr
#undef a_ref
#undef a_subscr



.