159 $ IPIV2, WORK, LWORK, INFO )
169 INTEGER N, LDA, LTB, LWORK, INFO
172 INTEGER IPIV( * ), IPIV2( * )
173 COMPLEX A( LDA, * ), TB( * ), WORK( * )
179 parameter( czero = ( 0.0e+0, 0.0e+0 ),
180 $ cone = ( 1.0e+0, 0.0e+0 ) )
183 LOGICAL UPPER, TQUERY, WQUERY
184 INTEGER I, J, K, I1, I2, TD
185 INTEGER LDTB, NB, KB, JB, NT, IINFO
191 EXTERNAL lsame, ilaenv
205 upper = lsame( uplo,
'U' )
206 wquery = ( lwork.EQ.-1 )
207 tquery = ( ltb.EQ.-1 )
208 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
210 ELSE IF( n.LT.0 )
THEN
212 ELSE IF( lda.LT.max( 1, n ) )
THEN
214 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery )
THEN
216 ELSE IF ( lwork .LT. n .AND. .NOT.wquery )
THEN
221 CALL xerbla(
'CSYTRF_AA_2STAGE', -info )
227 nb = ilaenv( 1,
'CSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
236 IF( tquery .OR. wquery )
THEN
249 IF( ldtb .LT. 3*nb+1 )
THEN
252 IF( lwork .LT. nb*n )
THEN
286 IF( i .EQ. (j-1) )
THEN
291 CALL cgemm(
'NoTranspose',
'NoTranspose',
293 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
294 $ a( (i-1)*nb+1, j*nb+1 ), lda,
295 $ czero, work( i*nb+1 ), n )
303 CALL cgemm(
'NoTranspose',
'NoTranspose',
305 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
307 $ a( (i-2)*nb+1, j*nb+1 ), lda,
308 $ czero, work( i*nb+1 ), n )
314 CALL clacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
315 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
318 CALL cgemm(
'Transpose',
'NoTranspose',
320 $ -cone, a( 1, j*nb+1 ), lda,
322 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
324 CALL cgemm(
'Transpose',
'NoTranspose',
326 $ cone, a( (j-1)*nb+1, j*nb+1 ), lda,
327 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
328 $ czero, work( 1 ), n )
329 CALL cgemm(
'NoTranspose',
'NoTranspose',
331 $ -cone, work( 1 ), n,
332 $ a( (j-2)*nb+1, j*nb+1 ), lda,
333 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
340 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
341 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
348 CALL ctrsm(
'L',
'U',
'T',
'N', kb, kb, cone,
349 $ a( (j-1)*nb+1, j*nb+1 ), lda,
350 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
351 CALL ctrsm(
'R',
'U',
'N',
'N', kb, kb, cone,
352 $ a( (j-1)*nb+1, j*nb+1 ), lda,
353 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
362 CALL cgemm(
'NoTranspose',
'NoTranspose',
364 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
365 $ a( (j-1)*nb+1, j*nb+1 ), lda,
366 $ czero, work( j*nb+1 ), n )
368 CALL cgemm(
'NoTranspose',
'NoTranspose',
370 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
372 $ a( (j-2)*nb+1, j*nb+1 ), lda,
373 $ czero, work( j*nb+1 ), n )
378 CALL cgemm(
'Transpose',
'NoTranspose',
379 $ nb, n-(j+1)*nb, j*nb,
380 $ -cone, work( nb+1 ), n,
381 $ a( 1, (j+1)*nb+1 ), lda,
382 $ cone, a( j*nb+1, (j+1)*nb+1 ), lda )
388 CALL ccopy( n-(j+1)*nb,
389 $ a( j*nb+k, (j+1)*nb+1 ), lda,
390 $ work( 1+(k-1)*n ), 1 )
395 CALL cgetrf( n-(j+1)*nb, nb,
397 $ ipiv( (j+1)*nb+1 ), iinfo )
405 CALL ccopy( n-(j+1)*nb,
406 $ work( 1+(k-1)*n ), 1,
407 $ a( j*nb+k, (j+1)*nb+1 ), lda )
412 kb = min(nb, n-(j+1)*nb)
413 CALL claset(
'Full', kb, nb, czero, czero,
414 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
415 CALL clacpy(
'Upper', kb, nb,
417 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
419 CALL ctrsm(
'R',
'U',
'N',
'U', kb, nb, cone,
420 $ a( (j-1)*nb+1, j*nb+1 ), lda,
421 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
429 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
430 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
433 CALL claset(
'Lower', kb, nb, czero, cone,
434 $ a( j*nb+1, (j+1)*nb+1), lda )
440 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
443 i2 = ipiv( (j+1)*nb+k )
446 CALL cswap( k-1, a( (j+1)*nb+1, i1 ), 1,
447 $ a( (j+1)*nb+1, i2 ), 1 )
450 $
CALL cswap( i2-i1-1, a( i1, i1+1 ), lda,
454 $
CALL cswap( n-i2, a( i1, i2+1 ), lda,
455 $ a( i2, i2+1 ), lda )
458 a( i1, i1 ) = a( i2, i2 )
462 CALL cswap( j*nb, a( 1, i1 ), 1,
483 IF( i .EQ. (j-1) )
THEN
488 CALL cgemm(
'NoTranspose',
'Transpose',
490 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
491 $ a( j*nb+1, (i-1)*nb+1 ), lda,
492 $ czero, work( i*nb+1 ), n )
495 IF( i .EQ. (j-1) )
THEN
500 CALL cgemm(
'NoTranspose',
'Transpose',
502 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
504 $ a( j*nb+1, (i-2)*nb+1 ), lda,
505 $ czero, work( i*nb+1 ), n )
511 CALL clacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
512 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
515 CALL cgemm(
'NoTranspose',
'NoTranspose',
517 $ -cone, a( j*nb+1, 1 ), lda,
519 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
521 CALL cgemm(
'NoTranspose',
'NoTranspose',
523 $ cone, a( j*nb+1, (j-1)*nb+1 ), lda,
524 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
525 $ czero, work( 1 ), n )
526 CALL cgemm(
'NoTranspose',
'Transpose',
528 $ -cone, work( 1 ), n,
529 $ a( j*nb+1, (j-2)*nb+1 ), lda,
530 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
537 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
538 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
545 CALL ctrsm(
'L',
'L',
'N',
'N', kb, kb, cone,
546 $ a( j*nb+1, (j-1)*nb+1 ), lda,
547 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
548 CALL ctrsm(
'R',
'L',
'T',
'N', kb, kb, cone,
549 $ a( j*nb+1, (j-1)*nb+1 ), lda,
550 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
557 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
558 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
568 CALL cgemm(
'NoTranspose',
'Transpose',
570 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
571 $ a( j*nb+1, (j-1)*nb+1 ), lda,
572 $ czero, work( j*nb+1 ), n )
574 CALL cgemm(
'NoTranspose',
'Transpose',
576 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
578 $ a( j*nb+1, (j-2)*nb+1 ), lda,
579 $ czero, work( j*nb+1 ), n )
584 CALL cgemm(
'NoTranspose',
'NoTranspose',
585 $ n-(j+1)*nb, nb, j*nb,
586 $ -cone, a( (j+1)*nb+1, 1 ), lda,
588 $ cone, a( (j+1)*nb+1, j*nb+1 ), lda )
593 CALL cgetrf( n-(j+1)*nb, nb,
594 $ a( (j+1)*nb+1, j*nb+1 ), lda,
595 $ ipiv( (j+1)*nb+1 ), iinfo )
602 kb = min(nb, n-(j+1)*nb)
603 CALL claset(
'Full', kb, nb, czero, czero,
604 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
605 CALL clacpy(
'Upper', kb, nb,
606 $ a( (j+1)*nb+1, j*nb+1 ), lda,
607 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
609 CALL ctrsm(
'R',
'L',
'T',
'U', kb, nb, cone,
610 $ a( j*nb+1, (j-1)*nb+1 ), lda,
611 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
619 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
620 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
623 CALL claset(
'Upper', kb, nb, czero, cone,
624 $ a( (j+1)*nb+1, j*nb+1 ), lda )
630 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
633 i2 = ipiv( (j+1)*nb+k )
636 CALL cswap( k-1, a( i1, (j+1)*nb+1 ), lda,
637 $ a( i2, (j+1)*nb+1 ), lda )
640 $
CALL cswap( i2-i1-1, a( i1+1, i1 ), 1,
641 $ a( i2, i1+1 ), lda )
644 $
CALL cswap( n-i2, a( i2+1, i1 ), 1,
648 a( i1, i1 ) = a( i2, i2 )
652 CALL cswap( j*nb, a( i1, 1 ), lda,
667 CALL cgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )
subroutine xerbla(SRNAME, INFO)
XERBLA
subroutine ccopy(N, CX, INCX, CY, INCY)
CCOPY
subroutine cswap(N, CX, INCX, CY, INCY)
CSWAP
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
subroutine ctrsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
CTRSM
subroutine cgbtrf(M, N, KL, KU, AB, LDAB, IPIV, INFO)
CGBTRF
subroutine cgetrf(M, N, A, LDA, IPIV, INFO)
CGETRF
subroutine claset(UPLO, M, N, ALPHA, BETA, A, LDA)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
subroutine clacpy(UPLO, M, N, A, LDA, B, LDB)
CLACPY copies all or part of one two-dimensional array to another.
subroutine csytrf_aa_2stage(UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, WORK, LWORK, INFO)
CSYTRF_AA_2STAGE