157 $ IPIV2, WORK, LWORK, INFO )
167 INTEGER N, LDA, LTB, LWORK, INFO
170 INTEGER IPIV( * ), IPIV2( * )
171 COMPLEX A( LDA, * ), TB( * ), WORK( * )
177 parameter( zero = ( 0.0e+0, 0.0e+0 ),
178 $ one = ( 1.0e+0, 0.0e+0 ) )
181 LOGICAL UPPER, TQUERY, WQUERY
182 INTEGER I, J, K, I1, I2, TD
183 INTEGER LDTB, NB, KB, JB, NT, IINFO
190 EXTERNAL lsame, ilaenv, sroundup_lwork
199 INTRINSIC conjg, min, max
206 upper = lsame( uplo,
'U' )
207 wquery = ( lwork.EQ.-1 )
208 tquery = ( ltb.EQ.-1 )
209 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
211 ELSE IF( n.LT.0 )
THEN
213 ELSE IF( lda.LT.max( 1, n ) )
THEN
215 ELSE IF( ltb.LT.max( 1, 4*n ) .AND. .NOT.tquery )
THEN
217 ELSE IF( lwork.LT.max( 1, n ) .AND. .NOT.wquery )
THEN
222 CALL xerbla(
'CHETRF_AA_2STAGE', -info )
228 nb = ilaenv( 1,
'CHETRF_AA_2STAGE', uplo, n, -1, -1, -1 )
231 tb( 1 ) = sroundup_lwork( max( 1, (3*nb+1)*n ) )
234 work( 1 ) = sroundup_lwork( max( 1, n*nb ) )
237 IF( tquery .OR. wquery )
THEN
250 IF( ldtb .LT. 3*nb+1 )
THEN
253 IF( lwork .LT. nb*n )
THEN
271 tb( 1 ) = cmplx( nb )
287 IF( i .EQ. (j-1) )
THEN
292 CALL cgemm(
'NoTranspose',
'NoTranspose',
294 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
295 $ a( (i-1)*nb+1, j*nb+1 ), lda,
296 $ zero, work( i*nb+1 ), n )
299 IF( i .EQ. (j-1) )
THEN
304 CALL cgemm(
'NoTranspose',
'NoTranspose',
306 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
308 $ a( (i-2)*nb+1, j*nb+1 ), lda,
309 $ zero, work( i*nb+1 ), n )
315 CALL clacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
316 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
319 CALL cgemm(
'Conjugate transpose',
'NoTranspose',
321 $ -one, a( 1, j*nb+1 ), lda,
323 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
325 CALL cgemm(
'Conjugate transpose',
'NoTranspose',
327 $ one, a( (j-1)*nb+1, j*nb+1 ), lda,
328 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
329 $ zero, work( 1 ), n )
330 CALL cgemm(
'NoTranspose',
'NoTranspose',
332 $ -one, work( 1 ), n,
333 $ a( (j-2)*nb+1, j*nb+1 ), lda,
334 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
337 CALL chegst( 1,
'Upper', kb,
338 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
339 $ a( (j-1)*nb+1, j*nb+1 ), lda, iinfo )
345 tb( td+1 + (j*nb+i-1)*ldtb )
346 $ = real( tb( td+1 + (j*nb+i-1)*ldtb ) )
348 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
349 $ = conjg( tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb ) )
359 CALL cgemm(
'NoTranspose',
'NoTranspose',
361 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
362 $ a( (j-1)*nb+1, j*nb+1 ), lda,
363 $ zero, work( j*nb+1 ), n )
365 CALL cgemm(
'NoTranspose',
'NoTranspose',
367 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
369 $ a( (j-2)*nb+1, j*nb+1 ), lda,
370 $ zero, work( j*nb+1 ), n )
375 CALL cgemm(
'Conjugate transpose',
'NoTranspose',
376 $ nb, n-(j+1)*nb, j*nb,
377 $ -one, work( nb+1 ), n,
378 $ a( 1, (j+1)*nb+1 ), lda,
379 $ one, a( j*nb+1, (j+1)*nb+1 ), lda )
385 CALL ccopy( n-(j+1)*nb,
386 $ a( j*nb+k, (j+1)*nb+1 ), lda,
387 $ work( 1+(k-1)*n ), 1 )
392 CALL cgetrf( n-(j+1)*nb, nb,
394 $ ipiv( (j+1)*nb+1 ), iinfo )
405 CALL ccopy( n-k-(j+1)*nb,
406 $ work( k+1+(k-1)*n ), 1,
407 $ a( j*nb+k, (j+1)*nb+k+1 ), lda )
411 CALL clacgv( k, work( 1+(k-1)*n ), 1 )
416 kb = min(nb, n-(j+1)*nb)
417 CALL claset(
'Full', kb, nb, zero, zero,
418 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
419 CALL clacpy(
'Upper', kb, nb,
421 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
423 CALL ctrsm(
'R',
'U',
'N',
'U', kb, nb, one,
424 $ a( (j-1)*nb+1, j*nb+1 ), lda,
425 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
433 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
434 $ = conjg( tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb ) )
437 CALL claset(
'Lower', kb, nb, zero, one,
438 $ a( j*nb+1, (j+1)*nb+1), lda )
444 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
447 i2 = ipiv( (j+1)*nb+k )
450 CALL cswap( k-1, a( (j+1)*nb+1, i1 ), 1,
451 $ a( (j+1)*nb+1, i2 ), 1 )
453 IF( i2.GT.(i1+1) )
THEN
454 CALL cswap( i2-i1-1, a( i1, i1+1 ), lda,
456 CALL clacgv( i2-i1-1, a( i1+1, i2 ), 1 )
458 CALL clacgv( i2-i1, a( i1, i1+1 ), lda )
461 $
CALL cswap( n-i2, a( i1, i2+1 ), lda,
462 $ a( i2, i2+1 ), lda )
465 a( i1, i1 ) = a( i2, i2 )
469 CALL cswap( j*nb, a( 1, i1 ), 1,
490 IF( i .EQ. (j-1) )
THEN
495 CALL cgemm(
'NoTranspose',
'Conjugate transpose',
497 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
498 $ a( j*nb+1, (i-1)*nb+1 ), lda,
499 $ zero, work( i*nb+1 ), n )
502 IF( i .EQ. (j-1) )
THEN
507 CALL cgemm(
'NoTranspose',
'Conjugate transpose',
509 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
511 $ a( j*nb+1, (i-2)*nb+1 ), lda,
512 $ zero, work( i*nb+1 ), n )
518 CALL clacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
519 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
522 CALL cgemm(
'NoTranspose',
'NoTranspose',
524 $ -one, a( j*nb+1, 1 ), lda,
526 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
528 CALL cgemm(
'NoTranspose',
'NoTranspose',
530 $ one, a( j*nb+1, (j-1)*nb+1 ), lda,
531 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
532 $ zero, work( 1 ), n )
533 CALL cgemm(
'NoTranspose',
'Conjugate transpose',
535 $ -one, work( 1 ), n,
536 $ a( j*nb+1, (j-2)*nb+1 ), lda,
537 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
540 CALL chegst( 1,
'Lower', kb,
541 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
542 $ a( j*nb+1, (j-1)*nb+1 ), lda, iinfo )
548 tb( td+1 + (j*nb+i-1)*ldtb )
549 $ = real( tb( td+1 + (j*nb+i-1)*ldtb ) )
551 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
552 $ = conjg( tb( td+(k-i)+1 + (j*nb+i-1)*ldtb ) )
562 CALL cgemm(
'NoTranspose',
563 $
'Conjugate transpose',
565 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
566 $ a( j*nb+1, (j-1)*nb+1 ), lda,
567 $ zero, work( j*nb+1 ), n )
569 CALL cgemm(
'NoTranspose',
570 $
'Conjugate transpose',
572 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
574 $ a( j*nb+1, (j-2)*nb+1 ), lda,
575 $ zero, work( j*nb+1 ), n )
580 CALL cgemm(
'NoTranspose',
'NoTranspose',
581 $ n-(j+1)*nb, nb, j*nb,
582 $ -one, a( (j+1)*nb+1, 1 ), lda,
584 $ one, a( (j+1)*nb+1, j*nb+1 ), lda )
589 CALL cgetrf( n-(j+1)*nb, nb,
590 $ a( (j+1)*nb+1, j*nb+1 ), lda,
591 $ ipiv( (j+1)*nb+1 ), iinfo )
598 kb = min(nb, n-(j+1)*nb)
599 CALL claset(
'Full', kb, nb, zero, zero,
600 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
601 CALL clacpy(
'Upper', kb, nb,
602 $ a( (j+1)*nb+1, j*nb+1 ), lda,
603 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
605 CALL ctrsm(
'R',
'L',
'C',
'U', kb, nb, one,
606 $ a( j*nb+1, (j-1)*nb+1 ), lda,
607 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
615 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
616 $ = conjg( tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb ) )
619 CALL claset(
'Upper', kb, nb, zero, one,
620 $ a( (j+1)*nb+1, j*nb+1), lda )
626 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
629 i2 = ipiv( (j+1)*nb+k )
632 CALL cswap( k-1, a( i1, (j+1)*nb+1 ), lda,
633 $ a( i2, (j+1)*nb+1 ), lda )
635 IF( i2.GT.(i1+1) )
THEN
636 CALL cswap( i2-i1-1, a( i1+1, i1 ), 1,
637 $ a( i2, i1+1 ), lda )
638 CALL clacgv( i2-i1-1, a( i2, i1+1 ), lda )
640 CALL clacgv( i2-i1, a( i1+1, i1 ), 1 )
643 $
CALL cswap( n-i2, a( i2+1, i1 ), 1,
647 a( i1, i1 ) = a( i2, i2 )
651 CALL cswap( j*nb, a( i1, 1 ), lda,
666 CALL cgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )