157 $ IPIV2, WORK, LWORK, INFO )
167 INTEGER N, LDA, LTB, LWORK, INFO
170 INTEGER IPIV( * ), IPIV2( * )
171 COMPLEX*16 A( LDA, * ), TB( * ), WORK( * )
176 COMPLEX*16 CZERO, CONE
177 parameter( czero = ( 0.0d+0, 0.0d+0 ),
178 $ cone = ( 1.0d+0, 0.0d+0 ) )
181 LOGICAL UPPER, TQUERY, WQUERY
182 INTEGER I, J, K, I1, I2, TD
183 INTEGER LDTB, NB, KB, JB, NT, IINFO
189 EXTERNAL lsame, ilaenv
204 upper = lsame( uplo,
'U' )
205 wquery = ( lwork.EQ.-1 )
206 tquery = ( ltb.EQ.-1 )
207 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
209 ELSE IF( n.LT.0 )
THEN
211 ELSE IF( lda.LT.max( 1, n ) )
THEN
213 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery )
THEN
215 ELSE IF ( lwork .LT. n .AND. .NOT.wquery )
THEN
220 CALL xerbla(
'ZSYTRF_AA_2STAGE', -info )
226 nb = ilaenv( 1,
'ZSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
235 IF( tquery .OR. wquery )
THEN
248 IF( ldtb .LT. 3*nb+1 )
THEN
251 IF( lwork .LT. nb*n )
THEN
285 IF( i .EQ. (j-1) )
THEN
290 CALL zgemm(
'NoTranspose',
'NoTranspose',
292 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
293 $ a( (i-1)*nb+1, j*nb+1 ), lda,
294 $ czero, work( i*nb+1 ), n )
297 IF( i .EQ. (j-1) )
THEN
302 CALL zgemm(
'NoTranspose',
'NoTranspose',
304 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
306 $ a( (i-2)*nb+1, j*nb+1 ), lda,
307 $ czero, work( i*nb+1 ), n )
313 CALL zlacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
314 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
317 CALL zgemm(
'Transpose',
'NoTranspose',
319 $ -cone, a( 1, j*nb+1 ), lda,
321 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
323 CALL zgemm(
'Transpose',
'NoTranspose',
325 $ cone, a( (j-1)*nb+1, j*nb+1 ), lda,
326 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
327 $ czero, work( 1 ), n )
328 CALL zgemm(
'NoTranspose',
'NoTranspose',
330 $ -cone, work( 1 ), n,
331 $ a( (j-2)*nb+1, j*nb+1 ), lda,
332 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
339 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
340 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
347 CALL ztrsm(
'L',
'U',
'T',
'N', kb, kb, cone,
348 $ a( (j-1)*nb+1, j*nb+1 ), lda,
349 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
350 CALL ztrsm(
'R',
'U',
'N',
'N', kb, kb, cone,
351 $ a( (j-1)*nb+1, j*nb+1 ), lda,
352 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
361 CALL zgemm(
'NoTranspose',
'NoTranspose',
363 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
364 $ a( (j-1)*nb+1, j*nb+1 ), lda,
365 $ czero, work( j*nb+1 ), n )
367 CALL zgemm(
'NoTranspose',
'NoTranspose',
369 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
371 $ a( (j-2)*nb+1, j*nb+1 ), lda,
372 $ czero, work( j*nb+1 ), n )
377 CALL zgemm(
'Transpose',
'NoTranspose',
378 $ nb, n-(j+1)*nb, j*nb,
379 $ -cone, work( nb+1 ), n,
380 $ a( 1, (j+1)*nb+1 ), lda,
381 $ cone, a( j*nb+1, (j+1)*nb+1 ), lda )
387 CALL zcopy( n-(j+1)*nb,
388 $ a( j*nb+k, (j+1)*nb+1 ), lda,
389 $ work( 1+(k-1)*n ), 1 )
394 CALL zgetrf( n-(j+1)*nb, nb,
396 $ ipiv( (j+1)*nb+1 ), iinfo )
404 CALL zcopy( n-(j+1)*nb,
405 $ work( 1+(k-1)*n ), 1,
406 $ a( j*nb+k, (j+1)*nb+1 ), lda )
411 kb = min(nb, n-(j+1)*nb)
412 CALL zlaset(
'Full', kb, nb, czero, czero,
413 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
414 CALL zlacpy(
'Upper', kb, nb,
416 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
418 CALL ztrsm(
'R',
'U',
'N',
'U', kb, nb, cone,
419 $ a( (j-1)*nb+1, j*nb+1 ), lda,
420 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
428 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
429 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
432 CALL zlaset(
'Lower', kb, nb, czero, cone,
433 $ a( j*nb+1, (j+1)*nb+1), lda )
439 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
442 i2 = ipiv( (j+1)*nb+k )
445 CALL zswap( k-1, a( (j+1)*nb+1, i1 ), 1,
446 $ a( (j+1)*nb+1, i2 ), 1 )
449 $
CALL zswap( i2-i1-1, a( i1, i1+1 ), lda,
453 $
CALL zswap( n-i2, a( i1, i2+1 ), lda,
454 $ a( i2, i2+1 ), lda )
457 a( i1, i1 ) = a( i2, i2 )
461 CALL zswap( j*nb, a( 1, i1 ), 1,
482 IF( i .EQ. (j-1) )
THEN
487 CALL zgemm(
'NoTranspose',
'Transpose',
489 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
490 $ a( j*nb+1, (i-1)*nb+1 ), lda,
491 $ czero, work( i*nb+1 ), n )
494 IF( i .EQ. (j-1) )
THEN
499 CALL zgemm(
'NoTranspose',
'Transpose',
501 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
503 $ a( j*nb+1, (i-2)*nb+1 ), lda,
504 $ czero, work( i*nb+1 ), n )
510 CALL zlacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
511 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
514 CALL zgemm(
'NoTranspose',
'NoTranspose',
516 $ -cone, a( j*nb+1, 1 ), lda,
518 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
520 CALL zgemm(
'NoTranspose',
'NoTranspose',
522 $ cone, a( j*nb+1, (j-1)*nb+1 ), lda,
523 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
524 $ czero, work( 1 ), n )
525 CALL zgemm(
'NoTranspose',
'Transpose',
527 $ -cone, work( 1 ), n,
528 $ a( j*nb+1, (j-2)*nb+1 ), lda,
529 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
536 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
537 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
544 CALL ztrsm(
'L',
'L',
'N',
'N', kb, kb, cone,
545 $ a( j*nb+1, (j-1)*nb+1 ), lda,
546 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
547 CALL ztrsm(
'R',
'L',
'T',
'N', kb, kb, cone,
548 $ a( j*nb+1, (j-1)*nb+1 ), lda,
549 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
556 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
557 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
567 CALL zgemm(
'NoTranspose',
'Transpose',
569 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
570 $ a( j*nb+1, (j-1)*nb+1 ), lda,
571 $ czero, work( j*nb+1 ), n )
573 CALL zgemm(
'NoTranspose',
'Transpose',
575 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
577 $ a( j*nb+1, (j-2)*nb+1 ), lda,
578 $ czero, work( j*nb+1 ), n )
583 CALL zgemm(
'NoTranspose',
'NoTranspose',
584 $ n-(j+1)*nb, nb, j*nb,
585 $ -cone, a( (j+1)*nb+1, 1 ), lda,
587 $ cone, a( (j+1)*nb+1, j*nb+1 ), lda )
592 CALL zgetrf( n-(j+1)*nb, nb,
593 $ a( (j+1)*nb+1, j*nb+1 ), lda,
594 $ ipiv( (j+1)*nb+1 ), iinfo )
601 kb = min(nb, n-(j+1)*nb)
602 CALL zlaset(
'Full', kb, nb, czero, czero,
603 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
604 CALL zlacpy(
'Upper', kb, nb,
605 $ a( (j+1)*nb+1, j*nb+1 ), lda,
606 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
608 CALL ztrsm(
'R',
'L',
'T',
'U', kb, nb, cone,
609 $ a( j*nb+1, (j-1)*nb+1 ), lda,
610 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
618 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
619 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
622 CALL zlaset(
'Upper', kb, nb, czero, cone,
623 $ a( (j+1)*nb+1, j*nb+1 ), lda )
629 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
632 i2 = ipiv( (j+1)*nb+k )
635 CALL zswap( k-1, a( i1, (j+1)*nb+1 ), lda,
636 $ a( i2, (j+1)*nb+1 ), lda )
639 $
CALL zswap( i2-i1-1, a( i1+1, i1 ), 1,
640 $ a( i2, i1+1 ), lda )
643 $
CALL zswap( n-i2, a( i2+1, i1 ), 1,
647 a( i1, i1 ) = a( i2, i2 )
651 CALL zswap( j*nb, a( i1, 1 ), lda,
666 CALL zgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )