157 $ IPIV2, WORK, LWORK, INFO )
167 INTEGER N, LDA, LTB, LWORK, INFO
170 INTEGER IPIV( * ), IPIV2( * )
171 DOUBLE PRECISION A( LDA, * ), TB( * ), WORK( * )
176 DOUBLE PRECISION ZERO, ONE
177 parameter( zero = 0.0d+0, one = 1.0d+0 )
180 LOGICAL UPPER, TQUERY, WQUERY
181 INTEGER I, J, K, I1, I2, TD
182 INTEGER LDTB, NB, KB, JB, NT, IINFO
188 EXTERNAL lsame, ilaenv
203 upper = lsame( uplo,
'U' )
204 wquery = ( lwork.EQ.-1 )
205 tquery = ( ltb.EQ.-1 )
206 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
208 ELSE IF( n.LT.0 )
THEN
210 ELSE IF( lda.LT.max( 1, n ) )
THEN
212 ELSE IF( ltb.LT.max( 1, 4*n ) .AND. .NOT.tquery )
THEN
214 ELSE IF( lwork.LT.max( 1, n ) .AND. .NOT.wquery )
THEN
219 CALL xerbla(
'DSYTRF_AA_2STAGE', -info )
225 nb = ilaenv( 1,
'DSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
228 tb( 1 ) = max( 1, (3*nb+1)*n )
231 work( 1 ) = max( 1, n*nb )
234 IF( tquery .OR. wquery )
THEN
247 IF( ldtb .LT. 3*nb+1 )
THEN
250 IF( lwork .LT. nb*n )
THEN
284 IF( i .EQ. (j-1) )
THEN
289 CALL dgemm(
'NoTranspose',
'NoTranspose',
291 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
292 $ a( (i-1)*nb+1, j*nb+1 ), lda,
293 $ zero, work( i*nb+1 ), n )
301 CALL dgemm(
'NoTranspose',
'NoTranspose',
303 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
305 $ a( (i-2)*nb+1, j*nb+1 ), lda,
306 $ zero, work( i*nb+1 ), n )
312 CALL dlacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
313 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
316 CALL dgemm(
'Transpose',
'NoTranspose',
318 $ -one, a( 1, j*nb+1 ), lda,
320 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
322 CALL dgemm(
'Transpose',
'NoTranspose',
324 $ one, a( (j-1)*nb+1, j*nb+1 ), lda,
325 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
326 $ zero, work( 1 ), n )
327 CALL dgemm(
'NoTranspose',
'NoTranspose',
329 $ -one, work( 1 ), n,
330 $ a( (j-2)*nb+1, j*nb+1 ), lda,
331 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
334 CALL dsygst( 1,
'Upper', kb,
335 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
336 $ a( (j-1)*nb+1, j*nb+1 ), lda, iinfo )
343 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
344 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
354 CALL dgemm(
'NoTranspose',
'NoTranspose',
356 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
357 $ a( (j-1)*nb+1, j*nb+1 ), lda,
358 $ zero, work( j*nb+1 ), n )
360 CALL dgemm(
'NoTranspose',
'NoTranspose',
362 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
364 $ a( (j-2)*nb+1, j*nb+1 ), lda,
365 $ zero, work( j*nb+1 ), n )
370 CALL dgemm(
'Transpose',
'NoTranspose',
371 $ nb, n-(j+1)*nb, j*nb,
372 $ -one, work( nb+1 ), n,
373 $ a( 1, (j+1)*nb+1 ), lda,
374 $ one, a( j*nb+1, (j+1)*nb+1 ), lda )
380 CALL dcopy( n-(j+1)*nb,
381 $ a( j*nb+k, (j+1)*nb+1 ), lda,
382 $ work( 1+(k-1)*n ), 1 )
387 CALL dgetrf( n-(j+1)*nb, nb,
389 $ ipiv( (j+1)*nb+1 ), iinfo )
397 CALL dcopy( n-(j+1)*nb,
398 $ work( 1+(k-1)*n ), 1,
399 $ a( j*nb+k, (j+1)*nb+1 ), lda )
404 kb = min(nb, n-(j+1)*nb)
405 CALL dlaset(
'Full', kb, nb, zero, zero,
406 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
407 CALL dlacpy(
'Upper', kb, nb,
409 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
411 CALL dtrsm(
'R',
'U',
'N',
'U', kb, nb, one,
412 $ a( (j-1)*nb+1, j*nb+1 ), lda,
413 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
421 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
422 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
425 CALL dlaset(
'Lower', kb, nb, zero, one,
426 $ a( j*nb+1, (j+1)*nb+1), lda )
432 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
435 i2 = ipiv( (j+1)*nb+k )
438 CALL dswap( k-1, a( (j+1)*nb+1, i1 ), 1,
439 $ a( (j+1)*nb+1, i2 ), 1 )
442 $
CALL dswap( i2-i1-1, a( i1, i1+1 ), lda,
446 $
CALL dswap( n-i2, a( i1, i2+1 ), lda,
447 $ a( i2, i2+1 ), lda )
450 a( i1, i1 ) = a( i2, i2 )
454 CALL dswap( j*nb, a( 1, i1 ), 1,
480 CALL dgemm(
'NoTranspose',
'Transpose',
482 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
483 $ a( j*nb+1, (i-1)*nb+1 ), lda,
484 $ zero, work( i*nb+1 ), n )
492 CALL dgemm(
'NoTranspose',
'Transpose',
494 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
496 $ a( j*nb+1, (i-2)*nb+1 ), lda,
497 $ zero, work( i*nb+1 ), n )
503 CALL dlacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
504 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
507 CALL dgemm(
'NoTranspose',
'NoTranspose',
509 $ -one, a( j*nb+1, 1 ), lda,
511 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
513 CALL dgemm(
'NoTranspose',
'NoTranspose',
515 $ one, a( j*nb+1, (j-1)*nb+1 ), lda,
516 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
517 $ zero, work( 1 ), n )
518 CALL dgemm(
'NoTranspose',
'Transpose',
520 $ -one, work( 1 ), n,
521 $ a( j*nb+1, (j-2)*nb+1 ), lda,
522 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
525 CALL dsygst( 1,
'Lower', kb,
526 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
527 $ a( j*nb+1, (j-1)*nb+1 ), lda, iinfo )
534 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
535 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
545 CALL dgemm(
'NoTranspose',
'Transpose',
547 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
548 $ a( j*nb+1, (j-1)*nb+1 ), lda,
549 $ zero, work( j*nb+1 ), n )
551 CALL dgemm(
'NoTranspose',
'Transpose',
553 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
555 $ a( j*nb+1, (j-2)*nb+1 ), lda,
556 $ zero, work( j*nb+1 ), n )
561 CALL dgemm(
'NoTranspose',
'NoTranspose',
562 $ n-(j+1)*nb, nb, j*nb,
563 $ -one, a( (j+1)*nb+1, 1 ), lda,
565 $ one, a( (j+1)*nb+1, j*nb+1 ), lda )
570 CALL dgetrf( n-(j+1)*nb, nb,
571 $ a( (j+1)*nb+1, j*nb+1 ), lda,
572 $ ipiv( (j+1)*nb+1 ), iinfo )
579 kb = min(nb, n-(j+1)*nb)
580 CALL dlaset(
'Full', kb, nb, zero, zero,
581 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
582 CALL dlacpy(
'Upper', kb, nb,
583 $ a( (j+1)*nb+1, j*nb+1 ), lda,
584 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
586 CALL dtrsm(
'R',
'L',
'T',
'U', kb, nb, one,
587 $ a( j*nb+1, (j-1)*nb+1 ), lda,
588 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
596 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
597 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
600 CALL dlaset(
'Upper', kb, nb, zero, one,
601 $ a( (j+1)*nb+1, j*nb+1), lda )
607 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
610 i2 = ipiv( (j+1)*nb+k )
613 CALL dswap( k-1, a( i1, (j+1)*nb+1 ), lda,
614 $ a( i2, (j+1)*nb+1 ), lda )
617 $
CALL dswap( i2-i1-1, a( i1+1, i1 ), 1,
618 $ a( i2, i1+1 ), lda )
621 $
CALL dswap( n-i2, a( i2+1, i1 ), 1,
625 a( i1, i1 ) = a( i2, i2 )
629 CALL dswap( j*nb, a( i1, 1 ), lda,
644 CALL dgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )