158
159
160
161
162
163 IMPLICIT NONE
164
165
166 CHARACTER UPLO
167 INTEGER N, LDA, LTB, LWORK, INFO
168
169
170 INTEGER IPIV( * ), IPIV2( * )
171 DOUBLE PRECISION A( LDA, * ), TB( * ), WORK( * )
172
173
174
175
176 DOUBLE PRECISION ZERO, ONE
177 parameter( zero = 0.0d+0, one = 1.0d+0 )
178
179
180 LOGICAL UPPER, TQUERY, WQUERY
181 INTEGER I, J, K, I1, I2, TD
182 INTEGER LDTB, NB, KB, JB, NT, IINFO
183 DOUBLE PRECISION PIV
184
185
186 LOGICAL LSAME
187 INTEGER ILAENV
189
190
194
195
196 INTRINSIC min, max
197
198
199
200
201
202 info = 0
203 upper =
lsame( uplo,
'U' )
204 wquery = ( lwork.EQ.-1 )
205 tquery = ( ltb.EQ.-1 )
206 IF( .NOT.upper .AND. .NOT.
lsame( uplo,
'L' ) )
THEN
207 info = -1
208 ELSE IF( n.LT.0 ) THEN
209 info = -2
210 ELSE IF( lda.LT.max( 1, n ) ) THEN
211 info = -4
212 ELSE IF( ltb.LT.max( 1, 4*n ) .AND. .NOT.tquery ) THEN
213 info = -6
214 ELSE IF( lwork.LT.max( 1, n ) .AND. .NOT.wquery ) THEN
215 info = -10
216 END IF
217
218 IF( info.NE.0 ) THEN
219 CALL xerbla(
'DSYTRF_AA_2STAGE', -info )
220 RETURN
221 END IF
222
223
224
225 nb =
ilaenv( 1,
'DSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
226 IF( info.EQ.0 ) THEN
227 IF( tquery ) THEN
228 tb( 1 ) = max( 1, (3*nb+1)*n )
229 END IF
230 IF( wquery ) THEN
231 work( 1 ) = max( 1, n*nb )
232 END IF
233 END IF
234 IF( tquery .OR. wquery ) THEN
235 RETURN
236 END IF
237
238
239
240 IF( n.EQ.0 ) THEN
241 RETURN
242 ENDIF
243
244
245
246 ldtb = ltb/n
247 IF( ldtb .LT. 3*nb+1 ) THEN
248 nb = (ldtb-1)/3
249 END IF
250 IF( lwork .LT. nb*n ) THEN
251 nb = lwork/n
252 END IF
253
254
255
256 nt = (n+nb-1)/nb
257 td = 2*nb
258 kb = min(nb, n)
259
260
261
262 DO j = 1, kb
263 ipiv( j ) = j
264 END DO
265
266
267
268 tb( 1 ) = nb
269
270 IF( upper ) THEN
271
272
273
274
275
276 DO j = 0, nt-1
277
278
279
280 kb = min(nb, n-j*nb)
281 DO i = 1, j-1
282 IF( i .EQ. 1 ) THEN
283
284 IF( i .EQ. (j-1) ) THEN
285 jb = nb+kb
286 ELSE
287 jb = 2*nb
288 END IF
289 CALL dgemm(
'NoTranspose',
'NoTranspose',
290 $ nb, kb, jb,
291 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
292 $ a( (i-1)*nb+1, j*nb+1 ), lda,
293 $ zero, work( i*nb+1 ), n )
294 ELSE
295
296 IF( i .EQ. j-1) THEN
297 jb = 2*nb+kb
298 ELSE
299 jb = 3*nb
300 END IF
301 CALL dgemm(
'NoTranspose',
'NoTranspose',
302 $ nb, kb, jb,
303 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
304 $ ldtb-1,
305 $ a( (i-2)*nb+1, j*nb+1 ), lda,
306 $ zero, work( i*nb+1 ), n )
307 END IF
308 END DO
309
310
311
312 CALL dlacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
313 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
314 IF( j.GT.1 ) THEN
315
316 CALL dgemm(
'Transpose',
'NoTranspose',
317 $ kb, kb, (j-1)*nb,
318 $ -one, a( 1, j*nb+1 ), lda,
319 $ work( nb+1 ), n,
320 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
321
322 CALL dgemm(
'Transpose',
'NoTranspose',
323 $ kb, nb, kb,
324 $ one, a( (j-1)*nb+1, j*nb+1 ), lda,
325 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
326 $ zero, work( 1 ), n )
327 CALL dgemm(
'NoTranspose',
'NoTranspose',
328 $ kb, kb, nb,
329 $ -one, work( 1 ), n,
330 $ a( (j-2)*nb+1, j*nb+1 ), lda,
331 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
332 END IF
333 IF( j.GT.0 ) THEN
334 CALL dsygst( 1,
'Upper', kb,
335 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
336 $ a( (j-1)*nb+1, j*nb+1 ), lda, iinfo )
337 END IF
338
339
340
341 DO i = 1, kb
342 DO k = i+1, kb
343 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
344 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
345 END DO
346 END DO
347
348 IF( j.LT.nt-1 ) THEN
349 IF( j.GT.0 ) THEN
350
351
352
353 IF( j.EQ.1 ) THEN
354 CALL dgemm(
'NoTranspose',
'NoTranspose',
355 $ kb, kb, kb,
356 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
357 $ a( (j-1)*nb+1, j*nb+1 ), lda,
358 $ zero, work( j*nb+1 ), n )
359 ELSE
360 CALL dgemm(
'NoTranspose',
'NoTranspose',
361 $ kb, kb, nb+kb,
362 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
363 $ ldtb-1,
364 $ a( (j-2)*nb+1, j*nb+1 ), lda,
365 $ zero, work( j*nb+1 ), n )
366 END IF
367
368
369
370 CALL dgemm(
'Transpose',
'NoTranspose',
371 $ nb, n-(j+1)*nb, j*nb,
372 $ -one, work( nb+1 ), n,
373 $ a( 1, (j+1)*nb+1 ), lda,
374 $ one, a( j*nb+1, (j+1)*nb+1 ), lda )
375 END IF
376
377
378
379 DO k = 1, nb
380 CALL dcopy( n-(j+1)*nb,
381 $ a( j*nb+k, (j+1)*nb+1 ), lda,
382 $ work( 1+(k-1)*n ), 1 )
383 END DO
384
385
386
387 CALL dgetrf( n-(j+1)*nb, nb,
388 $ work, n,
389 $ ipiv( (j+1)*nb+1 ), iinfo )
390
391
392
393
394
395
396 DO k = 1, nb
397 CALL dcopy( n-(j+1)*nb,
398 $ work( 1+(k-1)*n ), 1,
399 $ a( j*nb+k, (j+1)*nb+1 ), lda )
400 END DO
401
402
403
404 kb = min(nb, n-(j+1)*nb)
405 CALL dlaset(
'Full', kb, nb, zero, zero,
406 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
407 CALL dlacpy(
'Upper', kb, nb,
408 $ work, n,
409 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
410 IF( j.GT.0 ) THEN
411 CALL dtrsm(
'R',
'U',
'N',
'U', kb, nb, one,
412 $ a( (j-1)*nb+1, j*nb+1 ), lda,
413 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
414 END IF
415
416
417
418
419 DO k = 1, nb
420 DO i = 1, kb
421 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
422 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
423 END DO
424 END DO
425 CALL dlaset(
'Lower', kb, nb, zero, one,
426 $ a( j*nb+1, (j+1)*nb+1), lda )
427
428
429
430 DO k = 1, kb
431
432 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
433
434 i1 = (j+1)*nb+k
435 i2 = ipiv( (j+1)*nb+k )
436 IF( i1.NE.i2 ) THEN
437
438 CALL dswap( k-1, a( (j+1)*nb+1, i1 ), 1,
439 $ a( (j+1)*nb+1, i2 ), 1 )
440
441 IF( i2.GT.(i1+1) )
442 $
CALL dswap( i2-i1-1, a( i1, i1+1 ), lda,
443 $ a( i1+1, i2 ), 1 )
444
445 IF( i2.LT.n )
446 $
CALL dswap( n-i2, a( i1, i2+1 ), lda,
447 $ a( i2, i2+1 ), lda )
448
449 piv = a( i1, i1 )
450 a( i1, i1 ) = a( i2, i2 )
451 a( i2, i2 ) = piv
452
453 IF( j.GT.0 ) THEN
454 CALL dswap( j*nb, a( 1, i1 ), 1,
455 $ a( 1, i2 ), 1 )
456 END IF
457 ENDIF
458 END DO
459 END IF
460 END DO
461 ELSE
462
463
464
465
466
467 DO j = 0, nt-1
468
469
470
471 kb = min(nb, n-j*nb)
472 DO i = 1, j-1
473 IF( i.EQ.1 ) THEN
474
475 IF( i .EQ. j-1) THEN
476 jb = nb+kb
477 ELSE
478 jb = 2*nb
479 END IF
480 CALL dgemm(
'NoTranspose',
'Transpose',
481 $ nb, kb, jb,
482 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
483 $ a( j*nb+1, (i-1)*nb+1 ), lda,
484 $ zero, work( i*nb+1 ), n )
485 ELSE
486
487 IF( i .EQ. j-1) THEN
488 jb = 2*nb+kb
489 ELSE
490 jb = 3*nb
491 END IF
492 CALL dgemm(
'NoTranspose',
'Transpose',
493 $ nb, kb, jb,
494 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
495 $ ldtb-1,
496 $ a( j*nb+1, (i-2)*nb+1 ), lda,
497 $ zero, work( i*nb+1 ), n )
498 END IF
499 END DO
500
501
502
503 CALL dlacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
504 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
505 IF( j.GT.1 ) THEN
506
507 CALL dgemm(
'NoTranspose',
'NoTranspose',
508 $ kb, kb, (j-1)*nb,
509 $ -one, a( j*nb+1, 1 ), lda,
510 $ work( nb+1 ), n,
511 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
512
513 CALL dgemm(
'NoTranspose',
'NoTranspose',
514 $ kb, nb, kb,
515 $ one, a( j*nb+1, (j-1)*nb+1 ), lda,
516 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
517 $ zero, work( 1 ), n )
518 CALL dgemm(
'NoTranspose',
'Transpose',
519 $ kb, kb, nb,
520 $ -one, work( 1 ), n,
521 $ a( j*nb+1, (j-2)*nb+1 ), lda,
522 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
523 END IF
524 IF( j.GT.0 ) THEN
525 CALL dsygst( 1,
'Lower', kb,
526 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
527 $ a( j*nb+1, (j-1)*nb+1 ), lda, iinfo )
528 END IF
529
530
531
532 DO i = 1, kb
533 DO k = i+1, kb
534 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
535 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
536 END DO
537 END DO
538
539 IF( j.LT.nt-1 ) THEN
540 IF( j.GT.0 ) THEN
541
542
543
544 IF( j.EQ.1 ) THEN
545 CALL dgemm(
'NoTranspose',
'Transpose',
546 $ kb, kb, kb,
547 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
548 $ a( j*nb+1, (j-1)*nb+1 ), lda,
549 $ zero, work( j*nb+1 ), n )
550 ELSE
551 CALL dgemm(
'NoTranspose',
'Transpose',
552 $ kb, kb, nb+kb,
553 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
554 $ ldtb-1,
555 $ a( j*nb+1, (j-2)*nb+1 ), lda,
556 $ zero, work( j*nb+1 ), n )
557 END IF
558
559
560
561 CALL dgemm(
'NoTranspose',
'NoTranspose',
562 $ n-(j+1)*nb, nb, j*nb,
563 $ -one, a( (j+1)*nb+1, 1 ), lda,
564 $ work( nb+1 ), n,
565 $ one, a( (j+1)*nb+1, j*nb+1 ), lda )
566 END IF
567
568
569
570 CALL dgetrf( n-(j+1)*nb, nb,
571 $ a( (j+1)*nb+1, j*nb+1 ), lda,
572 $ ipiv( (j+1)*nb+1 ), iinfo )
573
574
575
576
577
578
579 kb = min(nb, n-(j+1)*nb)
580 CALL dlaset(
'Full', kb, nb, zero, zero,
581 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
582 CALL dlacpy(
'Upper', kb, nb,
583 $ a( (j+1)*nb+1, j*nb+1 ), lda,
584 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
585 IF( j.GT.0 ) THEN
586 CALL dtrsm(
'R',
'L',
'T',
'U', kb, nb, one,
587 $ a( j*nb+1, (j-1)*nb+1 ), lda,
588 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
589 END IF
590
591
592
593
594 DO k = 1, nb
595 DO i = 1, kb
596 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
597 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
598 END DO
599 END DO
600 CALL dlaset(
'Upper', kb, nb, zero, one,
601 $ a( (j+1)*nb+1, j*nb+1), lda )
602
603
604
605 DO k = 1, kb
606
607 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
608
609 i1 = (j+1)*nb+k
610 i2 = ipiv( (j+1)*nb+k )
611 IF( i1.NE.i2 ) THEN
612
613 CALL dswap( k-1, a( i1, (j+1)*nb+1 ), lda,
614 $ a( i2, (j+1)*nb+1 ), lda )
615
616 IF( i2.GT.(i1+1) )
617 $
CALL dswap( i2-i1-1, a( i1+1, i1 ), 1,
618 $ a( i2, i1+1 ), lda )
619
620 IF( i2.LT.n )
621 $
CALL dswap( n-i2, a( i2+1, i1 ), 1,
622 $ a( i2+1, i2 ), 1 )
623
624 piv = a( i1, i1 )
625 a( i1, i1 ) = a( i2, i2 )
626 a( i2, i2 ) = piv
627
628 IF( j.GT.0 ) THEN
629 CALL dswap( j*nb, a( i1, 1 ), lda,
630 $ a( i2, 1 ), lda )
631 END IF
632 ENDIF
633 END DO
634
635
636
637
638
639 END IF
640 END DO
641 END IF
642
643
644 CALL dgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )
645
646 RETURN
647
648
649
subroutine xerbla(srname, info)
subroutine dcopy(n, dx, incx, dy, incy)
DCOPY
subroutine dgbtrf(m, n, kl, ku, ab, ldab, ipiv, info)
DGBTRF
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
subroutine dgetrf(m, n, a, lda, ipiv, info)
DGETRF
subroutine dsygst(itype, uplo, n, a, lda, b, ldb, info)
DSYGST
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
logical function lsame(ca, cb)
LSAME
subroutine dswap(n, dx, incx, dy, incy)
DSWAP
subroutine dtrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
DTRSM