LAPACK  3.10.1
LAPACK: Linear Algebra PACKage
zhegv_2stage.f
Go to the documentation of this file.
1 *> \brief \b ZHEGV_2STAGE
2 *
3 * @precisions fortran z -> c
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download ZHEGV_2STAGE + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhegv_2stage.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhegv_2stage.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhegv_2stage.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W,
24 * WORK, LWORK, RWORK, INFO )
25 *
26 * IMPLICIT NONE
27 *
28 * .. Scalar Arguments ..
29 * CHARACTER JOBZ, UPLO
30 * INTEGER INFO, ITYPE, LDA, LDB, LWORK, N
31 * ..
32 * .. Array Arguments ..
33 * DOUBLE PRECISION RWORK( * ), W( * )
34 * COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * )
35 * ..
36 *
37 *
38 *> \par Purpose:
39 * =============
40 *>
41 *> \verbatim
42 *>
43 *> ZHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors
44 *> of a complex generalized Hermitian-definite eigenproblem, of the form
45 *> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x.
46 *> Here A and B are assumed to be Hermitian and B is also
47 *> positive definite.
48 *> This routine use the 2stage technique for the reduction to tridiagonal
49 *> which showed higher performance on recent architecture and for large
50 *> sizes N>2000.
51 *> \endverbatim
52 *
53 * Arguments:
54 * ==========
55 *
56 *> \param[in] ITYPE
57 *> \verbatim
58 *> ITYPE is INTEGER
59 *> Specifies the problem type to be solved:
60 *> = 1: A*x = (lambda)*B*x
61 *> = 2: A*B*x = (lambda)*x
62 *> = 3: B*A*x = (lambda)*x
63 *> \endverbatim
64 *>
65 *> \param[in] JOBZ
66 *> \verbatim
67 *> JOBZ is CHARACTER*1
68 *> = 'N': Compute eigenvalues only;
69 *> = 'V': Compute eigenvalues and eigenvectors.
70 *> Not available in this release.
71 *> \endverbatim
72 *>
73 *> \param[in] UPLO
74 *> \verbatim
75 *> UPLO is CHARACTER*1
76 *> = 'U': Upper triangles of A and B are stored;
77 *> = 'L': Lower triangles of A and B are stored.
78 *> \endverbatim
79 *>
80 *> \param[in] N
81 *> \verbatim
82 *> N is INTEGER
83 *> The order of the matrices A and B. N >= 0.
84 *> \endverbatim
85 *>
86 *> \param[in,out] A
87 *> \verbatim
88 *> A is COMPLEX*16 array, dimension (LDA, N)
89 *> On entry, the Hermitian matrix A. If UPLO = 'U', the
90 *> leading N-by-N upper triangular part of A contains the
91 *> upper triangular part of the matrix A. If UPLO = 'L',
92 *> the leading N-by-N lower triangular part of A contains
93 *> the lower triangular part of the matrix A.
94 *>
95 *> On exit, if JOBZ = 'V', then if INFO = 0, A contains the
96 *> matrix Z of eigenvectors. The eigenvectors are normalized
97 *> as follows:
98 *> if ITYPE = 1 or 2, Z**H*B*Z = I;
99 *> if ITYPE = 3, Z**H*inv(B)*Z = I.
100 *> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
101 *> or the lower triangle (if UPLO='L') of A, including the
102 *> diagonal, is destroyed.
103 *> \endverbatim
104 *>
105 *> \param[in] LDA
106 *> \verbatim
107 *> LDA is INTEGER
108 *> The leading dimension of the array A. LDA >= max(1,N).
109 *> \endverbatim
110 *>
111 *> \param[in,out] B
112 *> \verbatim
113 *> B is COMPLEX*16 array, dimension (LDB, N)
114 *> On entry, the Hermitian positive definite matrix B.
115 *> If UPLO = 'U', the leading N-by-N upper triangular part of B
116 *> contains the upper triangular part of the matrix B.
117 *> If UPLO = 'L', the leading N-by-N lower triangular part of B
118 *> contains the lower triangular part of the matrix B.
119 *>
120 *> On exit, if INFO <= N, the part of B containing the matrix is
121 *> overwritten by the triangular factor U or L from the Cholesky
122 *> factorization B = U**H*U or B = L*L**H.
123 *> \endverbatim
124 *>
125 *> \param[in] LDB
126 *> \verbatim
127 *> LDB is INTEGER
128 *> The leading dimension of the array B. LDB >= max(1,N).
129 *> \endverbatim
130 *>
131 *> \param[out] W
132 *> \verbatim
133 *> W is DOUBLE PRECISION array, dimension (N)
134 *> If INFO = 0, the eigenvalues in ascending order.
135 *> \endverbatim
136 *>
137 *> \param[out] WORK
138 *> \verbatim
139 *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
140 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
141 *> \endverbatim
142 *>
143 *> \param[in] LWORK
144 *> \verbatim
145 *> LWORK is INTEGER
146 *> The length of the array WORK. LWORK >= 1, when N <= 1;
147 *> otherwise
148 *> If JOBZ = 'N' and N > 1, LWORK must be queried.
149 *> LWORK = MAX(1, dimension) where
150 *> dimension = max(stage1,stage2) + (KD+1)*N + N
151 *> = N*KD + N*max(KD+1,FACTOPTNB)
152 *> + max(2*KD*KD, KD*NTHREADS)
153 *> + (KD+1)*N + N
154 *> where KD is the blocking size of the reduction,
155 *> FACTOPTNB is the blocking used by the QR or LQ
156 *> algorithm, usually FACTOPTNB=128 is a good choice
157 *> NTHREADS is the number of threads used when
158 *> openMP compilation is enabled, otherwise =1.
159 *> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available
160 *>
161 *> If LWORK = -1, then a workspace query is assumed; the routine
162 *> only calculates the optimal size of the WORK array, returns
163 *> this value as the first entry of the WORK array, and no error
164 *> message related to LWORK is issued by XERBLA.
165 *> \endverbatim
166 *>
167 *> \param[out] RWORK
168 *> \verbatim
169 *> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2))
170 *> \endverbatim
171 *>
172 *> \param[out] INFO
173 *> \verbatim
174 *> INFO is INTEGER
175 *> = 0: successful exit
176 *> < 0: if INFO = -i, the i-th argument had an illegal value
177 *> > 0: ZPOTRF or ZHEEV returned an error code:
178 *> <= N: if INFO = i, ZHEEV failed to converge;
179 *> i off-diagonal elements of an intermediate
180 *> tridiagonal form did not converge to zero;
181 *> > N: if INFO = N + i, for 1 <= i <= N, then the leading
182 *> minor of order i of B is not positive definite.
183 *> The factorization of B could not be completed and
184 *> no eigenvalues or eigenvectors were computed.
185 *> \endverbatim
186 *
187 * Authors:
188 * ========
189 *
190 *> \author Univ. of Tennessee
191 *> \author Univ. of California Berkeley
192 *> \author Univ. of Colorado Denver
193 *> \author NAG Ltd.
194 *
195 *> \ingroup complex16HEeigen
196 *
197 *> \par Further Details:
198 * =====================
199 *>
200 *> \verbatim
201 *>
202 *> All details about the 2stage techniques are available in:
203 *>
204 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
205 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
206 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
207 *> of 2011 International Conference for High Performance Computing,
208 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
209 *> Article 8 , 11 pages.
210 *> http://doi.acm.org/10.1145/2063384.2063394
211 *>
212 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
213 *> An improved parallel singular value algorithm and its implementation
214 *> for multicore hardware, In Proceedings of 2013 International Conference
215 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
216 *> Denver, Colorado, USA, 2013.
217 *> Article 90, 12 pages.
218 *> http://doi.acm.org/10.1145/2503210.2503292
219 *>
220 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
221 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
222 *> calculations based on fine-grained memory aware tasks.
223 *> International Journal of High Performance Computing Applications.
224 *> Volume 28 Issue 2, Pages 196-209, May 2014.
225 *> http://hpc.sagepub.com/content/28/2/196
226 *>
227 *> \endverbatim
228 *
229 * =====================================================================
230  SUBROUTINE zhegv_2stage( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W,
231  $ WORK, LWORK, RWORK, INFO )
232 *
233  IMPLICIT NONE
234 *
235 * -- LAPACK driver routine --
236 * -- LAPACK is a software package provided by Univ. of Tennessee, --
237 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
238 *
239 * .. Scalar Arguments ..
240  CHARACTER JOBZ, UPLO
241  INTEGER INFO, ITYPE, LDA, LDB, LWORK, N
242 * ..
243 * .. Array Arguments ..
244  DOUBLE PRECISION RWORK( * ), W( * )
245  COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * )
246 * ..
247 *
248 * =====================================================================
249 *
250 * .. Parameters ..
251  COMPLEX*16 ONE
252  parameter( one = ( 1.0d+0, 0.0d+0 ) )
253 * ..
254 * .. Local Scalars ..
255  LOGICAL LQUERY, UPPER, WANTZ
256  CHARACTER TRANS
257  INTEGER NEIG, LWMIN, LHTRD, LWTRD, KD, IB
258 * ..
259 * .. External Functions ..
260  LOGICAL LSAME
261  INTEGER ILAENV2STAGE
262  EXTERNAL lsame, ilaenv2stage
263 * ..
264 * .. External Subroutines ..
265  EXTERNAL xerbla, zhegst, zpotrf, ztrmm, ztrsm,
266  $ zheev_2stage
267 * ..
268 * .. Intrinsic Functions ..
269  INTRINSIC max
270 * ..
271 * .. Executable Statements ..
272 *
273 * Test the input parameters.
274 *
275  wantz = lsame( jobz, 'V' )
276  upper = lsame( uplo, 'U' )
277  lquery = ( lwork.EQ.-1 )
278 *
279  info = 0
280  IF( itype.LT.1 .OR. itype.GT.3 ) THEN
281  info = -1
282  ELSE IF( .NOT.( lsame( jobz, 'N' ) ) ) THEN
283  info = -2
284  ELSE IF( .NOT.( upper .OR. lsame( uplo, 'L' ) ) ) THEN
285  info = -3
286  ELSE IF( n.LT.0 ) THEN
287  info = -4
288  ELSE IF( lda.LT.max( 1, n ) ) THEN
289  info = -6
290  ELSE IF( ldb.LT.max( 1, n ) ) THEN
291  info = -8
292  END IF
293 *
294  IF( info.EQ.0 ) THEN
295  kd = ilaenv2stage( 1, 'ZHETRD_2STAGE', jobz, n, -1, -1, -1 )
296  ib = ilaenv2stage( 2, 'ZHETRD_2STAGE', jobz, n, kd, -1, -1 )
297  lhtrd = ilaenv2stage( 3, 'ZHETRD_2STAGE', jobz, n, kd, ib, -1 )
298  lwtrd = ilaenv2stage( 4, 'ZHETRD_2STAGE', jobz, n, kd, ib, -1 )
299  lwmin = n + lhtrd + lwtrd
300  work( 1 ) = lwmin
301 *
302  IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
303  info = -11
304  END IF
305  END IF
306 *
307  IF( info.NE.0 ) THEN
308  CALL xerbla( 'ZHEGV_2STAGE ', -info )
309  RETURN
310  ELSE IF( lquery ) THEN
311  RETURN
312  END IF
313 *
314 * Quick return if possible
315 *
316  IF( n.EQ.0 )
317  $ RETURN
318 *
319 * Form a Cholesky factorization of B.
320 *
321  CALL zpotrf( uplo, n, b, ldb, info )
322  IF( info.NE.0 ) THEN
323  info = n + info
324  RETURN
325  END IF
326 *
327 * Transform problem to standard eigenvalue problem and solve.
328 *
329  CALL zhegst( itype, uplo, n, a, lda, b, ldb, info )
330  CALL zheev_2stage( jobz, uplo, n, a, lda, w,
331  $ work, lwork, rwork, info )
332 *
333  IF( wantz ) THEN
334 *
335 * Backtransform eigenvectors to the original problem.
336 *
337  neig = n
338  IF( info.GT.0 )
339  $ neig = info - 1
340  IF( itype.EQ.1 .OR. itype.EQ.2 ) THEN
341 *
342 * For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
343 * backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y
344 *
345  IF( upper ) THEN
346  trans = 'N'
347  ELSE
348  trans = 'C'
349  END IF
350 *
351  CALL ztrsm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
352  $ b, ldb, a, lda )
353 *
354  ELSE IF( itype.EQ.3 ) THEN
355 *
356 * For B*A*x=(lambda)*x;
357 * backtransform eigenvectors: x = L*y or U**H *y
358 *
359  IF( upper ) THEN
360  trans = 'C'
361  ELSE
362  trans = 'N'
363  END IF
364 *
365  CALL ztrmm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
366  $ b, ldb, a, lda )
367  END IF
368  END IF
369 *
370  work( 1 ) = lwmin
371 *
372  RETURN
373 *
374 * End of ZHEGV_2STAGE
375 *
376  END
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine ztrsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
ZTRSM
Definition: ztrsm.f:180
subroutine ztrmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
ZTRMM
Definition: ztrmm.f:177
subroutine zhegst(ITYPE, UPLO, N, A, LDA, B, LDB, INFO)
ZHEGST
Definition: zhegst.f:128
subroutine zhegv_2stage(ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, LWORK, RWORK, INFO)
ZHEGV_2STAGE
Definition: zhegv_2stage.f:232
subroutine zheev_2stage(JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, INFO)
ZHEEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matr...
Definition: zheev_2stage.f:189
subroutine zpotrf(UPLO, N, A, LDA, INFO)
ZPOTRF VARIANT: right looking block version of the algorithm, calling Level 3 BLAS.
Definition: zpotrf.f:102