LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
cgejsv.f
Go to the documentation of this file.
1 *> \brief \b CGEJSV
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download CGEJSV + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgejsv.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgejsv.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgejsv.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE CGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
22 * M, N, A, LDA, SVA, U, LDU, V, LDV,
23 * CWORK, LWORK, RWORK, LRWORK, IWORK, INFO )
24 *
25 * .. Scalar Arguments ..
26 * IMPLICIT NONE
27 * INTEGER INFO, LDA, LDU, LDV, LWORK, M, N
28 * ..
29 * .. Array Arguments ..
30 * COMPLEX A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK )
31 * REAL SVA( N ), RWORK( LRWORK )
32 * INTEGER IWORK( * )
33 * CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
34 * ..
35 *
36 *
37 *> \par Purpose:
38 * =============
39 *>
40 *> \verbatim
41 *>
42 *> CGEJSV computes the singular value decomposition (SVD) of a complex M-by-N
43 *> matrix [A], where M >= N. The SVD of [A] is written as
44 *>
45 *> [A] = [U] * [SIGMA] * [V]^*,
46 *>
47 *> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
48 *> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and
49 *> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are
50 *> the singular values of [A]. The columns of [U] and [V] are the left and
51 *> the right singular vectors of [A], respectively. The matrices [U] and [V]
52 *> are computed and stored in the arrays U and V, respectively. The diagonal
53 *> of [SIGMA] is computed and stored in the array SVA.
54 *> \endverbatim
55 *>
56 *> Arguments:
57 *> ==========
58 *>
59 *> \param[in] JOBA
60 *> \verbatim
61 *> JOBA is CHARACTER*1
62 *> Specifies the level of accuracy:
63 *> = 'C': This option works well (high relative accuracy) if A = B * D,
64 *> with well-conditioned B and arbitrary diagonal matrix D.
65 *> The accuracy cannot be spoiled by COLUMN scaling. The
66 *> accuracy of the computed output depends on the condition of
67 *> B, and the procedure aims at the best theoretical accuracy.
68 *> The relative error max_{i=1:N}|d sigma_i| / sigma_i is
69 *> bounded by f(M,N)*epsilon* cond(B), independent of D.
70 *> The input matrix is preprocessed with the QRF with column
71 *> pivoting. This initial preprocessing and preconditioning by
72 *> a rank revealing QR factorization is common for all values of
73 *> JOBA. Additional actions are specified as follows:
74 *> = 'E': Computation as with 'C' with an additional estimate of the
75 *> condition number of B. It provides a realistic error bound.
76 *> = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
77 *> D1, D2, and well-conditioned matrix C, this option gives
78 *> higher accuracy than the 'C' option. If the structure of the
79 *> input matrix is not known, and relative accuracy is
80 *> desirable, then this option is advisable. The input matrix A
81 *> is preprocessed with QR factorization with FULL (row and
82 *> column) pivoting.
83 *> = 'G': Computation as with 'F' with an additional estimate of the
84 *> condition number of B, where A=B*D. If A has heavily weighted
85 *> rows, then using this condition number gives too pessimistic
86 *> error bound.
87 *> = 'A': Small singular values are not well determined by the data
88 *> and are considered as noisy; the matrix is treated as
89 *> numerically rank deficient. The error in the computed
90 *> singular values is bounded by f(m,n)*epsilon*||A||.
91 *> The computed SVD A = U * S * V^* restores A up to
92 *> f(m,n)*epsilon*||A||.
93 *> This gives the procedure the licence to discard (set to zero)
94 *> all singular values below N*epsilon*||A||.
95 *> = 'R': Similar as in 'A'. Rank revealing property of the initial
96 *> QR factorization is used do reveal (using triangular factor)
97 *> a gap sigma_{r+1} < epsilon * sigma_r in which case the
98 *> numerical RANK is declared to be r. The SVD is computed with
99 *> absolute error bounds, but more accurately than with 'A'.
100 *> \endverbatim
101 *>
102 *> \param[in] JOBU
103 *> \verbatim
104 *> JOBU is CHARACTER*1
105 *> Specifies whether to compute the columns of U:
106 *> = 'U': N columns of U are returned in the array U.
107 *> = 'F': full set of M left sing. vectors is returned in the array U.
108 *> = 'W': U may be used as workspace of length M*N. See the description
109 *> of U.
110 *> = 'N': U is not computed.
111 *> \endverbatim
112 *>
113 *> \param[in] JOBV
114 *> \verbatim
115 *> JOBV is CHARACTER*1
116 *> Specifies whether to compute the matrix V:
117 *> = 'V': N columns of V are returned in the array V; Jacobi rotations
118 *> are not explicitly accumulated.
119 *> = 'J': N columns of V are returned in the array V, but they are
120 *> computed as the product of Jacobi rotations, if JOBT = 'N'.
121 *> = 'W': V may be used as workspace of length N*N. See the description
122 *> of V.
123 *> = 'N': V is not computed.
124 *> \endverbatim
125 *>
126 *> \param[in] JOBR
127 *> \verbatim
128 *> JOBR is CHARACTER*1
129 *> Specifies the RANGE for the singular values. Issues the licence to
130 *> set to zero small positive singular values if they are outside
131 *> specified range. If A .NE. 0 is scaled so that the largest singular
132 *> value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
133 *> the licence to kill columns of A whose norm in c*A is less than
134 *> SQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN,
135 *> where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
136 *> = 'N': Do not kill small columns of c*A. This option assumes that
137 *> BLAS and QR factorizations and triangular solvers are
138 *> implemented to work in that range. If the condition of A
139 *> is greater than BIG, use CGESVJ.
140 *> = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)]
141 *> (roughly, as described above). This option is recommended.
142 *> ===========================
143 *> For computing the singular values in the FULL range [SFMIN,BIG]
144 *> use CGESVJ.
145 *> \endverbatim
146 *>
147 *> \param[in] JOBT
148 *> \verbatim
149 *> JOBT is CHARACTER*1
150 *> If the matrix is square then the procedure may determine to use
151 *> transposed A if A^* seems to be better with respect to convergence.
152 *> If the matrix is not square, JOBT is ignored.
153 *> The decision is based on two values of entropy over the adjoint
154 *> orbit of A^* * A. See the descriptions of WORK(6) and WORK(7).
155 *> = 'T': transpose if entropy test indicates possibly faster
156 *> convergence of Jacobi process if A^* is taken as input. If A is
157 *> replaced with A^*, then the row pivoting is included automatically.
158 *> = 'N': do not speculate.
159 *> The option 'T' can be used to compute only the singular values, or
160 *> the full SVD (U, SIGMA and V). For only one set of singular vectors
161 *> (U or V), the caller should provide both U and V, as one of the
162 *> matrices is used as workspace if the matrix A is transposed.
163 *> The implementer can easily remove this constraint and make the
164 *> code more complicated. See the descriptions of U and V.
165 *> In general, this option is considered experimental, and 'N'; should
166 *> be preferred. This is subject to changes in the future.
167 *> \endverbatim
168 *>
169 *> \param[in] JOBP
170 *> \verbatim
171 *> JOBP is CHARACTER*1
172 *> Issues the licence to introduce structured perturbations to drown
173 *> denormalized numbers. This licence should be active if the
174 *> denormals are poorly implemented, causing slow computation,
175 *> especially in cases of fast convergence (!). For details see [1,2].
176 *> For the sake of simplicity, this perturbations are included only
177 *> when the full SVD or only the singular values are requested. The
178 *> implementer/user can easily add the perturbation for the cases of
179 *> computing one set of singular vectors.
180 *> = 'P': introduce perturbation
181 *> = 'N': do not perturb
182 *> \endverbatim
183 *>
184 *> \param[in] M
185 *> \verbatim
186 *> M is INTEGER
187 *> The number of rows of the input matrix A. M >= 0.
188 *> \endverbatim
189 *>
190 *> \param[in] N
191 *> \verbatim
192 *> N is INTEGER
193 *> The number of columns of the input matrix A. M >= N >= 0.
194 *> \endverbatim
195 *>
196 *> \param[in,out] A
197 *> \verbatim
198 *> A is COMPLEX array, dimension (LDA,N)
199 *> On entry, the M-by-N matrix A.
200 *> \endverbatim
201 *>
202 *> \param[in] LDA
203 *> \verbatim
204 *> LDA is INTEGER
205 *> The leading dimension of the array A. LDA >= max(1,M).
206 *> \endverbatim
207 *>
208 *> \param[out] SVA
209 *> \verbatim
210 *> SVA is REAL array, dimension (N)
211 *> On exit,
212 *> - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
213 *> computation SVA contains Euclidean column norms of the
214 *> iterated matrices in the array A.
215 *> - For WORK(1) .NE. WORK(2): The singular values of A are
216 *> (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
217 *> sigma_max(A) overflows or if small singular values have been
218 *> saved from underflow by scaling the input matrix A.
219 *> - If JOBR='R' then some of the singular values may be returned
220 *> as exact zeros obtained by "set to zero" because they are
221 *> below the numerical rank threshold or are denormalized numbers.
222 *> \endverbatim
223 *>
224 *> \param[out] U
225 *> \verbatim
226 *> U is COMPLEX array, dimension ( LDU, N ) or ( LDU, M )
227 *> If JOBU = 'U', then U contains on exit the M-by-N matrix of
228 *> the left singular vectors.
229 *> If JOBU = 'F', then U contains on exit the M-by-M matrix of
230 *> the left singular vectors, including an ONB
231 *> of the orthogonal complement of the Range(A).
232 *> If JOBU = 'W' .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N),
233 *> then U is used as workspace if the procedure
234 *> replaces A with A^*. In that case, [V] is computed
235 *> in U as left singular vectors of A^* and then
236 *> copied back to the V array. This 'W' option is just
237 *> a reminder to the caller that in this case U is
238 *> reserved as workspace of length N*N.
239 *> If JOBU = 'N' U is not referenced, unless JOBT='T'.
240 *> \endverbatim
241 *>
242 *> \param[in] LDU
243 *> \verbatim
244 *> LDU is INTEGER
245 *> The leading dimension of the array U, LDU >= 1.
246 *> IF JOBU = 'U' or 'F' or 'W', then LDU >= M.
247 *> \endverbatim
248 *>
249 *> \param[out] V
250 *> \verbatim
251 *> V is COMPLEX array, dimension ( LDV, N )
252 *> If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
253 *> the right singular vectors;
254 *> If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N),
255 *> then V is used as workspace if the pprocedure
256 *> replaces A with A^*. In that case, [U] is computed
257 *> in V as right singular vectors of A^* and then
258 *> copied back to the U array. This 'W' option is just
259 *> a reminder to the caller that in this case V is
260 *> reserved as workspace of length N*N.
261 *> If JOBV = 'N' V is not referenced, unless JOBT='T'.
262 *> \endverbatim
263 *>
264 *> \param[in] LDV
265 *> \verbatim
266 *> LDV is INTEGER
267 *> The leading dimension of the array V, LDV >= 1.
268 *> If JOBV = 'V' or 'J' or 'W', then LDV >= N.
269 *> \endverbatim
270 *>
271 *> \param[out] CWORK
272 *> \verbatim
273 *> CWORK is COMPLEX array, dimension (MAX(2,LWORK))
274 *> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or
275 *> LRWORK=-1), then on exit CWORK(1) contains the required length of
276 *> CWORK for the job parameters used in the call.
277 *> \endverbatim
278 *>
279 *> \param[in] LWORK
280 *> \verbatim
281 *> LWORK is INTEGER
282 *> Length of CWORK to confirm proper allocation of workspace.
283 *> LWORK depends on the job:
284 *>
285 *> 1. If only SIGMA is needed ( JOBU = 'N', JOBV = 'N' ) and
286 *> 1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'):
287 *> LWORK >= 2*N+1. This is the minimal requirement.
288 *> ->> For optimal performance (blocked code) the optimal value
289 *> is LWORK >= N + (N+1)*NB. Here NB is the optimal
290 *> block size for CGEQP3 and CGEQRF.
291 *> In general, optimal LWORK is computed as
292 *> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ)).
293 *> 1.2. .. an estimate of the scaled condition number of A is
294 *> required (JOBA='E', or 'G'). In this case, LWORK the minimal
295 *> requirement is LWORK >= N*N + 2*N.
296 *> ->> For optimal performance (blocked code) the optimal value
297 *> is LWORK >= max(N+(N+1)*NB, N*N+2*N)=N**2+2*N.
298 *> In general, the optimal length LWORK is computed as
299 *> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ),
300 *> N*N+LWORK(CPOCON)).
301 *> 2. If SIGMA and the right singular vectors are needed (JOBV = 'V'),
302 *> (JOBU = 'N')
303 *> 2.1 .. no scaled condition estimate requested (JOBE = 'N'):
304 *> -> the minimal requirement is LWORK >= 3*N.
305 *> -> For optimal performance,
306 *> LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
307 *> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ,
308 *> CUNMLQ. In general, the optimal length LWORK is computed as
309 *> LWORK >= max(N+LWORK(CGEQP3), N+LWORK(CGESVJ),
310 *> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)).
311 *> 2.2 .. an estimate of the scaled condition number of A is
312 *> required (JOBA='E', or 'G').
313 *> -> the minimal requirement is LWORK >= 3*N.
314 *> -> For optimal performance,
315 *> LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB,
316 *> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ,
317 *> CUNMLQ. In general, the optimal length LWORK is computed as
318 *> LWORK >= max(N+LWORK(CGEQP3), LWORK(CPOCON), N+LWORK(CGESVJ),
319 *> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)).
320 *> 3. If SIGMA and the left singular vectors are needed
321 *> 3.1 .. no scaled condition estimate requested (JOBE = 'N'):
322 *> -> the minimal requirement is LWORK >= 3*N.
323 *> -> For optimal performance:
324 *> if JOBU = 'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
325 *> where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR.
326 *> In general, the optimal length LWORK is computed as
327 *> LWORK >= max(N+LWORK(CGEQP3), 2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)).
328 *> 3.2 .. an estimate of the scaled condition number of A is
329 *> required (JOBA='E', or 'G').
330 *> -> the minimal requirement is LWORK >= 3*N.
331 *> -> For optimal performance:
332 *> if JOBU = 'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
333 *> where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR.
334 *> In general, the optimal length LWORK is computed as
335 *> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CPOCON),
336 *> 2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)).
337 *>
338 *> 4. If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and
339 *> 4.1. if JOBV = 'V'
340 *> the minimal requirement is LWORK >= 5*N+2*N*N.
341 *> 4.2. if JOBV = 'J' the minimal requirement is
342 *> LWORK >= 4*N+N*N.
343 *> In both cases, the allocated CWORK can accommodate blocked runs
344 *> of CGEQP3, CGEQRF, CGELQF, CUNMQR, CUNMLQ.
345 *>
346 *> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or
347 *> LRWORK=-1), then on exit CWORK(1) contains the optimal and CWORK(2) contains the
348 *> minimal length of CWORK for the job parameters used in the call.
349 *> \endverbatim
350 *>
351 *> \param[out] RWORK
352 *> \verbatim
353 *> RWORK is REAL array, dimension (MAX(7,LWORK))
354 *> On exit,
355 *> RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1)
356 *> such that SCALE*SVA(1:N) are the computed singular values
357 *> of A. (See the description of SVA().)
358 *> RWORK(2) = See the description of RWORK(1).
359 *> RWORK(3) = SCONDA is an estimate for the condition number of
360 *> column equilibrated A. (If JOBA = 'E' or 'G')
361 *> SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
362 *> It is computed using CPOCON. It holds
363 *> N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
364 *> where R is the triangular factor from the QRF of A.
365 *> However, if R is truncated and the numerical rank is
366 *> determined to be strictly smaller than N, SCONDA is
367 *> returned as -1, thus indicating that the smallest
368 *> singular values might be lost.
369 *>
370 *> If full SVD is needed, the following two condition numbers are
371 *> useful for the analysis of the algorithm. They are provided for
372 *> a developer/implementer who is familiar with the details of
373 *> the method.
374 *>
375 *> RWORK(4) = an estimate of the scaled condition number of the
376 *> triangular factor in the first QR factorization.
377 *> RWORK(5) = an estimate of the scaled condition number of the
378 *> triangular factor in the second QR factorization.
379 *> The following two parameters are computed if JOBT = 'T'.
380 *> They are provided for a developer/implementer who is familiar
381 *> with the details of the method.
382 *> RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy
383 *> of diag(A^* * A) / Trace(A^* * A) taken as point in the
384 *> probability simplex.
385 *> RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).)
386 *> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or
387 *> LRWORK=-1), then on exit RWORK(1) contains the required length of
388 *> RWORK for the job parameters used in the call.
389 *> \endverbatim
390 *>
391 *> \param[in] LRWORK
392 *> \verbatim
393 *> LRWORK is INTEGER
394 *> Length of RWORK to confirm proper allocation of workspace.
395 *> LRWORK depends on the job:
396 *>
397 *> 1. If only the singular values are requested i.e. if
398 *> LSAME(JOBU,'N') .AND. LSAME(JOBV,'N')
399 *> then:
400 *> 1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
401 *> then: LRWORK = max( 7, 2 * M ).
402 *> 1.2. Otherwise, LRWORK = max( 7, N ).
403 *> 2. If singular values with the right singular vectors are requested
404 *> i.e. if
405 *> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND.
406 *> .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F'))
407 *> then:
408 *> 2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
409 *> then LRWORK = max( 7, 2 * M ).
410 *> 2.2. Otherwise, LRWORK = max( 7, N ).
411 *> 3. If singular values with the left singular vectors are requested, i.e. if
412 *> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
413 *> .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
414 *> then:
415 *> 3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
416 *> then LRWORK = max( 7, 2 * M ).
417 *> 3.2. Otherwise, LRWORK = max( 7, N ).
418 *> 4. If singular values with both the left and the right singular vectors
419 *> are requested, i.e. if
420 *> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
421 *> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
422 *> then:
423 *> 4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
424 *> then LRWORK = max( 7, 2 * M ).
425 *> 4.2. Otherwise, LRWORK = max( 7, N ).
426 *>
427 *> If, on entry, LRWORK = -1 or LWORK=-1, a workspace query is assumed and
428 *> the length of RWORK is returned in RWORK(1).
429 *> \endverbatim
430 *>
431 *> \param[out] IWORK
432 *> \verbatim
433 *> IWORK is INTEGER array, of dimension at least 4, that further depends
434 *> on the job:
435 *>
436 *> 1. If only the singular values are requested then:
437 *> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') )
438 *> then the length of IWORK is N+M; otherwise the length of IWORK is N.
439 *> 2. If the singular values and the right singular vectors are requested then:
440 *> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') )
441 *> then the length of IWORK is N+M; otherwise the length of IWORK is N.
442 *> 3. If the singular values and the left singular vectors are requested then:
443 *> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') )
444 *> then the length of IWORK is N+M; otherwise the length of IWORK is N.
445 *> 4. If the singular values with both the left and the right singular vectors
446 *> are requested, then:
447 *> 4.1. If LSAME(JOBV,'J') the length of IWORK is determined as follows:
448 *> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') )
449 *> then the length of IWORK is N+M; otherwise the length of IWORK is N.
450 *> 4.2. If LSAME(JOBV,'V') the length of IWORK is determined as follows:
451 *> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') )
452 *> then the length of IWORK is 2*N+M; otherwise the length of IWORK is 2*N.
453 *>
454 *> On exit,
455 *> IWORK(1) = the numerical rank determined after the initial
456 *> QR factorization with pivoting. See the descriptions
457 *> of JOBA and JOBR.
458 *> IWORK(2) = the number of the computed nonzero singular values
459 *> IWORK(3) = if nonzero, a warning message:
460 *> If IWORK(3) = 1 then some of the column norms of A
461 *> were denormalized floats. The requested high accuracy
462 *> is not warranted by the data.
463 *> IWORK(4) = 1 or -1. If IWORK(4) = 1, then the procedure used A^* to
464 *> do the job as specified by the JOB parameters.
465 *> If the call to CGEJSV is a workspace query (indicated by LWORK = -1 and
466 *> LRWORK = -1), then on exit IWORK(1) contains the required length of
467 *> IWORK for the job parameters used in the call.
468 *> \endverbatim
469 *>
470 *> \param[out] INFO
471 *> \verbatim
472 *> INFO is INTEGER
473 *> < 0: if INFO = -i, then the i-th argument had an illegal value.
474 *> = 0: successful exit;
475 *> > 0: CGEJSV did not converge in the maximal allowed number
476 *> of sweeps. The computed values may be inaccurate.
477 *> \endverbatim
478 *
479 * Authors:
480 * ========
481 *
482 *> \author Univ. of Tennessee
483 *> \author Univ. of California Berkeley
484 *> \author Univ. of Colorado Denver
485 *> \author NAG Ltd.
486 *
487 *> \ingroup complexGEsing
488 *
489 *> \par Further Details:
490 * =====================
491 *>
492 *> \verbatim
493 *> CGEJSV implements a preconditioned Jacobi SVD algorithm. It uses CGEQP3,
494 *> CGEQRF, and CGELQF as preprocessors and preconditioners. Optionally, an
495 *> additional row pivoting can be used as a preprocessor, which in some
496 *> cases results in much higher accuracy. An example is matrix A with the
497 *> structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
498 *> diagonal matrices and C is well-conditioned matrix. In that case, complete
499 *> pivoting in the first QR factorizations provides accuracy dependent on the
500 *> condition number of C, and independent of D1, D2. Such higher accuracy is
501 *> not completely understood theoretically, but it works well in practice.
502 *> Further, if A can be written as A = B*D, with well-conditioned B and some
503 *> diagonal D, then the high accuracy is guaranteed, both theoretically and
504 *> in software, independent of D. For more details see [1], [2].
505 *> The computational range for the singular values can be the full range
506 *> ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
507 *> & LAPACK routines called by CGEJSV are implemented to work in that range.
508 *> If that is not the case, then the restriction for safe computation with
509 *> the singular values in the range of normalized IEEE numbers is that the
510 *> spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
511 *> overflow. This code (CGEJSV) is best used in this restricted range,
512 *> meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are
513 *> returned as zeros. See JOBR for details on this.
514 *> Further, this implementation is somewhat slower than the one described
515 *> in [1,2] due to replacement of some non-LAPACK components, and because
516 *> the choice of some tuning parameters in the iterative part (CGESVJ) is
517 *> left to the implementer on a particular machine.
518 *> The rank revealing QR factorization (in this code: CGEQP3) should be
519 *> implemented as in [3]. We have a new version of CGEQP3 under development
520 *> that is more robust than the current one in LAPACK, with a cleaner cut in
521 *> rank deficient cases. It will be available in the SIGMA library [4].
522 *> If M is much larger than N, it is obvious that the initial QRF with
523 *> column pivoting can be preprocessed by the QRF without pivoting. That
524 *> well known trick is not used in CGEJSV because in some cases heavy row
525 *> weighting can be treated with complete pivoting. The overhead in cases
526 *> M much larger than N is then only due to pivoting, but the benefits in
527 *> terms of accuracy have prevailed. The implementer/user can incorporate
528 *> this extra QRF step easily. The implementer can also improve data movement
529 *> (matrix transpose, matrix copy, matrix transposed copy) - this
530 *> implementation of CGEJSV uses only the simplest, naive data movement.
531 *> \endverbatim
532 *
533 *> \par Contributor:
534 * ==================
535 *>
536 *> Zlatko Drmac (Zagreb, Croatia)
537 *
538 *> \par References:
539 * ================
540 *>
541 *> \verbatim
542 *>
543 *> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
544 *> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
545 *> LAPACK Working note 169.
546 *> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
547 *> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
548 *> LAPACK Working note 170.
549 *> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
550 *> factorization software - a case study.
551 *> ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
552 *> LAPACK Working note 176.
553 *> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
554 *> QSVD, (H,K)-SVD computations.
555 *> Department of Mathematics, University of Zagreb, 2008, 2016.
556 *> \endverbatim
557 *
558 *> \par Bugs, examples and comments:
559 * =================================
560 *>
561 *> Please report all bugs and send interesting examples and/or comments to
562 *> drmac@math.hr. Thank you.
563 *>
564 * =====================================================================
565  SUBROUTINE cgejsv( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
566  $ M, N, A, LDA, SVA, U, LDU, V, LDV,
567  $ CWORK, LWORK, RWORK, LRWORK, IWORK, INFO )
568 *
569 * -- LAPACK computational routine --
570 * -- LAPACK is a software package provided by Univ. of Tennessee, --
571 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
572 *
573 * .. Scalar Arguments ..
574  IMPLICIT NONE
575  INTEGER INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N
576 * ..
577 * .. Array Arguments ..
578  COMPLEX A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK )
579  REAL SVA( N ), RWORK( LRWORK )
580  INTEGER IWORK( * )
581  CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
582 * ..
583 *
584 * ===========================================================================
585 *
586 * .. Local Parameters ..
587  REAL ZERO, ONE
588  PARAMETER ( ZERO = 0.0e0, one = 1.0e0 )
589  COMPLEX CZERO, CONE
590  parameter( czero = ( 0.0e0, 0.0e0 ), cone = ( 1.0e0, 0.0e0 ) )
591 * ..
592 * .. Local Scalars ..
593  COMPLEX CTEMP
594  REAL AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK,
595  $ condr1, condr2, entra, entrat, epsln, maxprj, scalem,
596  $ sconda, sfmin, small, temp1, uscal1, uscal2, xsc
597  INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING
598  LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LQUERY,
599  $ lsvec, l2aber, l2kill, l2pert, l2rank, l2tran, noscal,
600  $ rowpiv, rsvec, transp
601 *
602  INTEGER OPTWRK, MINWRK, MINRWRK, MINIWRK
603  INTEGER LWCON, LWLQF, LWQP3, LWQRF, LWUNMLQ, LWUNMQR, LWUNMQRM,
604  $ lwsvdj, lwsvdjv, lrwqp3, lrwcon, lrwsvdj, iwoff
605  INTEGER LWRK_CGELQF, LWRK_CGEQP3, LWRK_CGEQP3N, LWRK_CGEQRF,
606  $ LWRK_CGESVJ, LWRK_CGESVJV, LWRK_CGESVJU, LWRK_CUNMLQ,
607  $ lwrk_cunmqr, lwrk_cunmqrm
608 * ..
609 * .. Local Arrays
610  COMPLEX CDUMMY(1)
611  REAL RDUMMY(1)
612 *
613 * .. Intrinsic Functions ..
614  INTRINSIC abs, cmplx, conjg, alog, max, min, real, nint, sqrt
615 * ..
616 * .. External Functions ..
617  REAL SLAMCH, SCNRM2
618  INTEGER ISAMAX, ICAMAX
619  LOGICAL LSAME
620  EXTERNAL isamax, icamax, lsame, slamch, scnrm2
621 * ..
622 * .. External Subroutines ..
623  EXTERNAL slassq, ccopy, cgelqf, cgeqp3, cgeqrf, clacpy, clapmr,
626  $ xerbla
627 *
628  EXTERNAL cgesvj
629 * ..
630 *
631 * Test the input arguments
632 *
633  lsvec = lsame( jobu, 'U' ) .OR. lsame( jobu, 'F' )
634  jracc = lsame( jobv, 'J' )
635  rsvec = lsame( jobv, 'V' ) .OR. jracc
636  rowpiv = lsame( joba, 'F' ) .OR. lsame( joba, 'G' )
637  l2rank = lsame( joba, 'R' )
638  l2aber = lsame( joba, 'A' )
639  errest = lsame( joba, 'E' ) .OR. lsame( joba, 'G' )
640  l2tran = lsame( jobt, 'T' ) .AND. ( m .EQ. n )
641  l2kill = lsame( jobr, 'R' )
642  defr = lsame( jobr, 'N' )
643  l2pert = lsame( jobp, 'P' )
644 *
645  lquery = ( lwork .EQ. -1 ) .OR. ( lrwork .EQ. -1 )
646 *
647  IF ( .NOT.(rowpiv .OR. l2rank .OR. l2aber .OR.
648  $ errest .OR. lsame( joba, 'C' ) )) THEN
649  info = - 1
650  ELSE IF ( .NOT.( lsvec .OR. lsame( jobu, 'N' ) .OR.
651  $ ( lsame( jobu, 'W' ) .AND. rsvec .AND. l2tran ) ) ) THEN
652  info = - 2
653  ELSE IF ( .NOT.( rsvec .OR. lsame( jobv, 'N' ) .OR.
654  $ ( lsame( jobv, 'W' ) .AND. lsvec .AND. l2tran ) ) ) THEN
655  info = - 3
656  ELSE IF ( .NOT. ( l2kill .OR. defr ) ) THEN
657  info = - 4
658  ELSE IF ( .NOT. ( lsame(jobt,'T') .OR. lsame(jobt,'N') ) ) THEN
659  info = - 5
660  ELSE IF ( .NOT. ( l2pert .OR. lsame( jobp, 'N' ) ) ) THEN
661  info = - 6
662  ELSE IF ( m .LT. 0 ) THEN
663  info = - 7
664  ELSE IF ( ( n .LT. 0 ) .OR. ( n .GT. m ) ) THEN
665  info = - 8
666  ELSE IF ( lda .LT. m ) THEN
667  info = - 10
668  ELSE IF ( lsvec .AND. ( ldu .LT. m ) ) THEN
669  info = - 13
670  ELSE IF ( rsvec .AND. ( ldv .LT. n ) ) THEN
671  info = - 15
672  ELSE
673 * #:)
674  info = 0
675  END IF
676 *
677  IF ( info .EQ. 0 ) THEN
678 * .. compute the minimal and the optimal workspace lengths
679 * [[The expressions for computing the minimal and the optimal
680 * values of LCWORK, LRWORK are written with a lot of redundancy and
681 * can be simplified. However, this verbose form is useful for
682 * maintenance and modifications of the code.]]
683 *
684 * .. minimal workspace length for CGEQP3 of an M x N matrix,
685 * CGEQRF of an N x N matrix, CGELQF of an N x N matrix,
686 * CUNMLQ for computing N x N matrix, CUNMQR for computing N x N
687 * matrix, CUNMQR for computing M x N matrix, respectively.
688  lwqp3 = n+1
689  lwqrf = max( 1, n )
690  lwlqf = max( 1, n )
691  lwunmlq = max( 1, n )
692  lwunmqr = max( 1, n )
693  lwunmqrm = max( 1, m )
694 * .. minimal workspace length for CPOCON of an N x N matrix
695  lwcon = 2 * n
696 * .. minimal workspace length for CGESVJ of an N x N matrix,
697 * without and with explicit accumulation of Jacobi rotations
698  lwsvdj = max( 2 * n, 1 )
699  lwsvdjv = max( 2 * n, 1 )
700 * .. minimal REAL workspace length for CGEQP3, CPOCON, CGESVJ
701  lrwqp3 = 2 * n
702  lrwcon = n
703  lrwsvdj = n
704  IF ( lquery ) THEN
705  CALL cgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1,
706  $ rdummy, ierr )
707  lwrk_cgeqp3 = real( cdummy(1) )
708  CALL cgeqrf( n, n, a, lda, cdummy, cdummy,-1, ierr )
709  lwrk_cgeqrf = real( cdummy(1) )
710  CALL cgelqf( n, n, a, lda, cdummy, cdummy,-1, ierr )
711  lwrk_cgelqf = real( cdummy(1) )
712  END IF
713  minwrk = 2
714  optwrk = 2
715  miniwrk = n
716  IF ( .NOT. (lsvec .OR. rsvec ) ) THEN
717 * .. minimal and optimal sizes of the complex workspace if
718 * only the singular values are requested
719  IF ( errest ) THEN
720  minwrk = max( n+lwqp3, n**2+lwcon, n+lwqrf, lwsvdj )
721  ELSE
722  minwrk = max( n+lwqp3, n+lwqrf, lwsvdj )
723  END IF
724  IF ( lquery ) THEN
725  CALL cgesvj( 'L', 'N', 'N', n, n, a, lda, sva, n, v,
726  $ ldv, cdummy, -1, rdummy, -1, ierr )
727  lwrk_cgesvj = real( cdummy(1) )
728  IF ( errest ) THEN
729  optwrk = max( n+lwrk_cgeqp3, n**2+lwcon,
730  $ n+lwrk_cgeqrf, lwrk_cgesvj )
731  ELSE
732  optwrk = max( n+lwrk_cgeqp3, n+lwrk_cgeqrf,
733  $ lwrk_cgesvj )
734  END IF
735  END IF
736  IF ( l2tran .OR. rowpiv ) THEN
737  IF ( errest ) THEN
738  minrwrk = max( 7, 2*m, lrwqp3, lrwcon, lrwsvdj )
739  ELSE
740  minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
741  END IF
742  ELSE
743  IF ( errest ) THEN
744  minrwrk = max( 7, lrwqp3, lrwcon, lrwsvdj )
745  ELSE
746  minrwrk = max( 7, lrwqp3, lrwsvdj )
747  END IF
748  END IF
749  IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
750  ELSE IF ( rsvec .AND. (.NOT.lsvec) ) THEN
751 * .. minimal and optimal sizes of the complex workspace if the
752 * singular values and the right singular vectors are requested
753  IF ( errest ) THEN
754  minwrk = max( n+lwqp3, lwcon, lwsvdj, n+lwlqf,
755  $ 2*n+lwqrf, n+lwsvdj, n+lwunmlq )
756  ELSE
757  minwrk = max( n+lwqp3, lwsvdj, n+lwlqf, 2*n+lwqrf,
758  $ n+lwsvdj, n+lwunmlq )
759  END IF
760  IF ( lquery ) THEN
761  CALL cgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,
762  $ lda, cdummy, -1, rdummy, -1, ierr )
763  lwrk_cgesvj = real( cdummy(1) )
764  CALL cunmlq( 'L', 'C', n, n, n, a, lda, cdummy,
765  $ v, ldv, cdummy, -1, ierr )
766  lwrk_cunmlq = real( cdummy(1) )
767  IF ( errest ) THEN
768  optwrk = max( n+lwrk_cgeqp3, lwcon, lwrk_cgesvj,
769  $ n+lwrk_cgelqf, 2*n+lwrk_cgeqrf,
770  $ n+lwrk_cgesvj, n+lwrk_cunmlq )
771  ELSE
772  optwrk = max( n+lwrk_cgeqp3, lwrk_cgesvj,n+lwrk_cgelqf,
773  $ 2*n+lwrk_cgeqrf, n+lwrk_cgesvj,
774  $ n+lwrk_cunmlq )
775  END IF
776  END IF
777  IF ( l2tran .OR. rowpiv ) THEN
778  IF ( errest ) THEN
779  minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
780  ELSE
781  minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
782  END IF
783  ELSE
784  IF ( errest ) THEN
785  minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
786  ELSE
787  minrwrk = max( 7, lrwqp3, lrwsvdj )
788  END IF
789  END IF
790  IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
791  ELSE IF ( lsvec .AND. (.NOT.rsvec) ) THEN
792 * .. minimal and optimal sizes of the complex workspace if the
793 * singular values and the left singular vectors are requested
794  IF ( errest ) THEN
795  minwrk = n + max( lwqp3,lwcon,n+lwqrf,lwsvdj,lwunmqrm )
796  ELSE
797  minwrk = n + max( lwqp3, n+lwqrf, lwsvdj, lwunmqrm )
798  END IF
799  IF ( lquery ) THEN
800  CALL cgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,
801  $ lda, cdummy, -1, rdummy, -1, ierr )
802  lwrk_cgesvj = real( cdummy(1) )
803  CALL cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
804  $ ldu, cdummy, -1, ierr )
805  lwrk_cunmqrm = real( cdummy(1) )
806  IF ( errest ) THEN
807  optwrk = n + max( lwrk_cgeqp3, lwcon, n+lwrk_cgeqrf,
808  $ lwrk_cgesvj, lwrk_cunmqrm )
809  ELSE
810  optwrk = n + max( lwrk_cgeqp3, n+lwrk_cgeqrf,
811  $ lwrk_cgesvj, lwrk_cunmqrm )
812  END IF
813  END IF
814  IF ( l2tran .OR. rowpiv ) THEN
815  IF ( errest ) THEN
816  minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
817  ELSE
818  minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
819  END IF
820  ELSE
821  IF ( errest ) THEN
822  minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
823  ELSE
824  minrwrk = max( 7, lrwqp3, lrwsvdj )
825  END IF
826  END IF
827  IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
828  ELSE
829 * .. minimal and optimal sizes of the complex workspace if the
830 * full SVD is requested
831  IF ( .NOT. jracc ) THEN
832  IF ( errest ) THEN
833  minwrk = max( n+lwqp3, n+lwcon, 2*n+n**2+lwcon,
834  $ 2*n+lwqrf, 2*n+lwqp3,
835  $ 2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,
836  $ 2*n+n**2+n+lwsvdj, 2*n+n**2+n+lwsvdjv,
837  $ 2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,
838  $ n+n**2+lwsvdj, n+lwunmqrm )
839  ELSE
840  minwrk = max( n+lwqp3, 2*n+n**2+lwcon,
841  $ 2*n+lwqrf, 2*n+lwqp3,
842  $ 2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,
843  $ 2*n+n**2+n+lwsvdj, 2*n+n**2+n+lwsvdjv,
844  $ 2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,
845  $ n+n**2+lwsvdj, n+lwunmqrm )
846  END IF
847  miniwrk = miniwrk + n
848  IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
849  ELSE
850  IF ( errest ) THEN
851  minwrk = max( n+lwqp3, n+lwcon, 2*n+lwqrf,
852  $ 2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,
853  $ n+lwunmqrm )
854  ELSE
855  minwrk = max( n+lwqp3, 2*n+lwqrf,
856  $ 2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,
857  $ n+lwunmqrm )
858  END IF
859  IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
860  END IF
861  IF ( lquery ) THEN
862  CALL cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
863  $ ldu, cdummy, -1, ierr )
864  lwrk_cunmqrm = real( cdummy(1) )
865  CALL cunmqr( 'L', 'N', n, n, n, a, lda, cdummy, u,
866  $ ldu, cdummy, -1, ierr )
867  lwrk_cunmqr = real( cdummy(1) )
868  IF ( .NOT. jracc ) THEN
869  CALL cgeqp3( n,n, a, lda, iwork, cdummy,cdummy, -1,
870  $ rdummy, ierr )
871  lwrk_cgeqp3n = real( cdummy(1) )
872  CALL cgesvj( 'L', 'U', 'N', n, n, u, ldu, sva,
873  $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
874  lwrk_cgesvj = real( cdummy(1) )
875  CALL cgesvj( 'U', 'U', 'N', n, n, u, ldu, sva,
876  $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
877  lwrk_cgesvju = real( cdummy(1) )
878  CALL cgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,
879  $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
880  lwrk_cgesvjv = real( cdummy(1) )
881  CALL cunmlq( 'L', 'C', n, n, n, a, lda, cdummy,
882  $ v, ldv, cdummy, -1, ierr )
883  lwrk_cunmlq = real( cdummy(1) )
884  IF ( errest ) THEN
885  optwrk = max( n+lwrk_cgeqp3, n+lwcon,
886  $ 2*n+n**2+lwcon, 2*n+lwrk_cgeqrf,
887  $ 2*n+lwrk_cgeqp3n,
888  $ 2*n+n**2+n+lwrk_cgelqf,
889  $ 2*n+n**2+n+n**2+lwcon,
890  $ 2*n+n**2+n+lwrk_cgesvj,
891  $ 2*n+n**2+n+lwrk_cgesvjv,
892  $ 2*n+n**2+n+lwrk_cunmqr,
893  $ 2*n+n**2+n+lwrk_cunmlq,
894  $ n+n**2+lwrk_cgesvju,
895  $ n+lwrk_cunmqrm )
896  ELSE
897  optwrk = max( n+lwrk_cgeqp3,
898  $ 2*n+n**2+lwcon, 2*n+lwrk_cgeqrf,
899  $ 2*n+lwrk_cgeqp3n,
900  $ 2*n+n**2+n+lwrk_cgelqf,
901  $ 2*n+n**2+n+n**2+lwcon,
902  $ 2*n+n**2+n+lwrk_cgesvj,
903  $ 2*n+n**2+n+lwrk_cgesvjv,
904  $ 2*n+n**2+n+lwrk_cunmqr,
905  $ 2*n+n**2+n+lwrk_cunmlq,
906  $ n+n**2+lwrk_cgesvju,
907  $ n+lwrk_cunmqrm )
908  END IF
909  ELSE
910  CALL cgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,
911  $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
912  lwrk_cgesvjv = real( cdummy(1) )
913  CALL cunmqr( 'L', 'N', n, n, n, cdummy, n, cdummy,
914  $ v, ldv, cdummy, -1, ierr )
915  lwrk_cunmqr = real( cdummy(1) )
916  CALL cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
917  $ ldu, cdummy, -1, ierr )
918  lwrk_cunmqrm = real( cdummy(1) )
919  IF ( errest ) THEN
920  optwrk = max( n+lwrk_cgeqp3, n+lwcon,
921  $ 2*n+lwrk_cgeqrf, 2*n+n**2,
922  $ 2*n+n**2+lwrk_cgesvjv,
923  $ 2*n+n**2+n+lwrk_cunmqr,n+lwrk_cunmqrm )
924  ELSE
925  optwrk = max( n+lwrk_cgeqp3, 2*n+lwrk_cgeqrf,
926  $ 2*n+n**2, 2*n+n**2+lwrk_cgesvjv,
927  $ 2*n+n**2+n+lwrk_cunmqr,
928  $ n+lwrk_cunmqrm )
929  END IF
930  END IF
931  END IF
932  IF ( l2tran .OR. rowpiv ) THEN
933  minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
934  ELSE
935  minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
936  END IF
937  END IF
938  minwrk = max( 2, minwrk )
939  optwrk = max( optwrk, minwrk )
940  IF ( lwork .LT. minwrk .AND. (.NOT.lquery) ) info = - 17
941  IF ( lrwork .LT. minrwrk .AND. (.NOT.lquery) ) info = - 19
942  END IF
943 *
944  IF ( info .NE. 0 ) THEN
945 * #:(
946  CALL xerbla( 'CGEJSV', - info )
947  RETURN
948  ELSE IF ( lquery ) THEN
949  cwork(1) = optwrk
950  cwork(2) = minwrk
951  rwork(1) = minrwrk
952  iwork(1) = max( 4, miniwrk )
953  RETURN
954  END IF
955 *
956 * Quick return for void matrix (Y3K safe)
957 * #:)
958  IF ( ( m .EQ. 0 ) .OR. ( n .EQ. 0 ) ) THEN
959  iwork(1:4) = 0
960  rwork(1:7) = 0
961  RETURN
962  ENDIF
963 *
964 * Determine whether the matrix U should be M x N or M x M
965 *
966  IF ( lsvec ) THEN
967  n1 = n
968  IF ( lsame( jobu, 'F' ) ) n1 = m
969  END IF
970 *
971 * Set numerical parameters
972 *
973 *! NOTE: Make sure SLAMCH() does not fail on the target architecture.
974 *
975  epsln = slamch('Epsilon')
976  sfmin = slamch('SafeMinimum')
977  small = sfmin / epsln
978  big = slamch('O')
979 * BIG = ONE / SFMIN
980 *
981 * Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
982 *
983 *(!) If necessary, scale SVA() to protect the largest norm from
984 * overflow. It is possible that this scaling pushes the smallest
985 * column norm left from the underflow threshold (extreme case).
986 *
987  scalem = one / sqrt(real(m)*real(n))
988  noscal = .true.
989  goscal = .true.
990  DO 1874 p = 1, n
991  aapp = zero
992  aaqq = one
993  CALL classq( m, a(1,p), 1, aapp, aaqq )
994  IF ( aapp .GT. big ) THEN
995  info = - 9
996  CALL xerbla( 'CGEJSV', -info )
997  RETURN
998  END IF
999  aaqq = sqrt(aaqq)
1000  IF ( ( aapp .LT. (big / aaqq) ) .AND. noscal ) THEN
1001  sva(p) = aapp * aaqq
1002  ELSE
1003  noscal = .false.
1004  sva(p) = aapp * ( aaqq * scalem )
1005  IF ( goscal ) THEN
1006  goscal = .false.
1007  CALL sscal( p-1, scalem, sva, 1 )
1008  END IF
1009  END IF
1010  1874 CONTINUE
1011 *
1012  IF ( noscal ) scalem = one
1013 *
1014  aapp = zero
1015  aaqq = big
1016  DO 4781 p = 1, n
1017  aapp = max( aapp, sva(p) )
1018  IF ( sva(p) .NE. zero ) aaqq = min( aaqq, sva(p) )
1019  4781 CONTINUE
1020 *
1021 * Quick return for zero M x N matrix
1022 * #:)
1023  IF ( aapp .EQ. zero ) THEN
1024  IF ( lsvec ) CALL claset( 'G', m, n1, czero, cone, u, ldu )
1025  IF ( rsvec ) CALL claset( 'G', n, n, czero, cone, v, ldv )
1026  rwork(1) = one
1027  rwork(2) = one
1028  IF ( errest ) rwork(3) = one
1029  IF ( lsvec .AND. rsvec ) THEN
1030  rwork(4) = one
1031  rwork(5) = one
1032  END IF
1033  IF ( l2tran ) THEN
1034  rwork(6) = zero
1035  rwork(7) = zero
1036  END IF
1037  iwork(1) = 0
1038  iwork(2) = 0
1039  iwork(3) = 0
1040  iwork(4) = -1
1041  RETURN
1042  END IF
1043 *
1044 * Issue warning if denormalized column norms detected. Override the
1045 * high relative accuracy request. Issue licence to kill nonzero columns
1046 * (set them to zero) whose norm is less than sigma_max / BIG (roughly).
1047 * #:(
1048  warning = 0
1049  IF ( aaqq .LE. sfmin ) THEN
1050  l2rank = .true.
1051  l2kill = .true.
1052  warning = 1
1053  END IF
1054 *
1055 * Quick return for one-column matrix
1056 * #:)
1057  IF ( n .EQ. 1 ) THEN
1058 *
1059  IF ( lsvec ) THEN
1060  CALL clascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr )
1061  CALL clacpy( 'A', m, 1, a, lda, u, ldu )
1062 * computing all M left singular vectors of the M x 1 matrix
1063  IF ( n1 .NE. n ) THEN
1064  CALL cgeqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,ierr )
1065  CALL cungqr( m,n1,1, u,ldu,cwork,cwork(n+1),lwork-n,ierr )
1066  CALL ccopy( m, a(1,1), 1, u(1,1), 1 )
1067  END IF
1068  END IF
1069  IF ( rsvec ) THEN
1070  v(1,1) = cone
1071  END IF
1072  IF ( sva(1) .LT. (big*scalem) ) THEN
1073  sva(1) = sva(1) / scalem
1074  scalem = one
1075  END IF
1076  rwork(1) = one / scalem
1077  rwork(2) = one
1078  IF ( sva(1) .NE. zero ) THEN
1079  iwork(1) = 1
1080  IF ( ( sva(1) / scalem) .GE. sfmin ) THEN
1081  iwork(2) = 1
1082  ELSE
1083  iwork(2) = 0
1084  END IF
1085  ELSE
1086  iwork(1) = 0
1087  iwork(2) = 0
1088  END IF
1089  iwork(3) = 0
1090  iwork(4) = -1
1091  IF ( errest ) rwork(3) = one
1092  IF ( lsvec .AND. rsvec ) THEN
1093  rwork(4) = one
1094  rwork(5) = one
1095  END IF
1096  IF ( l2tran ) THEN
1097  rwork(6) = zero
1098  rwork(7) = zero
1099  END IF
1100  RETURN
1101 *
1102  END IF
1103 *
1104  transp = .false.
1105 *
1106  aatmax = -one
1107  aatmin = big
1108  IF ( rowpiv .OR. l2tran ) THEN
1109 *
1110 * Compute the row norms, needed to determine row pivoting sequence
1111 * (in the case of heavily row weighted A, row pivoting is strongly
1112 * advised) and to collect information needed to compare the
1113 * structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.).
1114 *
1115  IF ( l2tran ) THEN
1116  DO 1950 p = 1, m
1117  xsc = zero
1118  temp1 = one
1119  CALL classq( n, a(p,1), lda, xsc, temp1 )
1120 * CLASSQ gets both the ell_2 and the ell_infinity norm
1121 * in one pass through the vector
1122  rwork(m+p) = xsc * scalem
1123  rwork(p) = xsc * (scalem*sqrt(temp1))
1124  aatmax = max( aatmax, rwork(p) )
1125  IF (rwork(p) .NE. zero)
1126  $ aatmin = min(aatmin,rwork(p))
1127  1950 CONTINUE
1128  ELSE
1129  DO 1904 p = 1, m
1130  rwork(m+p) = scalem*abs( a(p,icamax(n,a(p,1),lda)) )
1131  aatmax = max( aatmax, rwork(m+p) )
1132  aatmin = min( aatmin, rwork(m+p) )
1133  1904 CONTINUE
1134  END IF
1135 *
1136  END IF
1137 *
1138 * For square matrix A try to determine whether A^* would be better
1139 * input for the preconditioned Jacobi SVD, with faster convergence.
1140 * The decision is based on an O(N) function of the vector of column
1141 * and row norms of A, based on the Shannon entropy. This should give
1142 * the right choice in most cases when the difference actually matters.
1143 * It may fail and pick the slower converging side.
1144 *
1145  entra = zero
1146  entrat = zero
1147  IF ( l2tran ) THEN
1148 *
1149  xsc = zero
1150  temp1 = one
1151  CALL slassq( n, sva, 1, xsc, temp1 )
1152  temp1 = one / temp1
1153 *
1154  entra = zero
1155  DO 1113 p = 1, n
1156  big1 = ( ( sva(p) / xsc )**2 ) * temp1
1157  IF ( big1 .NE. zero ) entra = entra + big1 * alog(big1)
1158  1113 CONTINUE
1159  entra = - entra / alog(real(n))
1160 *
1161 * Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex.
1162 * It is derived from the diagonal of A^* * A. Do the same with the
1163 * diagonal of A * A^*, compute the entropy of the corresponding
1164 * probability distribution. Note that A * A^* and A^* * A have the
1165 * same trace.
1166 *
1167  entrat = zero
1168  DO 1114 p = 1, m
1169  big1 = ( ( rwork(p) / xsc )**2 ) * temp1
1170  IF ( big1 .NE. zero ) entrat = entrat + big1 * alog(big1)
1171  1114 CONTINUE
1172  entrat = - entrat / alog(real(m))
1173 *
1174 * Analyze the entropies and decide A or A^*. Smaller entropy
1175 * usually means better input for the algorithm.
1176 *
1177  transp = ( entrat .LT. entra )
1178 *
1179 * If A^* is better than A, take the adjoint of A. This is allowed
1180 * only for square matrices, M=N.
1181  IF ( transp ) THEN
1182 * In an optimal implementation, this trivial transpose
1183 * should be replaced with faster transpose.
1184  DO 1115 p = 1, n - 1
1185  a(p,p) = conjg(a(p,p))
1186  DO 1116 q = p + 1, n
1187  ctemp = conjg(a(q,p))
1188  a(q,p) = conjg(a(p,q))
1189  a(p,q) = ctemp
1190  1116 CONTINUE
1191  1115 CONTINUE
1192  a(n,n) = conjg(a(n,n))
1193  DO 1117 p = 1, n
1194  rwork(m+p) = sva(p)
1195  sva(p) = rwork(p)
1196 * previously computed row 2-norms are now column 2-norms
1197 * of the transposed matrix
1198  1117 CONTINUE
1199  temp1 = aapp
1200  aapp = aatmax
1201  aatmax = temp1
1202  temp1 = aaqq
1203  aaqq = aatmin
1204  aatmin = temp1
1205  kill = lsvec
1206  lsvec = rsvec
1207  rsvec = kill
1208  IF ( lsvec ) n1 = n
1209 *
1210  rowpiv = .true.
1211  END IF
1212 *
1213  END IF
1214 * END IF L2TRAN
1215 *
1216 * Scale the matrix so that its maximal singular value remains less
1217 * than SQRT(BIG) -- the matrix is scaled so that its maximal column
1218 * has Euclidean norm equal to SQRT(BIG/N). The only reason to keep
1219 * SQRT(BIG) instead of BIG is the fact that CGEJSV uses LAPACK and
1220 * BLAS routines that, in some implementations, are not capable of
1221 * working in the full interval [SFMIN,BIG] and that they may provoke
1222 * overflows in the intermediate results. If the singular values spread
1223 * from SFMIN to BIG, then CGESVJ will compute them. So, in that case,
1224 * one should use CGESVJ instead of CGEJSV.
1225  big1 = sqrt( big )
1226  temp1 = sqrt( big / real(n) )
1227 * >> for future updates: allow bigger range, i.e. the largest column
1228 * will be allowed up to BIG/N and CGESVJ will do the rest. However, for
1229 * this all other (LAPACK) components must allow such a range.
1230 * TEMP1 = BIG/REAL(N)
1231 * TEMP1 = BIG * EPSLN this should 'almost' work with current LAPACK components
1232  CALL slascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr )
1233  IF ( aaqq .GT. (aapp * sfmin) ) THEN
1234  aaqq = ( aaqq / aapp ) * temp1
1235  ELSE
1236  aaqq = ( aaqq * temp1 ) / aapp
1237  END IF
1238  temp1 = temp1 * scalem
1239  CALL clascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr )
1240 *
1241 * To undo scaling at the end of this procedure, multiply the
1242 * computed singular values with USCAL2 / USCAL1.
1243 *
1244  uscal1 = temp1
1245  uscal2 = aapp
1246 *
1247  IF ( l2kill ) THEN
1248 * L2KILL enforces computation of nonzero singular values in
1249 * the restricted range of condition number of the initial A,
1250 * sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN).
1251  xsc = sqrt( sfmin )
1252  ELSE
1253  xsc = small
1254 *
1255 * Now, if the condition number of A is too big,
1256 * sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN,
1257 * as a precaution measure, the full SVD is computed using CGESVJ
1258 * with accumulated Jacobi rotations. This provides numerically
1259 * more robust computation, at the cost of slightly increased run
1260 * time. Depending on the concrete implementation of BLAS and LAPACK
1261 * (i.e. how they behave in presence of extreme ill-conditioning) the
1262 * implementor may decide to remove this switch.
1263  IF ( ( aaqq.LT.sqrt(sfmin) ) .AND. lsvec .AND. rsvec ) THEN
1264  jracc = .true.
1265  END IF
1266 *
1267  END IF
1268  IF ( aaqq .LT. xsc ) THEN
1269  DO 700 p = 1, n
1270  IF ( sva(p) .LT. xsc ) THEN
1271  CALL claset( 'A', m, 1, czero, czero, a(1,p), lda )
1272  sva(p) = zero
1273  END IF
1274  700 CONTINUE
1275  END IF
1276 *
1277 * Preconditioning using QR factorization with pivoting
1278 *
1279  IF ( rowpiv ) THEN
1280 * Optional row permutation (Bjoerck row pivoting):
1281 * A result by Cox and Higham shows that the Bjoerck's
1282 * row pivoting combined with standard column pivoting
1283 * has similar effect as Powell-Reid complete pivoting.
1284 * The ell-infinity norms of A are made nonincreasing.
1285  IF ( ( lsvec .AND. rsvec ) .AND. .NOT.( jracc ) ) THEN
1286  iwoff = 2*n
1287  ELSE
1288  iwoff = n
1289  END IF
1290  DO 1952 p = 1, m - 1
1291  q = isamax( m-p+1, rwork(m+p), 1 ) + p - 1
1292  iwork(iwoff+p) = q
1293  IF ( p .NE. q ) THEN
1294  temp1 = rwork(m+p)
1295  rwork(m+p) = rwork(m+q)
1296  rwork(m+q) = temp1
1297  END IF
1298  1952 CONTINUE
1299  CALL claswp( n, a, lda, 1, m-1, iwork(iwoff+1), 1 )
1300  END IF
1301 *
1302 * End of the preparation phase (scaling, optional sorting and
1303 * transposing, optional flushing of small columns).
1304 *
1305 * Preconditioning
1306 *
1307 * If the full SVD is needed, the right singular vectors are computed
1308 * from a matrix equation, and for that we need theoretical analysis
1309 * of the Businger-Golub pivoting. So we use CGEQP3 as the first RR QRF.
1310 * In all other cases the first RR QRF can be chosen by other criteria
1311 * (eg speed by replacing global with restricted window pivoting, such
1312 * as in xGEQPX from TOMS # 782). Good results will be obtained using
1313 * xGEQPX with properly (!) chosen numerical parameters.
1314 * Any improvement of CGEQP3 improves overall performance of CGEJSV.
1315 *
1316 * A * P1 = Q1 * [ R1^* 0]^*:
1317  DO 1963 p = 1, n
1318 * .. all columns are free columns
1319  iwork(p) = 0
1320  1963 CONTINUE
1321  CALL cgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lwork-n,
1322  $ rwork, ierr )
1323 *
1324 * The upper triangular matrix R1 from the first QRF is inspected for
1325 * rank deficiency and possibilities for deflation, or possible
1326 * ill-conditioning. Depending on the user specified flag L2RANK,
1327 * the procedure explores possibilities to reduce the numerical
1328 * rank by inspecting the computed upper triangular factor. If
1329 * L2RANK or L2ABER are up, then CGEJSV will compute the SVD of
1330 * A + dA, where ||dA|| <= f(M,N)*EPSLN.
1331 *
1332  nr = 1
1333  IF ( l2aber ) THEN
1334 * Standard absolute error bound suffices. All sigma_i with
1335 * sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
1336 * aggressive enforcement of lower numerical rank by introducing a
1337 * backward error of the order of N*EPSLN*||A||.
1338  temp1 = sqrt(real(n))*epsln
1339  DO 3001 p = 2, n
1340  IF ( abs(a(p,p)) .GE. (temp1*abs(a(1,1))) ) THEN
1341  nr = nr + 1
1342  ELSE
1343  GO TO 3002
1344  END IF
1345  3001 CONTINUE
1346  3002 CONTINUE
1347  ELSE IF ( l2rank ) THEN
1348 * .. similarly as above, only slightly more gentle (less aggressive).
1349 * Sudden drop on the diagonal of R1 is used as the criterion for
1350 * close-to-rank-deficient.
1351  temp1 = sqrt(sfmin)
1352  DO 3401 p = 2, n
1353  IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
1354  $ ( abs(a(p,p)) .LT. small ) .OR.
1355  $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3402
1356  nr = nr + 1
1357  3401 CONTINUE
1358  3402 CONTINUE
1359 *
1360  ELSE
1361 * The goal is high relative accuracy. However, if the matrix
1362 * has high scaled condition number the relative accuracy is in
1363 * general not feasible. Later on, a condition number estimator
1364 * will be deployed to estimate the scaled condition number.
1365 * Here we just remove the underflowed part of the triangular
1366 * factor. This prevents the situation in which the code is
1367 * working hard to get the accuracy not warranted by the data.
1368  temp1 = sqrt(sfmin)
1369  DO 3301 p = 2, n
1370  IF ( ( abs(a(p,p)) .LT. small ) .OR.
1371  $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3302
1372  nr = nr + 1
1373  3301 CONTINUE
1374  3302 CONTINUE
1375 *
1376  END IF
1377 *
1378  almort = .false.
1379  IF ( nr .EQ. n ) THEN
1380  maxprj = one
1381  DO 3051 p = 2, n
1382  temp1 = abs(a(p,p)) / sva(iwork(p))
1383  maxprj = min( maxprj, temp1 )
1384  3051 CONTINUE
1385  IF ( maxprj**2 .GE. one - real(n)*epsln ) almort = .true.
1386  END IF
1387 *
1388 *
1389  sconda = - one
1390  condr1 = - one
1391  condr2 = - one
1392 *
1393  IF ( errest ) THEN
1394  IF ( n .EQ. nr ) THEN
1395  IF ( rsvec ) THEN
1396 * .. V is available as workspace
1397  CALL clacpy( 'U', n, n, a, lda, v, ldv )
1398  DO 3053 p = 1, n
1399  temp1 = sva(iwork(p))
1400  CALL csscal( p, one/temp1, v(1,p), 1 )
1401  3053 CONTINUE
1402  IF ( lsvec )THEN
1403  CALL cpocon( 'U', n, v, ldv, one, temp1,
1404  $ cwork(n+1), rwork, ierr )
1405  ELSE
1406  CALL cpocon( 'U', n, v, ldv, one, temp1,
1407  $ cwork, rwork, ierr )
1408  END IF
1409 *
1410  ELSE IF ( lsvec ) THEN
1411 * .. U is available as workspace
1412  CALL clacpy( 'U', n, n, a, lda, u, ldu )
1413  DO 3054 p = 1, n
1414  temp1 = sva(iwork(p))
1415  CALL csscal( p, one/temp1, u(1,p), 1 )
1416  3054 CONTINUE
1417  CALL cpocon( 'U', n, u, ldu, one, temp1,
1418  $ cwork(n+1), rwork, ierr )
1419  ELSE
1420  CALL clacpy( 'U', n, n, a, lda, cwork, n )
1421 *[] CALL CLACPY( 'U', N, N, A, LDA, CWORK(N+1), N )
1422 * Change: here index shifted by N to the left, CWORK(1:N)
1423 * not needed for SIGMA only computation
1424  DO 3052 p = 1, n
1425  temp1 = sva(iwork(p))
1426 *[] CALL CSSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 )
1427  CALL csscal( p, one/temp1, cwork((p-1)*n+1), 1 )
1428  3052 CONTINUE
1429 * .. the columns of R are scaled to have unit Euclidean lengths.
1430 *[] CALL CPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1,
1431 *[] $ CWORK(N+N*N+1), RWORK, IERR )
1432  CALL cpocon( 'U', n, cwork, n, one, temp1,
1433  $ cwork(n*n+1), rwork, ierr )
1434 *
1435  END IF
1436  IF ( temp1 .NE. zero ) THEN
1437  sconda = one / sqrt(temp1)
1438  ELSE
1439  sconda = - one
1440  END IF
1441 * SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
1442 * N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
1443  ELSE
1444  sconda = - one
1445  END IF
1446  END IF
1447 *
1448  l2pert = l2pert .AND. ( abs( a(1,1)/a(nr,nr) ) .GT. sqrt(big1) )
1449 * If there is no violent scaling, artificial perturbation is not needed.
1450 *
1451 * Phase 3:
1452 *
1453  IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
1454 *
1455 * Singular Values only
1456 *
1457 * .. transpose A(1:NR,1:N)
1458  DO 1946 p = 1, min( n-1, nr )
1459  CALL ccopy( n-p, a(p,p+1), lda, a(p+1,p), 1 )
1460  CALL clacgv( n-p+1, a(p,p), 1 )
1461  1946 CONTINUE
1462  IF ( nr .EQ. n ) a(n,n) = conjg(a(n,n))
1463 *
1464 * The following two DO-loops introduce small relative perturbation
1465 * into the strict upper triangle of the lower triangular matrix.
1466 * Small entries below the main diagonal are also changed.
1467 * This modification is useful if the computing environment does not
1468 * provide/allow FLUSH TO ZERO underflow, for it prevents many
1469 * annoying denormalized numbers in case of strongly scaled matrices.
1470 * The perturbation is structured so that it does not introduce any
1471 * new perturbation of the singular values, and it does not destroy
1472 * the job done by the preconditioner.
1473 * The licence for this perturbation is in the variable L2PERT, which
1474 * should be .FALSE. if FLUSH TO ZERO underflow is active.
1475 *
1476  IF ( .NOT. almort ) THEN
1477 *
1478  IF ( l2pert ) THEN
1479 * XSC = SQRT(SMALL)
1480  xsc = epsln / real(n)
1481  DO 4947 q = 1, nr
1482  ctemp = cmplx(xsc*abs(a(q,q)),zero)
1483  DO 4949 p = 1, n
1484  IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1485  $ .OR. ( p .LT. q ) )
1486 * $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1487  $ a(p,q) = ctemp
1488  4949 CONTINUE
1489  4947 CONTINUE
1490  ELSE
1491  CALL claset( 'U', nr-1,nr-1, czero,czero, a(1,2),lda )
1492  END IF
1493 *
1494 * .. second preconditioning using the QR factorization
1495 *
1496  CALL cgeqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n, ierr )
1497 *
1498 * .. and transpose upper to lower triangular
1499  DO 1948 p = 1, nr - 1
1500  CALL ccopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 )
1501  CALL clacgv( nr-p+1, a(p,p), 1 )
1502  1948 CONTINUE
1503 *
1504  END IF
1505 *
1506 * Row-cyclic Jacobi SVD algorithm with column pivoting
1507 *
1508 * .. again some perturbation (a "background noise") is added
1509 * to drown denormals
1510  IF ( l2pert ) THEN
1511 * XSC = SQRT(SMALL)
1512  xsc = epsln / real(n)
1513  DO 1947 q = 1, nr
1514  ctemp = cmplx(xsc*abs(a(q,q)),zero)
1515  DO 1949 p = 1, nr
1516  IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1517  $ .OR. ( p .LT. q ) )
1518 * $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1519  $ a(p,q) = ctemp
1520  1949 CONTINUE
1521  1947 CONTINUE
1522  ELSE
1523  CALL claset( 'U', nr-1, nr-1, czero, czero, a(1,2), lda )
1524  END IF
1525 *
1526 * .. and one-sided Jacobi rotations are started on a lower
1527 * triangular matrix (plus perturbation which is ignored in
1528 * the part which destroys triangular form (confusing?!))
1529 *
1530  CALL cgesvj( 'L', 'N', 'N', nr, nr, a, lda, sva,
1531  $ n, v, ldv, cwork, lwork, rwork, lrwork, info )
1532 *
1533  scalem = rwork(1)
1534  numrank = nint(rwork(2))
1535 *
1536 *
1537  ELSE IF ( ( rsvec .AND. ( .NOT. lsvec ) .AND. ( .NOT. jracc ) )
1538  $ .OR.
1539  $ ( jracc .AND. ( .NOT. lsvec ) .AND. ( nr .NE. n ) ) ) THEN
1540 *
1541 * -> Singular Values and Right Singular Vectors <-
1542 *
1543  IF ( almort ) THEN
1544 *
1545 * .. in this case NR equals N
1546  DO 1998 p = 1, nr
1547  CALL ccopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1548  CALL clacgv( n-p+1, v(p,p), 1 )
1549  1998 CONTINUE
1550  CALL claset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv )
1551 *
1552  CALL cgesvj( 'L','U','N', n, nr, v, ldv, sva, nr, a, lda,
1553  $ cwork, lwork, rwork, lrwork, info )
1554  scalem = rwork(1)
1555  numrank = nint(rwork(2))
1556 
1557  ELSE
1558 *
1559 * .. two more QR factorizations ( one QRF is not enough, two require
1560 * accumulated product of Jacobi rotations, three are perfect )
1561 *
1562  CALL claset( 'L', nr-1,nr-1, czero, czero, a(2,1), lda )
1563  CALL cgelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n, ierr)
1564  CALL clacpy( 'L', nr, nr, a, lda, v, ldv )
1565  CALL claset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv )
1566  CALL cgeqrf( nr, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1567  $ lwork-2*n, ierr )
1568  DO 8998 p = 1, nr
1569  CALL ccopy( nr-p+1, v(p,p), ldv, v(p,p), 1 )
1570  CALL clacgv( nr-p+1, v(p,p), 1 )
1571  8998 CONTINUE
1572  CALL claset('U', nr-1, nr-1, czero, czero, v(1,2), ldv)
1573 *
1574  CALL cgesvj( 'L', 'U','N', nr, nr, v,ldv, sva, nr, u,
1575  $ ldu, cwork(n+1), lwork-n, rwork, lrwork, info )
1576  scalem = rwork(1)
1577  numrank = nint(rwork(2))
1578  IF ( nr .LT. n ) THEN
1579  CALL claset( 'A',n-nr, nr, czero,czero, v(nr+1,1), ldv )
1580  CALL claset( 'A',nr, n-nr, czero,czero, v(1,nr+1), ldv )
1581  CALL claset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),ldv )
1582  END IF
1583 *
1584  CALL cunmlq( 'L', 'C', n, n, nr, a, lda, cwork,
1585  $ v, ldv, cwork(n+1), lwork-n, ierr )
1586 *
1587  END IF
1588 * .. permute the rows of V
1589 * DO 8991 p = 1, N
1590 * CALL CCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA )
1591 * 8991 CONTINUE
1592 * CALL CLACPY( 'All', N, N, A, LDA, V, LDV )
1593  CALL clapmr( .false., n, n, v, ldv, iwork )
1594 *
1595  IF ( transp ) THEN
1596  CALL clacpy( 'A', n, n, v, ldv, u, ldu )
1597  END IF
1598 *
1599  ELSE IF ( jracc .AND. (.NOT. lsvec) .AND. ( nr.EQ. n ) ) THEN
1600 *
1601  CALL claset( 'L', n-1,n-1, czero, czero, a(2,1), lda )
1602 *
1603  CALL cgesvj( 'U','N','V', n, n, a, lda, sva, n, v, ldv,
1604  $ cwork, lwork, rwork, lrwork, info )
1605  scalem = rwork(1)
1606  numrank = nint(rwork(2))
1607  CALL clapmr( .false., n, n, v, ldv, iwork )
1608 *
1609  ELSE IF ( lsvec .AND. ( .NOT. rsvec ) ) THEN
1610 *
1611 * .. Singular Values and Left Singular Vectors ..
1612 *
1613 * .. second preconditioning step to avoid need to accumulate
1614 * Jacobi rotations in the Jacobi iterations.
1615  DO 1965 p = 1, nr
1616  CALL ccopy( n-p+1, a(p,p), lda, u(p,p), 1 )
1617  CALL clacgv( n-p+1, u(p,p), 1 )
1618  1965 CONTINUE
1619  CALL claset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu )
1620 *
1621  CALL cgeqrf( n, nr, u, ldu, cwork(n+1), cwork(2*n+1),
1622  $ lwork-2*n, ierr )
1623 *
1624  DO 1967 p = 1, nr - 1
1625  CALL ccopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 )
1626  CALL clacgv( n-p+1, u(p,p), 1 )
1627  1967 CONTINUE
1628  CALL claset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu )
1629 *
1630  CALL cgesvj( 'L', 'U', 'N', nr,nr, u, ldu, sva, nr, a,
1631  $ lda, cwork(n+1), lwork-n, rwork, lrwork, info )
1632  scalem = rwork(1)
1633  numrank = nint(rwork(2))
1634 *
1635  IF ( nr .LT. m ) THEN
1636  CALL claset( 'A', m-nr, nr,czero, czero, u(nr+1,1), ldu )
1637  IF ( nr .LT. n1 ) THEN
1638  CALL claset( 'A',nr, n1-nr, czero, czero, u(1,nr+1),ldu )
1639  CALL claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu )
1640  END IF
1641  END IF
1642 *
1643  CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1644  $ ldu, cwork(n+1), lwork-n, ierr )
1645 *
1646  IF ( rowpiv )
1647  $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
1648 *
1649  DO 1974 p = 1, n1
1650  xsc = one / scnrm2( m, u(1,p), 1 )
1651  CALL csscal( m, xsc, u(1,p), 1 )
1652  1974 CONTINUE
1653 *
1654  IF ( transp ) THEN
1655  CALL clacpy( 'A', n, n, u, ldu, v, ldv )
1656  END IF
1657 *
1658  ELSE
1659 *
1660 * .. Full SVD ..
1661 *
1662  IF ( .NOT. jracc ) THEN
1663 *
1664  IF ( .NOT. almort ) THEN
1665 *
1666 * Second Preconditioning Step (QRF [with pivoting])
1667 * Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
1668 * equivalent to an LQF CALL. Since in many libraries the QRF
1669 * seems to be better optimized than the LQF, we do explicit
1670 * transpose and use the QRF. This is subject to changes in an
1671 * optimized implementation of CGEJSV.
1672 *
1673  DO 1968 p = 1, nr
1674  CALL ccopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1675  CALL clacgv( n-p+1, v(p,p), 1 )
1676  1968 CONTINUE
1677 *
1678 * .. the following two loops perturb small entries to avoid
1679 * denormals in the second QR factorization, where they are
1680 * as good as zeros. This is done to avoid painfully slow
1681 * computation with denormals. The relative size of the perturbation
1682 * is a parameter that can be changed by the implementer.
1683 * This perturbation device will be obsolete on machines with
1684 * properly implemented arithmetic.
1685 * To switch it off, set L2PERT=.FALSE. To remove it from the
1686 * code, remove the action under L2PERT=.TRUE., leave the ELSE part.
1687 * The following two loops should be blocked and fused with the
1688 * transposed copy above.
1689 *
1690  IF ( l2pert ) THEN
1691  xsc = sqrt(small)
1692  DO 2969 q = 1, nr
1693  ctemp = cmplx(xsc*abs( v(q,q) ),zero)
1694  DO 2968 p = 1, n
1695  IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
1696  $ .OR. ( p .LT. q ) )
1697 * $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
1698  $ v(p,q) = ctemp
1699  IF ( p .LT. q ) v(p,q) = - v(p,q)
1700  2968 CONTINUE
1701  2969 CONTINUE
1702  ELSE
1703  CALL claset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv )
1704  END IF
1705 *
1706 * Estimate the row scaled condition number of R1
1707 * (If R1 is rectangular, N > NR, then the condition number
1708 * of the leading NR x NR submatrix is estimated.)
1709 *
1710  CALL clacpy( 'L', nr, nr, v, ldv, cwork(2*n+1), nr )
1711  DO 3950 p = 1, nr
1712  temp1 = scnrm2(nr-p+1,cwork(2*n+(p-1)*nr+p),1)
1713  CALL csscal(nr-p+1,one/temp1,cwork(2*n+(p-1)*nr+p),1)
1714  3950 CONTINUE
1715  CALL cpocon('L',nr,cwork(2*n+1),nr,one,temp1,
1716  $ cwork(2*n+nr*nr+1),rwork,ierr)
1717  condr1 = one / sqrt(temp1)
1718 * .. here need a second opinion on the condition number
1719 * .. then assume worst case scenario
1720 * R1 is OK for inverse <=> CONDR1 .LT. REAL(N)
1721 * more conservative <=> CONDR1 .LT. SQRT(REAL(N))
1722 *
1723  cond_ok = sqrt(sqrt(real(nr)))
1724 *[TP] COND_OK is a tuning parameter.
1725 *
1726  IF ( condr1 .LT. cond_ok ) THEN
1727 * .. the second QRF without pivoting. Note: in an optimized
1728 * implementation, this QRF should be implemented as the QRF
1729 * of a lower triangular matrix.
1730 * R1^* = Q2 * R2
1731  CALL cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1732  $ lwork-2*n, ierr )
1733 *
1734  IF ( l2pert ) THEN
1735  xsc = sqrt(small)/epsln
1736  DO 3959 p = 2, nr
1737  DO 3958 q = 1, p - 1
1738  ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1739  $ zero)
1740  IF ( abs(v(q,p)) .LE. temp1 )
1741 * $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1742  $ v(q,p) = ctemp
1743  3958 CONTINUE
1744  3959 CONTINUE
1745  END IF
1746 *
1747  IF ( nr .NE. n )
1748  $ CALL clacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1749 * .. save ...
1750 *
1751 * .. this transposed copy should be better than naive
1752  DO 1969 p = 1, nr - 1
1753  CALL ccopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 )
1754  CALL clacgv(nr-p+1, v(p,p), 1 )
1755  1969 CONTINUE
1756  v(nr,nr)=conjg(v(nr,nr))
1757 *
1758  condr2 = condr1
1759 *
1760  ELSE
1761 *
1762 * .. ill-conditioned case: second QRF with pivoting
1763 * Note that windowed pivoting would be equally good
1764 * numerically, and more run-time efficient. So, in
1765 * an optimal implementation, the next call to CGEQP3
1766 * should be replaced with eg. CALL CGEQPX (ACM TOMS #782)
1767 * with properly (carefully) chosen parameters.
1768 *
1769 * R1^* * P2 = Q2 * R2
1770  DO 3003 p = 1, nr
1771  iwork(n+p) = 0
1772  3003 CONTINUE
1773  CALL cgeqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),
1774  $ cwork(2*n+1), lwork-2*n, rwork, ierr )
1775 ** CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
1776 ** $ LWORK-2*N, IERR )
1777  IF ( l2pert ) THEN
1778  xsc = sqrt(small)
1779  DO 3969 p = 2, nr
1780  DO 3968 q = 1, p - 1
1781  ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1782  $ zero)
1783  IF ( abs(v(q,p)) .LE. temp1 )
1784 * $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1785  $ v(q,p) = ctemp
1786  3968 CONTINUE
1787  3969 CONTINUE
1788  END IF
1789 *
1790  CALL clacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1791 *
1792  IF ( l2pert ) THEN
1793  xsc = sqrt(small)
1794  DO 8970 p = 2, nr
1795  DO 8971 q = 1, p - 1
1796  ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1797  $ zero)
1798 * V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) )
1799  v(p,q) = - ctemp
1800  8971 CONTINUE
1801  8970 CONTINUE
1802  ELSE
1803  CALL claset( 'L',nr-1,nr-1,czero,czero,v(2,1),ldv )
1804  END IF
1805 * Now, compute R2 = L3 * Q3, the LQ factorization.
1806  CALL cgelqf( nr, nr, v, ldv, cwork(2*n+n*nr+1),
1807  $ cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, ierr )
1808 * .. and estimate the condition number
1809  CALL clacpy( 'L',nr,nr,v,ldv,cwork(2*n+n*nr+nr+1),nr )
1810  DO 4950 p = 1, nr
1811  temp1 = scnrm2( p, cwork(2*n+n*nr+nr+p), nr )
1812  CALL csscal( p, one/temp1, cwork(2*n+n*nr+nr+p), nr )
1813  4950 CONTINUE
1814  CALL cpocon( 'L',nr,cwork(2*n+n*nr+nr+1),nr,one,temp1,
1815  $ cwork(2*n+n*nr+nr+nr*nr+1),rwork,ierr )
1816  condr2 = one / sqrt(temp1)
1817 *
1818 *
1819  IF ( condr2 .GE. cond_ok ) THEN
1820 * .. save the Householder vectors used for Q3
1821 * (this overwrites the copy of R2, as it will not be
1822 * needed in this branch, but it does not overwritte the
1823 * Huseholder vectors of Q2.).
1824  CALL clacpy( 'U', nr, nr, v, ldv, cwork(2*n+1), n )
1825 * .. and the rest of the information on Q3 is in
1826 * WORK(2*N+N*NR+1:2*N+N*NR+N)
1827  END IF
1828 *
1829  END IF
1830 *
1831  IF ( l2pert ) THEN
1832  xsc = sqrt(small)
1833  DO 4968 q = 2, nr
1834  ctemp = xsc * v(q,q)
1835  DO 4969 p = 1, q - 1
1836 * V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) )
1837  v(p,q) = - ctemp
1838  4969 CONTINUE
1839  4968 CONTINUE
1840  ELSE
1841  CALL claset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv )
1842  END IF
1843 *
1844 * Second preconditioning finished; continue with Jacobi SVD
1845 * The input matrix is lower trinagular.
1846 *
1847 * Recover the right singular vectors as solution of a well
1848 * conditioned triangular matrix equation.
1849 *
1850  IF ( condr1 .LT. cond_ok ) THEN
1851 *
1852  CALL cgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,
1853  $ cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,rwork,
1854  $ lrwork, info )
1855  scalem = rwork(1)
1856  numrank = nint(rwork(2))
1857  DO 3970 p = 1, nr
1858  CALL ccopy( nr, v(1,p), 1, u(1,p), 1 )
1859  CALL csscal( nr, sva(p), v(1,p), 1 )
1860  3970 CONTINUE
1861 
1862 * .. pick the right matrix equation and solve it
1863 *
1864  IF ( nr .EQ. n ) THEN
1865 * :)) .. best case, R1 is inverted. The solution of this matrix
1866 * equation is Q2*V2 = the product of the Jacobi rotations
1867 * used in CGESVJ, premultiplied with the orthogonal matrix
1868 * from the second QR factorization.
1869  CALL ctrsm('L','U','N','N', nr,nr,cone, a,lda, v,ldv)
1870  ELSE
1871 * .. R1 is well conditioned, but non-square. Adjoint of R2
1872 * is inverted to get the product of the Jacobi rotations
1873 * used in CGESVJ. The Q-factor from the second QR
1874 * factorization is then built in explicitly.
1875  CALL ctrsm('L','U','C','N',nr,nr,cone,cwork(2*n+1),
1876  $ n,v,ldv)
1877  IF ( nr .LT. n ) THEN
1878  CALL claset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv)
1879  CALL claset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv)
1880  CALL claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1881  END IF
1882  CALL cunmqr('L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1883  $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr)
1884  END IF
1885 *
1886  ELSE IF ( condr2 .LT. cond_ok ) THEN
1887 *
1888 * The matrix R2 is inverted. The solution of the matrix equation
1889 * is Q3^* * V3 = the product of the Jacobi rotations (appplied to
1890 * the lower triangular L3 from the LQ factorization of
1891 * R2=L3*Q3), pre-multiplied with the transposed Q3.
1892  CALL cgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,
1893  $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1894  $ rwork, lrwork, info )
1895  scalem = rwork(1)
1896  numrank = nint(rwork(2))
1897  DO 3870 p = 1, nr
1898  CALL ccopy( nr, v(1,p), 1, u(1,p), 1 )
1899  CALL csscal( nr, sva(p), u(1,p), 1 )
1900  3870 CONTINUE
1901  CALL ctrsm('L','U','N','N',nr,nr,cone,cwork(2*n+1),n,
1902  $ u,ldu)
1903 * .. apply the permutation from the second QR factorization
1904  DO 873 q = 1, nr
1905  DO 872 p = 1, nr
1906  cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1907  872 CONTINUE
1908  DO 874 p = 1, nr
1909  u(p,q) = cwork(2*n+n*nr+nr+p)
1910  874 CONTINUE
1911  873 CONTINUE
1912  IF ( nr .LT. n ) THEN
1913  CALL claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
1914  CALL claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
1915  CALL claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1916  END IF
1917  CALL cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1918  $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1919  ELSE
1920 * Last line of defense.
1921 * #:( This is a rather pathological case: no scaled condition
1922 * improvement after two pivoted QR factorizations. Other
1923 * possibility is that the rank revealing QR factorization
1924 * or the condition estimator has failed, or the COND_OK
1925 * is set very close to ONE (which is unnecessary). Normally,
1926 * this branch should never be executed, but in rare cases of
1927 * failure of the RRQR or condition estimator, the last line of
1928 * defense ensures that CGEJSV completes the task.
1929 * Compute the full SVD of L3 using CGESVJ with explicit
1930 * accumulation of Jacobi rotations.
1931  CALL cgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,
1932  $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1933  $ rwork, lrwork, info )
1934  scalem = rwork(1)
1935  numrank = nint(rwork(2))
1936  IF ( nr .LT. n ) THEN
1937  CALL claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
1938  CALL claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
1939  CALL claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1940  END IF
1941  CALL cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1942  $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1943 *
1944  CALL cunmlq( 'L', 'C', nr, nr, nr, cwork(2*n+1), n,
1945  $ cwork(2*n+n*nr+1), u, ldu, cwork(2*n+n*nr+nr+1),
1946  $ lwork-2*n-n*nr-nr, ierr )
1947  DO 773 q = 1, nr
1948  DO 772 p = 1, nr
1949  cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1950  772 CONTINUE
1951  DO 774 p = 1, nr
1952  u(p,q) = cwork(2*n+n*nr+nr+p)
1953  774 CONTINUE
1954  773 CONTINUE
1955 *
1956  END IF
1957 *
1958 * Permute the rows of V using the (column) permutation from the
1959 * first QRF. Also, scale the columns to make them unit in
1960 * Euclidean norm. This applies to all cases.
1961 *
1962  temp1 = sqrt(real(n)) * epsln
1963  DO 1972 q = 1, n
1964  DO 972 p = 1, n
1965  cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
1966  972 CONTINUE
1967  DO 973 p = 1, n
1968  v(p,q) = cwork(2*n+n*nr+nr+p)
1969  973 CONTINUE
1970  xsc = one / scnrm2( n, v(1,q), 1 )
1971  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1972  $ CALL csscal( n, xsc, v(1,q), 1 )
1973  1972 CONTINUE
1974 * At this moment, V contains the right singular vectors of A.
1975 * Next, assemble the left singular vector matrix U (M x N).
1976  IF ( nr .LT. m ) THEN
1977  CALL claset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu)
1978  IF ( nr .LT. n1 ) THEN
1979  CALL claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1980  CALL claset('A',m-nr,n1-nr,czero,cone,
1981  $ u(nr+1,nr+1),ldu)
1982  END IF
1983  END IF
1984 *
1985 * The Q matrix from the first QRF is built into the left singular
1986 * matrix U. This applies to all cases.
1987 *
1988  CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1989  $ ldu, cwork(n+1), lwork-n, ierr )
1990 
1991 * The columns of U are normalized. The cost is O(M*N) flops.
1992  temp1 = sqrt(real(m)) * epsln
1993  DO 1973 p = 1, nr
1994  xsc = one / scnrm2( m, u(1,p), 1 )
1995  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1996  $ CALL csscal( m, xsc, u(1,p), 1 )
1997  1973 CONTINUE
1998 *
1999 * If the initial QRF is computed with row pivoting, the left
2000 * singular vectors must be adjusted.
2001 *
2002  IF ( rowpiv )
2003  $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2004 *
2005  ELSE
2006 *
2007 * .. the initial matrix A has almost orthogonal columns and
2008 * the second QRF is not needed
2009 *
2010  CALL clacpy( 'U', n, n, a, lda, cwork(n+1), n )
2011  IF ( l2pert ) THEN
2012  xsc = sqrt(small)
2013  DO 5970 p = 2, n
2014  ctemp = xsc * cwork( n + (p-1)*n + p )
2015  DO 5971 q = 1, p - 1
2016 * CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) /
2017 * $ ABS(CWORK(N+(p-1)*N+q)) )
2018  cwork(n+(q-1)*n+p)=-ctemp
2019  5971 CONTINUE
2020  5970 CONTINUE
2021  ELSE
2022  CALL claset( 'L',n-1,n-1,czero,czero,cwork(n+2),n )
2023  END IF
2024 *
2025  CALL cgesvj( 'U', 'U', 'N', n, n, cwork(n+1), n, sva,
2026  $ n, u, ldu, cwork(n+n*n+1), lwork-n-n*n, rwork, lrwork,
2027  $ info )
2028 *
2029  scalem = rwork(1)
2030  numrank = nint(rwork(2))
2031  DO 6970 p = 1, n
2032  CALL ccopy( n, cwork(n+(p-1)*n+1), 1, u(1,p), 1 )
2033  CALL csscal( n, sva(p), cwork(n+(p-1)*n+1), 1 )
2034  6970 CONTINUE
2035 *
2036  CALL ctrsm( 'L', 'U', 'N', 'N', n, n,
2037  $ cone, a, lda, cwork(n+1), n )
2038  DO 6972 p = 1, n
2039  CALL ccopy( n, cwork(n+p), n, v(iwork(p),1), ldv )
2040  6972 CONTINUE
2041  temp1 = sqrt(real(n))*epsln
2042  DO 6971 p = 1, n
2043  xsc = one / scnrm2( n, v(1,p), 1 )
2044  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2045  $ CALL csscal( n, xsc, v(1,p), 1 )
2046  6971 CONTINUE
2047 *
2048 * Assemble the left singular vector matrix U (M x N).
2049 *
2050  IF ( n .LT. m ) THEN
2051  CALL claset( 'A', m-n, n, czero, czero, u(n+1,1), ldu )
2052  IF ( n .LT. n1 ) THEN
2053  CALL claset('A',n, n1-n, czero, czero, u(1,n+1),ldu)
2054  CALL claset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),ldu)
2055  END IF
2056  END IF
2057  CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2058  $ ldu, cwork(n+1), lwork-n, ierr )
2059  temp1 = sqrt(real(m))*epsln
2060  DO 6973 p = 1, n1
2061  xsc = one / scnrm2( m, u(1,p), 1 )
2062  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2063  $ CALL csscal( m, xsc, u(1,p), 1 )
2064  6973 CONTINUE
2065 *
2066  IF ( rowpiv )
2067  $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2068 *
2069  END IF
2070 *
2071 * end of the >> almost orthogonal case << in the full SVD
2072 *
2073  ELSE
2074 *
2075 * This branch deploys a preconditioned Jacobi SVD with explicitly
2076 * accumulated rotations. It is included as optional, mainly for
2077 * experimental purposes. It does perform well, and can also be used.
2078 * In this implementation, this branch will be automatically activated
2079 * if the condition number sigma_max(A) / sigma_min(A) is predicted
2080 * to be greater than the overflow threshold. This is because the
2081 * a posteriori computation of the singular vectors assumes robust
2082 * implementation of BLAS and some LAPACK procedures, capable of working
2083 * in presence of extreme values, e.g. when the singular values spread from
2084 * the underflow to the overflow threshold.
2085 *
2086  DO 7968 p = 1, nr
2087  CALL ccopy( n-p+1, a(p,p), lda, v(p,p), 1 )
2088  CALL clacgv( n-p+1, v(p,p), 1 )
2089  7968 CONTINUE
2090 *
2091  IF ( l2pert ) THEN
2092  xsc = sqrt(small/epsln)
2093  DO 5969 q = 1, nr
2094  ctemp = cmplx(xsc*abs( v(q,q) ),zero)
2095  DO 5968 p = 1, n
2096  IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
2097  $ .OR. ( p .LT. q ) )
2098 * $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
2099  $ v(p,q) = ctemp
2100  IF ( p .LT. q ) v(p,q) = - v(p,q)
2101  5968 CONTINUE
2102  5969 CONTINUE
2103  ELSE
2104  CALL claset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv )
2105  END IF
2106 
2107  CALL cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
2108  $ lwork-2*n, ierr )
2109  CALL clacpy( 'L', n, nr, v, ldv, cwork(2*n+1), n )
2110 *
2111  DO 7969 p = 1, nr
2112  CALL ccopy( nr-p+1, v(p,p), ldv, u(p,p), 1 )
2113  CALL clacgv( nr-p+1, u(p,p), 1 )
2114  7969 CONTINUE
2115 
2116  IF ( l2pert ) THEN
2117  xsc = sqrt(small/epsln)
2118  DO 9970 q = 2, nr
2119  DO 9971 p = 1, q - 1
2120  ctemp = cmplx(xsc * min(abs(u(p,p)),abs(u(q,q))),
2121  $ zero)
2122 * U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) )
2123  u(p,q) = - ctemp
2124  9971 CONTINUE
2125  9970 CONTINUE
2126  ELSE
2127  CALL claset('U', nr-1, nr-1, czero, czero, u(1,2), ldu )
2128  END IF
2129 
2130  CALL cgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,
2131  $ n, v, ldv, cwork(2*n+n*nr+1), lwork-2*n-n*nr,
2132  $ rwork, lrwork, info )
2133  scalem = rwork(1)
2134  numrank = nint(rwork(2))
2135 
2136  IF ( nr .LT. n ) THEN
2137  CALL claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
2138  CALL claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
2139  CALL claset( 'A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv )
2140  END IF
2141 
2142  CALL cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
2143  $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
2144 *
2145 * Permute the rows of V using the (column) permutation from the
2146 * first QRF. Also, scale the columns to make them unit in
2147 * Euclidean norm. This applies to all cases.
2148 *
2149  temp1 = sqrt(real(n)) * epsln
2150  DO 7972 q = 1, n
2151  DO 8972 p = 1, n
2152  cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
2153  8972 CONTINUE
2154  DO 8973 p = 1, n
2155  v(p,q) = cwork(2*n+n*nr+nr+p)
2156  8973 CONTINUE
2157  xsc = one / scnrm2( n, v(1,q), 1 )
2158  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2159  $ CALL csscal( n, xsc, v(1,q), 1 )
2160  7972 CONTINUE
2161 *
2162 * At this moment, V contains the right singular vectors of A.
2163 * Next, assemble the left singular vector matrix U (M x N).
2164 *
2165  IF ( nr .LT. m ) THEN
2166  CALL claset( 'A', m-nr, nr, czero, czero, u(nr+1,1), ldu )
2167  IF ( nr .LT. n1 ) THEN
2168  CALL claset('A',nr, n1-nr, czero, czero, u(1,nr+1),ldu)
2169  CALL claset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),ldu)
2170  END IF
2171  END IF
2172 *
2173  CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2174  $ ldu, cwork(n+1), lwork-n, ierr )
2175 *
2176  IF ( rowpiv )
2177  $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2178 *
2179 *
2180  END IF
2181  IF ( transp ) THEN
2182 * .. swap U and V because the procedure worked on A^*
2183  DO 6974 p = 1, n
2184  CALL cswap( n, u(1,p), 1, v(1,p), 1 )
2185  6974 CONTINUE
2186  END IF
2187 *
2188  END IF
2189 * end of the full SVD
2190 *
2191 * Undo scaling, if necessary (and possible)
2192 *
2193  IF ( uscal2 .LE. (big/sva(1))*uscal1 ) THEN
2194  CALL slascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr )
2195  uscal1 = one
2196  uscal2 = one
2197  END IF
2198 *
2199  IF ( nr .LT. n ) THEN
2200  DO 3004 p = nr+1, n
2201  sva(p) = zero
2202  3004 CONTINUE
2203  END IF
2204 *
2205  rwork(1) = uscal2 * scalem
2206  rwork(2) = uscal1
2207  IF ( errest ) rwork(3) = sconda
2208  IF ( lsvec .AND. rsvec ) THEN
2209  rwork(4) = condr1
2210  rwork(5) = condr2
2211  END IF
2212  IF ( l2tran ) THEN
2213  rwork(6) = entra
2214  rwork(7) = entrat
2215  END IF
2216 *
2217  iwork(1) = nr
2218  iwork(2) = numrank
2219  iwork(3) = warning
2220  IF ( transp ) THEN
2221  iwork(4) = 1
2222  ELSE
2223  iwork(4) = -1
2224  END IF
2225 
2226 *
2227  RETURN
2228 * ..
2229 * .. END OF CGEJSV
2230 * ..
2231  END
2232 *
subroutine slassq(n, x, incx, scl, sumsq)
SLASSQ updates a sum of squares represented in scaled form.
Definition: slassq.f90:126
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:143
subroutine classq(n, x, incx, scl, sumsq)
CLASSQ updates a sum of squares represented in scaled form.
Definition: classq.f90:126
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine ccopy(N, CX, INCX, CY, INCY)
CCOPY
Definition: ccopy.f:81
subroutine csscal(N, SA, CX, INCX)
CSSCAL
Definition: csscal.f:78
subroutine cswap(N, CX, INCX, CY, INCY)
CSWAP
Definition: cswap.f:81
subroutine ctrsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
CTRSM
Definition: ctrsm.f:180
subroutine cgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
CGEQRF
Definition: cgeqrf.f:145
subroutine cgeqp3(M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK, INFO)
CGEQP3
Definition: cgeqp3.f:159
subroutine cgesvj(JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, LDV, CWORK, LWORK, RWORK, LRWORK, INFO)
CGESVJ
Definition: cgesvj.f:351
subroutine cgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
CGELQF
Definition: cgelqf.f:143
subroutine cgejsv(JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, M, N, A, LDA, SVA, U, LDU, V, LDV, CWORK, LWORK, RWORK, LRWORK, IWORK, INFO)
CGEJSV
Definition: cgejsv.f:568
subroutine clacgv(N, X, INCX)
CLACGV conjugates a complex vector.
Definition: clacgv.f:74
subroutine claswp(N, A, LDA, K1, K2, IPIV, INCX)
CLASWP performs a series of row interchanges on a general rectangular matrix.
Definition: claswp.f:115
subroutine claset(UPLO, M, N, ALPHA, BETA, A, LDA)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: claset.f:106
subroutine clascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
CLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: clascl.f:143
subroutine clapmr(FORWRD, M, N, X, LDX, K)
CLAPMR rearranges rows of a matrix as specified by a permutation vector.
Definition: clapmr.f:104
subroutine clacpy(UPLO, M, N, A, LDA, B, LDB)
CLACPY copies all or part of one two-dimensional array to another.
Definition: clacpy.f:103
subroutine cunmlq(SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
CUNMLQ
Definition: cunmlq.f:168
subroutine cunmqr(SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
CUNMQR
Definition: cunmqr.f:168
subroutine cungqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
CUNGQR
Definition: cungqr.f:128
subroutine cpocon(UPLO, N, A, LDA, ANORM, RCOND, WORK, RWORK, INFO)
CPOCON
Definition: cpocon.f:121
subroutine sscal(N, SA, SX, INCX)
SSCAL
Definition: sscal.f:79