LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
cgetrf.f
Go to the documentation of this file.
1 C> \brief \b CGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 * Definition:
9 * ===========
10 *
11 * SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO )
12 *
13 * .. Scalar Arguments ..
14 * INTEGER INFO, LDA, M, N
15 * ..
16 * .. Array Arguments ..
17 * INTEGER IPIV( * )
18 * COMPLEX A( LDA, * )
19 * ..
20 *
21 * Purpose
22 * =======
23 *
24 C>\details \b Purpose:
25 C>\verbatim
26 C>
27 C> CGETRF computes an LU factorization of a general M-by-N matrix A
28 C> using partial pivoting with row interchanges.
29 C>
30 C> The factorization has the form
31 C> A = P * L * U
32 C> where P is a permutation matrix, L is lower triangular with unit
33 C> diagonal elements (lower trapezoidal if m > n), and U is upper
34 C> triangular (upper trapezoidal if m < n).
35 C>
36 C> This code implements an iterative version of Sivan Toledo's recursive
37 C> LU algorithm[1]. For square matrices, this iterative versions should
38 C> be within a factor of two of the optimum number of memory transfers.
39 C>
40 C> The pattern is as follows, with the large blocks of U being updated
41 C> in one call to DTRSM, and the dotted lines denoting sections that
42 C> have had all pending permutations applied:
43 C>
44 C> 1 2 3 4 5 6 7 8
45 C> +-+-+---+-------+------
46 C> | |1| | |
47 C> |.+-+ 2 | |
48 C> | | | | |
49 C> |.|.+-+-+ 4 |
50 C> | | | |1| |
51 C> | | |.+-+ |
52 C> | | | | | |
53 C> |.|.|.|.+-+-+---+ 8
54 C> | | | | | |1| |
55 C> | | | | |.+-+ 2 |
56 C> | | | | | | | |
57 C> | | | | |.|.+-+-+
58 C> | | | | | | | |1|
59 C> | | | | | | |.+-+
60 C> | | | | | | | | |
61 C> |.|.|.|.|.|.|.|.+-----
62 C> | | | | | | | | |
63 C>
64 C> The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
65 C> the binary expansion of the current column. Each Schur update is
66 C> applied as soon as the necessary portion of U is available.
67 C>
68 C> [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
69 C> Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
70 C> 1065-1081. http://dx.doi.org/10.1137/S0895479896297744
71 C>
72 C>\endverbatim
73 *
74 * Arguments:
75 * ==========
76 *
77 C> \param[in] M
78 C> \verbatim
79 C> M is INTEGER
80 C> The number of rows of the matrix A. M >= 0.
81 C> \endverbatim
82 C>
83 C> \param[in] N
84 C> \verbatim
85 C> N is INTEGER
86 C> The number of columns of the matrix A. N >= 0.
87 C> \endverbatim
88 C>
89 C> \param[in,out] A
90 C> \verbatim
91 C> A is COMPLEX array, dimension (LDA,N)
92 C> On entry, the M-by-N matrix to be factored.
93 C> On exit, the factors L and U from the factorization
94 C> A = P*L*U; the unit diagonal elements of L are not stored.
95 C> \endverbatim
96 C>
97 C> \param[in] LDA
98 C> \verbatim
99 C> LDA is INTEGER
100 C> The leading dimension of the array A. LDA >= max(1,M).
101 C> \endverbatim
102 C>
103 C> \param[out] IPIV
104 C> \verbatim
105 C> IPIV is INTEGER array, dimension (min(M,N))
106 C> The pivot indices; for 1 <= i <= min(M,N), row i of the
107 C> matrix was interchanged with row IPIV(i).
108 C> \endverbatim
109 C>
110 C> \param[out] INFO
111 C> \verbatim
112 C> INFO is INTEGER
113 C> = 0: successful exit
114 C> < 0: if INFO = -i, the i-th argument had an illegal value
115 C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization
116 C> has been completed, but the factor U is exactly
117 C> singular, and division by zero will occur if it is used
118 C> to solve a system of equations.
119 C> \endverbatim
120 C>
121 *
122 * Authors:
123 * ========
124 *
125 C> \author Univ. of Tennessee
126 C> \author Univ. of California Berkeley
127 C> \author Univ. of Colorado Denver
128 C> \author NAG Ltd.
129 *
130 C> \date December 2016
131 *
132 C> \ingroup variantsGEcomputational
133 *
134 * =====================================================================
135  SUBROUTINE cgetrf( M, N, A, LDA, IPIV, INFO )
136 *
137 * -- LAPACK computational routine (version 3.X) --
138 * -- LAPACK is a software package provided by Univ. of Tennessee, --
139 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
140 *
141 * .. Scalar Arguments ..
142  INTEGER INFO, LDA, M, N
143 * ..
144 * .. Array Arguments ..
145  INTEGER IPIV( * )
146  COMPLEX A( LDA, * )
147 * ..
148 *
149 * =====================================================================
150 *
151 * .. Parameters ..
152  COMPLEX ONE, NEGONE
153  REAL ZERO
154  parameter( one = (1.0e+0, 0.0e+0) )
155  parameter( negone = (-1.0e+0, 0.0e+0) )
156  parameter( zero = 0.0e+0 )
157 * ..
158 * .. Local Scalars ..
159  REAL SFMIN, PIVMAG
160  COMPLEX TMP
161  INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
162  INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS
163 * ..
164 * .. External Functions ..
165  REAL SLAMCH
166  INTEGER ICAMAX
167  LOGICAL SISNAN
168  EXTERNAL slamch, icamax, sisnan
169 * ..
170 * .. External Subroutines ..
171  EXTERNAL ctrsm, cscal, xerbla, claswp
172 * ..
173 * .. Intrinsic Functions ..
174  INTRINSIC max, min, iand, abs
175 * ..
176 * .. Executable Statements ..
177 *
178 * Test the input parameters.
179 *
180  info = 0
181  IF( m.LT.0 ) THEN
182  info = -1
183  ELSE IF( n.LT.0 ) THEN
184  info = -2
185  ELSE IF( lda.LT.max( 1, m ) ) THEN
186  info = -4
187  END IF
188  IF( info.NE.0 ) THEN
189  CALL xerbla( 'CGETRF', -info )
190  RETURN
191  END IF
192 *
193 * Quick return if possible
194 *
195  IF( m.EQ.0 .OR. n.EQ.0 )
196  $ RETURN
197 *
198 * Compute machine safe minimum
199 *
200  sfmin = slamch( 'S' )
201 *
202  nstep = min( m, n )
203  DO j = 1, nstep
204  kahead = iand( j, -j )
205  kstart = j + 1 - kahead
206  kcols = min( kahead, m-j )
207 *
208 * Find pivot.
209 *
210  jp = j - 1 + icamax( m-j+1, a( j, j ), 1 )
211  ipiv( j ) = jp
212 
213 * Permute just this column.
214  IF (jp .NE. j) THEN
215  tmp = a( j, j )
216  a( j, j ) = a( jp, j )
217  a( jp, j ) = tmp
218  END IF
219 
220 * Apply pending permutations to L
221  ntopiv = 1
222  ipivstart = j
223  jpivstart = j - ntopiv
224  DO WHILE ( ntopiv .LT. kahead )
225  CALL claswp( ntopiv, a( 1, jpivstart ), lda, ipivstart, j,
226  $ ipiv, 1 )
227  ipivstart = ipivstart - ntopiv;
228  ntopiv = ntopiv * 2;
229  jpivstart = jpivstart - ntopiv;
230  END DO
231 
232 * Permute U block to match L
233  CALL claswp( kcols, a( 1,j+1 ), lda, kstart, j, ipiv, 1 )
234 
235 * Factor the current column
236  pivmag = abs( a( j, j ) )
237  IF( pivmag.NE.zero .AND. .NOT.sisnan( pivmag ) ) THEN
238  IF( pivmag .GE. sfmin ) THEN
239  CALL cscal( m-j, one / a( j, j ), a( j+1, j ), 1 )
240  ELSE
241  DO i = 1, m-j
242  a( j+i, j ) = a( j+i, j ) / a( j, j )
243  END DO
244  END IF
245  ELSE IF( pivmag .EQ. zero .AND. info .EQ. 0 ) THEN
246  info = j
247  END IF
248 
249 * Solve for U block.
250  CALL ctrsm( 'Left', 'Lower', 'No transpose', 'Unit', kahead,
251  $ kcols, one, a( kstart, kstart ), lda,
252  $ a( kstart, j+1 ), lda )
253 * Schur complement.
254  CALL cgemm( 'No transpose', 'No transpose', m-j,
255  $ kcols, kahead, negone, a( j+1, kstart ), lda,
256  $ a( kstart, j+1 ), lda, one, a( j+1, j+1 ), lda )
257  END DO
258 
259 * Handle pivot permutations on the way out of the recursion
260  npived = iand( nstep, -nstep )
261  j = nstep - npived
262  DO WHILE ( j .GT. 0 )
263  ntopiv = iand( j, -j )
264  CALL claswp( ntopiv, a( 1, j-ntopiv+1 ), lda, j+1, nstep,
265  $ ipiv, 1 )
266  j = j - ntopiv
267  END DO
268 
269 * If short and wide, handle the rest of the columns.
270  IF ( m .LT. n ) THEN
271  CALL claswp( n-m, a( 1, m+kcols+1 ), lda, 1, m, ipiv, 1 )
272  CALL ctrsm( 'Left', 'Lower', 'No transpose', 'Unit', m,
273  $ n-m, one, a, lda, a( 1,m+kcols+1 ), lda )
274  END IF
275 
276  RETURN
277 *
278 * End of CGETRF
279 *
280  END
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine cscal(N, CA, CX, INCX)
CSCAL
Definition: cscal.f:78
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:187
subroutine ctrsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
CTRSM
Definition: ctrsm.f:180
subroutine cgetrf(M, N, A, LDA, IPIV, INFO)
CGETRF
Definition: cgetrf.f:108
subroutine claswp(N, A, LDA, K1, K2, IPIV, INCX)
CLASWP performs a series of row interchanges on a general rectangular matrix.
Definition: claswp.f:115