📄 sgelss.f
字号:
T1 = SECOND( ) CALL SORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) T2 = SECOND( ) TIMNG( ORMBR ) = TIMNG( ORMBR ) + ( T2-T1 )** Generate right bidiagonalizing vectors of R in A* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)* NB = ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) OPCNT( ORGBR ) = OPCNT( ORGBR ) + $ SOPLA2( 'SORGBR', 'P', N, N, N, 0, NB ) T1 = SECOND( ) CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), $ WORK( IWORK ), LWORK-IWORK+1, INFO ) T2 = SECOND( ) TIMNG( ORGBR ) = TIMNG( ORGBR ) + ( T2-T1 ) IWORK = IE + N** Perform bidiagonal QR iteration* multiply B by transpose of left singular vectors* compute right singular vectors in A* (Workspace: need BDSPAC)* OPS = 0 T1 = SECOND( ) CALL SBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM, $ 1, B, LDB, WORK( IWORK ), INFO ) T2 = SECOND( ) TIMNG( BDSQR ) = TIMNG( BDSQR ) + ( T2-T1 ) OPCNT( BDSQR ) = OPCNT( BDSQR ) + OPS IF( INFO.NE.0 ) $ GO TO 70** Multiply B by reciprocals of singular values* OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( 1 ) THR = MAX( RCOND*S( 1 ), SFMIN ) IF( RCOND.LT.ZERO ) THEN OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( 1 ) THR = MAX( EPS*S( 1 ), SFMIN ) END IF RANK = 0 DO 10 I = 1, N IF( S( I ).GT.THR ) THEN OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( NRHS + 3 ) CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) RANK = RANK + 1 ELSE CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) END IF 10 CONTINUE** Multiply B by right singular vectors* (Workspace: need N, prefer N*NRHS)* IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN OPCNT( GEMM ) = OPCNT( GEMM ) + $ SOPBL3( 'SGEMM ', N, NRHS, N ) T1 = SECOND( ) CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO, $ WORK, LDB ) T2 = SECOND( ) TIMNG( GEMM ) = TIMNG( GEMM ) + ( T2-T1 ) CALL SLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) ELSE IF( NRHS.GT.1 ) THEN CHUNK = LWORK / N DO 20 I = 1, NRHS, CHUNK BL = MIN( NRHS-I+1, CHUNK ) OPCNT( GEMM ) = OPCNT( GEMM ) + $ SOPBL3( 'SGEMM ', N, BL, N ) T1 = SECOND( ) CALL SGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ), $ LDB, ZERO, WORK, N ) T2 = SECOND( ) TIMNG( GEMM ) = TIMNG( GEMM ) + ( T2-T1 ) CALL SLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) 20 CONTINUE ELSE OPCNT( GEMV ) = OPCNT( GEMV ) + $ SOPBL2( 'SGEMV ', N, N, 0, 0 ) T1 = SECOND( ) CALL SGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) T2 = SECOND( ) TIMNG( GEMV ) = TIMNG( GEMV ) + ( T2-T1 ) CALL SCOPY( N, WORK, 1, B, 1 ) END IF* ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN** Path 2a - underdetermined, with many more columns than rows* and sufficient workspace for an efficient algorithm* LDWORK = M IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), $ M*LDA+M+M*NRHS ) )LDWORK = LDA ITAU = 1 IWORK = M + 1** Compute A=L*Q* (Workspace: need 2*M, prefer M+M*NB)* NB = ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) OPCNT( GELQF ) = OPCNT( GELQF ) + $ SOPLA( 'SGELQF', M, N, 0, 0, NB ) T1 = SECOND( ) CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), $ LWORK-IWORK+1, INFO ) T2 = SECOND( ) TIMNG( GELQF ) = TIMNG( GELQF ) + ( T2-T1 ) IL = IWORK** Copy L to WORK(IL), zeroing out above it* CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), $ LDWORK ) IE = IL + LDWORK*M ITAUQ = IE + M ITAUP = ITAUQ + M IWORK = ITAUP + M** Bidiagonalize L in WORK(IL)* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)* NB = ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) OPCNT( GEBRD ) = OPCNT( GEBRD ) + $ SOPLA( 'SGEBRD', M, M, 0, 0, NB ) T1 = SECOND( ) CALL SGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), $ LWORK-IWORK+1, INFO ) T2 = SECOND( ) TIMNG( GEBRD ) = TIMNG( GEBRD ) + ( T2-T1 )** Multiply B by transpose of left bidiagonalizing vectors of L* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)* NB = ILAENV( 1, 'SORMBR', 'QLT', M, NRHS, M, -1 ) OPCNT( ORMBR ) = OPCNT( ORMBR ) + $ SOPLA2( 'SORMBR', 'QLT', M, NRHS, M, 0, NB ) T1 = SECOND( ) CALL SORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), $ LWORK-IWORK+1, INFO ) T2 = SECOND( ) TIMNG( ORMBR ) = TIMNG( ORMBR ) + ( T2-T1 )** Generate right bidiagonalizing vectors of R in WORK(IL)* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)* NB = ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) OPCNT( ORGBR ) = OPCNT( ORGBR ) + $ SOPLA2( 'SORGBR', 'P', M, M, M, 0, NB ) T1 = SECOND( ) CALL SORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), $ WORK( IWORK ), LWORK-IWORK+1, INFO ) T2 = SECOND( ) TIMNG( ORGBR ) = TIMNG( ORGBR ) + ( T2-T1 ) IWORK = IE + M** Perform bidiagonal QR iteration,* computing right singular vectors of L in WORK(IL) and* multiplying B by transpose of left singular vectors* (Workspace: need M*M+M+BDSPAC)* OPS = 0 T1 = SECOND( ) CALL SBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ), $ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO ) T2 = SECOND( ) TIMNG( BDSQR ) = TIMNG( BDSQR ) + ( T2-T1 ) OPCNT( BDSQR ) = OPCNT( BDSQR ) + OPS IF( INFO.NE.0 ) $ GO TO 70** Multiply B by reciprocals of singular values* OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( 1 ) THR = MAX( RCOND*S( 1 ), SFMIN ) IF( RCOND.LT.ZERO ) THEN OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( 1 ) THR = MAX( EPS*S( 1 ), SFMIN ) END IF RANK = 0 DO 30 I = 1, M IF( S( I ).GT.THR ) THEN OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( NRHS + 3 ) CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) RANK = RANK + 1 ELSE CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) END IF 30 CONTINUE IWORK = IE** Multiply B by right singular vectors of L in WORK(IL)* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)* IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN OPCNT( GEMM ) = OPCNT( GEMM ) + $ SOPBL3( 'SGEMM ', M, NRHS, M ) T1 = SECOND( ) CALL SGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK, $ B, LDB, ZERO, WORK( IWORK ), LDB ) T2 = SECOND( ) TIMNG( GEMM ) = TIMNG( GEMM ) + ( T2-T1 ) CALL SLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) ELSE IF( NRHS.GT.1 ) THEN CHUNK = ( LWORK-IWORK+1 ) / M DO 40 I = 1, NRHS, CHUNK BL = MIN( NRHS-I+1, CHUNK ) OPCNT( GEMM ) = OPCNT( GEMM ) + $ SOPBL3( 'SGEMM ', M, BL, M ) T1 = SECOND( ) CALL SGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK, $ B( 1, I ), LDB, ZERO, WORK( IWORK ), N ) T2 = SECOND( ) TIMNG( GEMM ) = TIMNG( GEMM ) + ( T2-T1 ) CALL SLACPY( 'G', M, BL, WORK( IWORK ), N, B( 1, I ), $ LDB ) 40 CONTINUE ELSE OPCNT( GEMV ) = OPCNT( GEMV ) + $ SOPBL2( 'SGEMV ', M, M, 0, 0 ) T1 = SECOND( ) CALL SGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ), $ 1, ZERO, WORK( IWORK ), 1 ) T2 = SECOND( ) TIMNG( GEMV ) = TIMNG( GEMV ) + ( T2-T1 ) CALL SCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) END IF** Zero out below first M rows of B* CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) IWORK = ITAU + M** Multiply transpose(Q) by B* (Workspace: need M+NRHS, prefer M+NRHS*NB)* NB = ILAENV( 1, 'SORMLQ', 'LT', N, NRHS, M, -1 ) OPCNT( ORMLQ ) = OPCNT( ORMLQ ) + $ SOPLA( 'SORMLQ', N, NRHS, M, 0, NB ) T1 = SECOND( ) CALL SORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) T2 = SECOND( ) TIMNG( ORMLQ ) = TIMNG( ORMLQ ) + ( T2-T1 )* ELSE** Path 2 - remaining underdetermined cases* IE = 1 ITAUQ = IE + M ITAUP = ITAUQ + M IWORK = ITAUP + M** Bidiagonalize A* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)* NB = ILAENV( 1, 'SGEBRD', ' ', M, N, -1, -1 ) OPCNT( GEBRD ) = OPCNT( GEBRD ) + $ SOPLA( 'SGEBRD', M, N, 0, 0, NB ) T1 = SECOND( ) CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, $ INFO ) T2 = SECOND( ) TIMNG( GEBRD ) = TIMNG( GEBRD ) + ( T2-T1 )** Multiply B by transpose of left bidiagonalizing vectors* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)* NB = ILAENV( 1, 'SORMBR', 'QLT', M, NRHS, N, -1 ) OPCNT( ORMBR ) = OPCNT( ORMBR ) + $ SOPLA2( 'SORMBR', 'QLT', M, NRHS, N, 0, NB ) T1 = SECOND( ) CALL SORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) T2 = SECOND( ) TIMNG( ORMBR ) = TIMNG( ORMBR ) + ( T2-T1 )** Generate right bidiagonalizing vectors in A* (Workspace: need 4*M, prefer 3*M+M*NB)* NB = ILAENV( 1, 'SORGBR', 'P', M, N, M, -1 ) OPCNT( ORGBR ) = OPCNT( ORGBR ) + $ SOPLA2( 'SORGBR', 'P', M, N, M, 0, NB ) T1 = SECOND( ) CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), $ WORK( IWORK ), LWORK-IWORK+1, INFO ) T2 = SECOND( ) TIMNG( ORGBR ) = TIMNG( ORGBR ) + ( T2-T1 ) IWORK = IE + M** Perform bidiagonal QR iteration,* computing right singular vectors of A in A and* multiplying B by transpose of left singular vectors* (Workspace: need BDSPAC)* OPS = 0 T1 = SECOND( ) CALL SBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM, $ 1, B, LDB, WORK( IWORK ), INFO ) T2 = SECOND( ) TIMNG( BDSQR ) = TIMNG( BDSQR ) + ( T2-T1 ) OPCNT( BDSQR ) = OPCNT( BDSQR ) + OPS IF( INFO.NE.0 ) $ GO TO 70** Multiply B by reciprocals of singular values* OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( 1 ) THR = MAX( RCOND*S( 1 ), SFMIN ) IF( RCOND.LT.ZERO ) THEN OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( 1 ) THR = MAX( EPS*S( 1 ), SFMIN ) END IF RANK = 0 DO 50 I = 1, M IF( S( I ).GT.THR ) THEN OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( NRHS + 3 ) CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) RANK = RANK + 1 ELSE CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) END IF 50 CONTINUE** Multiply B by right singular vectors of A* (Workspace: need N, prefer N*NRHS)* IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN OPCNT( GEMM ) = OPCNT( GEMM ) + $ SOPBL3( 'SGEMM ', N, NRHS, M ) T1 = SECOND( ) CALL SGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO, $ WORK, LDB ) T2 = SECOND( ) TIMNG( GEMM ) = TIMNG( GEMM ) + ( T2-T1 ) CALL SLACPY( 'F', N, NRHS, WORK, LDB, B, LDB ) ELSE IF( NRHS.GT.1 ) THEN CHUNK = LWORK / N DO 60 I = 1, NRHS, CHUNK BL = MIN( NRHS-I+1, CHUNK ) OPCNT( GEMM ) = OPCNT( GEMM ) + $ SOPBL3( 'SGEMM ', N, BL, M ) T1 = SECOND( ) CALL SGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ), $ LDB, ZERO, WORK, N ) T2 = SECOND( ) TIMNG( GEMM ) = TIMNG( GEMM ) + ( T2-T1 ) CALL SLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) 60 CONTINUE ELSE OPCNT( GEMV ) = OPCNT( GEMV ) + $ SOPBL2( 'SGEMV ', M, N, 0, 0 ) T1 = SECOND( ) CALL SGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) T2 = SECOND( ) TIMNG( GEMV ) = TIMNG( GEMV ) + ( T2-T1 ) CALL SCOPY( N, WORK, 1, B, 1 ) END IF END IF** Undo scaling* IF( IASCL.EQ.1 ) THEN OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( N*NRHS + MINMN ) CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, $ INFO ) ELSE IF( IASCL.EQ.2 ) THEN OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( N*NRHS + MINMN ) CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, $ INFO ) END IF IF( IBSCL.EQ.1 ) THEN OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( N*NRHS ) CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) ELSE IF( IBSCL.EQ.2 ) THEN OPCNT( GELSS ) = OPCNT( GELSS ) + REAL( N*NRHS ) CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) END IF* 70 CONTINUE WORK( 1 ) = MAXWRK RETURN** End of SGELSS* END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -