📄 sba_lapack.c
字号:
///////////////////////////////////////////////////////////////////////////////////// //// Linear algebra operations for the sba package//// Copyright (C) 2004-2008 Manolis Lourakis (lourakis at ics forth gr)//// Institute of Computer Science, Foundation for Research & Technology - Hellas//// Heraklion, Crete, Greece.//////// This program is free software; you can redistribute it and/or modify//// it under the terms of the GNU General Public License as published by//// the Free Software Foundation; either version 2 of the License, or//// (at your option) any later version.//////// This program is distributed in the hope that it will be useful,//// but WITHOUT ANY WARRANTY; without even the implied warranty of//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the//// GNU General Public License for more details.///////////////////////////////////////////////////////////////////////////////////////#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <float.h>#include "compiler.h"#include "sba.h"#ifdef SBA_APPEND_UNDERSCORE_SUFFIX#define F77_FUNC(func) func ## _#else#define F77_FUNC(func) func #endif /* SBA_APPEND_UNDERSCORE_SUFFIX *//* declarations of LAPACK routines employed *//* QR decomposition */extern int F77_FUNC(dgeqrf)(int *m, int *n, double *a, int *lda, double *tau, double *work, int *lwork, int *info);extern int F77_FUNC(dorgqr)(int *m, int *n, int *k, double *a, int *lda, double *tau, double *work, int *lwork, int *info);/* solution of triangular system */extern int F77_FUNC(dtrtrs)(char *uplo, char *trans, char *diag, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info);/* cholesky decomposition, linear system solution and matrix inversion */extern int F77_FUNC(dpotf2)(char *uplo, int *n, double *a, int *lda, int *info); /* unblocked cholesky */extern int F77_FUNC(dpotrf)(char *uplo, int *n, double *a, int *lda, int *info); /* block version of dpotf2 */extern int F77_FUNC(dpotrs)(char *uplo, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info);extern int F77_FUNC(dpotri)(char *uplo, int *n, double *a, int *lda, int *info);/* LU decomposition, linear system solution and matrix inversion */extern int F77_FUNC(dgetrf)(int *m, int *n, double *a, int *lda, int *ipiv, int *info); /* blocked LU */extern int F77_FUNC(dgetf2)(int *m, int *n, double *a, int *lda, int *ipiv, int *info); /* unblocked LU */extern int F77_FUNC(dgetrs)(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info);extern int F77_FUNC(dgetri)(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info);/* SVD */extern int F77_FUNC(dgesvd)(char *jobu, char *jobvt, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *info);/* lapack 3.0 routine, faster than dgesvd() */extern int F77_FUNC(dgesdd)(char *jobz, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *iwork, int *info);/* Bunch-Kaufman factorization of a real symmetric matrix A, solution of linear systems and matrix inverse */extern int F77_FUNC(dsytrf)(char *uplo, int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info);extern int F77_FUNC(dsytrs)(char *uplo, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info);extern int F77_FUNC(dsytri)(char *uplo, int *n, double *a, int *lda, int *ipiv, double *work, int *info);/* * This function returns the solution of Ax = b * * The function is based on QR decomposition with explicit computation of Q: * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes * Q R x = b or R x = Q^T b. * * A is mxm, b is mx1. Argument iscolmaj specifies whether A is * stored in column or row major order. Note that if iscolmaj==1 * this function modifies A! * * The function returns 0 in case of error, 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */int sba_Axb_QR(double *A, double *B, double *x, int m, int iscolmaj){static double *buf=NULL;static int buf_sz=0, nb=0;double *a, *qtb, *r, *tau, *work;int a_sz, qtb_sz, r_sz, tau_sz, tot_sz;register int i, j;int info, worksz, nrhs=1;register double sum; if(A==NULL){ if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } /* calculate required memory size */ a_sz=(iscolmaj)? 0 : m*m; qtb_sz=m; r_sz=m*m; /* only the upper triangular part really needed */ tau_sz=m; if(!nb){#ifndef SBA_LS_SCARCE_MEMORY double tmp; worksz=-1; // workspace query; optimal size is returned in tmp F77_FUNC(dgeqrf)((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&worksz, (int *)&info); nb=((int)tmp)/m; // optimal worksize is m*nb#else nb=1; // min worksize is m#endif /* SBA_LS_SCARCE_MEMORY */ } worksz=nb*m; tot_sz=a_sz + qtb_sz + r_sz + tau_sz + worksz; if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz*sizeof(double)); if(!buf){ fprintf(stderr, "memory allocation in sba_Axb_QR() failed!\n"); exit(1); } } if(!iscolmaj){ a=buf; /* store A (column major!) into a */ for(i=0; i<m; ++i) for(j=0; j<m; ++j) a[i+j*m]=A[i*m+j]; } else a=A; /* no copying required */ qtb=buf+a_sz; r=qtb+qtb_sz; tau=r+r_sz; work=tau+tau_sz; /* QR decomposition of A */ F77_FUNC(dgeqrf)((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, "LAPACK error: illegal value for argument %d of dgeqrf in sba_Axb_QR()\n", -info); exit(1); } else{ fprintf(stderr, "Unknown LAPACK error %d for dgeqrf in sba_Axb_QR()\n", info); return 0; } } /* R is now stored in the upper triangular part of a; copy it in r so that dorgqr() below won't destroy it */ for(i=0; i<r_sz; ++i) r[i]=a[i]; /* compute Q using the elementary reflectors computed by the above decomposition */ F77_FUNC(dorgqr)((int *)&m, (int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, "LAPACK error: illegal value for argument %d of dorgqr in sba_Axb_QR()\n", -info); exit(1); } else{ fprintf(stderr, "Unknown LAPACK error (%d) in sba_Axb_QR()\n", info); return 0; } } /* Q is now in a; compute Q^T b in qtb */ for(i=0; i<m; ++i){ for(j=0, sum=0.0; j<m; ++j) sum+=a[i*m+j]*B[j]; qtb[i]=sum; } /* solve the linear system R x = Q^t b */ F77_FUNC(dtrtrs)("U", "N", "N", (int *)&m, (int *)&nrhs, r, (int *)&m, qtb, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QR()\n", -info); exit(1); } else{ fprintf(stderr, "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QR()\n", info); return 0; } } /* copy the result in x */ for(i=0; i<m; ++i) x[i]=qtb[i]; return 1;}/* * This function returns the solution of Ax = b * * The function is based on QR decomposition without computation of Q: * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes * (A^T A) x = A^T b or (R^T Q^T Q R) x = A^T b or (R^T R) x = A^T b. * This amounts to solving R^T y = A^T b for y and then R x = y for x * Note that Q does not need to be explicitly computed * * A is mxm, b is mx1. Argument iscolmaj specifies whether A is * stored in column or row major order. Note that if iscolmaj==1 * this function modifies A! * * The function returns 0 in case of error, 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */int sba_Axb_QRnoQ(double *A, double *B, double *x, int m, int iscolmaj){static double *buf=NULL;static int buf_sz=0, nb=0;double *a, *atb, *tau, *work;int a_sz, atb_sz, tau_sz, tot_sz;register int i, j;int info, worksz, nrhs=1;register double sum; if(A==NULL){ if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } /* calculate required memory size */ a_sz=(iscolmaj)? 0 : m*m; atb_sz=m; tau_sz=m; if(!nb){#ifndef SBA_LS_SCARCE_MEMORY double tmp; worksz=-1; // workspace query; optimal size is returned in tmp F77_FUNC(dgeqrf)((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&worksz, (int *)&info); nb=((int)tmp)/m; // optimal worksize is m*nb#else nb=1; // min worksize is m#endif /* SBA_LS_SCARCE_MEMORY */ } worksz=nb*m; tot_sz=a_sz + atb_sz + tau_sz + worksz; if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz*sizeof(double)); if(!buf){ fprintf(stderr, "memory allocation in sba_Axb_QRnoQ() failed!\n"); exit(1); } } if(!iscolmaj){ a=buf; /* store A (column major!) into a */ for(i=0; i<m; ++i) for(j=0; j<m; ++j) a[i+j*m]=A[i*m+j]; } else a=A; /* no copying required */ atb=buf+a_sz; tau=atb+atb_sz; work=tau+tau_sz; /* compute A^T b in atb */ for(i=0; i<m; ++i){ for(j=0, sum=0.0; j<m; ++j) sum+=a[i*m+j]*B[j]; atb[i]=sum; } /* QR decomposition of A */ F77_FUNC(dgeqrf)((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, "LAPACK error: illegal value for argument %d of dgeqrf in sba_Axb_QRnoQ()\n", -info); exit(1); } else{ fprintf(stderr, "Unknown LAPACK error %d for dgeqrf in sba_Axb_QRnoQ()\n", info); return 0; } } /* R is stored in the upper triangular part of a */ /* solve the linear system R^T y = A^t b */ F77_FUNC(dtrtrs)("U", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, atb, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QRnoQ()\n", -info); exit(1); } else{ fprintf(stderr, "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QRnoQ()\n", info); return 0; } } /* solve the linear system R x = y */ F77_FUNC(dtrtrs)("U", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, atb, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QRnoQ()\n", -info); exit(1); } else{ fprintf(stderr, "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QRnoQ()\n", info); return 0; } } /* copy the result in x */ for(i=0; i<m; ++i) x[i]=atb[i]; return 1;}/* * This function returns the solution of Ax=b * * The function assumes that A is symmetric & positive definite and employs * the Cholesky decomposition: * If A=U^T U with U upper triangular, the system to be solved becomes * (U^T U) x = b * This amounts to solving U^T y = b for y and then U x = y for x * * A is mxm, b is mx1. Argument iscolmaj specifies whether A is * stored in column or row major order. Note that if iscolmaj==1 * this function modifies A and B! * * The function returns 0 in case of error, 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */int sba_Axb_Chol(double *A, double *B, double *x, int m, int iscolmaj){static double *buf=NULL;static int buf_sz=0;double *a, *b;int a_sz, b_sz, tot_sz;register int i, j;int info, nrhs=1; if(A==NULL){ if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } /* calculate required memory size */ a_sz=(iscolmaj)? 0 : m*m; b_sz=(iscolmaj)? 0 : m; tot_sz=a_sz + b_sz; if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz*sizeof(double)); if(!buf){ fprintf(stderr, "memory allocation in sba_Axb_Chol() failed!\n"); exit(1); } } if(!iscolmaj){ a=buf; b=a+a_sz; /* store A into a and B into b; A is assumed to be symmetric, hence * the column and row major order representations are the same */ for(i=0; i<m; ++i){ a[i]=A[i]; b[i]=B[i]; } for(j=m*m; i<j; ++i) // copy remaining rows; note that i is not re-initialized a[i]=A[i]; } else{ /* no copying is necessary */ a=A; b=B;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -