📄 sqmatrix.c

📁 麻省理工的计算光子晶体的程序
💻 C
字号:
/* Copyright (C) 1999, 2000, 2001, 2002, Massachusetts Institute of Technology. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */#include <stdlib.h>#include <stdio.h>#include <math.h>#include "../config.h"#include <check.h>#include "matrices.h"#include "blasglue.h"/* Simple operations on sqmatrices.  Also, some not-so-simple operations,   like inversion and eigenvalue decomposition. *//* A = B */void sqmatrix_copy(sqmatrix A, sqmatrix B){     CHECK(A.p == B.p, "arrays not conformant");     blasglue_copy(A.p * A.p, B.data, 1, A.data, 1);}/* Resize A from its current size to a pxp matrix, assuming that   A was initially allocated to hold at least this big a matrix.   If preserve_data is nonzero, copies the existing data in A (or   a subset of it, if the matrix is shrinking) to the corresponding   entries of the resized matrix. */void sqmatrix_resize(sqmatrix *A, int p, short preserve_data){     CHECK(p <= A->alloc_p, "tried to resize beyond allocated limit");     if (preserve_data) {	  int i, j;	  	  if (p < A->p) {	       for (i = 0; i < p; ++i)		    for (j = 0; j < p; ++j)			 A->data[i*p + j] = A->data[i*A->p + j];	  }	  else {	       for (i = A->p-1; i >= 0; --i)		    for (j = A->p-1; j >= 0; --j)			 A->data[i*p + j] = A->data[i*A->p + j];	  }     }     A->p = p;}/* U contains the upper triangle of a Hermitian matrix; we copy this    to F and also fill in the lower triangle with the adjoint of the upper. */void sqmatrix_copy_upper2full(sqmatrix F, sqmatrix U){     int i, j;     CHECK(F.p == U.p, "arrays not conformant");     for (i = 0; i < U.p; ++i) {	  for (j = 0; j < i; ++j) {	       ASSIGN_CONJ(F.data[i*U.p + j], U.data[j*U.p + i]);	  }	  for (; j < U.p; ++j)	       F.data[i*U.p + j] = U.data[i*U.p + j];     }}/* Asym = (A + adjoint(A)) / 2.  Asym is thus Hermitian. */void sqmatrix_symmetrize(sqmatrix Asym, sqmatrix A){     int i, j;     CHECK(Asym.p == A.p, "arrays not conformant");     for (i = 0; i < A.p; ++i) 	  for (j = 0; j < A.p; ++j) {	       int ij = i * A.p + j, ji = j * A.p + i;	       ASSIGN_SCALAR(Asym.data[ij], 			     0.5 * (SCALAR_RE(A.data[ij]) +				    SCALAR_RE(A.data[ji])),			     0.5 * (SCALAR_IM(A.data[ij]) -				    SCALAR_IM(A.data[ji])));	  }}/* trace(U) */scalar sqmatrix_trace(sqmatrix U){     int i;     scalar trace = SCALAR_INIT_ZERO;     for (i = 0; i < U.p; ++i)	  ACCUMULATE_SUM(trace, U.data[i*U.p + i]);     return trace;}/* compute trace(adjoint(A) * B) */scalar sqmatrix_traceAtB(sqmatrix A, sqmatrix B){     scalar trace;     CHECK(A.p == B.p, "matrices not conformant");     trace = blasglue_dotc(A.p * A.p, A.data, 1, B.data, 1);     return trace;}/* A = B * C.  If bdagger != 0, then adjoint(B) is used; similarly for C.    A must be distinct from B and C.   Note that since the matrices   are stored in row-major order, the most efficient operation should   be B * adjoint(C), assuming the BLAS is sane.  i.e. if C is hermitian,   you should use cdagger = 1.  Conversely, the worst operation is   probably adjoint(B) * C. */void sqmatrix_AeBC(sqmatrix A, sqmatrix B, short bdagger,		   sqmatrix C, short cdagger){     CHECK(A.p == B.p && A.p == C.p, "matrices not conformant");     blasglue_gemm(bdagger ? 'C' : 'N', cdagger ? 'C' : 'N', A.p, A.p, A.p,                   1.0, B.data, B.p, C.data, C.p, 0.0, A.data, A.p);}/* A += a B * C.  bdagger, cdagger are as for sqmatrix_AeBC, above. */void sqmatrix_ApaBC(sqmatrix A, real a, sqmatrix B, short bdagger,		    sqmatrix C, short cdagger){     CHECK(A.p == B.p && A.p == C.p, "matrices not conformant");     blasglue_gemm(bdagger ? 'C' : 'N', cdagger ? 'C' : 'N', A.p, A.p, A.p,                   a, B.data, B.p, C.data, C.p, 1.0, A.data, A.p);}/* A += a B */void sqmatrix_ApaB(sqmatrix A, real a, sqmatrix B){     CHECK(A.p == B.p, "matrices not conformant");     blasglue_axpy(A.p * A.p, a, B.data, 1, A.data, 1);}/* compute A = a*A + b*B; A and B may be equal. */void sqmatrix_aApbB(real a, sqmatrix A, real b, sqmatrix B){     CHECK(A.p == B.p, "arrays not conformant");     if (a != 1.0)          blasglue_rscal(A.p * A.p, a, A.data, 1);     blasglue_axpy(A.p * A.p, b, B.data, 1, A.data, 1);}/* U <- 1/U.  U must be Hermitian and, if positive_definite != 0,   positive-definite (e.g. U = Yt*Y).  Work is a scratch matrix. */void sqmatrix_invert(sqmatrix U, short positive_definite,		     sqmatrix Work){     int i, j;     if (positive_definite) {	  /* factorize U: */	  lapackglue_potrf('U', U.p, U.data, U.p);	  	  /* QUESTION: would it be more efficient to stop here,	     returning the Cholesky factorization of U?  This	     could then be used to multiply by 1/U without	     ever calculating the inverse explicitely.  It	     would probably be more numerically stable, but	     how do the computational costs compare? */	  	  /* Compute 1/U (upper half only) */	  lapackglue_potri('U', U.p, U.data, U.p);     }     else {	  int *ipiv;	  CHK_MALLOC(ipiv, int, U.p);	  CHECK(Work.p * Work.p >= U.p, "scratch matrix is too small");	  /* factorize U: */	  lapackglue_hetrf('U', U.p, U.data, U.p,			   ipiv, Work.data, Work.p * Work.p);	  /* Compute 1/U (upper half only) */	  lapackglue_hetri('U', U.p, U.data, U.p, ipiv, Work.data);	  	  free(ipiv);     }	       /* Now, copy the conjugate of the upper half	onto the lower half of U */     for (i = 0; i < U.p; ++i)	  for (j = i + 1; j < U.p; ++j) {	       ASSIGN_CONJ(U.data[j * U.p + i], U.data[i * U.p + j]);	  }}/* U <- eigenvectors of U.  U must be Hermitian. eigenvals <- eigenvalues.   W is a work array.  The columns of adjoint(U') are the eigenvectors, so that   U == adjoint(U') D U', with D = diag(eigenvals).    The eigenvalues are returned in ascending order. */void sqmatrix_eigensolve(sqmatrix U, real *eigenvals, sqmatrix W){     real *work;     CHK_MALLOC(work, real, 3*U.p - 2);     if (W.p * W.p >= 3 * U.p - 1)	  lapackglue_heev('V', 'U', U.p, U.data, U.p, eigenvals,			  W.data, W.p * W.p, work);     else {	  scalar *morework;	  CHK_MALLOC(morework, scalar, 3 * U.p - 1);	  lapackglue_heev('V', 'U', U.p, U.data, U.p, eigenvals,			  morework, 3 * U.p - 1, work);	  free(morework);     }     free(work);}/* Compute Usqrt <- sqrt(U), where U must be Hermitian positive-definite.    W is a work array, and must be the same size as U.  Both U and   W are overwritten. */void sqmatrix_sqrt(sqmatrix Usqrt, sqmatrix U, sqmatrix W){     real *eigenvals;     CHECK(Usqrt.p == U.p && U.p == W.p, "matrices not conformant");     CHK_MALLOC(eigenvals, real, U.p);     sqmatrix_eigensolve(U, eigenvals, W);     {	  int i;	  	  /* Compute W = diag(sqrt(eigenvals)) * U; i.e. the rows of W	     become the rows of U times sqrt(corresponding eigenvalue) */	  for (i = 0; i < U.p; ++i) {	       CHECK(eigenvals[i] > 0, "non-positive eigenvalue");	       	       blasglue_copy(U.p, U.data + i*U.p, 1, W.data + i*U.p, 1);	       blasglue_rscal(U.p, sqrt(eigenvals[i]), W.data + i*U.p, 1);	  }     }     free(eigenvals);     /* compute Usqrt = Ut * W */     sqmatrix_AeBC(Usqrt, U, 1, W, 0);}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -