📄 az_gmres.c

📁 并行解法器,功能强大
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*==================================================================== * ------------------------ * | CVS File Information | * ------------------------ * * $RCSfile: az_gmres.c,v $ * * $Author: tuminaro $ * * $Date: 2000/06/02 16:46:55 $ * * $Revision: 1.40 $ * * $Name:  $ *====================================================================*/#ifndef lintstatic char rcsid[] = "$Id: az_gmres.c,v 1.40 2000/06/02 16:46:55 tuminaro Exp $";#endif/******************************************************************************* * Copyright 1995, Sandia Corporation.  The United States Government retains a * * nonexclusive license in this software as prescribed in AL 88-1 and AL 91-7. * * Export of this program may require a license from the United States         * * Government.                                                                 * ******************************************************************************/#include <stdlib.h>#include <stdio.h>#include <math.h>#include <float.h>#include "az_aztec.h"void AZ_pgmres (double b[], double x[],double weight[], int options[],	double params[], int proc_config[], double status[], AZ_MATRIX *Amat, 	AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info)/*******************************************************************************  This routine uses Saad's restarted Genralized Minimum Residual method to solve  the nonsymmetric matrix problem Ax = b.  IMPORTANT NOTE: While the 2-norm of the gmres residual is available, the  actual residual is not normally computed as part of the gmres algorithm. Thus,  if the user uses a convergence condition (see AZ_compute_global_scalars())  that is based on the 2-norm of the residual there is no need to compute the  residual (i.e. r_avail = AZ_FALSE). However, if another norm of r is  requested, AZ_compute_global_scalars() sets r_avail = AZ_TRUE and the  algorithm computes the residual.  Author:          John N. Shadid, SNL, 1421  =======  Return code:     void  ============  Parameter list:  ===============  b:               Right hand side of linear system.  x:               On input, contains the initial guess. On output contains the                   solution to the linear system.  weight:          Vector of weights for convergence norm #4.  options:         Determines specific solution method and other parameters.  params:          Drop tolerance and convergence tolerance info.  proc_config:     Machine configuration.  proc_config[AZ_node] is the node                   number.  proc_config[AZ_N_procs] is the number of processors.  status:          On output, indicates termination status:                    0:  terminated normally.                   -1:  maximum number of iterations taken without achieving                        convergence.                   -2:  Breakdown. The algorithm can not proceed due to                        numerical difficulties (usually a divide by zero).                   -3:  Internal residual differs from the computed residual due                        to a significant loss of precision.  Amat:            Structure used to represent the matrix (see file az_aztec.h                   and Aztec User's Guide).  precond:         Structure used to represent the preconditionner                   (see file az_aztec.h and Aztec User's Guide).*******************************************************************************/{  /* local variables */  register int k;  int          i, i1, k1, mm, ii;  int          N, converged, one = 1, iter = 1, r_avail = AZ_FALSE;  int          precond_flag, print_freq, proc, kspace, first_time = AZ_TRUE;  double     **v, **hh, *c, *s, *rs, *dots, *tmp, *temp;  double       *res, init_time = 0.0;  double       dble_tmp, r_2norm, first_2norm, epsilon;  double       rec_residual, scaled_r_norm, true_scaled_r=0.0;  double       actual_residual = -1.0;  double       doubleone = 1.0, minusone = -1.0, *dummy = (double *) 0;  int          *data_org, str_leng;  char         label[64],suffix[32], prefix[64];  /* condition number estimation variables */  double *svbig, *svsml, *vectmp;  double  big, cc, dble_tmp1, sestpr, small, ss;  int     ijob;  double *hhblock, *vblock;  int kspace_p1,kspace_p2,N_total;  int aligned_N_total;char *T = "T";char *T2 = "N";  /**************************** execution begins ******************************/  sprintf(suffix," in gmres%d",options[AZ_recursion_level]);/* set string that will be used */                                                           /* for manage_memory label      */  /* set prefix for printing */  str_leng = 0;  for (i = 0; i < 16; i++) prefix[str_leng++] = ' ';  for (i = 0 ; i < options[AZ_recursion_level]; i++ ) {     prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' ';     prefix[str_leng++] = ' '; prefix[str_leng++] = ' ';  }  prefix[str_leng] = '\0';  data_org = Amat->data_org;  /* pull needed values out of parameter arrays */  N            = data_org[AZ_N_internal] + data_org[AZ_N_border];  precond_flag = options[AZ_precond];  epsilon      = params[AZ_tol];  proc         = proc_config[AZ_node];  print_freq   = options[AZ_print_freq];  kspace       = options[AZ_kspace];  /* allocate memory for required vectors */  kspace_p2 = kspace + 2;  kspace_p1 = kspace + 1;  N_total   = N + data_org[AZ_N_external] + 1;			/* +1: make sure everybody allocates something */  /* Note: temp must be aligned on the Intel Paragon so  */  /* that the quad load inside the assembly dgemv works. */  sprintf(label,"general%s",suffix);  temp   = AZ_manage_memory((3*kspace_p2 + 5*kspace_p1 + N_total +                            (kspace+1)*kspace_p1)                            *sizeof(double),AZ_ALLOC,AZ_SYS,label, &i);  dots   = &(temp[  N_total]);  tmp    = &(dots[  kspace_p2]);  rs     = &(tmp[   kspace_p2]);  c      = &(rs[    kspace_p2]);  s      = &(c[     kspace_p1]);  svbig  = &(s[     kspace_p1]);  svsml  = &(svbig[ kspace_p1]);  vectmp = &(svsml[ kspace_p1]);  hhblock= &(vectmp[kspace_p1]);  sprintf(label,"ptrs%s",suffix);  v     = (double **) AZ_manage_memory(2*kspace_p2*sizeof(double *),AZ_ALLOC,                                       AZ_SYS, label, &i);  hh   = &(v[kspace_p2]);  aligned_N_total  = N_total;   /* The vectors in 'v' must be aligned on */  aligned_N_total += N_total%2; /* the Intel Paragon so that the quad    */                                /* load inside the assembly dgemv works. */  sprintf(label,"vblock%s",suffix);  vblock = AZ_manage_memory((kspace+1)*aligned_N_total*sizeof(double),AZ_ALLOC,			     AZ_SYS,label, &i);  for (k = 0; k < kspace+1; k++) {     hh[k] = &(hhblock[k*kspace_p1]);     v[k]  = &(vblock[k*aligned_N_total]);  }  AZ_compute_residual(b, x, v[0], proc_config, Amat);  /*   * Compute a few global scalars:   *     1) ||r||                corresponding to options[AZ_conv]   *     2) scaled ||r||         corresponding to options[AZ_conv]   *     3) r_2norm = <r,r>      corresponding to options[AZ_conv]   */  AZ_compute_global_scalars(Amat, x, b, v[0],                            weight, &rec_residual, &scaled_r_norm, options,                            data_org, proc_config, &r_avail, v[0], v[0],                            &r_2norm, convergence_info);  true_scaled_r = scaled_r_norm;  r_2norm   = sqrt(r_2norm);  converged = scaled_r_norm < epsilon;  if (r_avail) {    sprintf(label,"res%s",suffix);    res = AZ_manage_memory(N_total*sizeof(double),AZ_ALLOC,AZ_SYS,label,&i);  }  else res = (double *) NULL;    if ( (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) &&       (options[AZ_output] != AZ_warnings) && (proc == 0) )    (void) fprintf(stdout, "%siter:    0           residual = %e\n",prefix,scaled_r_norm);  iter = 0;  while (!converged && iter < options[AZ_max_iter]) {    if (r_avail) dcopy_(&N, v[0], &one, res, &one);    /* v1 = r0/beta */    dble_tmp    = 1.0 / r_2norm;    first_2norm = r_2norm;    dscal_(&N, &dble_tmp, v[0], &one);    rs[0] = r_2norm;  /* initialize 1st rhs term of H system */    i     = 0;    while (i < kspace && !converged && iter < options[AZ_max_iter]) {      iter++;      i1 = i + 1;      /* v_i+1 = A M^-1 v_i */      dcopy_(&N, v[i], &one, temp, &one);      if (iter == 1) init_time = AZ_second();     if (precond_flag) precond->prec_function(temp,options,proc_config,params,Amat,precond);    if (iter == 1) status[AZ_first_precond] = AZ_second() - init_time;    Amat->matvec(temp, v[i1], Amat, proc_config);      /* Gram-Schmidt orthogonalization */      if (!options[AZ_orthog]) { /* classical. Actually, we do */                                 /* this twice. I forget the   */                                 /* initials that are used to  */                                 /* describe this: DKSG???     */         for (k = 0; k <= i; k++) hh[k][i] = 0.0;         for (ii = 0 ; ii < 2; ii++ ) {            if (N == 0) for (k = 0; k <= i; k++) dots[k] = 0.0;            dble_tmp = 0.0; mm = i+1;            dgemv_(T, &N, &mm, &doubleone, vblock, &aligned_N_total,                    v[i1], &one, &dble_tmp, dots, &one, 1 /* strlen(T) */);            AZ_gdot_vec(i1, dots, tmp, proc_config);            for (k = 0; k <= i; k++) hh[k][i] += dots[k];            dgemv_(T2, &N, &mm, &minusone, vblock, &aligned_N_total,                    dots, &one, &doubleone, v[i1], &one, 1 /* strlen(T2) */);         }      }      else {                    /* modified */        for (k = 0; k <= i; k++) {          hh[k][i] = dble_tmp = AZ_gdot(N, v[k], v[i1], proc_config);          dble_tmp = -dble_tmp;          daxpy_(&N, &dble_tmp, v[k], &one, v[i1], &one);        }      }      /* normalize vector */      hh[i1][i] = dble_tmp = sqrt(AZ_gdot(N, v[i1], v[i1], proc_config));      if (dble_tmp  > DBL_EPSILON*r_2norm)        dble_tmp  = 1.0 / dble_tmp;      else        dble_tmp = 0.0;      dscal_(&N, &dble_tmp, v[i1], &one);      /* update factorization of hh by plane rotation */      for (k = 1; k <= i; k++) {        k1        = k - 1;        dble_tmp  = hh[k1][i];        hh[k1][i] =  c[k1]*dble_tmp + s[k1]*hh[k][i];        hh[k][i]  = -s[k1]*dble_tmp + c[k1]*hh[k][i];      }      /* determine next plane rotation */
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -