📄 az_dd_overlap.c
字号:
/******************************************************************************* * Copyright 1995, Sandia Corporation. The United States Government retains a * * nonexclusive license in this software as prescribed in AL 88-1 and AL 91-7. * * Export of this program may require a license from the United States * * Government. * ******************************************************************************/#include <stdlib.h>#include <stdio.h>#include <math.h>#include "az_aztec.h"extern int AZ_sys_msg_type; #define ALLOCATE 1#define FREE_IT 2#define UPDATE_IT 3#define V_SET 4#define B_SET 5#define LASTUSED_SET 6#define ESTIMATED_SET 7 #define OUT_OF_BOUNDS 0#define IN_V 1#define IN_B 2 #define SET_VAL(a,b) AZ_allocate_or_free((void *) (a),(unsigned int)(b),V_SET)#define SET_BINDX(a,b) AZ_allocate_or_free((void *)(a),(unsigned int)(b),B_SET)#define SET_USAGE(a,b) AZ_allocate_or_free(NULL,(unsigned int)(a),LASTUSED_SET); \ AZ_allocate_or_free(NULL,(unsigned int) (b),ESTIMATED_SET)#define BV_ALLOC(a) AZ_allocate_or_free(NULL,(a),ALLOCATE)#define BV_FREE(a) AZ_allocate_or_free((void *) (a),(long int) NULL,FREE_IT)#define RESET_USAGE(a) AZ_allocate_or_free(NULL,(unsigned int)(a),LASTUSED_SET);extern void AZ_setup_sendlist(int , int *, int *, int *, int **, int *, int , int , int *);/******************************************************************************//******************************************************************************* Setup subroutine for overlapping domain decomposition preconditioner (currently for matrices with MSR format only). Author: Charles H. Tong (8117) ======= Return code: void ============ Parameter list: =============== Input : N_rows: number of matrix rows residing locally Rownum: an array containing the row numbers of the local rows bindx : an index array for the DMSR sparse matrix storage val: an array containing the nonzero entries of the matrix olap_size: the degree of overlap requested proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors.Output : New_N_rows: the number of matrix rows for the enlarged matrix bindx: an index array for the DMSR storage of the enlarged matrix val: an array containing the nonzeros of the enlarged matrix*******************************************************************************/#ifndef TRUE#define TRUE 1#define FALSE 0#endifvoid AZ_setup_dd_olap_msr(int N_rows, int *New_N_rows, int *bindx, double *val, int olap_size, int *proc_config, int *data_org[], int **map3, int bindx_length,int name, int *prev_data_org, int estimated_requirements, struct context *context){ /* ----------------- variable and function declaration ---------------- */ /* local variables */ int i,j,k,jj,m,ndist,ind,ncnt,nzeros,ret_index,off_set,type; int nprocs,status,num_neigh,*proc_neigh; int enlarged_N, *enlarged_Rownum, *sorted_New_Rownum; int *send_leng, *actual_send_leng, **send_rownum, *send_proc; int *recv_leng, *actual_recv_leng, *recv_proc, *send_rowcnt; int *start_send; int N_ext_node, *ext_nodelist, *sorted_exts, *externals; int *Rownum, *tRownum, net_gain_N; int offset, max_per_proc; double *comm_buffer = NULL, *dptr = NULL, *dbl_grows; int comm_size = 0, *iptr = NULL; MPI_AZRequest request[AZ_MAX_NEIGHBORS]; /* Message handle */char str[80]; /* ------------------------- subroutine begins ------------------------ */ /* Initialize overlap memory scheme */ enlarged_N = bindx[0] - 1; nzeros = bindx[enlarged_N]; if (bindx_length < nzeros) { fprintf(stderr,"Not enough memory allocated for overlapping\n"); exit(1); } SET_VAL(val , bindx_length); SET_BINDX(bindx, bindx_length); if (estimated_requirements > nzeros) { SET_USAGE(nzeros,estimated_requirements); } else { SET_USAGE(nzeros,bindx_length);} /* tranform the local matrices to a global matrix */ /* by putting in fake numbers */ max_per_proc = AZ_gmax_int(N_rows,proc_config); max_per_proc++; offset = max_per_proc*proc_config[AZ_node]; /* determine the global row number corresponding to */ /* external variables using AZ_exchange_bdry(). */ Rownum = (int *) BV_ALLOC((N_rows+1) * sizeof(int)); externals = (int *) BV_ALLOC((prev_data_org[AZ_N_external]+1)*sizeof(int)); dbl_grows = (double *) BV_ALLOC((N_rows + prev_data_org[AZ_N_external] + 1)* sizeof(double) ); for (i = 0 ; i < N_rows; i++ ) { Rownum[i] = offset + i; dbl_grows[i] = (double) Rownum[i]; } AZ_exchange_bdry(dbl_grows, prev_data_org, proc_config); for (i = 0 ; i < prev_data_org[AZ_N_external] ; i++ ) externals[i] = (int) dbl_grows[i + N_rows]; /* change matrix columns to reflect global numbers */ for ( i = N_rows+1; i < bindx[N_rows] ; i++ ) { if ( bindx[i] < N_rows) bindx[i] += offset; else bindx[i] = externals[bindx[i] - N_rows]; } BV_FREE((char *) externals); BV_FREE((char *) dbl_grows); /* fetch processor information */ nprocs = proc_config[AZ_N_procs]; enlarged_Rownum = (int*) BV_ALLOC((enlarged_N+1) * sizeof(int)); if (enlarged_Rownum == NULL) { printf("Error in allocating memory for enlarged_Rownum.\n"); exit(-1); } for (i=0; i<enlarged_N; i++) enlarged_Rownum[i] = Rownum[i]; /* if no overlap is requested, just return the original matrix */ if (olap_size <= 0) { (*New_N_rows) = enlarged_N; return; } /* Allocate temporary storage space for further processing * - send_proc : a processor flag array to indicate send processors * - send_leng : data length (in doubles) for each send-to processor * - send_rownum : row numbers of matrix to the send-to processors * - send_rowcnt : number of matrix rows to the send-to processors * - recv_proc : a processor flag array to indicate recv-from processors * - recv_leng : data length (in doubles) for each recv-from processor * - proc_neigh : a compressed list of neighbors for communication * - actual_send_leng : compressed send_leng array * - actual_recv_leng : compressed recv_leng array * - start_send : pointing to subarrays for each send processor */ send_rownum = (int**) BV_ALLOC( nprocs *sizeof(int*) + (9* nprocs)*sizeof(int)); send_proc = (int *) &(send_rownum[nprocs]); send_leng = &(send_proc[nprocs]); send_rowcnt = &(send_leng[nprocs]); recv_proc = &(send_rowcnt[nprocs]); recv_leng = &(recv_proc[nprocs]); proc_neigh = &(recv_leng[nprocs]); actual_send_leng = &(proc_neigh[nprocs]); actual_recv_leng = &(actual_send_leng[nprocs]); start_send = &(actual_recv_leng[nprocs]); for (i=0; i<nprocs; i++) send_rownum[i] = NULL; /* duplicate the sorted Rownum list for use later in the loop */ sorted_New_Rownum = (int*) BV_ALLOC((enlarged_N+1) * sizeof(int)); if (sorted_New_Rownum == NULL) { printf("Error in allocating memory for sorted_New_Rownum.\n"); exit(-1); } for (i=0; i<enlarged_N; i++) sorted_New_Rownum[i] = Rownum[i]; /* enlarging the local matrix with a distance of one at a time using breadth first search */ for (ndist=0; ndist<olap_size; ndist++) { /* Starting with the enlarged system, compose the external node list. * Input : enlarged_N,sorted_New_Rownum,bindx * Output : N_ext_node, ext_nodelist */ PAZ_compose_external(enlarged_N,sorted_New_Rownum, bindx, &N_ext_node, &ext_nodelist); BV_FREE(sorted_New_Rownum); /* use the external node list to construct the send information * Input : N_ext_node, ext_nodelist, Rownum (must be sorted) * Output : send_proc, send_rowcnt, send_rownum */ AZ_setup_sendlist(N_ext_node, ext_nodelist, send_proc, send_rowcnt, send_rownum, proc_config, max_per_proc, N_rows,Rownum); /* Now generate receive information data */ for (i=0; i<nprocs; i++) recv_proc[i] = 0; for (i=0; i<N_ext_node; i++) recv_proc[ext_nodelist[i]/max_per_proc]++; BV_FREE(ext_nodelist); /* form a compressed processor list for communication */ num_neigh = 0; for (i=0; i<nprocs; i++) if (send_proc[i] > 0 || recv_proc[i] > 0) proc_neigh[num_neigh++] = i; /* compute the size that the linear system will */ /* be when the new rows are incorporated */ net_gain_N = 0; for (i = 0 ; i < num_neigh; i++ ) net_gain_N += recv_proc[proc_neigh[i]]; /* Pass new matrix size to the memory allocator */ nzeros = bindx[enlarged_N] + net_gain_N; if (bindx_length < nzeros) { fprintf(stderr,"Not enough memory allocated for overlapping\n"); exit(1); } RESET_USAGE(nzeros); /* Make room for the new matrix entries by shifting */ /* the off-diagonals by 'net_gain_N' storage locations */ for (i = bindx[enlarged_N]-1 ; i >= bindx[0] ; i--) { k = i + net_gain_N; bindx[k] = bindx[i]; val[k] = val[i]; } for (i=0; i<=enlarged_N; i++) bindx[i] += net_gain_N; /* Figure out the total number of off-diagonals */ /* that we need to send out to update the rows. */ k = 0; for (i = 0 ; i < num_neigh; i++) { jj = proc_neigh[i]; for (j = 0 ; j < send_rowcnt[jj] ; j++ ) { k += (bindx[send_rownum[jj][j]+1] - bindx[send_rownum[jj][j]]); } } if (k+1 > comm_size) { if (comm_buffer != NULL) { BV_FREE(comm_buffer); comm_buffer = NULL; } comm_size = k+1; comm_buffer = (double *) BV_ALLOC( comm_size * sizeof(double)); iptr = (int *) comm_buffer; dptr = (double *) comm_buffer; } /* expand the Rownum list: */ /* 1) allocate space */ /* 2) post receives */ /* 3) send messages */ /* 4) wait to receive */ tRownum = (int*) BV_ALLOC((net_gain_N + enlarged_N+1) * sizeof(int)); if (tRownum == NULL) { printf("Error in allocating memory for tRownum (%d).\n",enlarged_N); exit(-1); } for (i=0; i<enlarged_N; i++) tRownum[i] = enlarged_Rownum[i]; BV_FREE(enlarged_Rownum); enlarged_Rownum = tRownum; type =AZ_sys_msg_type; AZ_sys_msg_type =(AZ_sys_msg_type+1-AZ_MSG_TYPE) % AZ_NUM_MSGS + AZ_MSG_TYPE; off_set = enlarged_N; for (i = 0 ; i < num_neigh; i++ ) { jj = proc_neigh[i]; (void) mdwrap_iread((void *) &(enlarged_Rownum[off_set]), recv_proc[jj]*sizeof(int), &jj, &type, &(request[i])); off_set += recv_proc[jj]; } for (i = 0 ; i < num_neigh; i++) { jj = proc_neigh[i]; for (k = 0 ; k < send_rowcnt[jj] ; k++ ) iptr[k] = enlarged_Rownum[send_rownum[jj][k] ] ; mdwrap_write((void*)iptr,send_rowcnt[jj]*sizeof(int),jj,type,&status); } off_set = enlarged_N; for (i = 0 ; i < num_neigh; i++ ) { jj = proc_neigh[i]; mdwrap_wait((char*)&(enlarged_Rownum[off_set]), recv_proc[jj]*sizeof(int),&jj, &type, &status, request+i); off_set += recv_proc[jj]; } /* Obtain the number of columns in each new row */ type =AZ_sys_msg_type; AZ_sys_msg_type =(AZ_sys_msg_type+1-AZ_MSG_TYPE) % AZ_NUM_MSGS + AZ_MSG_TYPE; off_set = enlarged_N+1; for (i = 0 ; i < num_neigh; i++ ) { jj = proc_neigh[i]; (void) mdwrap_iread((void *) &(bindx[off_set]), recv_proc[jj]*sizeof(int), &jj, &type, &(request[i])); off_set += recv_proc[jj]; } for (i = 0 ; i < num_neigh; i++) { jj = proc_neigh[i]; for (k = 0 ; k < send_rowcnt[jj] ; k++ ) iptr[k] = bindx[send_rownum[jj][k] + 1] - bindx[send_rownum[jj][k]]; mdwrap_write((void*)iptr,send_rowcnt[jj]*sizeof(int),jj,type,&status); } off_set = enlarged_N + 1; for (i = 0 ; i < num_neigh; i++ ) { jj = proc_neigh[i]; mdwrap_wait((char*) &(bindx[off_set]), recv_proc[jj]*sizeof(int),&jj, &type, &status, request+i); off_set += recv_proc[jj]; } /* Now convert the 'row size' into the proper bindx entry */ for (i = enlarged_N+1 ; i <= enlarged_N+net_gain_N ; i++ ) bindx[i] += bindx[i-1]; /* Pass new matrix size (including off-diags) to the memory allocator */ nzeros = bindx[enlarged_N + net_gain_N]; if (bindx_length < nzeros) { fprintf(stderr,"Not enough memory allocated for overlapping\n"); exit(1); } RESET_USAGE(nzeros); /* Obtain the diagonal entry of the new rows */ type =AZ_sys_msg_type; AZ_sys_msg_type =(AZ_sys_msg_type+1-AZ_MSG_TYPE) % AZ_NUM_MSGS + AZ_MSG_TYPE; off_set = enlarged_N; for (i = 0 ; i < num_neigh; i++ ) { jj = proc_neigh[i]; (void) mdwrap_iread((void*)&(val[off_set]),recv_proc[jj]*sizeof(double), &jj, &type, &(request[i])); off_set += recv_proc[jj]; } for (i = 0 ; i < num_neigh; i++) { jj = proc_neigh[i]; for (k = 0 ; k < send_rowcnt[jj] ; k++ ) dptr[k] = val[send_rownum[jj][k]]; mdwrap_write((void*)dptr,send_rowcnt[jj]*sizeof(double),jj, type,&status); } off_set = enlarged_N; for (i = 0 ; i < num_neigh; i++ ) { jj = proc_neigh[i]; mdwrap_wait((char*) &(val[off_set]), recv_proc[jj]*sizeof(double), &jj, &type, &status, request+i); off_set += recv_proc[jj]; } /* Obtain the off-diagonal column indices */ type =AZ_sys_msg_type; AZ_sys_msg_type =(AZ_sys_msg_type+1-AZ_MSG_TYPE) % AZ_NUM_MSGS + AZ_MSG_TYPE; off_set = enlarged_N; for (i = 0 ; i < num_neigh; i++ ) { jj = proc_neigh[i]; actual_recv_leng[i] = bindx[off_set + recv_proc[jj]] - bindx[off_set]; off_set += recv_proc[jj]; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -