⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 p2_complib2.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 4 页
字号:
/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the   U. of Virginia *//* $Name: fa35_03_06 $ - $Id: p2_complib2.c,v 1.6 2008/01/11 15:27:43 wrp Exp $ *//* * pcomplib.c : Parallel library search *  *	#define FIRSTNODE 0/1 (in msg.h) can be used to reserve one node *	for collecting results * * Parallel specific options (from doinit.c): *	-J # jump to query # *	-I   self-comparison, do (N choose 2) comparisons *	-T # number of workers *//* This version is modifed to read all files, query and database,   through the manager process. Workers will now receive their   database from the manager, rather than reading it themselves.  This   cuts down considerably on NFS traffic, simplifies searches of   multiple files, and allows use of clusters of slave nodes that do   not have NFS access*//* modified 5-November-2004 to ensure 15 byte (SEQ_PAD) NULL   padding   modified 12-December-2006 to ensure n0>0 before SEQ_PAD padding. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <time.h>#include <limits.h>#include <float.h>#include <math.h>#include <unistd.h>#include <sys/types.h>#include <signal.h>#include <sys/stat.h>#ifdef PVM_SRC#include "pvm3.h"char *mp_verstr="35.03, Jan 11, 2008 PVM";#endif#ifdef MPI_SRC#include "mpi.h"char *mp_verstr="35.03, Jan 11, 2008 MPI";#endif#include "msg.h"#include "defs.h"#include "mm_file.h"#include "structs.h"#include "param.h"#include "p_mw.h"#define XTERNAL#include "uascii.h"char pgmdir[MAX_FN];char workerpgm[MAX_FN];char managepgm[MAX_FN];#define XTERNAL#include "upam.h"#undef XTERNAL/********************************//* global variable declarations *//********************************/int nsfnum;	/* number of superfamily numbers */int sfnum[10];	/* superfamily number from types 0 and 5 */int nsfnum_n;int sfnum_n[10];/********************************//* extern variable declarations *//********************************/extern char *prog_func;		/* function label */extern char *verstr, *iprompt0, *iprompt1, *iprompt2, *refstr;/********************************//*extern function declarations  *//********************************/void libchoice(char *lname, int, struct mngmsg *); /* lib_sel.c */void libselect(char *lname, struct mngmsg *);	/* lib_sel.c */extern void closelib();/* check for DNA sequence (nxgetaa.c) */extern int scanseq(unsigned char *seq, int n, char *str);extern void re_ascii(int *qascii, int *sascii);extern int recode(unsigned char *seq, int n, int *qascii, int nsq);/* 1d to 2d pam (initxx.c) */extern void initpam2 (struct pstruct *ppst);/* initialize environment (doinit.c) */extern void h_init (struct pstruct *ppst, struct mngmsg *, char *);extern void s_abort (char *p,  char *p1);extern void query_parm (struct mngmsg *m_msp, struct pstruct *ppst);extern void last_init (struct mngmsg *, struct pstruct *, int);extern void initenv (int argc, char **argv, struct mngmsg *m_msg,		     struct pstruct *ppst, unsigned char **aa0);/* print hist, summaries, timing information */void prhist(FILE *, struct mngmsg, struct pstruct *, struct hist_str, int nstats, struct db_str,	    char *lib_range, char **info_gstring2, char **info_hstring_p);void printsum(FILE *);extern void ptime (FILE *, time_t);/* reset parameters if DNA sequence (initxx.c) */extern void resetp (struct mngmsg *, struct pstruct *);/* read a sequence (nmgetlib.c) */struct lmf_str *openlib(char *, int, int *, int, struct lmf_str *);#define QGETLIB (q_file_p->getlib)#define LGETLIB (l_file_p->getlib)/* these functions are in scaleswn.c */extern int process_hist(struct stat_str *sptr, int nstat,			struct mngmsg m_msg, struct pstruct *ppst,			struct hist_str *hist, void **pstat_void, int);extern voidscale_scores(struct beststr **bptr, int nbest, struct db_str db,	     struct pstruct *ppst, void *rs);extern double zs_to_E(double zs, int n1, int isdna, long, struct db_str ntt);extern double (*find_zp)(int score, double escore, int length, double comp, void *);void addhistz(double zscore, struct hist_str *);	/* scaleswn.c */void last_stats(const unsigned char *aa0, int n0,  		struct stat_str *sptr, int nstats,		struct beststr **bestp_arr, int nbest,		struct mngmsg m_msg, struct pstruct pst,		struct hist_str *histp, void *rs);void selectbestz(struct beststr **, int, int);void sortbest(struct beststr **, int, int);void sortbeste(struct beststr **, int);void showbest (FILE *fp, struct beststr **bptr, int nbest,	       int qlib, struct mngmsg *m_msg, struct pstruct *pst,	       struct db_str ntt, char **info_gstring2);void showalign (FILE *fp, 		struct beststr **bptr, int nbest,int qlib, struct mngmsg m_msg,		struct pstruct *pst, char **info_gstring2);int ann_scan(unsigned char *, int, struct mngmsg *, int);void init_ascii(int is_ext, int *sascii, int is_dna);void last_params(unsigned char *aa0, int n0, struct mngmsg *m_msg,		 struct pstruct *ppst, struct qmng_str *qm_msg);int nrand(int);int last_calc(struct beststr **bestp_arr, int nbest,	      struct mngmsg m_msg, struct pstruct *pst,	      struct qmng_str *qm_msp, void *rs_str);void do_stage2(struct beststr **bptr, int mshow,	       struct mngmsg m_msg, int, struct qmng_str *qm_msp0);#ifdef MPI_SRCextern void workcomp(int);#endif#ifdef PVM_SRCchar worknode[120];int pinums[MAXNOD],hosttid;int narch;struct pvmhostinfo *hostp;#endifFILE *outfd;			/* Output file */extern time_t s_time ();                 /* fetches time for timing *//* this information is global for fsigint() */time_t tstart, tscan, tprev, tdone;	/* Timing */time_t tdstart, tddone, time();int max_nodes, nnodes;			/* number of nodes */int node_map[MAXWRKR], node_id[MAXWRKR];int tot_speed,h_speed;int  qlib = 0;	/* number of sequences scanned */struct db_str ntt, qtt;extern int max_workers, worker_1, worker_n;int  wlsn [MAXWRKR + 1];	/* number of library sequences in worker */int  clsn [MAXWRKR + 1];	/* number of 1st library sequence in worker */int max_buf_cnt;#ifdef PVM_SRC#ifndef WORKERPGM#define WORKERPGM "c35.work"#endif#endifmain (int argc, char *argv[]){  unsigned char *aa00, *aa01, *aa0p0, *aa0p1;  unsigned char *aa1, *aa1ptr, *aa1prev;  int aa1i, *aa1i_arr;	/* integer offset of sequence in buffer */  char *info_gstring2p[2];  char info_gstring3[MAX_STR];                  /* string for label */  char *info_hstring_p[2];  char lib_range[MAX_SSTR];  int n1;  int *n1tot_ptr=NULL, *n1tot_cur;  int n1tot_cnt=0;  int n1tot_v;  long l_off;  char nodefile[240];  struct pstruct pst;  int i_score;  struct lmf_str *q_file_p;  struct lmf_str *l_file_p;  /* from manage code */  struct mngmsg m_msg0, m_msg1;	/* Message from host to manager */  struct mngmsg *m_msp0, *m_msp1;	/* alternating pointers */  struct qmng_str qm_msg0, qm_msg1;	/* stuff updated for each query */  char q_sqnam[4];   int sstart, sstop;      struct db_str lldb;	/* entry, length information for each search */  struct qmng_str *qm_msp0, *qm_msp1;	/* pointer to stuff updated */  int last_msg_b[10];	/* last set of numbers */  long curtype = ONETYPE;	/* current message type */  int nclib;  struct beststr *best,		/* array of best scores */                 **bptr;	/* array of pointers */  struct comstr bestr[BFR+1];	/* temporary structure array */  struct comstr2 bestr2[BFR2+1];	/* temporary structure array */  struct a_struct *aln_d_base=NULL;	/* alignment info for -m 9 */  int qres_bufsize;		/* buffer size for results */  struct stat_str *stats=NULL, *qstats=NULL;  int best_flag = 1;		/* bptr[] must be re-initialized */  int fast_flag = 0;		/* send new sequences before old displayed */  int nstats, nqstats, kstats, jstats;  int nbest, nres;		/* number of best scores */  double zbestcut = -BIGNUM;	/* z-value cutoff */  int lcnt;			/* counters */  int nopt;  int i, j, k, is, id, iw, ires, naa0 = 0;  FILE *fdata=NULL;		/* file for full results */  struct seq_record *seq_p;  struct seq_record *ldes;		/* descriptive lines for all lib sequences */  char *bline_buf, *bline_bufp;  char *bline_buf_mx;	/* buffer for blines */  char q_bline[256];  char t_bline[256];  int max_bline_b, bline_inc;  int *n1_arr, *m_seqnm_arr;  unsigned char *aa1_buf;  char tlibstr[11];		/* used only for fdata *.res files */    int node, snode, zero;	/* Number of nodes */  int bufid, numt, tid;  int ave_seq_len;  int max_sql;  int ntbuff, nseq, m_seqnm;  int iln, ocont, maxt;  long loffset;  int leng;			/* leng is length of the descriptive line */  fseek_t qseek,lseek;		/* seek into library of current sequence */  int qlcont,lcont;			/* continued sequence */  int n_proc, n_tmp;  char errstr[120];  int stats_done =0;			/* flag for z-value processing */  int tm_best, t_rbest, t_qrbest, t_best, t_n1;  double e_score, tm_escore, t_rescore, t_qrescore;  double zscore;			/* tmp value */  double k_H, k_comp;  char tmp_str[MAX_FN];  char info_pgm_abbr[MAX_SSTR];  char *bp;#ifdef MPI_SRC  MPI_Status mpi_status;#endif  void fsigint();    /* this is necessary because of an SGI Irix 64 issue */  info_gstring2p[0] = calloc(MAX_STR,sizeof(char));  info_gstring2p[1] = calloc(MAX_STR,sizeof(char));  info_hstring_p[0] = calloc(MAX_STR,sizeof(char));  info_hstring_p[1] = calloc(MAX_STR,sizeof(char));  signal(SIGHUP,SIG_IGN);  if (signal(SIGINT,SIG_IGN) != SIG_IGN) signal(SIGINT,fsigint);  if (signal(SIGQUIT,SIG_IGN) != SIG_IGN) signal(SIGQUIT,fsigint);/*  if (signal(SIGSEGV,SIG_IGN) != SIG_IGN) signal(SIGSEGV,fsigint); */  /* Initialization */#if defined(UNIX)  m_msg0.quiet = !isatty(1);#endif  /* BFR must be %6 = 0 for TFASTA */  if ((BFR%6) != 0) {    fprintf(stderr," BFR size %d not %%6=0 - recompile\n",BFR);    exit(1);  }#ifdef MPI_SRC  MPI_Init(&argc, &argv);  MPI_Comm_rank(MPI_COMM_WORLD,&tid);  if (tid > 0) {    workcomp(tid);     MPI_Finalize();    exit(0);  }#endif  printf("#");  for (i=0; i<argc; i++) {    if (strchr(argv[i],' ')) printf(" \"%s\"",argv[i]);    else printf(" %s",argv[i]);  }  printf("\n");#ifdef MPI_SRC  MPI_Comm_size(MPI_COMM_WORLD,&nnodes);  if (nnodes <= 1) {    fprintf(stderr," nnodes = %d; no workers available\n",nnodes);    exit(1);  }  else fprintf(stderr," have %d nodes\n",nnodes);  tot_speed = nnodes*100;#endif  h_init (&pst,&m_msg0, info_pgm_abbr);  initenv (argc, argv, &m_msg0, &pst, &aa00);#ifdef PVM_SRC  strncpy (workerpgm, WORKERPGM,sizeof(workerpgm)-1);  strncat(workerpgm, info_pgm_abbr, sizeof(workerpgm)-strlen(workerpgm)-1);  workerpgm[sizeof(workerpgm)-1] = '\0';#endif    strncpy(q_sqnam,"aa",sizeof(q_sqnam));  m_msg0.quiet = 1;  if (m_msg0.qdnaseq != SEQT_UNK &&       (m_msg0.qdnaseq == SEQT_DNA || m_msg0.qdnaseq == SEQT_RNA))    strncpy(q_sqnam,"nt",sizeof(q_sqnam));  m_msg0.pstat_void = NULL;  m_msg0.hist.hist_a = NULL;  m_msg1.pstat_void = NULL;  m_msg1.hist.hist_a = NULL;  fprintf (stderr, "Pcomp library processor\n");  fprintf (stderr, "Using %s\n", prog_func);    tstart = tscan = s_time();  tdstart = time(NULL);  #ifdef PVM_SRC  if ((hosttid=pvm_mytid())<0) {    pvm_perror("initialization");    fprintf(stderr,"can't initialize %s\n", argv[0]);    pvm_exit();    exit(1);  }    pvm_config(&nnodes,&narch,&hostp);  fprintf(stderr,"nnodes: %d, narch: %d\n",nnodes, narch);  max_nodes = nnodes;#ifdef DEBUG  pvm_catchout(stderr);#endif/*  if (nnodes < 2 ) nnodes = 4; */  if (max_workers > 0  && nnodes > max_workers) {    nnodes = max_workers+FIRSTNODE;    fprintf(stderr," workers reset from %d to %d\n",	    max_nodes,nnodes-FIRSTNODE);  }  else max_workers = nnodes;    strncpy(nodefile,pgmdir,sizeof(nodefile)-1);  strncat(nodefile,workerpgm,sizeof(nodefile)-strlen(nodefile)-1);  nodefile[sizeof(nodefile)-1] = '\0';  if (worker_1 > 0) {    /* remap configuration to specific nodes */    for (i=FIRSTNODE, j=worker_1; i<nnodes && j<=worker_n; i++,j++)      node_id[i]=j;    nnodes = i;    max_workers = i-FIRSTNODE;    fprintf(stderr," workers remapped from %d to %d\n",	    max_nodes,nnodes-FIRSTNODE);    max_nodes = nnodes;  }  else {    for (i=0; i< nnodes; i++) node_map[i]=node_id[i] = i;  }  if (nnodes < max_nodes) {    hostp++;	/* bump over host name for spawn */    rand_nodes(node_map,nnodes,max_nodes-1);    for (i=FIRSTNODE; i<nnodes; i++) {      numt+=pvm_spawn(nodefile,NULL,PvmTaskHost,hostp[node_map[i]].hi_name,		      1,&pinums[i]);    }  }  else {    /* i counts through nodes (machines) */    /* j counts through processes (multiple processes/node) */    /* node map maps the process (virtual node) to a physical node (machine) */    for (i=j=FIRSTNODE; i<nnodes && j < MAXWRKR; i++) {      n_proc = hostp[node_id[i]].hi_speed%100;      if (n_proc == 0) n_proc = 1;      if (n_proc > MAXWRKR/max_nodes) n_proc = MAXWRKR/max_nodes;      n_tmp =pvm_spawn(nodefile,NULL,PvmTaskHost,hostp[node_id[i]].hi_name,		       n_proc,&pinums[j]);      if (n_tmp < n_proc)	fprintf(stderr," spawn problem: %d\n", pinums[j]);      if (n_tmp > 0) {	for (k=j; k < j+n_tmp; k++) node_map[k]=node_id[i];	j += n_tmp;      }    }    nnodes = numt = j;  }  if (numt < nnodes) {    if (numt <= 0) {      pvm_perror("");      pvm_exit();      exit(1);    }    nnodes = numt;  }  for (tot_speed=0,i=FIRSTNODE; i<nnodes; i++) {    if (pinums[i]<0) {      fprintf(stderr," tids %d %8o\n",i,pinums[i]);      pvm_perror("");      pvm_exit();      exit(1);    }    else {      h_speed = hostp[node_map[tidtonode(pinums[i])]].hi_speed;      if (h_speed <= 0) h_speed = 100;      fprintf(stderr," tids %d %8o %s %5d\n",i,pinums[i],	      hostp[node_map[tidtonode(pinums[i])]].hi_name,	      h_speed);      tot_speed +=(hostp[node_map[tidtonode(pinums[i])]].hi_speed);    }  }  strncpy(worknode,nodefile,sizeof(worknode));  fprintf (stderr, "%3d worker programs loaded from %s\n",	   nnodes-FIRSTNODE,worknode);#endif    /* need to allocate two aa0 arrays so that the old is saved for alignments */  /* Allocate space for the query sequence */  if ((aa00 = (unsigned char *) malloc ((MAXTST + SEQ_PAD + 1)* sizeof (char))) == NULL)    s_abort ("Unable to allocate query sequence", "");

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -