📄 apam.c
字号:
/* pam.c 19-June-86 copyright (c) 1987 William R. Pearson read in the alphabet and pam matrix data designed for universal matcher This version reads BLAST format (square) PAM files*//* $Name: fa35_03_06 $ - $Id: apam.c,v 1.44 2008/01/25 12:51:42 wrp Exp $ */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "defs.h"#include "param.h"#define XTERNAL#include "uascii.h"#include "upam.h"#undef XTERNALextern void alloc_pam (int d1, int d2, struct pstruct *ppst);voidpam_opts(char *smstr, struct pstruct *ppst) { char *bp; ppst->pam_ms = 0; ppst->pamoff = 0; if ((bp=strchr(smstr,'-'))!=NULL) { if (!strncmp(bp+1,"MS",2) || !strncmp(bp+1,"ms",2)) { ppst->pam_ms = 1; } else { ppst->pamoff=atoi(bp+1); } *bp = '\0'; } else if ((bp=strchr(smstr,'+'))!=NULL) { ppst->pamoff= -atoi(bp+1); *bp = '\0'; }}/* modified 13-Oct-2005 to accomodate assymetrical matrices */intinitpam (char *mfname, struct pstruct *ppst){ char line[512], *lp; int i, j, iaa, pval; int *hsq, nsq; int *sascii; char *sq; int ess_tmp, max_val, min_val; int have_es = 0; FILE *fmat; pam_opts(mfname, ppst); if ((fmat = fopen (mfname, "r")) == NULL) { printf ("***WARNING*** cannot open scoring matrix file %s\n", mfname); fprintf (stderr,"***WARNING*** cannot open scoring matrix file %s\n", mfname); return 0; }/* the size of the alphabet is determined in advance */ hsq = ppst->hsq; sq = ppst->sq; ppst->nt_align = (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA);/* look for alphabet line, skipping the comments alphabet ends up in line[]*/ while (fgets (line, sizeof(line), fmat) != NULL && line[0]=='#'); /* decide whether this is a protein or DNA matrix */ if (ppst->nt_align) sascii = &nascii[0]; else sascii = &aascii[0];/* re-initialize sascii[] for matrix alphabet*/ /* save ',' value used by FASTS/FASTM/FASTF */ ess_tmp = sascii[','];/* clear out sascii */ for (i = 0; i <= AAMASK; i++) sascii[i] = NA;/* set end of line stop */ sascii[0] = sascii['\r'] = sascii['\n'] = EL; sascii[','] = ess_tmp;/* read the alphabet - determine alphabet nsq */ sq[0] = '\0'; for (i = 0, nsq = 1; line[i]; i++) { if (line[i] == '*') have_es = 1; if (line[i] > ' ') sq[nsq++] = toupper (line[i]); } sq[nsq]='\0'; nsq--;/* set end of sequence stop */ fprintf(stderr,"sq[%d]: %s\n",nsq,sq+1);/* initialize sascii */ for (iaa = 1; iaa <= nsq; iaa++) { sascii[sq[iaa]] = iaa; } if (ppst->dnaseq==SEQT_DNA) { sascii['U'] = sascii['T']; sascii['u'] = sascii['t']; } else if (ppst->dnaseq==SEQT_RNA) { sascii['T'] = sascii['U']; sascii['t'] = sascii['u']; }/* finished with sascii[] *//* setup hnt (ambiguous nt hash) values*/ hsq[0] = 0; for (iaa = 1; iaa <= nsq; iaa++) { hsq[iaa]=iaa; } if (ppst->nt_align) { /* DNA ambiguitities */ hsq[sascii['R']]=hsq[sascii['M']]=hsq[sascii['W']]=hsq[sascii['A']]; hsq[sascii['D']]=hsq[sascii['H']]=hsq[sascii['V']]=hsq[sascii['A']]; hsq[sascii['N']]=hsq[sascii['X']]=hsq[sascii['A']]; hsq[sascii['Y']]=hsq[sascii['S']]=hsq[sascii['B']]=hsq[sascii['C']]; hsq[sascii['K']]=hsq[sascii['G']]; } else /* protein ambiguities */ if (ppst->dnaseq == SEQT_UNK || ppst->dnaseq == SEQT_PROT || (ppst->nsq >= 20 && ppst->nsq <= 24)) { hsq[sascii['B']] = hsq[sascii['N']]; hsq[sascii['Z']] = hsq[sascii['E']]; hsq[sascii['X']] = hsq[sascii['A']]; } /* here if non-DNA, non-protein sequence */ else ppst->dnaseq = SEQT_OTHER;/* check for 2D pam - if not found, allocate it*/ if (!ppst->have_pam2) { alloc_pam (MAXSQ, MAXSQ, ppst); ppst->have_pam2 = 1; }/* read the scoring matrix values*/ max_val = -1; min_val = 1; for (j=0; j < nsq; j++) ppst->pam2[0][0][j] = -BIGNUM; for (iaa = 1; iaa <= nsq; iaa++) { /* read pam value line */ if (fgets(line,sizeof(line),fmat)==NULL) { fprintf (stderr," error reading pam line: %s\n",line); exit (1); } /* fprintf(stderr,"%d/%d %s",iaa,nsq,line); */ strtok(line," \t\n"); /* skip the letter (residue) */ ppst->pam2[0][i][0] = -BIGNUM; for (j = 1; j <= nsq; j++) { /* iaa limits to triangle */ lp=strtok(NULL," \t\n"); /* get the number string */ pval=ppst->pam2[0][iaa][j]=atoi(lp); /* convert to integer */ if (pval > max_val) max_val = pval; if (pval < min_val) min_val = pval; } } if (have_es==0) { sascii['*']=nsq; nsq++; sq[nsq]='*'; sq[nsq+1]='\0'; for (j=1; j<=nsq; j++) ppst->pam2[0][nsq][j]= -1; ppst->pam2[0][nsq][nsq]= max_val/2; } ppst->sqx[0]='\0'; /* initialize sqx[] */ for (i=1; i<= nsq; i++) { ppst->sqx[i] = sq[i]; ppst->sqx[i+nsq] = tolower(sq[i]); if (sascii[aa[i]] < NA && sq[i] >= 'A' && sq[i] <= 'Z') sascii[aa[i] - 'A' + 'a'] = sascii[aa[i]]+nsq; } ppst->nsq = nsq; /* save new nsq */ ppst->nsqx = nsq*2; /* save new nsqx */ ppst->pam_h = max_val; ppst->pam_l = min_val; strncpy (ppst->pamfile, mfname, MAX_FN); ppst->pamfile[MAX_FN-1]='\0'; if (ppst->pam_ms) { strncat(ppst->pamfile,"-MS",MAX_FN-strlen(ppst->pamfile)-1); } ppst->pamfile[MAX_FN-1]='\0'; fclose (fmat); return 1;}/* make a DNA scoring from +match/-mismatch values */void mk_n_pam(int *arr,int siz, int mat, int mis){ int i, j, k; /* current default match/mismatch values */ int max_mat = +5; int min_mis = -4; float f_val, f_scale; f_scale = (float)(mat - mis)/(float)(max_mat - min_mis); k = 0; for (i = 0; i<nnt-1; i++) for (j = 0; j <= i; j++ ) { if (arr[k] == max_mat) arr[k] = mat; else if (arr[k] == min_mis) arr[k] = mis; else if (arr[k] != -1) { f_val = (arr[k] - min_mis)*f_scale + 0.5; arr[k] = f_val + mis; } k++; }}struct std_pam_str { char abbrev[6]; char name[10]; int *pam; float scale; int gdel, ggap;};staticstruct std_pam_str std_pams[] = { {"P120", "PAM120", apam120, 0.346574, -20, -3}, {"P250", "PAM250", apam250, 0.231049, -12, -2}, {"P10", "MD10", a_md10, 0.346574, -27, -4}, {"M10", "MD10", a_md10, 0.346574, -27, -4}, {"MD10", "MD10", a_md10, 0.346574, -27, -4}, {"P20", "MD20", a_md20, 0.346574, -26, -4}, {"M20", "MD20", a_md20, 0.346574, -26, -4}, {"MD20", "MD20", a_md20, 0.346574, -26, -4}, {"P40", "MD40", a_md40, 0.346574, -25, -4}, {"M40", "MD40", a_md40, 0.346574, -25, -4}, {"MD40", "MD40", a_md40, 0.346574, -25, -4}, {"BL50", "BL50", abl50, 0.231049, -12, -2}, {"BL62", "BL62", abl62, 0.346574, -8, -1}, {"BP62", "BL62", abl62, 0.346574, -12, -1}, {"BL80", "BL80", abl80, 0.346574, -12, -2}, {"VT160","VT160", avt160, 0.231049, -12, -2}, {"OPT5","OPTIMA5", aopt5, 0.138629, -20, -2}, {"\0", "\0", NULL, 0.0, 0, 0}};intstandard_pam(char *smstr, struct pstruct *ppst, int del_set, int gap_set) { struct std_pam_str *std_pam_p; pam_opts(smstr, ppst); for (std_pam_p = std_pams; std_pam_p->abbrev[0]; std_pam_p++ ) { if (strcmp(smstr,std_pam_p->abbrev)==0) { pam = std_pam_p->pam; strncpy(ppst->pamfile,std_pam_p->name,MAX_FN); ppst->pamfile[MAX_FN-1]='\0'; if (ppst->pam_ms) { strncat(ppst->pamfile,"-MS",MAX_FN-strlen(ppst->pamfile)-1); } ppst->pamfile[MAX_FN-1]='\0';#ifdef OLD_FASTA_GAP if (!del_set) ppst->gdelval = std_pam_p->gdel;#else if (!del_set) ppst->gdelval = std_pam_p->gdel-std_pam_p->ggap;#endif if (!gap_set) ppst->ggapval = std_pam_p->ggap; ppst->pamscale = std_pam_p->scale; return 1; } } return 0;}/* ESS must match uascii.h */#define ESS 49/* build_xascii is only used for SEQT_UNK - it replaces the default input mapping (aascii[]) with a mapping that preserves any letter in either the aax[], ntx[], or othx[] alphabets, or in save_str[]. othx[] was added to support letters that are mapped, but are not (yet) in aax[], e.g. 'OoUu'. Because build_xascii makes a qascii[] that is all ascii characters with values > '@', these values must be replaced using either aascii[] or nascii[] and the initial query sequnce re-coded. */voidbuild_xascii(int *qascii, char *save_str) { int i, max_save; int comma_val, term_val; int save_arr[MAX_SSTR]; comma_val = qascii[',']; term_val = qascii['*']; /* preserve special characters */ for (i=0; i < MAX_SSTR && save_str[i]; i++ ) { save_arr[i] = qascii[save_str[i]]; } max_save = i; for (i=1; i<128; i++) { qascii[i]=NA; } /* range of values in aax, ntx is from 1..naax,nntx - do not zero-out qascii[0] - 9 Oct 2002 */ for (i=1; i<naax; i++) { qascii[aax[i]]=aax[i]; } for (i=1; i<nntx; i++) { qascii[ntx[i]]=ntx[i]; } /* put it letters that are not in other alphabets because they are mapped */ for (i=1; i<nothx; i++) { qascii[othx[i]]=othx[i]; } qascii['\n']=qascii['\r']=qascii[0] = EL; qascii[','] = comma_val; qascii['*'] = term_val; for (i=0; i < max_save; i++) { qascii[save_str[i]]=save_arr[i]; }}/* checks for lower case letters in *sq array; if not present, map lowercase to upper*/voidinit_ascii(int is_ext, int *sascii, int is_dna) { int isq, have_lc; char *sq, term_char; int nsq; if (is_dna==SEQT_UNK) return; term_char = sascii['*']; if (is_dna==SEQT_DNA || is_dna == SEQT_RNA) { if (is_ext) { sq = &ntx[0]; nsq = nntx; } else {sq = &nt[0]; nsq = nnt;} } else { if (is_ext) { sq = &aax[0]; nsq = naax; } else {sq = &aa[0]; nsq = naa;} }/* initialize sascii from sq[], checking for lower-case letters */ have_lc = 0; for (isq = 1; isq <= nsq; isq++) { sascii[sq[isq]] = isq; if (sq[isq] >= 'a' && sq[isq] <= 'z') have_lc = 1; } /* no lower case letters in alphabet, map lower case to upper */ if (have_lc != 1) { for (isq = 1; isq <= nsq; isq++) { if (sq[isq] >= 'A' && sq[isq] <= 'Z') sascii[sq[isq]-'A'+'a'] = isq; } if (is_dna==1) sascii['u'] = sascii['t']; } sascii['*']=term_char;}void print_pam(struct pstruct *ppst) { int i, nsq, ip; char *sq; fprintf(stderr," ext_sq_set: %d\n",ppst->ext_sq_set); nsq = ppst->nsq; ip = 0; sq = ppst->sq; fprintf(stderr," sq[%d]: %s\n",nsq, sq); if (ppst->ext_sq_set) { nsq = ppst->nsqx; ip = 1; sq = ppst->sqx; fprintf(stderr," sq[%d]: %s\n",nsq, sq); } for (i=1; i<=nsq; i++) { fprintf(stderr," %c:%c - %3d\n",sq[i], sq[i], ppst->pam2[ip][i][i]); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -