📄 hash_alph.h
字号:
/* * $Id: hash_alph.h 1339 2006-09-21 19:46:28Z tbailey $ * * $Log$ * Revision 1.1 2005/07/29 18:38:54 nadya * Initial revision * */#ifndef HASH_ALPH_H# define HASH_ALPH_H#include "config.h"/* alphabets*/#define DNA0 "ACGT" /* minimal DNA alphabet */#define DNAB "ABCDGHKMNRSTUVWY*-" /* BLAST DNA alphabet */#define PROTEIN0 "ACDEFGHIKLMNPQRSTVWY" /* minimal protein alphabet */#define PROTEINB "ABCDEFGHIKLMNPQRSTUVWXYZ*-" /* BLAST PROTEIN alphabet *//* alphabet substitutions */#ifndef DEFINE_GLOBALS extern char *dna_subst[]; extern char *prot_subst[];#else char *dna_subst[] = { "A", /* A */ "GTC", /* B */ "C", /* C */ "AGT", /* D */ "G", /* G */ "ATC", /* H */ "GT", /* K */ "AC", /* M */ DNA0, /* N */ "AG", /* R */ "GC", /* S */ "T", /* T */ "T", /* U */ "AGC", /* V */ "AT", /* W */ "TC", /* Y */ DNA0, /* * */ DNA0 /* - */ }; char *prot_subst[] = { "A", /* A */ "DN", /* B */ "C", /* C */ "D", /* D */ "E", /* E */ "F", /* F */ "G", /* G */ "H", /* H */ "I", /* I */ "K", /* K */ "L", /* L */ "M", /* M */ "N", /* N */ "P", /* P */ "Q", /* Q */ "R", /* R */ "S", /* S */ "T", /* T */ PROTEIN0, /* U */ "V", /* V */ "W", /* W */ PROTEIN0, /* X */ "Y", /* Y */ "EQ", /* Z */ PROTEIN0, /* * */ PROTEIN0 /* - */ };#endif/* length of DNAB alphabet*/EXTERN int dnablen;/* alphabet frequencies*/#ifndef DEFINE_GLOBALS extern double ntfreq[]; extern double nrfreq[]; extern double frame0[]; #else /* BLAST DNA alphabet: frequencies for NT 9/22/96 */ double ntfreq[] = { 0.281475655 /* A */, 0.000000649 /* B */, 0.221785822 /* C */, 0.000001389 /* D */, 0.228634607 /* G */, 0.000001612 /* H */, 0.000006323 /* K */, 0.000005848 /* M */, 0.000991686 /* N */, 0.000015904 /* R */, 0.000009514 /* S */, 0.267048106 /* T */, 0.0 /* U; doesn't occur in NT */, 0.000000968 /* V */, 0.000005846 /* W */, 0.000016070 /* Y */, 0.0 /* *; doesn't occur in NT */, 0.0 /* -; doesn't occur in NT */ }; /* BLAST protein alphabet: frequencies for NR 9/22/96 */ double nrfreq[] = { 0.073091885 /* A */, 0.000021047 /* B */, 0.018145453 /* C */, 0.051687956 /* D */, 0.062278511 /* E */, 0.040243411 /* F */, 0.069259642 /* G */, 0.022405456 /* H */, 0.056227000 /* I */, 0.058435042 /* K */, 0.091621836 /* L */, 0.023044274 /* M */, 0.046032137 /* N */, 0.050623807 /* P */, 0.040715284 /* Q */, 0.051846246 /* R */, 0.073729031 /* S */, 0.059352333 /* T */, 0.000000110 /* U */, 0.064298546 /* V */, 0.013328158 /* W */, 0.000941980 /* X */, 0.032649745 /* Y */, 0.000021111 /* Z */, 0.0 /* *; doesn't occur in NR */, 0.0 /* -; doesn't occur in NR */ }; /* Tranlated DNAB to PROTEINB */ /* shuffle_db -nostop < est > tmp1; getsize -f -x tmp1 est is the 11/11/97 EST database from ncbi Frequencies for U, *, - added by hand. */ double frame0[] = { 0.06995463 /* A */, 0.00000228 /* B */, 0.02398853 /* C */, 0.03950999 /* D */, 0.05199564 /* E */, 0.03955549 /* F */, 0.07363406 /* G */, 0.02911862 /* H */, 0.04092760 /* I */, 0.05187635 /* K */, 0.09566732 /* L */, 0.01747826 /* M */, 0.03216447 /* N */, 0.06506016 /* P */, 0.04180057 /* Q */, 0.06748940 /* R */, 0.08353402 /* S */, 0.05231376 /* T */, 0.0 /* U */, 0.05927076 /* V */, 0.01585647 /* W */, 0.02720962 /* X */, 0.02158787 /* Y */, 0.00000413 /* Z */, 0.0 /* *; doesn't occur in NT */, 0.0 /* -; doesn't occur in NT */ };#endif/* alphabet hashing functions setalph: set current alphabet to DNAB or PROTEINB 0 -> DNAB alphabet 1 -> PROTEINB alphabet 2 -> user specified alphabet hash: character to position in current alphabet unhash: position in alphabet to alphabet character dnabhash: DNAB character to position in alphabet protbhash: PROTEINB character to position in alphabet chash: hash contents of pointer (letter or codon) to position in current alphabet: !X -> hash single letter (DNAB or PROTEINB) X -> hash DNAB codon to PROTEINB IC -> hash inverse complement of letter/codon dhash: hash two letters to single (double-letter) index L1, L2- letters in alphabet A - length of alphabet*//*#define setalph(I) hindex = (I==0 ? dnabindex : (I==1 ? protbindex : cpindex))*/#define setalph(I) { \ hindex = (I==0 ? dnabindex : (I==1 ? protbindex : cpindex)); \ dnaindex = (I==0 ? dnabindex : (I==1 ? dnabindex : cpindex)); \ dna_comp = (I==0 ? dnab_comp : (I==1 ? dnab_comp : dna0_comp)); \}#define hash(L) hindex[(int)(L)]#define unhash(I) pcindex[(int)(I)]#define dnahash(L) dnaindex[(int)(L)]#define dnabhash(L) dnabindex[(int)(L)]#define protbhash(L) protbindex[(int)(L)]#define dna0hash(L) cpindex[(int)(L)]#define chash(X, IC, L) ( \ (X) ? \ ( (IC) ? \ hash_dnab2protb(comp_dnab(*(L+2)), comp_dnab(*(L+1)), comp_dnab(*(L))) \ : \ hash_dnab2protb(*(L), *(L+1), *(L+2)) \ ) \ : \ ( (IC) ? hash(comp_dna(*(L))) : hash(*(L)) ) \)#define dhash(L1, L2, A) ((L2) + ((A)+1) * L1)EXTERN int *hindex; /* character vs. position in alphabet */EXTERN int *dnaindex; /* DNA character vs. position in alphabet */EXTERN int cpindex[MAXASCII]; /* index: character vs. position in alphabet */EXTERN char pcindex[MAXASCII]; /* index: position in alphabet vs. character */EXTERN int dnabindex[MAXASCII]; /* DNAB character vs. position in alphabet */EXTERN int protbindex[MAXASCII];/* PROTEINB character vs. pos. in alphabet *//* converting to DNA complement comp_dna: convert from DNA letter to complementary DNA letter comp_dnab: convert from DNAB letter to complementary DNAB letter*/char *dna_comp; /* complementary letters *//* ABCDGHKMNRSTUVWY*- */DEXTERN(char *, dnab_comp, "TVGHCDMKNYSAABWRNN");/* ACGTX */DEXTERN(char *, dna0_comp, "TGCAX");#define comp_dna(X) dna_comp[dnahash(X)]#define comp_dnab(X) dna_comp[dnabhash(X)]/* Converting DNA to protein*//* genetic code Order is UUU, UUC, UUA, UUG, UCU, UCC, ..., GGG. U=T, so T=0, C=1, A=2, G=3.*/DEXTERN(char *, gene_code, "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG");#ifdef DEFINE_GLOBALS/* lookup table from letter to order in "TCAG"; other letters get 0 *//* A B C D E F G */ int tcag_index[MAXASCII] = {2,0,1,0,0,0,3}; /* TCAG index *//* lookup table from DNAB codon to PROTEINB position */ int *dnab2protb_index;#else extern int tcag_index[MAXASCII]; extern int *dnab2protb_index;#endif/* convert nucleic acid to index in alphabet "TCAG" */#define tcag_hash(R) (tcag_index[(R) - 'A'])/* convert codon (DNA0 alphabet) to protein letter (PROTEIN0 alphabet) */#define dna0_to_prot0(R1, R2, R3) \ gene_code[tcag_hash(R3)+(4*(tcag_hash(R2)+4*tcag_hash(R1)))]/* hash codon (DNAB alphabet) to protein letter position (PROTEINB alphabet) */#define hash_dnab2protb(R1, R2, R3) \ dnab2protb_index[dnabhash(R3)+\ ((dnablen)*(dnabhash(R2)+((dnablen)*dnabhash(R1))))]/* set things up for hashing */extern int setup_hash_alph( char *alphabet);/* unhash an integer-coded sequence */extern void r2seq( char *seq, char *res, int len);extern char *get_blast_alphabet( char *old_alph, /* old alphabet */ int *p[MAXASCII] /* permutation and substitution matrix */);extern void setup_hash_dnab2protb();extern void invcomp_dna( char *sequence, /* DNAB sequence */ long length /* length of sequence */);extern int *dhash_it( BOOLEAN xlate_dna, /* database is DNA and motifs protein */ int alen, /* length of alphabet */ char *sequence, /* sequence of sample */ long length /* length of sequence */);#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -