📄 mast-util.c
字号:
char *h = ((j==0) ? hdr : " "); /* current header */ dlen = PAGEWIDTH - hlen - 6; if (remain <= PAGEWIDTH - hlen) dlen = remain; fprintf(file, "%-*.*s%.*s", hlen, hlen, h, dlen, dia+j); j += dlen; /* continue printing until a good breaking point */ while (j < dia_len && dia[j-1] != '_' && dia[j-1] != ',') putc(dia[j++], file); putc('\n', file); }} /* print_diagram *//**********************************************************************//* score_it Score the sequence with each motif. Returns the array of scores: scores[i][j].score = score of motif i at position j scores[i][j].ic = score on - strand (set to FALSE here)*//**********************************************************************/static SCORE **score_it( BOOLEAN xlate_dna, /* database is DNA and motifs protein */ LO *los[], /* array of pointers to log-odds matrices */ int nmotifs, /* number of motifs */ char *sequence, /* sequence */ long length /* length of the sequence */){ int imotif; long j, k; SCORE **scores = NULL; /* scores[i][j].score motif i at offset j */ int *hash_seq = NULL; /* hashed sequence */ /* Hash sequence to index in alphabet. */ hash_seq = dhash_it(xlate_dna, los[0]->alen, sequence, length); /* Create scores array. */ Resize(scores, nmotifs, SCORE *); /* Score the sequence with each motif. */ for (imotif=0; imotif<nmotifs; imotif++) { /* motif */ LO *lo = los[imotif]; /* array of motifs */ LOGODDS2 logodds2 = lo->logodds2; /* motif matrix */ int r = (lo->w+1)/2; /* number of rows in the matrix */ int ws = lo->ws; /* width the motif in the sequence */ int inc1 = 1; /* increment to next site */ int inc2 = 2*(xlate_dna ? 3 : 1); /* increment to next hashed column */ /* create array of scores unless sequence shorter than motif */ scores[imotif] = NULL; if (ws > length) { continue; /* skip this motif */ } else { Resize(scores[imotif], length, SCORE); } /* Score each subsequence with the current motif. */ for (j=0; j<=length-ws; j+=inc1) { /* position */ int *h; /* pointer in hash_seq */ int score = 0; /* subsequence score */ /* motif score (positive motif) */ for (k=0, h=hash_seq+j; k<r; k++, h+=inc2) score += logodds2(k, *h); /* 3-class score */ if (lo->pair) { int neg = 0; /* subsequence score, neg motif */ double score3; /* 3-class bit score */ for (k=r, h=hash_seq+j; k<2*r; k++, h+=inc2) neg += logodds2(k, *h); score3 = score3class(score, neg, lo); score = bit_to_scaled(score3, 1, lo->scale3, lo->offset3); } scores[imotif][j].score = score; scores[imotif][j].ic = FALSE; } /* position */ } /* imotif */ myfree(hash_seq); return scores;} /* score_it *//**********************************************************************//* best_hit Determine whether a given motif occurrence has the lowest p-value for that motif.*//**********************************************************************/static BOOLEAN best_hit( int index, /* Position of the given motif. */ int motif, /* Motif number. */ int *hits, /* Motif indices. */ double *pvalues, /* Array of pvalues. */ long length /* Length of pvalue array. */){ long i; double lowest_pvalue = 1.0; int best_index = 0; for (i = 0; i < length; i++) { int m = abs(hits[i])-1; /* motif of hit */ if ((m == motif) && (pvalues[i] < lowest_pvalue)) { lowest_pvalue = pvalues[i]; best_index = i; } } return (best_index == index);} /* best_hit */ /**********************************************************************//* make_block Create a block string: [smf(p)] or <smf(p)> s strand (optional) m motif f frame (optional) (p) p-value (optional) *//**********************************************************************/extern void make_block( int m, /* motif number */ char *strand, /* strand */ int f, /* frame number; f=0 not translating DNA */ double thresh, /* strong motif threshold */ double p, /* p-value */ BOOLEAN print_p, /* print p-value in block if TRUE */ char *block /* put block string here */) { char left = p < thresh ? '[' : '<'; char right = p < thresh ? ']' : '>'; BOOLEAN xlate_dna = (f != 0); char *fnames = "abc"; /* frame 1=a, 2=b, 3=c*/ if (print_p) { /* print p-value */ char *bfmt = f ? "%c%s%d%c(%8.2e)%c" : "%c%s%d(%8.2e)%c"; if (xlate_dna) { /* str., motif, frame */ sprintf(block, bfmt, left, strand, m, fnames[f-1], p, right); } else { /* strand, motif */ sprintf(block, bfmt, left, strand, m, p, right); } } else { /* don't print p-value */ char *bfmt = f ? "%c%s%d%c%c" : "%c%s%d%c"; if (xlate_dna) { /* str., motif, frame */ sprintf(block, bfmt, left, strand, m, fnames[f-1], right); } else { /* strand, motif */ sprintf(block, bfmt, left, strand, m, right); } }} /* make_block *//**********************************************************************//* score_tile_diagram Score a sequence, tile it with motif occurrences and create a block diagram string. Returns the tiling containing the hits, their position p-values, and the p-value of the product of p-values for the best hits.*//**********************************************************************/extern TILING score_tile_diagram( char *sequence, /* sequence to score and tile */ long length, /* length of sequence */ LO *los[], /* array of pointers to lo matrices */ int nmotifs, /* number motifs read */ BOOLEAN dna, /* database is DNA */ STYPE stype, /* handling of different strands */ BOOLEAN xlate_dna, /* database is DNA and motifs protein */ BOOLEAN best_motifs, /* show only best motifs in diagrams */ BOOLEAN print_p, /* print p-value in block */ double **pv, /* p-value tables for each motif */ double m_thresh, /* maximum motif p-value to print */ double w_thresh, /* max. motif p-value for weak hits */ BOOLEAN use_seq_p, /* use sequence not position p-values */ BOOLEAN hit_list, /* create hit list instead of diagram */ char *name /* name of sequence; only used if hit_list=TRUE */){ SCORE **scores; /* scores for each motif vs seq. pos. */ TILING tiling; /* tiling and diagram of sequence */ /* score the sequence with each of the motifs */ scores = score_sequence(stype, xlate_dna, sequence, length, nmotifs, los); /* mark the non-overlapping motif positions */ tiling = tile_sequence(stype, pv, w_thresh, use_seq_p, los, nmotifs, length, scores); /* add the block diagram to the tiling structure */ tiling.diagram = create_diagram(dna, stype, xlate_dna, best_motifs, print_p, m_thresh, nmotifs, los, length, hit_list, name, tiling); free_2array(scores, nmotifs); return(tiling);} /* score_tile_diagram *//**********************************************************************//* qfast Calculate the p-value of the product of uniform [0,1] random variables.*//**********************************************************************/extern double qfast( int n, /* number of random variables in product */ double k /* product of random variables */){ int i = 1; double mlnk, term, phi; if (n == 0) return 1.0; /* worst possible p-value */ if (k == 0) return 0.0; /* p-value is 0 */ mlnk = -log(k); phi = term = k; for (i=1; i<n; i++) { term *= mlnk/i; phi += term; } return phi;} /* qfast *//**********************************************************************//* init_mast_background Determine the BLAST alphabet corresponding to the input alphabet. Set up the hashing functions for the alphabet(s). Read in the background frequencies for letters (or use non-redundant database frequencies if no file name given). The background frequency file should contain frequencies for the DNA0 or PROTEIN0 alphabet in the format required by read_markov model. Sets blast_alphabet : the corresponding BLAST alphabet p : permutation/substitution matrix from alphabet to blast_alphabet Returns the background frequencies for the BLAST alphabet.*//**********************************************************************/extern double *init_mast_background( char *bfile, /* name of background file */ char *alphabet, /* motif alphabet */ STYPE stype, /* handling of DNA strands */ BOOLEAN translate_dna, /* DNA sequences and protein motifs */ char **blast_alphabet, /* corresponding BLAST alphabet */ int *p[MAXASCII] /* permutation/substitution matrix */){ int i; int alen; double *back=NULL, *tmp; char *bfile_alpha; /* alphabet in bfile */ int order; /* order of background */ /* average reverse complement frequencies */ BOOLEAN rc = (stype==Separate || stype==Combine); /* Set up the hashing functions for mapping between letters or codons and alphabet positions. */ setup_hash_alph(DNAB); /* DNAB to position hashing */ setup_hash_alph(PROTEINB); /* PROTEINB to position hash */ /* Determine the alphabet used by the motifs. Make a permutation/substitution mapping from that alphabet to one of the BLAST alphabets. */ *blast_alphabet = get_blast_alphabet(alphabet, p); alen = strlen(*blast_alphabet); /* Set up to translate DNAB to PROTEINB if requested. */ if (translate_dna) { if (strcmp(*blast_alphabet, PROTEINB)) { /* motifs must be protein */ fprintf(stderr, "\nThe -dna switch requires protein motifs. "); fprintf(stderr, "Your motif(s) use a DNA alphabet.\n"); exit(1); } setup_hash_dnab2protb(); /* DNAB to PROTEINB hashing */ } /* Determine the type of motif alphabet and set it as current */ if (!strcmp(*blast_alphabet, DNAB)) { /* searching DNA with DNA motifs */ tmp = ntfreq; bfile_alpha = DNA0; setalph(0); /* DNAB alphabet */ } else { if (translate_dna) { /* searching DNA with protein motifs */ tmp = frame0; } else { /* searching protein with protein mtfs*/ tmp = nrfreq; rc = FALSE; /* no reverse complement! */ } bfile_alpha = PROTEIN0; setalph(1); /* PROTEINB alphabet */ } /* Read in the background probabilities from a Markov model file using the basic alphabet. Convert to the BLAST alphabet. */ Resize(back, alen, double); /* create results array */ if (bfile) { /* use bfile frequencies */ int alenb = strlen(bfile_alpha); /* length of '0' alphabet */ tmp = read_markov_model(bfile, NULL, bfile_alpha, FALSE, rc, &order); for (i=0; i<alen; i++) back[i] = 0; for (i=0; i<alenb; i++) back[hash(bfile_alpha[i])] = tmp[i]; } else { for (i=0; i<alen; i++) back[i] = tmp[i]; } return(back);} /* init_mast_background *//**********************************************************************//* free_tiling Free a tiling structure.*//**********************************************************************/extern void free_tiling( TILING tiling) { myfree(tiling.pvalues); myfree(tiling.diagram);} /* free_tiling *//**********************************************************************//* get_seq_comp Get the letter frequencies in a sequence. Doesn't take into account reverse complement.*//**********************************************************************/double *get_seq_comp( BOOLEAN xlate_dna, /* translate DNA */ char *sequence, /* ASCII sequence */ int alen /* length of alphabet */){ int i, n; double *freq = NULL; /* create the frequency array */ Resize(freq, alen, double); for (i=0; i<alen; i++) freq[i] = 0; /*for (i=0; i<alen; i++) freq[i] = 1;*/ /* add one prior */ /* count the number of letters of each type */ for (n=0; sequence[n]; n++) { i = chash(xlate_dna, FALSE, sequence+n); /* hash letter */ freq[i]++; } /* convert counts to frequencies */ for (i=0; i<alen; i++) freq[i] = n ? freq[i]/n : 1.0; /*for (i=0; i<alen; i++) freq[i] = n ? freq[i]/(n+alength) : 1.0;*/ /* prior */ /* return the frequency matrix */ return(freq);} /* get_seq_comp */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -