📄 logodds.c
字号:
/* * $Id: logodds.c 1339 2006-09-21 19:46:28Z tbailey $ * * $Log$ * Revision 1.3 2005/10/25 19:06:39 nadya * rm old macro for Header, all info is taken care of by Id and Log. * * Revision 1.2 2005/10/02 00:20:33 nadya * update command line with a proper path * * Revision 1.1.1.1 2005/07/29 17:16:24 nadya * Importing from meme-3.0.14, and adding configure/make * *//************************************************************************ ** MEME++ ** Copyright 1994, The Regents of the University of California ** Author: Timothy L. Bailey ** ************************************************************************/#include "macros.h"#include "logodds.h"/**********************************************************************//* read_log_odds Read in the next log-odds matrices from a file. Returns the number of matrices read. Updates los and f. Sets up alphabet hash tables. Ambiguous letters are given logodds scores equal to the weighted average of the scores of the possible true letters. If range > 0, 1) scale and round the log-odds matrices so that all entries are in range [0...range]. 2) Matrix entries will be rounded to log_10(range) significant digits. 3) Bit score values can be restored (with some loss of significance) score_bit = (score/scale) + (w*offset) See (install-path>/bin/make_logodds for format of logodds file.*//**********************************************************************/extern int read_log_odds( BOOLEAN translate_dna,/* DNA sequences and protein motifs */ char *filename, /* file name (output of make_logodds) */ char *alphabet, /* alphabet of log-odds matrices */ char *blast_alphabet, /* corresponding BLAST alphabet */ int *p[MAXASCII], /* alphabet permutation/substitution matrix */ int range, /* scale entries in logodds matrices to [0..range] */ LO *los[MAXLO+1], /* log-odds structures */ double *f /* null letter frequencies for alphabet (pointer) */){ int i, j, k; int nmotifs; /* number of motifs */ static FILE *fptr; /* file pointer */ LO *lo; /* log odds structure pointer */ int alen = strlen(blast_alphabet); /* length of BLAST alphabet */ /* open the log-odds file */ fptr = fopen(filename, "r"); if (fptr == NULL) { fprintf(stderr, "Cannot open file `%s'.\n", filename); exit(1); } /* read in all the log-odds matrices */ for (nmotifs=0; ; nmotifs++) { /* number of logodds matrix */ lo = NULL; Resize(lo, 1, LO); /* create a log odds structure */ los[nmotifs] = lo; /* put pointer to it in the array */ /* Too many logodds matrices? */ if (nmotifs > MAXLO) { fprintf(stderr, "Too many logodds matrices. Recompile with larger MAXLO.\n"); exit(1); } /* read in header line */ if ( (i=fscanf(fptr, "%d %d %le %d", &(lo->w), &(lo->alen), &(lo->ev), &(lo->pair))) != 4 ) { if (nmotifs == 0) { fprintf(stderr, "Error reading log-odds matrix file `%s'.\n", filename); exit(1); } else { break; } } /* create the score matrix and best matching sequence */ lo->logodds = NULL; Resize(lo->logodds, lo->w, double *); lo->best_seq = NULL; Resize(lo->best_seq, lo->w+1, char); lo->best_icseq = NULL; for (i=0; i < lo->w; i++) { double tmp; double best_score = LITTLE; /* best score in row */ char best_letter = 'X'; /* best letter in row */ double *row = NULL; Resize(row, alen, double); /* temporary row */ lo->logodds[i] = NULL; Resize(lo->logodds[i], lo->alen, double); /* row */ for (j=0; j < lo->alen; j++) { /* pos in old alph */ if ( fscanf(fptr, "%lf", &tmp) != 1) { /* read score */ if (i!=0 || j!=0) { fprintf(stderr, "Error reading log-odds matrix file `%s'.\n", filename); exit(1); } } lo->logodds[i][j] = tmp; } /* permute the columns of the row so they are in alphabetical order and calculate the scores for any missing, ambiguous letters; determine best matching letter for row */ for (j=0; j<alen; j++) { /* position in new alphabet */ double score = 0; double freq = 0; int old_p, new_p; /* no need for weighted average if only one possible letter */ if (p[j][1] < 0) { row[j] = lo->logodds[i][p[j][0]]; /* substitute single score */ } else { /* calculate weighted average of matching letter scores */ for (k=0; (old_p = p[j][k]) >= 0; k++) { /* p list */ new_p = hash(alphabet[old_p]); /* new position of letter */ score += lo->logodds[i][old_p] * f[new_p]; /* weighted sum */ freq += f[new_p]; /* total frequency */ } /* p list */ row[j] = score / freq; /* weighted average */ } if (p[j][1] < 0 && row[j] > best_score) {/* update best sgl.let score */ best_score = row[j]; /* best score */ best_letter = blast_alphabet[j]; /* best letter */ } } /* position in new alphabet */ /*printf("\n");*/ myfree(lo->logodds[i]); /* free old space */ lo->logodds[i] = row; /* replace row with permuted/substituted row */ lo->best_seq[i] = best_letter; /* build best matching seq */ } /* position in motif */ /* initialize lo flags */ lo->best_seq[i] = '\0'; /* terminate best matching sequence */ lo->alen = alen; /* set length of alphabet to new alphabet's */ lo->dna = !strcmp(blast_alphabet, DNAB); /* TRUE if DNA motif */ lo->name = nmotifs+1; /* name of motif */ if (lo->pair) lo->w /= 2; /* double matrix-- divide width in half */ lo->ws = lo->w * (translate_dna ? 3 : 1); /* motif width in sequence */ /* create reverse complement of best sequence if DNA motif */ Resize(lo->best_icseq, lo->w+1, char); if (lo->dna) { /* DNA motif: reverse comp. */ strcpy(lo->best_icseq, lo->best_seq); invcomp_dna(lo->best_icseq, lo->w); } else { /* protein motif: reverse */ for (i=0; i<lo->w; i++) lo->best_icseq[i] = lo->best_seq[lo->w-1-i]; lo->best_icseq[i] = '\0'; } } /* motif */ /* Convert motif matrices to integer so entries are in [0...range]. */ if (range>0) scale_lo(los, nmotifs, range); /* Create a double-letter log-odds matrix for efficiency. */ make_double_lo(los, nmotifs); return nmotifs; } /* read_log_odds *//**********************************************************************//* min_max Find the lowest and highest scores possible with a given log-odds matrix. Returns the single lowest entry in the log-odds matrix.*//**********************************************************************/EXTERN void min_max( LOGODDS logodds, /* log-odds matrix */ int w, /* width of motif */ int a, /* length of alphabet */ double *minimum, /* minimum score */ double *maximum /* minimum score */){ int i, j; double min_score = 0; double max_score = 0; for (i=0; i < w; i++) { double min = BIG; double max = LITTLE; for (j=0; j < a; j++) { min = MIN(min, logodds(i, j)); max = MAX(max, logodds(i, j)); } min_score += min; max_score += max; } *minimum = min_score; *maximum = max_score;} /* min_max *//***********************************************************************//* motif_corr Compute correlations between pairs of motifs. The correlation between two motifs is the maximum sum of Pearson's correlation coefficients for aligned columns divided by the width of the shorter motif. The maximum is found by trying all alignments of the two motifs. The correlations are saved in lower-diagonal form in the logodds array; each motif records correlations between it and lower-numbered motifs.*//***********************************************************************/extern void motif_corr( int nmotifs, /* number of motifs */ LO *los[] /* array of logodds structures */){ int i, j, k, l, m, n, o; double *means[MAXLO]; /* means of motif columns */ /* compute the motif column means */ for (i=0; i<nmotifs; i++) { int w = los[i]->w; /* width of motif i */ int alen = los[i]->alen; /* alphabet length */ means[i] = NULL; Resize(means[i], w, double); for (j=0; j<w; j++) { /* motif column */ means[i][j] = 0; for (k=0; k<alen; k++) means[i][j] += los[i]->logodds(j,k);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -