blast_psi_priv.c
来自「ncbi源码」· C语言 代码 · 共 1,546 行 · 第 1/4 页
C
1,546 行
/* * =========================================================================== * PRODUCTION $Log: blast_psi_priv.c,v $ * PRODUCTION Revision 1000.1 2004/06/01 18:07:34 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== */static char const rcsid[] = "$Id: blast_psi_priv.c,v 1000.1 2004/06/01 18:07:34 gouriano Exp $";/* =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Alejandro Schaffer, ported by Christiam Camacho * *//** @file blast_psi_priv.c * Defintions for functions in private interface for Position Iterated BLAST * API. */#include "blast_psi_priv.h"#include "matrix_freq_ratios.h"/****************************************************************************//* Constants */const unsigned int kQueryIndex = 0;const double kEpsilon = 0.0001;const double kDefaultEvalueForPosition = 1.0;const int kPsiScaleFactor = 200;/****************************************************************************/void**_PSIAllocateMatrix(unsigned int ncols, unsigned int nrows, unsigned int data_type_sz){ void** retval = NULL; unsigned int i = 0; if ( !(retval = (void**) malloc(sizeof(void*) * ncols))) return NULL; for (i = 0; i < ncols; i++) { if ( !(retval[i] = (void*) calloc(nrows, data_type_sz))) { retval = _PSIDeallocateMatrix(retval, i); break; } } return retval;}void**_PSIDeallocateMatrix(void** matrix, unsigned int ncols){ unsigned int i = 0; if (!matrix) return NULL; for (i = 0; i < ncols; i++) { sfree(matrix[i]); } sfree(matrix); return NULL;}void_PSICopyMatrix(double** dest, const double** src, unsigned int ncols, unsigned int nrows){ unsigned int i = 0; unsigned int j = 0; for (i = 0; i < ncols; i++) { for (j = 0; j < nrows; j++) { dest[i][j] = src[i][j]; } }}/****************************************************************************/PsiAlignedBlock*_PSIAlignedBlockNew(Uint4 num_positions){ PsiAlignedBlock* retval = NULL; Uint4 i = 0; if ( !(retval = (PsiAlignedBlock*) calloc(1, sizeof(PsiAlignedBlock)))) { return NULL; } retval->pos_extnt = (SSeqRange*) calloc(num_positions, sizeof(SSeqRange)); if ( !retval->pos_extnt ) { return _PSIAlignedBlockFree(retval); } retval->size = (Uint4*) calloc(num_positions, sizeof(Uint4)); if ( !retval->size ) { return _PSIAlignedBlockFree(retval); } for (i = 0; i < num_positions; i++) { retval->pos_extnt[i].left = -1; retval->pos_extnt[i].right = num_positions; } return retval;}PsiAlignedBlock*_PSIAlignedBlockFree(PsiAlignedBlock* aligned_blocks){ if ( !aligned_blocks ) { return NULL; } if (aligned_blocks->pos_extnt) { sfree(aligned_blocks->pos_extnt); } if (aligned_blocks->size) { sfree(aligned_blocks->size); } sfree(aligned_blocks); return NULL;}PsiSequenceWeights*_PSISequenceWeightsNew(const PsiInfo* info, const BlastScoreBlk* sbp){ PsiSequenceWeights* retval = NULL; ASSERT(info); retval = (PsiSequenceWeights*) calloc(1, sizeof(PsiSequenceWeights)); if ( !retval ) { return NULL; } retval->row_sigma = (double*) calloc(info->num_seqs + 1, sizeof(double)); if ( !retval->row_sigma ) { return _PSISequenceWeightsFree(retval); } retval->norm_seq_weights = (double*) calloc(info->num_seqs + 1, sizeof(double)); if ( !retval->norm_seq_weights ) { return _PSISequenceWeightsFree(retval); } retval->sigma = (double*) calloc(info->num_seqs + 1, sizeof(double)); if ( !retval->sigma ) { return _PSISequenceWeightsFree(retval); } retval->match_weights = (double**) _PSIAllocateMatrix(info->query_sz + 1, PSI_ALPHABET_SIZE, sizeof(double)); retval->match_weights_size = info->query_sz + 1; if ( !retval->match_weights ) { return _PSISequenceWeightsFree(retval); } retval->std_prob = _PSIGetStandardProbabilities(sbp); if ( !retval->std_prob ) { return _PSISequenceWeightsFree(retval); } retval->info_content = (double*) calloc(info->query_sz, sizeof(double)); if ( !retval->info_content ) { return _PSISequenceWeightsFree(retval); } return retval;}PsiSequenceWeights*_PSISequenceWeightsFree(PsiSequenceWeights* seq_weights){ if ( !seq_weights ) { return NULL; } if (seq_weights->row_sigma) { sfree(seq_weights->row_sigma); } if (seq_weights->norm_seq_weights) { sfree(seq_weights->norm_seq_weights); } if (seq_weights->sigma) { sfree(seq_weights->sigma); } if (seq_weights->match_weights) { _PSIDeallocateMatrix((void**) seq_weights->match_weights, seq_weights->match_weights_size); } if (seq_weights->std_prob) { sfree(seq_weights->std_prob); } if (seq_weights->info_content) { sfree(seq_weights->info_content); } sfree(seq_weights); return NULL;}/****************************************************************************//* Function prototypes *//* Purges any aligned segments which are identical to the query sequence */static void_PSIPurgeIdenticalAlignments(PsiAlignmentData* alignment);/* Keeps only one copy of any aligned sequences which are >PSI_NEAR_IDENTICAL% * identical to one another */static void_PSIPurgeNearIdenticalAlignments(PsiAlignmentData* alignment);static void_PSIUpdatePositionCounts(PsiAlignmentData* alignment);/* FIXME: needs more descriptive name */static void_PSIPurgeSimilarAlignments(PsiAlignmentData* alignment, Uint4 seq_index1, Uint4 seq_index2, double max_percent_identity);/****************************************************************************//**************** PurgeMatches stage of PSSM creation ***********************/intPSIPurgeBiasedSegments(PsiAlignmentData* alignment){ if ( !alignment ) { return PSIERR_BADPARAM; } _PSIPurgeIdenticalAlignments(alignment); _PSIPurgeNearIdenticalAlignments(alignment); _PSIUpdatePositionCounts(alignment); return PSI_SUCCESS;}/** Remove those sequences which are identical to the query sequence * FIXME: Rename to _PSIPurgeSelfHits() ? */static void_PSIPurgeIdenticalAlignments(PsiAlignmentData* alignment){ Uint4 s = 0; /* index on sequences */ ASSERT(alignment); for (s = 0; s < alignment->dimensions->num_seqs + 1; s++) { _PSIPurgeSimilarAlignments(alignment, kQueryIndex, s, PSI_IDENTICAL); }}static void_PSIPurgeNearIdenticalAlignments(PsiAlignmentData* alignment){ Uint4 i = 0; Uint4 j = 0; ASSERT(alignment); for (i = 1; i < alignment->dimensions->num_seqs + 1; i++) { for (j = 1; (i + j) < alignment->dimensions->num_seqs + 1; j++) { _PSIPurgeSimilarAlignments(alignment, i, (i + j), PSI_NEAR_IDENTICAL); } }}/** Counts the number of sequences matching the query per query position * (columns of the multiple alignment) as well as the number of residues * present in each position of the query. * Should be called after multiple alignment data has been purged from biased * sequences. */static void_PSIUpdatePositionCounts(PsiAlignmentData* alignment){ Uint4 s = 0; /* index on aligned sequences */ Uint4 p = 0; /* index on positions */ ASSERT(alignment); for (s = kQueryIndex + 1; s < alignment->dimensions->num_seqs + 1; s++) { if ( !alignment->use_sequences[s] ) continue; for (p = 0; p < alignment->dimensions->query_sz; p++) { if (alignment->desc_matrix[s][p].used) { const Uint1 res = alignment->desc_matrix[s][p].letter; if (res >= PSI_ALPHABET_SIZE) { continue; } alignment->res_counts[p][res]++; alignment->match_seqs[p]++; } } }}/** This function compares the sequences in the alignment->desc_matrix * structure indexed by sequence_index1 and seq_index2. If it finds aligned * regions that have a greater percent identity than max_percent_identity, * it removes the sequence identified by seq_index2. */static void_PSIPurgeSimilarAlignments(PsiAlignmentData* alignment, Uint4 seq_index1, Uint4 seq_index2, double max_percent_identity){ Uint4 i = 0; /* Nothing to do if sequences are the same or not selected for further processing */ if ( seq_index1 == seq_index2 || !alignment->use_sequences[seq_index1] || !alignment->use_sequences[seq_index2] ) { return; } for (i = 0; i < alignment->dimensions->query_sz; i++) { const PsiDesc* seq1 = alignment->desc_matrix[seq_index1]; const PsiDesc* seq2 = alignment->desc_matrix[seq_index2]; /* starting index of the aligned region */ Uint4 align_start = i; /* length of the aligned region */ Uint4 align_length = 0; /* # of identical residues in aligned region */ Uint4 nidentical = 0; /* both positions in the sequences must be used */ if ( !(seq1[i].used && seq2[i].used) ) { continue; } /* examine the aligned region (FIXME: should we care about Xs?) */ while ( (i < alignment->dimensions->query_sz) && (seq1[i].used && seq2[i].used)) { if (seq1[i].letter == seq2[i].letter) nidentical++; align_length++; i++; } ASSERT(align_length != 0);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?