blast_psi_priv.c

来自「ncbi源码」· C语言 代码 · 共 1,546 行 · 第 1/4 页

C
1,546
字号
/* * =========================================================================== * PRODUCTION $Log: blast_psi_priv.c,v $ * PRODUCTION Revision 1000.1  2004/06/01 18:07:34  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== */static char const rcsid[] =    "$Id: blast_psi_priv.c,v 1000.1 2004/06/01 18:07:34 gouriano Exp $";/* =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author:  Alejandro Schaffer, ported by Christiam Camacho * *//** @file blast_psi_priv.c * Defintions for functions in private interface for Position Iterated BLAST  * API. */#include "blast_psi_priv.h"#include "matrix_freq_ratios.h"/****************************************************************************//* Constants */const unsigned int kQueryIndex = 0;const double kEpsilon = 0.0001;const double kDefaultEvalueForPosition = 1.0;const int kPsiScaleFactor = 200;/****************************************************************************/void**_PSIAllocateMatrix(unsigned int ncols, unsigned int nrows,                    unsigned int data_type_sz){    void** retval = NULL;    unsigned int i = 0;    if ( !(retval = (void**) malloc(sizeof(void*) * ncols)))        return NULL;    for (i = 0; i < ncols; i++) {        if ( !(retval[i] = (void*) calloc(nrows, data_type_sz))) {            retval = _PSIDeallocateMatrix(retval, i);            break;        }    }    return retval;}void**_PSIDeallocateMatrix(void** matrix, unsigned int ncols){    unsigned int i = 0;    if (!matrix)        return NULL;    for (i = 0; i < ncols; i++) {        sfree(matrix[i]);    }    sfree(matrix);    return NULL;}void_PSICopyMatrix(double** dest, const double** src,                unsigned int ncols, unsigned int nrows){    unsigned int i = 0;    unsigned int j = 0;    for (i = 0; i < ncols; i++) {        for (j = 0; j < nrows; j++) {            dest[i][j] = src[i][j];        }    }}/****************************************************************************/PsiAlignedBlock*_PSIAlignedBlockNew(Uint4 num_positions){    PsiAlignedBlock* retval = NULL;    Uint4 i = 0;    if ( !(retval = (PsiAlignedBlock*) calloc(1, sizeof(PsiAlignedBlock)))) {        return NULL;    }    retval->pos_extnt = (SSeqRange*) calloc(num_positions, sizeof(SSeqRange));    if ( !retval->pos_extnt ) {        return _PSIAlignedBlockFree(retval);    }    retval->size = (Uint4*) calloc(num_positions, sizeof(Uint4));    if ( !retval->size ) {        return _PSIAlignedBlockFree(retval);    }    for (i = 0; i < num_positions; i++) {        retval->pos_extnt[i].left = -1;        retval->pos_extnt[i].right = num_positions;    }    return retval;}PsiAlignedBlock*_PSIAlignedBlockFree(PsiAlignedBlock* aligned_blocks){    if ( !aligned_blocks ) {        return NULL;    }    if (aligned_blocks->pos_extnt) {        sfree(aligned_blocks->pos_extnt);    }    if (aligned_blocks->size) {        sfree(aligned_blocks->size);    }    sfree(aligned_blocks);    return NULL;}PsiSequenceWeights*_PSISequenceWeightsNew(const PsiInfo* info, const BlastScoreBlk* sbp){    PsiSequenceWeights* retval = NULL;    ASSERT(info);    retval = (PsiSequenceWeights*) calloc(1, sizeof(PsiSequenceWeights));    if ( !retval ) {        return NULL;    }    retval->row_sigma = (double*) calloc(info->num_seqs + 1, sizeof(double));    if ( !retval->row_sigma ) {        return _PSISequenceWeightsFree(retval);    }    retval->norm_seq_weights = (double*) calloc(info->num_seqs + 1,                                                 sizeof(double));    if ( !retval->norm_seq_weights ) {        return _PSISequenceWeightsFree(retval);    }    retval->sigma = (double*) calloc(info->num_seqs + 1, sizeof(double));    if ( !retval->sigma ) {        return _PSISequenceWeightsFree(retval);    }    retval->match_weights = (double**) _PSIAllocateMatrix(info->query_sz + 1,                                                          PSI_ALPHABET_SIZE,                                                          sizeof(double));    retval->match_weights_size = info->query_sz + 1;    if ( !retval->match_weights ) {        return _PSISequenceWeightsFree(retval);    }    retval->std_prob = _PSIGetStandardProbabilities(sbp);    if ( !retval->std_prob ) {        return _PSISequenceWeightsFree(retval);    }    retval->info_content = (double*) calloc(info->query_sz, sizeof(double));    if ( !retval->info_content ) {        return _PSISequenceWeightsFree(retval);    }    return retval;}PsiSequenceWeights*_PSISequenceWeightsFree(PsiSequenceWeights* seq_weights){    if ( !seq_weights ) {        return NULL;    }    if (seq_weights->row_sigma) {        sfree(seq_weights->row_sigma);    }    if (seq_weights->norm_seq_weights) {        sfree(seq_weights->norm_seq_weights);    }    if (seq_weights->sigma) {        sfree(seq_weights->sigma);    }    if (seq_weights->match_weights) {        _PSIDeallocateMatrix((void**) seq_weights->match_weights,                             seq_weights->match_weights_size);    }    if (seq_weights->std_prob) {        sfree(seq_weights->std_prob);    }    if (seq_weights->info_content) {        sfree(seq_weights->info_content);    }    sfree(seq_weights);    return NULL;}/****************************************************************************//* Function prototypes *//* Purges any aligned segments which are identical to the query sequence */static void_PSIPurgeIdenticalAlignments(PsiAlignmentData* alignment);/* Keeps only one copy of any aligned sequences which are >PSI_NEAR_IDENTICAL% * identical to one another */static void_PSIPurgeNearIdenticalAlignments(PsiAlignmentData* alignment);static void_PSIUpdatePositionCounts(PsiAlignmentData* alignment);/* FIXME: needs more descriptive name */static void_PSIPurgeSimilarAlignments(PsiAlignmentData* alignment,                           Uint4 seq_index1,                           Uint4 seq_index2,                           double max_percent_identity);/****************************************************************************//**************** PurgeMatches stage of PSSM creation ***********************/intPSIPurgeBiasedSegments(PsiAlignmentData* alignment){    if ( !alignment ) {        return PSIERR_BADPARAM;    }    _PSIPurgeIdenticalAlignments(alignment);    _PSIPurgeNearIdenticalAlignments(alignment);    _PSIUpdatePositionCounts(alignment);    return PSI_SUCCESS;}/** Remove those sequences which are identical to the query sequence  * FIXME: Rename to _PSIPurgeSelfHits() ? */static void_PSIPurgeIdenticalAlignments(PsiAlignmentData* alignment){    Uint4 s = 0;        /* index on sequences */    ASSERT(alignment);    for (s = 0; s < alignment->dimensions->num_seqs + 1; s++) {        _PSIPurgeSimilarAlignments(alignment, kQueryIndex, s, PSI_IDENTICAL);    }}static void_PSIPurgeNearIdenticalAlignments(PsiAlignmentData* alignment){    Uint4 i = 0;    Uint4 j = 0;    ASSERT(alignment);    for (i = 1; i < alignment->dimensions->num_seqs + 1; i++) {         for (j = 1; (i + j) < alignment->dimensions->num_seqs + 1; j++) {            _PSIPurgeSimilarAlignments(alignment, i, (i + j),                                        PSI_NEAR_IDENTICAL);        }    }}/** Counts the number of sequences matching the query per query position * (columns of the multiple alignment) as well as the number of residues * present in each position of the query. * Should be called after multiple alignment data has been purged from biased * sequences. */static void_PSIUpdatePositionCounts(PsiAlignmentData* alignment){    Uint4 s = 0;        /* index on aligned sequences */    Uint4 p = 0;        /* index on positions */    ASSERT(alignment);    for (s = kQueryIndex + 1; s < alignment->dimensions->num_seqs + 1; s++) {        if ( !alignment->use_sequences[s] )            continue;        for (p = 0; p < alignment->dimensions->query_sz; p++) {            if (alignment->desc_matrix[s][p].used) {                const Uint1 res = alignment->desc_matrix[s][p].letter;                if (res >= PSI_ALPHABET_SIZE) {                    continue;                }                alignment->res_counts[p][res]++;                alignment->match_seqs[p]++;            }        }    }}/** This function compares the sequences in the alignment->desc_matrix * structure indexed by sequence_index1 and seq_index2. If it finds aligned  * regions that have a greater percent identity than max_percent_identity,  * it removes the sequence identified by seq_index2. */static void_PSIPurgeSimilarAlignments(PsiAlignmentData* alignment,                           Uint4 seq_index1,                           Uint4 seq_index2,                           double max_percent_identity){    Uint4 i = 0;    /* Nothing to do if sequences are the same or not selected for further       processing */    if ( seq_index1 == seq_index2 ||         !alignment->use_sequences[seq_index1] ||         !alignment->use_sequences[seq_index2] ) {        return;    }    for (i = 0; i < alignment->dimensions->query_sz; i++) {        const PsiDesc* seq1 = alignment->desc_matrix[seq_index1];        const PsiDesc* seq2 = alignment->desc_matrix[seq_index2];        /* starting index of the aligned region */        Uint4 align_start = i;            /* length of the aligned region */        Uint4 align_length = 0;           /* # of identical residues in aligned region */        Uint4 nidentical = 0;             /* both positions in the sequences must be used */        if ( !(seq1[i].used && seq2[i].used) ) {            continue;        }        /* examine the aligned region (FIXME: should we care about Xs?) */        while ( (i < alignment->dimensions->query_sz) &&                 (seq1[i].used && seq2[i].used)) {            if (seq1[i].letter == seq2[i].letter)                nidentical++;            align_length++;            i++;        }        ASSERT(align_length != 0);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?