📄 blast_kappa.c
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_kappa.c,v $ * PRODUCTION Revision 1000.0 2004/06/01 18:13:48 gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.10 * PRODUCTION * =========================================================================== *//* $Id: blast_kappa.c,v 1000.0 2004/06/01 18:13:48 gouriano Exp $ * ========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Alejandro Schaffer, Mike Gertz (ported to algo/blast by Tom Madden) * *//** @file blast_kappa.c * Utilities for doing Smith-Waterman alignments and adjusting the scoring * system for each match in blastpgp */static char const rcsid[] = "$Id: blast_kappa.c,v 1000.0 2004/06/01 18:13:48 gouriano Exp $";#include <algo/blast/core/blast_def.h>#include <algo/blast/core/blast_hits.h>#include <algo/blast/core/blast_stat.h>#include <algo/blast/core/blast_kappa.h>#include <algo/blast/core/blast_util.h>#include <algo/blast/core/blast_gapalign.h>#include <algo/blast/core/blast_traceback.h>#include <algo/blast/core/blast_filter.h>#include "blast_psi_priv.h"#include "matrix_freq_ratios.h"#include "blast_gapalign_pri.h"#define EVALUE_STRETCH 5 /*by what factor might initially reported E-value exceed true Evalue*/#define PRO_TRUE_ALPHABET_SIZE 20#define scoreRange 10000#define XCHAR 21 /*character for low-complexity columns*/#define STARCHAR 25 /*character for stop codons*//*positions of true characters in protein alphabet*/Int4 trueCharPositions[20] = {1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,22};/** Structure used for full Smith-Waterman results. */typedef struct SWResults { struct SWResults *next; /**< next object in list */ Uint1* seq; /**< match sequence. */ Int4 seqStart; /**< start of alignment on match */ Int4 seqEnd; /**< end of alignment on match */ Int4 queryStart; /**< start of alignment on query */ Int4 queryEnd; /**< end of alignment on query */ Int4 score; /**< score of alignment */ double eValue; /**< best expect value for this match record */ double eValueThisAlign; /**< expect value of this alignment. */ double Lambda; /**< Karlin-Altschul parameter. */ double logK; /**< log of Karlin-Altschul parameter */ Boolean isFirstAlignment; /**< TRUE if first alignment for this sequence */ Int4 subject_index; /**< ordinal ID of match sequence, needed to break ties on rare occasions */ BlastHSP* hsp; /**< Saves alignment informaiton for conversion to SeqAlign. */} SWResults;/** * Frees the linked-list of SWResults. Does not deallocate the BlastHSP * on the SWResults as that is saved elsewhere. * @param sw_results the head of the linked list to be freed [in] * @return NULL pointer */static SWResults* SWResultsFree(SWResults* sw_results){ SWResults *current, *next; next = current = sw_results; while (current) { next = current->next; sfree(current); current = next; } return NULL;}/** * SWResultsNew Create a new instance of the SWResults struct, initializing * it with values common to different kinds of searches * The parameters of this function correspond directly to fields * in the SWResults data structure. * @param sequence match sequence [in] * @param score score of match [in] * @param newEvalue expect value of this alignment [in] * @param bestEvalue lowest expect value of this match sequence [in] * @param isFirstAlignment TRUE if first alignment for this sequence [in] * @param lambda Karlin-Altschul parameter [in] * @param logK log of Karlin-Altschul parameter [in] * @param subject_index ordinal ID of match sequence [in] */static SWResults *SWResultsNew(Uint1* sequence, Int4 score, double newEvalue, double bestEvalue, Boolean isFirstAlignment, double lambda, double logK, Int4 subject_index){ SWResults *newSW; /* The newly created instance of SWResults */ newSW = (SWResults *) calloc(1, sizeof(SWResults)); if(newSW) { newSW->seq = sequence; newSW->score = score; newSW->eValue = bestEvalue; newSW->Lambda = lambda; newSW->logK = logK; newSW->eValueThisAlign = newEvalue; newSW->isFirstAlignment = isFirstAlignment; newSW->subject_index = subject_index; newSW->next = NULL; } return newSW;}/** * An instance of struct Kappa_MatchRecord represents all alignments * of a query sequence to a matching subject sequence. * * For a given query-subject pair, a Kappa_MatchRecord is created once it * is known that the eValue of the best alignment is small enough to be * significant. Then alignments of the two sequences are added to the * Kappa_MatchRecord one at a time, using one of the following two routines * * - Kappa_MatchRecordInsertHSP inserts the alignment represented * by a single HSP into the match record. * - Kappa_MatchRecordInsertSwAlign inserts an alignment computed by * the Smith-Waterman algorithm into the match record. * * Alignments should be specified in order of smallest (best) e-value to * largest (worst) e-value. * * The Kappa_MatchRecord::alignments field stores the alignments in * the reverse order, i.e. from largest (worst) e-value to smallest * (best) e-value. The reason the alignments are stored in reverse * order is that this order is consistent with the order that matches * are returned by a SWheap (see below), i.e. worst to best. */ struct Kappa_MatchRecord { double eValue; /**< best evalue of all alignments the record */ Int4 score; /**< best score of all alignments the record */ Uint1* sequence; /**< the subject sequence */ Int4 subject_index; /**< the index number of the subject sequence */ SWResults *alignments; /**< a list of query-subject alignments */};typedef struct Kappa_MatchRecord Kappa_MatchRecord;/** Initialize a Kappa_MatchRecord. Parameters to this function correspond * directly to fields of Kappa_MatchRecord. * @param self the record to be modified [in][out] * @param eValue expect value of this alignment [in] * @param score score of match [in] * @param sequence match sequence [in] * @param subject_index ordinal ID of sequence in database [in] */static voidKappa_MatchRecordInitialize(Kappa_MatchRecord * self, double eValue, Int4 score, Uint1* sequence, Int4 subject_index){ self->eValue = eValue; self->score = score; self->sequence = sequence; self->subject_index = subject_index; self->alignments = NULL;}/** The following procedure computes the number of identities in an * alignment of query_seq to the matching sequence stored in * SWAlign. The alignment is encoded in gap_info * @param SWAlign input structure holding HSP to be modified [in][out] * @param query_seq Query sequence used for calculation [in] */static Int2 SWAlignGetNumIdentical(SWResults *SWAlign, Uint1* query_seq){ Int4 num_ident; /*number of identities to return*/ Int4 align_length; /*aligned length, calculated but discarded. */ Blast_HSPGetNumIdentities(query_seq, SWAlign->seq, SWAlign->hsp, TRUE, &num_ident, &align_length); SWAlign->hsp->num_ident = num_ident; return 0;}/** * Insert an alignment represented by a seqAlign into the match * record. * @param self the match record to be modified [in][out] * @param hsp contains alignment and scoring information, * will be NULLed out [in][out] * @param lambda a statistical parameter used to evaluate the significance of the * match [in] * @param logK a statistical parameter used to evaluate the significance of the * match [in] * @param localScalingFactor the factor by which the scoring system has been * scaled in order to obtain greater precision [in] * @param query_seq Used to calculate percent identity [in] */static voidKappa_MatchRecordInsertHSP( Kappa_MatchRecord * self, BlastHSP* *hsp, double lambda, double logK, double localScalingFactor, Uint1* query_seq) { SWResults *newSW; /* A new SWResults object that represents the alignment to be inserted */ newSW = SWResultsNew(self->sequence, self->score, (*hsp)->evalue, self->eValue, (Boolean) (NULL == self->alignments), localScalingFactor * lambda, logK, self->subject_index); newSW->queryStart = (*hsp)->gap_info->start1; newSW->seqStart = (*hsp)->gap_info->start2; newSW->hsp = *hsp; *hsp = NULL; /* Information stored on SWResults now. */ SWAlignGetNumIdentical(newSW, query_seq); /* Calculate num identities, attach to HSP. */ newSW->next = self->alignments; self->alignments = newSW;}/** * Insert an alignment computed by the Smith-Waterman algorithm into * the match record. * @param self the match record to be modified [in][out] * @param newScore the score of the alignment [in] * @param newEvalue the expect value of the alignment [in] * @param lambda a statistical parameter used to evaluate the significance of the * match [in] * @param logK a statistical parameter used to evaluate the significance of the * match [in] * @param localScalingFactor the factor by which the scoring system has been * scaled in order to obtain greater precision [in] * @param matchStart start of the alignment in the subject [in] * @param matchAlignmentExtent length of the alignment in the subject [in] * @param queryStart start of the alignment in the query [in] * @param queryAlignmentExtent length of the alignment in the query [in] * @param reverseAlignScript Alignment information (script) returned by * the X-drop alignment algorithm [in] * @param query_seq Used to calculate percent identity [in] */static voidKappa_MatchRecordInsertSwAlign( Kappa_MatchRecord * self, Int4 newScore, double newEvalue, double lambda, double logK, double localScalingFactor, Int4 matchStart, Int4 matchAlignmentExtent, Int4 queryStart, Int4 queryAlignmentExtent, Int4 * reverseAlignScript, Uint1* query_seq) { SWResults *newSW; /* A new SWResults object that represents the alignment to be inserted */ GapEditBlock* editBlock=NULL; /* Contains representation of traceback. */ if(NULL == self->alignments) { /* This is the first sequence recorded for this match. Use the x-drop * score, "newScore", as the score for the sequence */ self->score = newScore; } newSW = SWResultsNew(self->sequence, self->score, newEvalue, self->eValue, (Boolean) (NULL == self->alignments), lambda * localScalingFactor, logK, self->subject_index); newSW->seqStart = matchStart; newSW->seqEnd = matchStart + matchAlignmentExtent; newSW->queryStart = queryStart; newSW->queryEnd = queryStart + queryAlignmentExtent; newSW->next = self->alignments; BLAST_TracebackToGapEditBlock(reverseAlignScript, queryAlignmentExtent, matchAlignmentExtent, queryStart, matchStart, &editBlock);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -