📄 blast_traceback.c
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_traceback.c,v $ * PRODUCTION Revision 1000.5 2004/06/01 18:07:53 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.110 * PRODUCTION * =========================================================================== *//* $Id: blast_traceback.c,v 1000.5 2004/06/01 18:07:53 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's offical duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Ilya Dondoshansky * *//** @file blast_traceback.c * Functions responsible for the traceback stage of the BLAST algorithm */static char const rcsid[] = "$Id: blast_traceback.c,v 1000.5 2004/06/01 18:07:53 gouriano Exp $";#include <algo/blast/core/blast_traceback.h>#include <algo/blast/core/blast_util.h>#include <algo/blast/core/link_hsps.h>#include <algo/blast/core/blast_setup.h>#include <algo/blast/core/blast_kappa.h>#include "blast_psi_priv.h"/* Comparison function for sorting HSPs by score. * Ties are broken based on subject sequence offsets. */static intscore_compare_hsps(const void* v1, const void* v2){ BlastHSP* h1,* h2; BlastHSP** hp1,** hp2; hp1 = (BlastHSP**) v1; hp2 = (BlastHSP**) v2; h1 = *hp1; h2 = *hp2; if (h1 == NULL || h2 == NULL) return 0; if (h1->score < h2->score) return 1; if (h1->score > h2->score) return -1; if( h1->subject.offset < h2->subject.offset ) return 1; if( h1->subject.offset > h2->subject.offset ) return -1; if( h1->subject.end < h2->subject.end ) return 1; if( h1->subject.end > h2->subject.end ) return -1; return 0;}/** TRUE if c is between a and b; f between d and f. Determines if the * coordinates are already in an HSP that has been evaluated. */#define CONTAINED_IN_HSP(a,b,c,d,e,f) (((a <= c && b >= c) && (d <= f && e >= f)) ? TRUE : FALSE)static void BLASTCheckHSPInclusion(BlastHSP* *hsp_array, Int4 hspcnt, Boolean is_ooframe){ Int4 index, index1; BlastHSP* hsp,* hsp1; for (index = 0; index < hspcnt; index++) { hsp = hsp_array[index]; if (hsp == NULL) continue; for (index1 = 0; index1 < index; index1++) { hsp1 = hsp_array[index1]; if (hsp1 == NULL) continue; if(is_ooframe) { if (SIGN(hsp1->query.frame) != SIGN(hsp->query.frame)) continue; } else { if (hsp->context != hsp1->context) continue; } /* Check of the start point of this HSP */ if (CONTAINED_IN_HSP(hsp1->query.offset, hsp1->query.end, hsp->query.offset, hsp1->subject.offset, hsp1->subject.end, hsp->subject.offset) == TRUE) { /* Check of the end point of this HSP */ if (CONTAINED_IN_HSP(hsp1->query.offset, hsp1->query.end, hsp->query.end, hsp1->subject.offset, hsp1->subject.end, hsp->subject.end) == TRUE) { /* Now checking correct strand */ if (SIGN(hsp1->query.frame) == SIGN(hsp->query.frame) && SIGN(hsp1->subject.frame) == SIGN(hsp->subject.frame)){ /* If we come here through all these if-s - this mean, that current HSP should be removed. */ if(hsp_array[index] != NULL) { hsp_array[index] = Blast_HSPFree(hsp_array[index]); break; } } } } } } return;}/* Window size used to scan HSP for highest score region, where gapped * extension starts. */#define HSP_MAX_WINDOW 11/** Function to check that the highest scoring region in an HSP still gives a * positive score. This value was originally calcualted by * GetStartForGappedAlignment but it may have changed due to the introduction * of ambiguity characters. Such a change can lead to 'strange' results from * ALIGN. * @param hsp An HSP structure [in] * @param query Query sequence buffer [in] * @param subject Subject sequence buffer [in] * @param sbp Scoring block containing matrix [in] * @return TRUE if region aroung starting offsets gives a positive score*/static BooleanBLAST_CheckStartForGappedAlignment (BlastHSP* hsp, Uint1* query, Uint1* subject, const BlastScoreBlk* sbp){ Int4 index1, score, start, end, width; Uint1* query_var,* subject_var; Boolean positionBased = (sbp->posMatrix != NULL); width = MIN((hsp->query.gapped_start-hsp->query.offset), HSP_MAX_WINDOW/2); start = hsp->query.gapped_start - width; end = MIN(hsp->query.end, hsp->query.gapped_start + width); /* Assures that the start of subject is above zero. */ if ((hsp->subject.gapped_start + start - hsp->query.gapped_start) < 0) start -= hsp->subject.gapped_start + (start - hsp->query.gapped_start); query_var = query + start; subject_var = subject + hsp->subject.gapped_start + (start - hsp->query.gapped_start); score=0; for (index1=start; index1<end; index1++) { if (!positionBased) score += sbp->matrix[*query_var][*subject_var]; else score += sbp->posMatrix[index1][*subject_var]; query_var++; subject_var++; } if (score <= 0) return FALSE; return TRUE;}static Int4 GetPatternLengthFromBlastHSP(BlastHSP* hsp){ return hsp->pattern_length;}static void SavePatternLengthInGapAlignStruct(Int4 length, BlastGapAlignStruct* gap_align){ /* Kludge: save length in an output structure member, to avoid introducing a new structure member. Probably should be changed??? */ gap_align->query_stop = length;}#define MAX_FULL_TRANSLATION 2100/** Translate the subject sequence into 6 frames, and create a mixed-frame * sequnce if out-of-frame gapping will be used. * @param subject_blk Subject sequence structure [in] * @param gen_code_string Genetic code to use for translation [in] * @param translation_buffer_ptr Pointer to buffer to hold the * translated sequence(s) [out] * @param frame_offsets_ptr Pointer to an array to hold offsets into the * translation buffer for each frame. Mixed-frame * sequence is to be returned, if NULL. [in] [out] * @param partial_translation_ptr Should partial translations be performed later * for each HSP instead of a full * translation? [out]*/static Int2SetUpSubjectTranslation(BLAST_SequenceBlk* subject_blk, const Uint1* gen_code_string, Uint1** translation_buffer_ptr, Int4** frame_offsets_ptr, Boolean* partial_translation_ptr){ Boolean partial_translation; Boolean is_ooframe = (frame_offsets_ptr == NULL); if (!gen_code_string) return -1; if (is_ooframe && subject_blk->oof_sequence) { /* If mixed-frame sequence is already available (two-sequences case), then no need to translate again */ *partial_translation_ptr = FALSE; return 0; } *partial_translation_ptr = partial_translation = (subject_blk->length > MAX_FULL_TRANSLATION); if (!partial_translation) { if (is_ooframe) { BLAST_GetAllTranslations(subject_blk->sequence_start, NCBI4NA_ENCODING, subject_blk->length, gen_code_string, NULL, NULL, &subject_blk->oof_sequence); } else { BLAST_GetAllTranslations(subject_blk->sequence_start, NCBI4NA_ENCODING, subject_blk->length, gen_code_string, translation_buffer_ptr, frame_offsets_ptr, NULL); } } return 0;}/** Performs the translation and coordinates adjustment, if only part of the * subject sequence is translated for gapped alignment. * @param subject_blk Subject sequence structure [in] * @param hsp The HSP information [in] * @param is_ooframe Return a mixed-frame sequence if TRUE [in] * @param gen_code_string Database genetic code [in] * @param translation_buffer_ptr Pointer to buffer holding the translation [out] * @param subject_ptr Pointer to sequence to be passed to the gapped * alignment [out] * @param subject_length_ptr Length of the translated sequence [in] * @param start_shift_ptr How far is the partial sequence shifted w.r.t. the * full sequence. [out]*/static void GetPartialSubjectTranslation(BLAST_SequenceBlk* subject_blk, BlastHSP* hsp, Boolean is_ooframe, const Uint1* gen_code_string, Uint1** translation_buffer_ptr, Uint1** subject_ptr, Int4* subject_length_ptr, Int4* start_shift_ptr){ Int4 translation_length; Uint1* translation_buffer = *translation_buffer_ptr; Uint1* subject; Int4 start_shift; Int4 nucl_shift; sfree(translation_buffer); if (!is_ooframe) { start_shift = MAX(0, 3*hsp->subject.offset - MAX_FULL_TRANSLATION); translation_length = MIN(3*hsp->subject.end + MAX_FULL_TRANSLATION, subject_blk->length) - start_shift; if (hsp->subject.frame > 0) { nucl_shift = start_shift; } else { nucl_shift = subject_blk->length - start_shift - translation_length; } GetPartialTranslation(subject_blk->sequence_start+nucl_shift, translation_length, hsp->subject.frame, gen_code_string, &translation_buffer, subject_length_ptr, NULL); /* Below, the start_shift will be used for the protein coordinates, so need to divide it by 3 */ start_shift /= CODON_LENGTH; } else { Int4 oof_start, oof_end; if (hsp->subject.frame > 0) { oof_start = 0; oof_end = subject_blk->length; } else { oof_start = subject_blk->length + 1; oof_end = 2*subject_blk->length + 1; } start_shift = MAX(oof_start, hsp->subject.offset - MAX_FULL_TRANSLATION); translation_length = MIN(hsp->subject.end + MAX_FULL_TRANSLATION, oof_end) - start_shift; if (hsp->subject.frame > 0) { nucl_shift = start_shift - oof_start; } else { nucl_shift = oof_end - start_shift - translation_length; } GetPartialTranslation(subject_blk->sequence_start+nucl_shift, translation_length, hsp->subject.frame, gen_code_string, NULL, subject_length_ptr, &translation_buffer); } hsp->subject.offset -= start_shift; hsp->subject.gapped_start -= start_shift; *translation_buffer_ptr = translation_buffer; *start_shift_ptr = start_shift; if (!is_ooframe) { subject = translation_buffer + 1; } else { subject = translation_buffer + CODON_LENGTH; } *subject_ptr = subject;}/** Check whether an HSP is already contain within another higher scoring HSP. * "Containment" is defined by the macro CONTAINED_IN_HSP. * the implicit assumption here is that HSP's are sorted by score * The main goal of this routine is to eliminate double gapped extensions of HSP's. * * @param hsp_array Full Array of all HSP's found so far. [in] * @param hsp HSP to be compared to other HSP's [in] * @param max_index compare above HSP to all HSP's in hsp_array up to max_index [in] * @param is_ooframe true if out-of-frame gapped alignment (blastx and tblastn only). [in] */static BooleanHSPContainedInHSPCheck(BlastHSP** hsp_array, BlastHSP* hsp, Int4 max_index, Boolean is_ooframe){ BlastHSP* hsp1; Boolean delete_hsp=FALSE; Boolean hsp_start_is_contained=FALSE; Boolean hsp_end_is_contained=FALSE; Int4 index; for (index=0; index<max_index; index++) { hsp_start_is_contained = FALSE; hsp_end_is_contained = FALSE; hsp1 = hsp_array[index]; if (hsp1 == NULL) continue; if(is_ooframe) { if (SIGN(hsp1->query.frame) != SIGN(hsp->query.frame)) continue; } else { if (hsp->context != hsp1->context) continue; } if (CONTAINED_IN_HSP(hsp1->query.offset, hsp1->query.end, hsp->query.offset, hsp1->subject.offset, hsp1->subject.end, hsp->subject.offset) == TRUE) { if (SIGN(hsp1->query.frame) == SIGN(hsp->query.frame) &&
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -