📄 blast_traceback.c
字号:
SIGN(hsp1->subject.frame) == SIGN(hsp->subject.frame)) hsp_start_is_contained = TRUE; } if (CONTAINED_IN_HSP(hsp1->query.offset, hsp1->query.end, hsp->query.end, hsp1->subject.offset, hsp1->subject.end, hsp->subject.end) == TRUE) { if (SIGN(hsp1->query.frame) == SIGN(hsp->query.frame) && SIGN(hsp1->subject.frame) == SIGN(hsp->subject.frame)) hsp_end_is_contained = TRUE; } if (hsp_start_is_contained && hsp_end_is_contained && hsp->score <= hsp1->score) { delete_hsp = TRUE; break; } } return delete_hsp;}/** Sets some values that will be in the "score" block of the SeqAlign. * The values set here are expect value and number of identical matches. * * @param query_info information about query. [in] * @param query query sequence as a raw string [in] * @param hsp HPS to operate on [in][out] * @param subject database sequence as a raw string [in] * @param program_number which program [in] * @param sbp the scoring information [in] * @param scoring_params Parameters for how to score matches. [in] * @param hit_options determines which scores to save. [in] */static BooleanHSPSetScores(BlastQueryInfo* query_info, Uint1* query, Uint1* subject, BlastHSP* hsp, Uint1 program_number, BlastScoreBlk* sbp, const BlastScoringParameters* score_params, const BlastHitSavingOptions* hit_options){ Boolean keep = TRUE; Int4 align_length = 0; double scale_factor = score_params->scale_factor; BlastScoringOptions *score_options = score_params->options; /* Calculate alignment length and number of identical letters. Do not get the number of identities if the query is not available */ if (query != NULL) { if (score_options->is_ooframe) { Blast_HSPGetOOFNumIdentities(query, subject, hsp, program_number, &hsp->num_ident, &align_length); } else { Blast_HSPGetNumIdentities(query, subject, hsp, score_options->gapped_calculation, &hsp->num_ident, &align_length); } } if ((hsp->num_ident * 100 < align_length * hit_options->percent_identity) || align_length < hit_options->min_hit_length) { keep = FALSE; } if (keep == TRUE) { if (program_number == blast_type_blastp || program_number == blast_type_rpsblast || program_number == blast_type_blastn) { Blast_KarlinBlk** kbp; if (score_options->gapped_calculation) kbp = sbp->kbp_gap; else kbp = sbp->kbp; if (hit_options->phi_align) { Blast_HSPPHIGetEvalue(hsp, sbp); } else { hsp->evalue = BLAST_KarlinStoE_simple(hsp->score, kbp[hsp->context], query_info->eff_searchsp_array[hsp->context]); } if (hsp->evalue > hit_options->expect_value) /* put in for comp. based stats. */ keep = FALSE; } /* remove any scaling of the calculated score */ hsp->score = (Int4) ((hsp->score+(0.5*scale_factor))/ scale_factor); /* only one alignment considered for blast[np]. */ /* This may be changed by LinkHsps for blastx or tblastn. */ hsp->num = 1; if ((program_number == blast_type_tblastn || program_number == blast_type_rpstblastn) && hit_options->longest_intron > 0) { hsp->evalue = BLAST_KarlinStoE_simple(hsp->score, sbp->kbp_gap[hsp->context], query_info->eff_searchsp_array[hsp->context]); } } return keep;}/** Adjusts offset if out-of-frame and negative frame, or if partial sequence used for extension. * @param hsp The hit to work on [in][out] * @param subject_blk information about database sequence [in] * @param is_ooframe true if out-of-frame (blastx or tblastn) used. [in] * @param start_shift amount of database sequence not used for extension. [in]*/static voidHSPAdjustSubjectOffset(BlastHSP* hsp, BLAST_SequenceBlk* subject_blk, Boolean is_ooframe, Int4 start_shift){ if (is_ooframe) { /* Adjust subject offsets for negative frames */ if (hsp->subject.frame < 0) { Int4 strand_start = subject_blk->length + 1; hsp->subject.offset -= strand_start; hsp->subject.end -= strand_start; hsp->subject.gapped_start -= strand_start; hsp->gap_info->start2 -= strand_start; } } /* Adjust subject offsets if shifted (partial) sequence was used for extension */ if (start_shift > 0) { hsp->subject.offset += start_shift; hsp->subject.end += start_shift; hsp->subject.gapped_start += start_shift; if (hsp->gap_info) hsp->gap_info->start2 += start_shift; } return;}/** Check whether an HSP is already contained within another higher scoring HSP. * This is done AFTER the gapped alignment has been performed * * @param hsp_array Full Array of all HSP's found so far. [in][out] * @param hsp HSP to be compared to other HSP's [in] * @param max_index compare above HSP to all HSP's in hsp_array up to max_index [in] */static BooleanHSPCheckForDegenerateAlignments(BlastHSP** hsp_array, BlastHSP* hsp, Int4 max_index){ BlastHSP* hsp2; Boolean keep=TRUE; Int4 index; for (index=0; index<max_index; index++) { hsp2 = hsp_array[index]; if (hsp2 == NULL) continue; /* Check if both HSP's start or end on the same diagonal (and are on same strands). */ if (((hsp->query.offset == hsp2->query.offset && hsp->subject.offset == hsp2->subject.offset) || (hsp->query.end == hsp2->query.end && hsp->subject.end == hsp2->subject.end)) && hsp->context == hsp2->context && hsp->subject.frame == hsp2->subject.frame) { if (hsp2->score > hsp->score) { keep = FALSE; break; } else { hsp_array[index] = Blast_HSPFree(hsp2); } } } return keep;}/* Comments in blast_traceback.h */Int2Blast_TracebackFromHSPList(Uint1 program_number, BlastHSPList* hsp_list, BLAST_SequenceBlk* query_blk, BLAST_SequenceBlk* subject_blk, BlastQueryInfo* query_info, BlastGapAlignStruct* gap_align, BlastScoreBlk* sbp, const BlastScoringParameters* score_params, const BlastExtensionOptions* ext_options, const BlastHitSavingParameters* hit_params, const Uint1* gen_code_string){ Int4 index; BlastHSP* hsp; Uint1* query,* subject; Int4 query_length, query_length_orig; Int4 subject_length=0; BlastHSP** hsp_array; Int4 q_start, s_start; BlastHitSavingOptions* hit_options = hit_params->options; BlastScoringOptions* score_options = score_params->options; Int4 context_offset; Uint1* translation_buffer = NULL; Int4* frame_offsets = NULL; Boolean partial_translation = FALSE; const Boolean k_is_ooframe = score_options->is_ooframe; const Boolean kGreedyTraceback = (ext_options->ePrelimGapExt == eGreedyExt); const Boolean kTranslateSubject = (program_number == blast_type_tblastn || program_number == blast_type_rpstblastn); if (hsp_list->hspcnt == 0) { return 0; } hsp_array = hsp_list->hsp_array; if (kTranslateSubject) { if (!gen_code_string) return -1; if (k_is_ooframe) { SetUpSubjectTranslation(subject_blk, gen_code_string, NULL, NULL, &partial_translation); subject = subject_blk->oof_sequence + CODON_LENGTH; /* Mixed-frame sequence spans all 6 frames, i.e. both strands of the nucleotide sequence. However its start will also be shifted by 3.*/ subject_length = 2*subject_blk->length - 1; } else { SetUpSubjectTranslation(subject_blk, gen_code_string, &translation_buffer, &frame_offsets, &partial_translation); /* subject and subject_length will be set later, for each HSP. */ } } else { /* Subject is not translated */ subject = subject_blk->sequence; subject_length = subject_blk->length; } for (index=0; index < hsp_list->hspcnt; index++) { hsp = hsp_array[index]; if (program_number == blast_type_blastx || program_number == blast_type_tblastx) { Int4 context = hsp->context - hsp->context % 3; context_offset = query_info->context_offsets[context]; query_length_orig = query_info->context_offsets[context+3] - context_offset - 1; if (k_is_ooframe) { query = query_blk->oof_sequence + CODON_LENGTH + context_offset; query_length = query_length_orig; } else { query = query_blk->sequence + query_info->context_offsets[hsp->context]; query_length = BLAST_GetQueryLength(query_info, hsp->context); } } else { query = query_blk->sequence + query_info->context_offsets[hsp->context]; query_length = query_length_orig = BLAST_GetQueryLength(query_info, hsp->context); } /* preliminary RPS blast alignments have not had the composition-based correction applied yet, so we cannot reliably check whether an HSP is contained within another */ if (program_number == blast_type_rpsblast || !HSPContainedInHSPCheck(hsp_array, hsp, index, k_is_ooframe)) { Int4 start_shift = 0; Int4 adjusted_s_length; Uint1* adjusted_subject; if (kTranslateSubject) { if (!k_is_ooframe && !partial_translation) { Int4 context = FrameToContext(hsp->subject.frame); subject = translation_buffer + frame_offsets[context] + 1; subject_length = frame_offsets[context+1] - frame_offsets[context] - 1; } else if (partial_translation) { GetPartialSubjectTranslation(subject_blk, hsp, k_is_ooframe, gen_code_string, &translation_buffer, &subject, &subject_length, &start_shift); } } if (!hit_options->phi_align && !k_is_ooframe && (((hsp->query.gapped_start == 0 && hsp->subject.gapped_start == 0) || !BLAST_CheckStartForGappedAlignment(hsp, query, subject, sbp)))) { Int4 max_offset = BlastGetStartForGappedAlignment(query, subject, sbp, hsp->query.offset, hsp->query.length, hsp->subject.offset, hsp->subject.length); q_start = max_offset; s_start = (hsp->subject.offset - hsp->query.offset) + max_offset; hsp->query.gapped_start = q_start; hsp->subject.gapped_start = s_start; } else { if(k_is_ooframe) { /* Code below should be investigated for possible optimization for OOF */ s_start = hsp->subject.gapped_start; q_start = hsp->query.gapped_start; gap_align->subject_start = 0; gap_align->query_start = 0; } else { q_start = hsp->query.gapped_start; s_start = hsp->subject.gapped_start; } } adjusted_s_length = subject_length; adjusted_subject = subject; /* Perform the gapped extension with traceback */ if (hit_options->phi_align) { Int4 pat_length = GetPatternLengthFromBlastHSP(hsp); SavePatternLengthInGapAlignStruct(pat_length, gap_align); PHIGappedAlignmentWithTraceback(program_number, query, subject, gap_align, score_params, q_start, s_start, query_length, subject_length); } else { if (!kTranslateSubject) { AdjustSubjectRange(&s_start, &adjusted_s_length, q_start, query_length, &start_shift); adjusted_subject = subject + start_shift; } if (kGreedyTraceback) { BLAST_GreedyGappedAlignment(query, adjusted_subject, query_length, adjusted_s_length, gap_align, score_params, q_start, s_start, FALSE, TRUE); } else { BLAST_GappedAlignmentWithTraceback(program_number, query, adjusted_subject, gap_align, score_params, q_start, s_start, query_length, adjusted_s_length); } } if (gap_align->score >= hit_params->cutoff_score) { Boolean keep=FALSE; Blast_HSPReset(gap_align->query_start, gap_align->query_stop, gap_align->subject_start, gap_align->subject_stop, gap_align->score, &(gap_align->edit_block), hsp); /* FIXME not pretty, should be wrapped as function, done earlier or part of Blast_HSPReset. */ if (hsp && hsp->gap_info) { hsp->gap_info->frame1 = hsp->query.frame; hsp->gap_info->frame2 = hsp->subject.frame; hsp->gap_info->original_length1 = query_length_orig; hsp->gap_info->original_length2 = subject_blk->length; if (program_number == blast_type_blastx) hsp->gap_info->translate1 = TRUE; if (program_number == blast_type_tblastn || program_number == blast_type_rpstblastn) hsp->gap_info->translate2 = TRUE; } if (kGreedyTraceback) { /* Low level greedy algorithm ignores ambiguities, so the score needs to be reevaluated. */ Blast_HSPReevaluateWithAmbiguities(hsp, query, adjusted_subject, hit_options, score_params, query_info, sbp); } keep = HSPSetScores(query_info, query, adjusted_subject, hsp, program_number, sbp, score_params, hit_options); HSPAdjustSubjectOffset(hsp, subject_blk, k_is_ooframe, start_shift); if (keep) keep = HSPCheckForDegenerateAlignments(hsp_array, hsp, index); if (!keep) { hsp_array[index] = Blast_HSPFree(hsp); } } else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -