📄 blast_hits.c
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_hits.c,v $ * PRODUCTION Revision 1000.7 2004/06/01 18:07:22 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.101 * PRODUCTION * =========================================================================== *//* $Id: blast_hits.c,v 1000.7 2004/06/01 18:07:22 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's offical duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Ilya Dondoshansky * *//** @file blast_hits.c * BLAST functions for saving hits after the (preliminary) gapped alignment */static char const rcsid[] = "$Id: blast_hits.c,v 1000.7 2004/06/01 18:07:22 gouriano Exp $";#include <algo/blast/core/blast_options.h>#include <algo/blast/core/blast_extend.h>#include <algo/blast/core/blast_hits.h>#include <algo/blast/core/blast_util.h>/******************************************************************************** Functions manipulating BlastHSP's********************************************************************************/BlastHSP* Blast_HSPFree(BlastHSP* hsp){ if (!hsp) return NULL; hsp->gap_info = GapEditBlockDelete(hsp->gap_info); sfree(hsp); return NULL;}BlastHSP* Blast_HSPNew(void){ BlastHSP* new_hsp = (BlastHSP*) calloc(1, sizeof(BlastHSP)); return new_hsp;}/* Comments in blast_hits.h*/Int2Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 subject_frame, Int4 score, GapEditBlock* *gap_edit, BlastHSP* *ret_hsp){ BlastHSP* new_hsp = Blast_HSPNew(); *ret_hsp = NULL; if (new_hsp == NULL) return -1; new_hsp->query.offset = query_start; new_hsp->subject.offset = subject_start; new_hsp->query.length = query_end - query_start; new_hsp->subject.length = subject_end - subject_start; new_hsp->query.end = query_end; new_hsp->subject.end = subject_end; new_hsp->query.gapped_start = query_gapped_start; new_hsp->subject.gapped_start = subject_gapped_start; new_hsp->context = query_context; new_hsp->subject.frame = subject_frame; new_hsp->score = score; if (gap_edit && *gap_edit) { /* If this is non-NULL transfer ownership. */ new_hsp->gap_info = *gap_edit; *gap_edit = NULL; } *ret_hsp = new_hsp; return 0;}/* Comments in blast_hits.h*/Int2Blast_HSPReset(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 score, GapEditBlock* *gap_edit, BlastHSP* hsp){ if (hsp == NULL) return -1; hsp->score = score; hsp->query.offset = query_start; hsp->subject.offset = subject_start; hsp->query.length = query_end - query_start; hsp->subject.length = subject_end - subject_start; hsp->query.end = query_end; hsp->subject.end = subject_end; if (gap_edit && *gap_edit) { /* If this is non-NULL transfer ownership. */ hsp->gap_info = *gap_edit; *gap_edit = NULL; } return 0;}void Blast_HSPPHIGetEvalue(BlastHSP* hsp, BlastScoreBlk* sbp){ double paramC; double Lambda; Int8 pattern_space; if (!hsp || !sbp) return; paramC = sbp->kbp[0]->paramC; Lambda = sbp->kbp[0]->Lambda; pattern_space = sbp->effective_search_sp; hsp->evalue = pattern_space*paramC*(1+Lambda*hsp->score)* exp(-Lambda*hsp->score);}Boolean Blast_HSPReevaluateWithAmbiguities(BlastHSP* hsp, Uint1* query_start, Uint1* subject_start, const BlastHitSavingOptions* hit_options, const BlastScoringParameters* score_params, BlastQueryInfo* query_info, BlastScoreBlk* sbp){ BlastScoringOptions *score_options = score_params->options; Int4 sum, score, gap_open, gap_extend; Int4** matrix; Uint1* query,* subject; Uint1* new_q_start,* new_s_start,* new_q_end,* new_s_end; Int4 index; Int2 factor = 1; Uint1 mask = 0x0f; GapEditScript* esp,* last_esp = NULL,* prev_esp,* first_esp = NULL; Boolean delete_hsp; Int8 searchsp_eff; Int4 last_esp_num = 0; Int4 align_length; Blast_KarlinBlk* kbp; Boolean gapped_calculation = score_options->gapped_calculation; /* NB: this function is called only for BLASTn, so we know where the Karlin block is */ kbp = sbp->kbp_std[hsp->context]; searchsp_eff = query_info->eff_searchsp_array[hsp->context]; if (score_params->gap_open == 0 && score_params->gap_extend == 0) { if (score_params->reward % 2 == 1) factor = 2; gap_open = 0; gap_extend = (score_params->reward - 2*score_params->penalty) * factor / 2; } else { gap_open = score_params->gap_open; gap_extend = score_params->gap_extend; } matrix = sbp->matrix; query = query_start + hsp->query.offset; subject = subject_start + hsp->subject.offset; score = 0; sum = 0; new_q_start = new_q_end = query; new_s_start = new_s_end = subject; index = 0; if (!gapped_calculation) { for (index = 0; index < hsp->subject.length; ++index) { sum += factor*matrix[*query & mask][*subject]; query++; subject++; if (sum < 0) { if (BLAST_KarlinStoE_simple(score, kbp, searchsp_eff) > hit_options->expect_value) { /* Start from new offset */ new_q_start = query; new_s_start = subject; score = sum = 0; } else { /* Stop here */ break; } } else if (sum > score) { /* Remember this point as the best scoring end point */ score = sum; new_q_end = query; new_s_end = subject; } } } else { esp = hsp->gap_info->esp; prev_esp = NULL; last_esp = first_esp = esp; last_esp_num = 0; while (esp) { if (esp->op_type == eGapAlignSub) { sum += factor*matrix[*query & mask][*subject]; query++; subject++; index++; } else if (esp->op_type == eGapAlignDel) { sum -= gap_open + gap_extend * esp->num; subject += esp->num; index += esp->num; } else if (esp->op_type == eGapAlignIns) { sum -= gap_open + gap_extend * esp->num; query += esp->num; index += esp->num; } if (sum < 0) { if (BLAST_KarlinStoE_simple(score, kbp, searchsp_eff) > hit_options->expect_value) { /* Start from new offset */ new_q_start = query; new_s_start = subject; score = sum = 0; if (index < esp->num) { esp->num -= index; first_esp = esp; index = 0; } else { first_esp = esp->next; } /* Unlink the bad part of the esp chain so it can be freed later */ if (prev_esp) prev_esp->next = NULL; last_esp = NULL; } else { /* Stop here */ break; } } else if (sum > score) { /* Remember this point as the best scoring end point */ score = sum; last_esp = esp; last_esp_num = index; new_q_end = query; new_s_end = subject; } if (index >= esp->num) { index = 0; prev_esp = esp; esp = esp->next; } } /* loop on edit scripts */ } /* if (gapped_calculation) */ score /= factor; delete_hsp = FALSE; hsp->score = score; hsp->evalue = BLAST_KarlinStoE_simple(score, kbp, searchsp_eff); if (hsp->evalue > hit_options->expect_value) { delete_hsp = TRUE; } else { hsp->query.length = new_q_end - new_q_start; hsp->subject.length = new_s_end - new_s_start; hsp->query.offset = new_q_start - query_start; hsp->query.end = hsp->query.offset + hsp->query.length; hsp->subject.offset = new_s_start - subject_start; hsp->subject.end = hsp->subject.offset + hsp->subject.length; if (gapped_calculation) { /* Make corrections in edit block and free any parts that are no longer needed */ if (first_esp != hsp->gap_info->esp) { GapEditScriptDelete(hsp->gap_info->esp); hsp->gap_info->esp = first_esp; } if (last_esp->next != NULL) { GapEditScriptDelete(last_esp->next); last_esp->next = NULL; } last_esp->num = last_esp_num; } Blast_HSPGetNumIdentities(query_start, subject_start, hsp, gapped_calculation, &hsp->num_ident, &align_length); /* Check if this HSP passes the percent identity test */ if (((double)hsp->num_ident) / align_length * 100 < hit_options->percent_identity) delete_hsp = TRUE; } if (delete_hsp) { /* This HSP is now below the cutoff */ if (gapped_calculation && first_esp != NULL && first_esp != hsp->gap_info->esp) GapEditScriptDelete(first_esp); } return delete_hsp;} Int2Blast_HSPGetNumIdentities(Uint1* query, Uint1* subject, BlastHSP* hsp, Boolean is_gapped, Int4* num_ident_ptr, Int4* align_length_ptr){ Int4 i, num_ident, align_length, q_off, s_off; Uint1* q,* s; GapEditBlock* gap_info; GapEditScript* esp; gap_info = hsp->gap_info; if (!gap_info && is_gapped) return -1; q_off = hsp->query.offset; s_off = hsp->subject.offset; if (!subject || !query) return -1; q = &query[q_off]; s = &subject[s_off]; num_ident = 0; align_length = 0; if (!gap_info) { /* Ungapped case */ align_length = hsp->query.length; for (i=0; i<align_length; i++) { if (*q++ == *s++) num_ident++; } } else { for (esp = gap_info->esp; esp; esp = esp->next) { align_length += esp->num; switch (esp->op_type) { case eGapAlignSub: for (i=0; i<esp->num; i++) { if (*q++ == *s++) num_ident++; } break; case eGapAlignDel: s += esp->num; break; case eGapAlignIns: q += esp->num; break; default: s += esp->num; q += esp->num; break; } } } *align_length_ptr = align_length; *num_ident_ptr = num_ident; return 0;}Int2Blast_HSPGetOOFNumIdentities(Uint1* query, Uint1* subject, BlastHSP* hsp, Uint1 program, Int4* num_ident_ptr, Int4* align_length_ptr){ Int4 i, num_ident, align_length; Uint1* q,* s; GapEditScript* esp; if (!hsp->gap_info || !subject || !query) return -1; if (program == blast_type_tblastn || program == blast_type_rpstblastn) { q = &query[hsp->query.offset];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -