📄 link_hsps.c
字号:
/* * =========================================================================== * PRODUCTION $Log: link_hsps.c,v $ * PRODUCTION Revision 1000.3 2004/06/01 18:08:05 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.35 * PRODUCTION * =========================================================================== *//* $Id: link_hsps.c,v 1000.3 2004/06/01 18:08:05 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's offical duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Ilya Dondoshansky * *//** @file link_hsps.c * Functions to link with use of sum statistics */static char const rcsid[] = "$Id: link_hsps.c,v 1000.3 2004/06/01 18:08:05 gouriano Exp $";#include <algo/blast/core/link_hsps.h>#include <algo/blast/core/blast_util.h>/** Methods used to "order" the HSP's. */#define BLAST_NUMBER_OF_ORDERING_METHODS 2typedef enum LinkOrderingMethod { BLAST_SMALL_GAPS = 0, BLAST_LARGE_GAPS} LinkOrderingMethod;/* Forward declaration */struct LinkHSPStruct;/** The following structure is used in "link_hsps" to decide between * two different "gapping" models. Here link is used to hook up * a chain of HSP's, num is the number of links, and sum is the sum score. * Once the best gapping model has been found, this information is * transferred up to the LinkHSPStruct. This structure should not be * used outside of the function link_hsps.*/typedef struct BlastHSPLink { struct LinkHSPStruct* link[BLAST_NUMBER_OF_ORDERING_METHODS]; /**< Best choice of HSP to link with */ Int2 num[BLAST_NUMBER_OF_ORDERING_METHODS]; /**< number of HSP in the ordering. */ Int4 sum[BLAST_NUMBER_OF_ORDERING_METHODS]; /**< Sum-Score of HSP. */ double xsum[BLAST_NUMBER_OF_ORDERING_METHODS]; /**< Sum-Score of HSP, multiplied by the appropriate Lambda. */ Int4 changed; /**< Has the link been changed since previous access? */} BlastHSPLink;/** Structure containing all information internal to the process of linking * HSPs. */typedef struct LinkHSPStruct { BlastHSP* hsp; /**< Specific HSP this structure corresponds to */ struct LinkHSPStruct* prev; /**< Previous HSP in a set, if any */ struct LinkHSPStruct* next; /**< Next HSP in a set, if any */ BlastHSPLink hsp_link; /**< Auxiliary structure for keeping track of sum scores, etc. */ Boolean linked_set; /**< Is this HSp part of a linked set? */ Boolean start_of_chain; /**< If TRUE, this HSP starts a chain along the "link" pointer. */ Int4 linked_to; /**< Where this HSP is linked to? */ Int4 sumscore; /**< Sumscore of a set of "linked" HSP's. */ Int2 ordering_method; /**< Which method (max or no max for gaps) was used for linking HSPs? */ Int4 q_offset_trim; /**< Start of trimmed hsp in query */ Int4 q_end_trim; /**< End of trimmed HSP in query */ Int4 s_offset_trim; /**< Start of trimmed hsp in subject */ Int4 s_end_trim; /**< End of trimmed HSP in subject */} LinkHSPStruct;#define WINDOW_SIZE 20static double SumHSPEvalue(Uint1 program_number, BlastScoreBlk* sbp, BlastQueryInfo* query_info, BLAST_SequenceBlk* subject, const BlastHitSavingParameters* hit_params, LinkHSPStruct* head_hsp, LinkHSPStruct* hsp, Int4* sumscore){ double gap_prob, gap_decay_rate, sum_evalue, score_prime; Int2 num; Int4 subject_eff_length, query_eff_length, length_adjustment; Int4 context = head_hsp->hsp->context; double eff_searchsp; gap_prob = hit_params->gap_prob; gap_decay_rate = hit_params->gap_decay_rate; num = head_hsp->hsp->num + hsp->hsp->num; length_adjustment = query_info->length_adjustments[context]; subject_eff_length = MAX((subject->length - length_adjustment), 1); if (program_number == blast_type_tblastn) subject_eff_length /= 3; subject_eff_length = MAX(subject_eff_length, 1); query_eff_length = MAX(BLAST_GetQueryLength(query_info, context) - length_adjustment, 1); *sumscore = MAX(hsp->hsp->score, hsp->sumscore) + MAX(head_hsp->hsp->score, head_hsp->sumscore); score_prime = *sumscore * sbp->kbp_gap[context]->Lambda; sum_evalue = BLAST_UnevenGapSumE(sbp->kbp_gap[context], 2*WINDOW_SIZE, hit_params->options->longest_intron + WINDOW_SIZE, num, score_prime, query_eff_length, subject_eff_length, BLAST_GapDecayDivisor(gap_decay_rate, num)); eff_searchsp = ((double) subject_eff_length) * query_eff_length; sum_evalue *= ((double) query_info->eff_searchsp_array[context]) / eff_searchsp; return sum_evalue;}/** Sort the HSP's by starting position of the query. Called by qsort. * The first function sorts in forward, the second in reverse order.*/static intfwd_compare_hsps(const void* v1, const void* v2){ BlastHSP* h1,* h2; LinkHSPStruct** hp1,** hp2; hp1 = (LinkHSPStruct**) v1; hp2 = (LinkHSPStruct**) v2; h1 = (*hp1)->hsp; h2 = (*hp2)->hsp; if (h1->context < h2->context) return -1; else if (h1->context > h2->context) return 1; if (h1->query.offset < h2->query.offset) return -1; if (h1->query.offset > h2->query.offset) return 1; /* Necessary in case both HSP's have the same query offset. */ if (h1->subject.offset < h2->subject.offset) return -1; if (h1->subject.offset > h2->subject.offset) return 1; return 0;}/** Sort the HSP's by starting position of the query. Called by qsort. * The first function sorts in forward, the second in reverse order.*/static intfwd_compare_hsps_transl(const void* v1, const void* v2){ BlastHSP* h1,* h2; LinkHSPStruct** hp1,** hp2; Int4 context1, context2; hp1 = (LinkHSPStruct**) v1; hp2 = (LinkHSPStruct**) v2; h1 = (*hp1)->hsp; h2 = (*hp2)->hsp; context1 = h1->context/3; context2 = h2->context/3; if (context1 < context2) return 1; else if (context1 > context2) return -1; if (h1->query.offset < h2->query.offset) return -1; if (h1->query.offset > h2->query.offset) return 1; /* Necessary in case both HSP's have the same query offset. */ if (h1->subject.offset < h2->subject.offset) return -1; if (h1->subject.offset > h2->subject.offset) return 1; return 0;}/* Comparison function based on end position in the query */static intend_compare_hsps(const void* v1, const void* v2){ BlastHSP* h1,* h2; LinkHSPStruct** hp1,** hp2; hp1 = (LinkHSPStruct**) v1; hp2 = (LinkHSPStruct**) v2; h1 = (*hp1)->hsp; h2 = (*hp2)->hsp; if (h1->context < h2->context) return 1; else if (h1->context > h2->context) return -1; if (h1->query.end < h2->query.end) return -1; if (h1->query.end > h2->query.end) return 1; /* Necessary in case both HSP's have the same query end. */ if (h1->subject.end < h2->subject.end) return -1; if (h1->subject.end > h2->subject.end) return 1; return 0;}static intsumscore_compare_hsps(const void* v1, const void* v2){ LinkHSPStruct* h1,* h2; LinkHSPStruct** hp1,** hp2; Int4 score1, score2; hp1 = (LinkHSPStruct**) v1; hp2 = (LinkHSPStruct**) v2; h1 = *hp1; h2 = *hp2; if (h1 == NULL || h2 == NULL) return 0; score1 = MAX(h1->sumscore, h1->hsp->score); score2 = MAX(h2->sumscore, h2->hsp->score); if (score1 < score2) return 1; if (score1 > score2) return -1; return 0;}/* The following function should be used only for new tblastn. Current implementation does not allow its use in two sequences BLAST */#define MAX_SPLICE_DIST 5static BooleanFindSpliceJunction(Uint1* subject_seq, BlastHSP* hsp1, BlastHSP* hsp2){ Boolean found = FALSE; Int4 overlap, length, i; Uint1* nt_seq; Uint1 g = 4, t = 8, a = 1; /* ncbi4na values for G, T, A respectively */ if (!subject_seq) return FALSE; overlap = hsp1->query.end - hsp2->query.offset; if (overlap >= 0) { length = 3*overlap + 2; nt_seq = &subject_seq[hsp1->subject.end - 3*overlap]; } else { length = MAX_SPLICE_DIST; nt_seq = &subject_seq[hsp1->subject.end]; } for (i=0; i<length-1; i++) { if (nt_seq[i] == g && nt_seq[i+1] == t) { found = TRUE; break; } } if (!found) return FALSE; else found = FALSE; if (overlap >= 0) nt_seq = &subject_seq[hsp2->subject.offset - 2]; else nt_seq = &subject_seq[hsp2->subject.offset - MAX_SPLICE_DIST]; for (i=0; i<length-1; i++) { if (nt_seq[i] == a && nt_seq[i+1] == g) { found = TRUE; break; } } return found;}/* Find an HSP with offset closest, but not smaller/larger than a given one. */static Int4 hsp_binary_search(LinkHSPStruct** hsp_array, Int4 size, Int4 offset, Boolean right){ Int4 index, begin, end, coord; begin = 0; end = size; while (begin < end) { index = (begin + end) / 2; if (right) coord = hsp_array[index]->hsp->query.offset; else coord = hsp_array[index]->hsp->query.end; if (coord >= offset) end = index; else begin = index + 1; } return end;}static intrev_compare_hsps(const void *v1, const void *v2){ BlastHSP* h1,* h2; LinkHSPStruct** hp1,** hp2;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -