📄 blast_hits.h
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_hits.h,v $ * PRODUCTION Revision 1000.4 2004/06/01 18:03:32 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.41 * PRODUCTION * =========================================================================== *//* $Id: blast_hits.h,v 1000.4 2004/06/01 18:03:32 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Ilya Dondoshansky * *//** @file blast_hits.h * Structures and API used for saving BLAST hits */#ifndef __BLAST_HITS__#define __BLAST_HITS__#ifdef __cplusplusextern "C" {#endif#include <algo/blast/core/blast_options.h>#include <algo/blast/core/gapinfo.h>#include <algo/blast/core/blast_seqsrc.h>/** One sequence segment within an HSP */typedef struct BlastSeg { Int2 frame; /**< Translation frame */ Int4 offset; /**< Start of hsp */ Int4 length; /**< Length of hsp */ Int4 end; /**< End of HSP */ Int4 gapped_start;/**< Where the gapped extension started. */} BlastSeg;/** Structure holding all information about an HSP */typedef struct BlastHSP { Int4 score; /**< This HSP's raw score */ Int4 num_ident; /**< Number of identical base pairs in this HSP */ double evalue; /**< This HSP's e-value */ BlastSeg query; /**< Query sequence info. */ BlastSeg subject; /**< Subject sequence info. */ Int4 context; /**< Context number of query */ GapEditBlock* gap_info; /**< ALL gapped alignment is here */ Int2 splice_junction; /**< Splice junction count in a linked set of HSPs. Each present splice junction counts as +1, absent as -1. */ Int4 num; /**< How many HSP's make up this (sum) segment? */ Uint4 pattern_length; /**< Length of pattern occurrence in this HSP, in PHI BLAST */} BlastHSP;/** The structure to hold all HSPs for a given sequence after the gapped * alignment. */typedef struct BlastHSPList { Int4 oid;/**< The ordinal id of the subject sequence this HSP list is for */ BlastHSP** hsp_array; /**< Array of pointers to individual HSPs */ Int4 hspcnt; /**< Number of HSPs saved */ Int4 allocated; /**< The allocated size of the hsp_array */ Int4 hsp_max; /**< The maximal number of HSPs allowed to be saved */ Boolean do_not_reallocate; /**< Is reallocation of the hsp_array allowed? */ Boolean traceback_done; /**< Has the traceback already been done on HSPs in this list? */} BlastHSPList;/** The structure to contain all BLAST results for one query sequence */typedef struct BlastHitList { Int4 hsplist_count; /**< Filled size of the HSP lists array */ Int4 hsplist_max; /**< Maximal allowed size of the HSP lists array */ double worst_evalue; /**< Highest of the best e-values among the HSP lists */ Int4 low_score; /**< The lowest of the best scores among the HSP lists */ Boolean heapified; /**< Is this hit list already heapified? */ BlastHSPList** hsplist_array; /**< Array of HSP lists for individual database hits */} BlastHitList;/** The structure to contain all BLAST results, for multiple queries */typedef struct BlastHSPResults { Int4 num_queries; /**< Number of query sequences */ BlastHitList** hitlist_array; /**< Array of results for individual query sequences */} BlastHSPResults;/* By how much should the chunks of a subject sequence overlap if it is too long and has to be split */#define DBSEQ_CHUNK_OVERLAP 100/********************************************************************************The following section has four sets of functions (or "APIs"), manipulating withthe following structures:1. BlastHSP, which is the basic unit to record one alignment. 2. BlastHSPList, which is a list of BlastHSP's for one database sequence. 3. BlastHitList, which contains all HSPList's for a given query. 4. BlastHSPResults, which is a list of BlastHitList's for multiple queries. The naming conventions for the functions are the following:1.) All routines start with "Blast_"2.) After "Blast_" comes the structure being manipulated, that should be either HSP (all capitals all the time!), HSPList (exactly this capitalization), HitList (capital H and L, all others lower-case), or HSPResults.3.) finally the task being done, e.g., "Free", "New", "Init".********************************************************************************//******************************************************************************** HSP API********************************************************************************//** Deallocate memory for an HSP structure */BlastHSP* Blast_HSPFree(BlastHSP* hsp);/** Allocate and zeros out memory for an HSP structure */BlastHSP* Blast_HSPNew(void);/** Allocates BlastHSP and inits with information from input. * structure * @param query_start Start of query alignment [in] * @param query_end End of query alignment [in] * @param subject_start Start of subject alignment [in] * @param subject_end End of subject alignment [in] * @param query_gapped_start Where gapped alignment started on query [in] * @param subject_gapped_start Where gapped alignment started on subject [in] * @param query_context The index of the query containing this HSP [in] * @param subject_frame Subject frame: -3..3 for translated sequence, * 1 for blastn, 0 for blastp [in] * @param score score of alignment [in] * @param gap_edit Will be transferred to HSP and nulled out * if a traceback was not calculated may be NULL [in] [out] * @param ret_hsp allocated and filled in BlastHSP [out] */Int2Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 subject_frame, Int4 score, GapEditBlock* *gap_edit, BlastHSP** ret_hsp);/** Modifies the HSP data after the final gapped alignment. * Input includes only data that likely needs modification. * @param query_start start of alignment on query [in] * @param query_end end of alignment on query [in] * @param subject_start start of alignment on subject [in] * @param subject_end end of alignment on subject [in] * @param program_number Which BLAST program is this done for? [in] * @param gap_edit traceback from final gapped alignment [in] [out] * @param hsp Original HSP from the preliminary stage [in] [out] */Int2Blast_HSPReset(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 score, GapEditBlock* *gap_edit, BlastHSP* hsp);/** Calculate e-value for an HSP found by PHI BLAST. * @param hsp An HSP found by PHI BLAST [in] * @param sbp Scoring block with statistical parameters [in] */void Blast_HSPPHIGetEvalue(BlastHSP* hsp, BlastScoreBlk* sbp);/** Reevaluate the HSP's score, e-value and percent identity after taking * into account the ambiguity information. Needed for blastn only, either * after a greedy gapped extension, or for ungapped search. * @param hsp The HSP structure [in] [out] * @param query_start Pointer to the start of the query sequence [in] * @param subject_start Pointer to the start of the subject sequence [in] * @param hit_options Hit saving options with e-value cut-off [in] * @param score_params Scoring parameters [in] * @param query_info Query information structure, containing effective search * space(s) [in] * @param sbp Score block with Karlin-Altschul parameters [in] * @return Should this HSP be deleted after the score reevaluation? */Boolean Blast_HSPReevaluateWithAmbiguities(BlastHSP* hsp, Uint1* query_start, Uint1* subject_start, const BlastHitSavingOptions* hit_options, const BlastScoringParameters* score_params, BlastQueryInfo* query_info, BlastScoreBlk* sbp);/** Calculate number of identities in an HSP. * @param query The query sequence [in] * @param subject The uncompressed subject sequence [in] * @param hsp All information about the HSP [in] * @param is_gapped Is this a gapped search? [in] * @param num_ident_ptr Number of identities [out] * @param align_length_ptr The alignment length, including gaps [out] */Int2Blast_HSPGetNumIdentities(Uint1* query, Uint1* subject, BlastHSP* hsp, Boolean is_gapped, Int4* num_ident_ptr, Int4* align_length_ptr);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -