📄 blast_options.h
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_options.h,v $ * PRODUCTION Revision 1000.4 2004/06/01 18:03:39 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.76 * PRODUCTION * =========================================================================== *//* $Id: blast_options.h,v 1000.4 2004/06/01 18:03:39 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's offical duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Tom Madden * *//** @file blast_options.h * Options to be used for different stages of the BLAST search. */#ifndef __BLASTOPTIONS__#define __BLASTOPTIONS__#include <algo/blast/core/blast_def.h>#include <algo/blast/core/blast_message.h>#include <algo/blast/core/blast_stat.h>#ifdef __cplusplusextern "C" {#endif/** Some default values (used when creating blast options block and for * command-line program defaults. When changing these defaults, please * remember to update the defaults in the command-line programs *//** "window" between hits to trigger an extension. */#define BLAST_WINDOW_SIZE_PROT 40 /**< default window (all protein searches) */#define BLAST_WINDOW_SIZE_NUCL 0 /**< default window size (blastn) */#define BLAST_WINDOW_SIZE_MEGABLAST 0 /**< default window size (contiguous megablast) */#define BLAST_WINDOW_SIZE_DISC 40 /**< default window size (discontiguous megablast) *//** length of word to trigger an extension. */#define BLAST_WORDSIZE_PROT 3 /**< default word size (all protein searches) */#define BLAST_WORDSIZE_NUCL 11 /**< default word size (blastn) */#define BLAST_WORDSIZE_MEGABLAST 28 /**< default word size (contiguous megablast; for discontig megablast the word size is explicitly overridden) */#define BLAST_VARWORD_NUCL 0 /**< blastn with variable wordsize */#define BLAST_VARWORD_MEGABLAST 1 /**< megablast with variable wordsize *//** Protein gap costs are the defaults for the BLOSUM62 scoring matrix. * More gap costs are listed in BLASTOptionSetGapParams *//** cost for the existence of a gap.*/#define BLAST_GAP_OPEN_PROT 11 /**< default gap open penalty (all protein searches) */#define BLAST_GAP_OPEN_NUCL 5 /**< default gap open penalty (blastn) */#define BLAST_GAP_OPEN_MEGABLAST 0 /**< default gap open penalty (megablast with greedy gapped alignment) *//** cost to extend a gap. */#define BLAST_GAP_EXTN_PROT 1 /**< default gap open penalty (all protein searches) */#define BLAST_GAP_EXTN_NUCL 2 /**< default gap open penalty (blastn) */#define BLAST_GAP_EXTN_MEGABLAST 0 /**< default gap open penalty (megablast) with greedy gapped alignment) *//** neighboring word score thresholds; a threshold of zero * means that only query and subject words that match exactly * will go into the BLAST lookup table when it is generated */#define BLAST_WORD_THRESHOLD_BLASTP 11 /**< default neighboring threshold (blastp/rpsblast) */#define BLAST_WORD_THRESHOLD_BLASTN 0 /**< default threshold (blastn) */#define BLAST_WORD_THRESHOLD_BLASTX 12 /**< default threshold (blastx) */#define BLAST_WORD_THRESHOLD_TBLASTN 13 /**< default neighboring threshold (tblastn/rpstblastn) */#define BLAST_WORD_THRESHOLD_TBLASTX 13 /**< default threshold (tblastx) */#define BLAST_WORD_THRESHOLD_MEGABLAST 0 /**< default threshold (megablast) *//** default dropoff for ungapped extension; ungapped extensions * will stop when the score for the extension has dropped from * the current best score by at least this much */#define BLAST_UNGAPPED_X_DROPOFF_PROT 7 /**< ungapped dropoff score for all searches except blastn */#define BLAST_UNGAPPED_X_DROPOFF_NUCL 20 /**< ungapped dropoff score for blastn (and megablast) *//** default dropoff for preliminary gapped extensions */#define BLAST_GAP_X_DROPOFF_PROT 15 /**< default dropoff (all protein- based gapped extensions) */#define BLAST_GAP_X_DROPOFF_NUCL 30 /**< default dropoff for non-greedy nucleotide gapped extensions */#define BLAST_GAP_X_DROPOFF_GREEDY 30 /**< default dropoff for greedy nucleotide gapped extensions */#define BLAST_GAP_X_DROPOFF_TBLASTX 0 /**< default dropoff for tblastx *//** default bit score that will trigger gapped extension */#define BLAST_GAP_TRIGGER_PROT 22.0 /**< default bit score that will trigger a gapped extension for all protein- based searches */#define BLAST_GAP_TRIGGER_NUCL 25.0 /**< default bit score that will trigger a gapped extension for blastn *//** default dropoff for the final gapped extension with traceback */#define BLAST_GAP_X_DROPOFF_FINAL_PROT 25 /**< default dropoff (all protein- based gapped extensions) */#define BLAST_GAP_X_DROPOFF_FINAL_NUCL 50 /**< default dropoff for nucleotide gapped extensions) */#define BLAST_GAP_X_DROPOFF_FINAL_TBLASTX 0 /**< default dropoff for tblastx *//** default reward and penalty (only applies to blastn/megablast) */#define BLAST_PENALTY -3 /**< default nucleotide mismatch score */#define BLAST_REWARD 1 /**< default nucleotide match score */#define BLAST_EXPECT_VALUE 10.0 /**< by default, alignments whose expect value exceeds this number are discarded *//** Types of the lookup table */#define MB_LOOKUP_TABLE 1 /**< megablast lookup table (includes both contiguous and discontiguous megablast) */#define NA_LOOKUP_TABLE 2 /**< blastn lookup table */#define AA_LOOKUP_TABLE 3 /**< standard protein (blastp) lookup table */#define PHI_AA_LOOKUP 4 /**< protein lookup table specialized for phi-blast */#define PHI_NA_LOOKUP 5 /**< nucleotide lookup table for phi-blast */#define RPS_LOOKUP_TABLE 6 /**< RPS lookup table (rpsblast and rpstblastn) *//** Defaults for PSI-BLAST options */#define PSI_INCLUSION_ETHRESH 0.002#define PSI_PSEUDO_COUNT_CONST 9/** Default genetic code for query and/or database */#define BLAST_GENETIC_CODE 1 /**< Use the standard genetic code for converting groups of three nucleotide bases to protein letters *//** Default parameters for linking HSPs */#define BLAST_GAP_PROB 0.5#define BLAST_GAP_PROB_GAPPED 1.0#define BLAST_GAP_DECAY_RATE 0.5 #define BLAST_GAP_DECAY_RATE_GAPPED 0.1#define BLAST_GAP_SIZE 50/** Options needed to construct a lookup table * Also needed: query sequence and query length. */typedef struct LookupTableOptions { Int4 threshold; /**< Score threshold for putting words in a lookup table */ Int4 lut_type; /**< What kind of lookup table to construct? E.g. blastn allows for traditional and megablast style lookup table */ Int2 word_size; /**< Determines the size of the lookup table */ Int4 alphabet_size; /**< Size of the alphabet */ Uint1 mb_template_length; /**< Length of the discontiguous words */ Uint1 mb_template_type; /**< Type of a discontiguous word template */ Int4 max_positions; /**< Max number of positions per word (MegaBlast only); no restriction if 0 */ Uint1 scan_step; /**< Step at which database sequence should be parsed */ char* phi_pattern; /**< PHI-BLAST pattern */ Int4 max_num_patterns; /**< Maximal number of patterns allowed for PHI-BLAST */ Boolean use_pssm; /**< Use a PSSM rather than a (protein) query to construct lookup table */} LookupTableOptions;/** Options required for setting up the query sequence */typedef struct QuerySetUpOptions { char* filter_string; /**< Parseable string that determines the filtering options */ Uint1 strand_option; /**< In blastn: which strand to search: 1 = forward; 2 = reverse; 3 = both */ Int4 genetic_code; /**< Genetic code to use for translation, [t]blastx only */} QuerySetUpOptions;/** specifies the data structures used for bookkeeping * during computation of ungapped extensions */typedef enum SeedContainerType { eDiagArray, /**< use diagonal structures */ eMbStacks, /**< use stacks (megablast only) */ eMaxContainerType /**< maximum value for this enumeration */} SeedContainerType;/** when performing mini-extensions on hits from the * blastn or megablast lookup table, this determines * the direction in which the mini-extension is attempted */typedef enum SeedExtensionMethod { eRight, /**< extend only to the right */ eRightAndLeft, /**< extend to left and right (used with AG method) */ eMaxSeedExtensionMethod /**< maximum value for this enumeration */} SeedExtensionMethod;/** Options needed for initial word finding and processing */typedef struct BlastInitialWordOptions { Int4 window_size; /**< Maximal allowed distance between 2 hits in case 2 hits are required to trigger the extension */ SeedContainerType container_type; /**< How to store offset pairs for initial seeds? */ SeedExtensionMethod extension_method; /**< How should exact matches be extended? */ Boolean variable_wordsize; /**< Should the partial bytes be examined for determining whether exact match is long enough? */ Boolean ungapped_extension; /**< Should the ungapped extension be performed? */ double x_dropoff; /**< X-dropoff value (in bits) for the ungapped extension */} BlastInitialWordOptions;/** Expect values corresponding to the default cutoff * scores for ungapped alignments */#define UNGAPPED_CUTOFF_E_BLASTN 0.05 /**< default ungapped evalue (blastn) */#define UNGAPPED_CUTOFF_E_BLASTP 1e-300 /**< default ungapped evalue (blastp) */#define UNGAPPED_CUTOFF_E_BLASTX 1.0 /**< default ungapped evalue (blastx) */#define UNGAPPED_CUTOFF_E_TBLASTN 1.0 /**< default ungapped evalue (tblastn) */#define UNGAPPED_CUTOFF_E_TBLASTX 1e-300 /**< default ungapped evalue (tblastx) *//** Parameter block that contains a pointer to BlastInitialWordOptions * and parsed values for those options that require it * (in this case x_dropoff). */typedef struct BlastInitialWordParameters { BlastInitialWordOptions* options; /**< The original (unparsed) options. */ Int4 x_dropoff; /**< Raw X-dropoff value for the ungapped extension */ Int4 cutoff_score; /**< Cutoff score for saving ungapped hits. */} BlastInitialWordParameters; /** The algorithm to be used for preliminary * gapped extensions */typedef enum EBlastPrelimGapExt { eDynProgExt, /**< standard affine gapping */ eGreedyExt, /**< Greedy extension (megaBlast) */ eGreedyWithTracebackExt /**< Greedy extension with Traceback calculated. */} EBlastPrelimGapExt;/** The algorithm to be used for final gapped * extensions with traceback */typedef enum EBlastTbackExt { eDynProgTbck, /**< standard affine gapping */ eGreedyTbck, /**< Greedy extension (megaBlast) */ eSmithWatermanTbck /**< Smith-waterman finds optimal scores, then ALIGN_EX to find alignment. */} EBlastTbackExt;/** Options used for gapped extension * These include: * a. Penalties for various types of gapping; * b. Drop-off values for the extension algorithms tree exploration; * c. Parameters identifying what kind of extension algorithm(s) should * be used. */typedef struct BlastExtensionOptions { double gap_x_dropoff; /**< X-dropoff value for gapped extension (in bits) */ double gap_x_dropoff_final;/**< X-dropoff value for the final gapped extension (in bits) */ double gap_trigger; /**< Score in bits for starting gapped extension */ EBlastPrelimGapExt ePrelimGapExt; /**< type of preliminary gapped extension (normally) for calculating score. */ EBlastTbackExt eTbackExt; /**< type of traceback extension. */ Boolean compositionBasedStats; /**< if TRUE use composition-based stats. */ Boolean skip_traceback; /**< @deprecated Is traceback information needed in results? */} BlastExtensionOptions;/** Computed values used as parameters for gapped alignments */typedef struct BlastExtensionParameters { BlastExtensionOptions* options; /**< The original (unparsed) options. */ Int4 gap_x_dropoff; /**< X-dropoff value for gapped extension (raw) */ Int4 gap_x_dropoff_final;/**< X-dropoff value for the final gapped extension (raw) */ Int4 gap_trigger; /**< Minimal raw score for starting gapped extension */} BlastExtensionParameters;/** Options used when evaluating and saving hits * These include: * a. Restrictions on the number of hits to be saved;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -