📄 blast_lookup.h
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_lookup.h,v $ * PRODUCTION Revision 1000.2 2004/06/01 18:03:34 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.9 * PRODUCTION * =========================================================================== *//* $Id: blast_lookup.h,v 1000.2 2004/06/01 18:03:34 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's offical duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== *//** @file blast_lookup.h * @todo FIXME: Need description (protein/rps lookup structures & word finding * routines?) */#include <algo/blast/core/blast_def.h>#include <algo/blast/core/blast_options.h>#include <algo/blast/core/blast_rps.h>#include <algo/blast/core/lookup_wrap.h>#ifndef BLAST_LOOKUP__H#define BLAST_LOOKUP__H#ifdef __cplusplusextern "C" {#endif/* some defines for the pv_array, as this changes from 32-bit to 64-bit systems. */#if defined(LONG_BIT) && LONG_BIT==64#define PV_ARRAY_TYPE Uint8 /* The pv_array 'native' type. */#define PV_ARRAY_BYTES 8 /* number of BYTES in 'native' type. */#define PV_ARRAY_BTS 6 /* bits-to-shift from lookup_index to pv_array index. */#define PV_ARRAY_MASK 63 /* amount to mask off. */#else#define PV_ARRAY_TYPE Uint4 /* The pv_array 'native' type. */#define PV_ARRAY_BYTES 4 /* number of BYTES in 'native' type. */#define PV_ARRAY_BTS 5 /* bits-to-shift from lookup_index to pv_array index. */#define PV_ARRAY_MASK 31 /* amount to mask off. */#endif#define PV_SET(lookup, index) ( (lookup)->pv[(index)>>PV_ARRAY_BTS] |= 1 << ((index) & PV_ARRAY_MASK) )#define PV_TEST(lookup, index) ( (lookup)->pv[(index)>>PV_ARRAY_BTS] & 1 << ((index) & PV_ARRAY_MASK) )/* Number of bits to shift in lookup index calculation when scanning compressed * nucleotide sequence */#define FULL_BYTE_SHIFT 8 /* structure defining one cell of the compacted lookup table */ /* stores the number of hits and up to three hits if the total number of hits is <= 3 or a pointer to more hits if the total number of hits is > 3 */#define HITS_ON_BACKBONE 3 typedef struct LookupBackboneCell { Int4 num_used; /* num valid positions */ union { Int4 overflow_cursor; Int4 entries[HITS_ON_BACKBONE]; } payload; } LookupBackboneCell; typedef struct LookupTable { Int4 threshold; /* the score threshold for neighboring words */ Int4 neighbor_matches; /* the number of neighboring words found while indexing the queries, used for informational/debugging purposes */ Int4 exact_matches; /* the number of exact matches found while indexing the queries, used for informational/debugging purposes */ Int4 mask; /* part of index to mask off, that is, top (wordsize*charsize) bits should be discarded. */ Int4 word_length; /* Length in bases of the full word match required to trigger extension */ Int4 wordsize; /* number of full bytes in a full word */ Int4 reduced_wordsize; /* number of bytes in a word stored in the LT */ Int4 charsize; /* number of bits for a base/residue */ Int4 scan_step; /* number of bases between successive words */ Int4 alphabet_size; /* number of letters in the alphabet */ Int4 backbone_size; /* number of cells in the backbone */ Int4 longest_chain; /* length of the longest chain on the backbone */ Int4 ** thin_backbone; /* the "thin" backbone. for each index cell, maintain a pointer to a dynamically-allocated chain of hits. */ LookupBackboneCell * thick_backbone; /* the "thick" backbone. after queries are indexed, compact the backbone to put at most HITS_ON_BACKBONE hits on the backbone, otherwise point to some overflow storage */ Int4 * overflow; /* the overflow array for the compacted lookup table */ Int4 overflow_size; /* Number of elements in the overflow array (above). */ PV_ARRAY_TYPE *pv; /* presence vector. a bit vector indicating which cells are occupied */ Uint1* neighbors; /* neighboring word array */ Int4 neighbors_length; /* length of neighboring word array */ Boolean use_pssm; /* if True use PSSM rather than (protein) sequence to construct lookup table. */ } LookupTable; /** Create a mapping from word w to the supplied query offset * * @param lookup the lookup table [in] * @param w pointer to the beginning of the word [in] * @param query_offset the offset in the query where the word occurs [in] * @return Zero. */Int4 BlastAaLookupAddWordHit(LookupTable* lookup, Uint1* w, Int4 query_offset);/** Convert the chained lookup table to the pv_array and thick_backbone. * * @param lookup the lookup table [in] * @return Zero. */Int4 _BlastAaLookupFinalize(LookupTable* lookup);/** * Scans the subject sequence from "offset" to the end of the sequence. * Copies at most array_size hits. * Returns the number of hits found. * If there isn't enough room to copy all the hits, return early, and update * "offset". * * @param lookup_wrap the lookup table [in] * @param subject the subject sequence [in] * @param offset the offset in the subject at which to begin scanning [in/out] * @param query_offsets array to which hits will be copied [out] * @param subject_offsets array to which hits will be copied [out] * @param array_size length of the offset arrays [in] * @return The number of hits found. */Int4 BlastAaScanSubject(const LookupTableWrap* lookup_wrap, /* in: the LUT */ const BLAST_SequenceBlk *subject, Int4* offset, Uint4 * NCBI_RESTRICT query_offsets, /* out: pointer to the array to which hits will be copied */ Uint4 * NCBI_RESTRICT subject_offsets, /* out : pointer to the array where offsets will be stored */ Int4 array_size);/** * Scans the RPS query sequence from "offset" to the end of the sequence. * Copies at most array_size hits. * Returns the number of hits found. * If there isn't enough room to copy all the hits, return early, and update * "offset". * * @param lookup_wrap the lookup table [in] * @param sequence the subject sequence [in] * @param offset the offset in the subject at which to begin scanning [in/out] * @param table_offsets array to which hits will be copied [out] * @param sequence_offsets array to which hits will be copied [out] * @param array_size length of the offset arrays [in] * @return The number of hits found. */Int4 BlastRPSScanSubject(const LookupTableWrap* lookup_wrap, /* in: the LUT */ const BLAST_SequenceBlk *sequence, Int4* offset, Uint4 * table_offsets, /* out : pointer to the array where offsets will be stored */ Uint4 * sequence_offsets, /* out: pointer to the array to which hits will be copied */ Int4 array_size);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -