📄 blast_lookup.h
字号:
/** Create a new protein lookup table. * @param opt pointer to lookup table options structure [in] * @param lut handle to lookup table structure [in/modified] */ Int4 BlastAaLookupNew(const LookupTableOptions* opt, LookupTable* * lut);/** Create a new lookup table. * @param opt pointer to lookup table options structure [in] * @param lut handle to lookup table [in/modified] * @param is_protein boolean indicating protein or nucleotide [in] */ Int4 LookupTableNew(const LookupTableOptions* opt, LookupTable* * lut, Boolean is_protein);/** Free the lookup table. */LookupTable* LookupTableDestruct(LookupTable* lookup);/** Index an array of queries. * * @param lookup the lookup table [in/modified] * @param matrix the substitution matrix [in] * @param query the array of queries to index * @param unmasked_regions an array of ListNode*s, each of which points to a (list of) integer pair(s) which specify the unmasked region(s) of the query [in] * @param num_queries the number of queries [in] * @return Zero. */Int4 BlastAaLookupIndexQueries(LookupTable* lookup, Int4 ** matrix, BLAST_SequenceBlk* query, ListNode* unmasked_regions, Int4 num_queries);/** Index a single query. * * @param lookup the lookup table [in/modified] * @param matrix the substitution matrix [in] * @param query the array of queries to index * @param unmasked_regions a ListNode* which points to a (list of) integer pair(s) which specify the unmasked region(s) of the query [in] * @param query_bias number added to each offset put into lookup table (only used for RPS blast database creation, otherwise 0) [in] * @return Zero. */Int4 _BlastAaLookupIndexQuery(LookupTable* lookup, Int4 ** matrix, BLAST_SequenceBlk* query, ListNode* unmasked_regions, Int4 query_bias);/** Create a sequence containing all possible words as subsequences. * * @param lookup the lookup table [in] * @return Zero. */Int4 MakeAllWordSequence(LookupTable* lookup);/** * Find the words in the neighborhood of w, that is, those whose * score is greater than t. * * For typical searches against a database, a sequence containing * all possible words (as created by MakeAllWordSequence() is used. * * For blast-two-sequences type applications, it is not necessary to * find all neighboring words; it is sufficient to use the words * occurring in the subject sequence. * * @param lookup the lookup table [in/modified] * @param matrix the substitution matrix [in] * @param query the query sequence [in] * @param offset the offset of the word * @param query_bias number added to each offset put into lookup table (only used for RPS blast database creation, otherwise 0) [in] * @return Zero. */Int4 AddNeighboringWords(LookupTable* lookup, Int4 ** matrix, BLAST_SequenceBlk* query, Int4 offset, Int4 query_bias);/* RPS blast structures and functions */#define RPS_HITS_PER_CELL 3typedef struct RPSBackboneCell { Int4 num_used; Int4 entries[RPS_HITS_PER_CELL];} RPSBackboneCell;typedef struct RPSLookupTable { Int4 wordsize; /* number of full bytes in a full word */ Int4 longest_chain; /* length of the longest chain on the backbone */ Int4 mask; /* part of index to mask off, that is, top (wordsize*charsize) bits should be discarded. */ Int4 alphabet_size; /* number of letters in the alphabet */ Int4 charsize; /* number of bits for a base/residue */ Int4 backbone_size; /* number of cells in the backbone */ RPSBackboneCell * rps_backbone; /* the lookup table used for RPS blast */ Int4 ** rps_pssm; /* Pointer to memory-mapped RPS Blast profile file */ Int4 * rps_seq_offsets; /* array of start offsets for each RPS DB seq. */ RPSAuxInfo* rps_aux_info; /* RPS Blast auxiliary information */ Int4 * overflow; /* the overflow array for the compacted lookup table */ Int4 overflow_size; /* Number of elements in the overflow array (above). */ PV_ARRAY_TYPE *pv; /* presence vector. a bit vector indicating which cells are occupied */} RPSLookupTable; /** Create a new RPS blast lookup table. * @param rps_info pointer to structure with RPS setup information [in] * @param lut handle to lookup table [in/modified] */ Int4 RPSLookupTableNew(const RPSInfo *rps_info, RPSLookupTable* * lut);/** Free the lookup table. */RPSLookupTable* RPSLookupTableDestruct(RPSLookupTable* lookup);/********************************* * * Nucleotide functions * *********************************//* Macro to test the presence vector array value for a lookup table index */#define NA_PV_TEST(pv_array, index, pv_array_bts) (pv_array[(index)>>pv_array_bts]&(((PV_ARRAY_TYPE) 1)<<((index)&PV_ARRAY_MASK)))/** Scan the compressed subject sequence, returning all word hits, using the * old BLASTn approach - looking up words at every byte (4 bases) of the * sequence. Lookup table is presumed to have a traditional BLASTn structure. * @param lookup_wrap Pointer to the (wrapper to) lookup table [in] * @param subject The (compressed) sequence to be scanned for words [in] * @param start_offset The offset into the sequence in actual coordinates [in] * @param q_offsets Array of query positions where words are found [out] * @param s_offsets Array of subject positions where words are found [out] * @param max_hits The allocated size of the above arrays - how many offsets * can be returned [in] * @param end_offset Where the scanning should stop [in], has stopped [out]*/Int4 BlastNaScanSubject(const LookupTableWrap* lookup_wrap, const BLAST_SequenceBlk* subject, Int4 start_offset, Uint4* NCBI_RESTRICT q_offsets, Uint4* NCBI_RESTRICT s_offsets, Int4 max_hits, Int4* end_offset);/** Scan the compressed subject sequence, returning all word hits, using the * arbitrary stride. Lookup table is presumed to have a traditional BLASTn * structure. * @param lookup_wrap Pointer to the (wrapper to) lookup table [in] * @param subject The (compressed) sequence to be scanned for words [in] * @param start_offset The offset into the sequence in actual coordinates [in] * @param q_offsets Array of query positions where words are found [out] * @param s_offsets Array of subject positions where words are found [out] * @param max_hits The allocated size of the above arrays - how many offsets * can be returned [in] * @param end_offset Where the scanning should stop [in], has stopped [out]*/Int4 BlastNaScanSubject_AG(const LookupTableWrap* lookup_wrap, const BLAST_SequenceBlk* subject, Int4 start_offset, Uint4* NCBI_RESTRICT q_offsets, Uint4* NCBI_RESTRICT s_offsets, Int4 max_hits, Int4* end_offset);/** Fill the lookup table for a given query sequence or partial sequence. * @param lookup Pointer to the lookup table structure [in] [out] * @param query The query sequence [in] * @param location What locations on the query sequence to index? [in] */Int4 BlastNaLookupIndexQuery(LookupTable* lookup, BLAST_SequenceBlk* query, ListNode* location);#ifdef __cplusplus}#endif#endif /* BLAST_LOOKUP__H */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -