⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_inline.h

📁 ncbi源码
💻 H
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_inline.h,v $ * PRODUCTION Revision 1000.0  2004/06/01 18:13:43  gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.2 * PRODUCTION * =========================================================================== *//* $Id: blast_inline.h,v 1000.0 2004/06/01 18:13:43 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's offical duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * *//** @file blast_inline.h * @todo FIXME needs file description */#include <algo/blast/core/mb_lookup.h>#include <algo/blast/core/blast_util.h>/** Given a word packed into an integer, compute a discontiguous word lookup  *  index. * @param subject Pointer to the next byte of the sequence after the end of  *        the word (needed when word template is longer than 16 bases) [in] * @param word A piece of the sequence packed into an integer [in] * @param template_type What type of discontiguous word template to use [in] * @return The lookup table index of the discontiguous word [out] */static NCBI_INLINE Int4 ComputeDiscontiguousIndex(Uint1* subject, Int4 word,                  Uint1 template_type){   Int4 index;   Int4 extra_code;      switch (template_type) {   case TEMPL_11_16:      index = GET_WORD_INDEX_11_16(word);      break;   case TEMPL_12_16:      index = GET_WORD_INDEX_12_16(word);      break;   case TEMPL_11_16_OPT:      index = GET_WORD_INDEX_11_16_OPT(word);      break;   case TEMPL_12_16_OPT:      index = GET_WORD_INDEX_12_16_OPT(word);      break;   case TEMPL_11_18:      extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18(subject);     index = (GET_WORD_INDEX_11_18(word) | extra_code);     break;   case TEMPL_12_18:       extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18(subject);      index = (GET_WORD_INDEX_12_18(word) | extra_code);      break;   case TEMPL_11_18_OPT:       extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18_OPT(subject);      index = (GET_WORD_INDEX_11_18_OPT(word) | extra_code);      break;   case TEMPL_12_18_OPT:      extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18_OPT(subject);      index = (GET_WORD_INDEX_12_18_OPT(word) | extra_code);      break;   case TEMPL_11_21:       extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21(subject);      index = (GET_WORD_INDEX_11_21(word) | extra_code);      break;   case TEMPL_12_21:      extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21(subject);      index = (GET_WORD_INDEX_12_21(word) | extra_code);      break;   case TEMPL_11_21_OPT:       extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21_OPT(subject);      index = (GET_WORD_INDEX_11_21_OPT(word) | extra_code);      break;   case TEMPL_12_21_OPT:      extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21_OPT(subject);      index = (GET_WORD_INDEX_12_21_OPT(word) | extra_code);         break;   default:       extra_code = 0;       index = 0;      break;   }#ifdef USE_HASH_TABLE   hash_buf = (Uint1*)&index;   CRC32(crc, hash_buf);   index = (crc>>hash_shift) & hash_mask;#endif   return index;}/** Compute the lookup table index for the first word template, given a word  * position, template type and previous value of the word, in case of  * one-base (2 bit) database scanning. * @param word_start Pointer to the start of a word in the sequence [in] * @param word The word packed into an integer value [in] * @param sequence_bit By how many bits the real word start is shifted within  *        a compressed sequence byte [in] * @param template_type What discontiguous word template to use for index  *        computation [in] * @return The lookup index for the discontiguous word.*/static NCBI_INLINE Int4 ComputeDiscontiguousIndex_1b(const Uint1* word_start,                       Int4 word, Uint1 sequence_bit, Uint1 template_type){   Int4 index;   Uint1* subject = (Uint1 *) word_start;   Uint1 bit;   Int4 extra_code, tmpval;      /* Prepare auxiliary variables for extra code calculation */   tmpval = 0;   extra_code = 0;   /* The bits in an integer byte are counted in a reverse order than in a      sequence byte */   bit = 6 - sequence_bit;   switch (template_type) {   case TEMPL_11_16:      index = GET_WORD_INDEX_11_16(word);      break;   case TEMPL_12_16:      index = GET_WORD_INDEX_12_16(word);      break;   case TEMPL_11_16_OPT:      index = GET_WORD_INDEX_11_16_OPT(word);      break;   case TEMPL_12_16_OPT:      index = GET_WORD_INDEX_12_16_OPT(word);      break;   case TEMPL_11_18:       GET_EXTRA_CODE_PACKED_18(subject, bit, tmpval, extra_code);      index = (GET_WORD_INDEX_11_18(word) | extra_code);      break;   case TEMPL_12_18:       GET_EXTRA_CODE_PACKED_18(subject, bit, tmpval, extra_code);      index = (GET_WORD_INDEX_12_18(word) | extra_code);      break;   case TEMPL_11_18_OPT:       GET_EXTRA_CODE_PACKED_18_OPT(subject, bit, tmpval, extra_code);      index = (GET_WORD_INDEX_11_18_OPT(word) | extra_code);      break;   case TEMPL_12_18_OPT:      GET_EXTRA_CODE_PACKED_18_OPT(subject, bit, tmpval, extra_code);      index = (GET_WORD_INDEX_12_18_OPT(word) | extra_code);      break;   case TEMPL_11_21:       GET_EXTRA_CODE_PACKED_21(subject, bit, tmpval, extra_code);      index = (GET_WORD_INDEX_11_21(word) | extra_code);      break;   case TEMPL_12_21:      GET_EXTRA_CODE_PACKED_21(subject, bit, tmpval, extra_code);      index = (GET_WORD_INDEX_12_21(word) | extra_code);      break;   case TEMPL_11_21_OPT:       GET_EXTRA_CODE_PACKED_21_OPT(subject, bit, tmpval, extra_code);      index = (GET_WORD_INDEX_11_21_OPT(word) | extra_code);      break;   case TEMPL_12_21_OPT:      GET_EXTRA_CODE_PACKED_21_OPT(subject, bit, tmpval, extra_code);      index = (GET_WORD_INDEX_12_21_OPT(word) | extra_code);         break;   default:       extra_code = 0;       index = 0;      break;   }#ifdef USE_HASH_TABLE   hash_buf = (Uint1*)&index;   CRC32(crc, hash_buf);   index = (crc>>hash_shift) & hash_mask;#endif     return index;}static NCBI_INLINE void  _ComputeIndex(Int4 wordsize,				  Int4 charsize,				  Int4 mask,				  const Uint1* word,				  Int4* index);static NCBI_INLINE void  _ComputeIndexIncremental(Int4 wordsize,					     Int4 charsize,					     Int4 mask,					     const Uint1* word,					     Int4* index);/** Given a word, compute its index value from scratch. * * @param wordsize length of the word, in residues [in] * @param charsize length of one residue, in bits [in] * @param mask value used to mask the index so that only the bottom wordsize * charsize bits remain [in] * @param word pointer to the beginning of the word [in] * @param index the computed index value [out]  */static NCBI_INLINE void  _ComputeIndex(Int4 wordsize,				  Int4 charsize,				  Int4 mask,				  const Uint1* word,				  Int4* index){  Int4 i;  *index = 0;  for(i=0;i<wordsize;i++){    *index = ((*index << charsize) | word[i]) & mask;}  return;}/** Given a word, compute its index value, reusing a previously  *  computed index value. * * @param wordsize length of the word - 1, in residues [in] * @param charsize length of one residue, in bits [in] * @param mask value used to mask the index so that only the bottom wordsize * charsize bits remain [in] * @param word pointer to the beginning of the word [in] * @param index the computed index value [in/out] */static NCBI_INLINE void  _ComputeIndexIncremental(Int4 wordsize,					     Int4 charsize,					     Int4 mask,					     const Uint1* word,					     Int4* index){  *index = ((*index << charsize) | word[wordsize - 1]) & mask;  return;}/* Given a starting position of a word in a compressed nucleotide sequence,  * compute this word's lookup table index */static NCBI_INLINE Uint1* BlastNaLookupInitIndex(Int4 length,		          const Uint1* word, Int4* index){   Int4 i;      *index = 0;   for (i = 0; i < length; ++i)      *index = ((*index)<<FULL_BYTE_SHIFT) | word[i];   return (Uint1 *) (word + length);}/* Recompute the word index given its previous value and the new location  *  of the last byte of the word */static NCBI_INLINE Int4 BlastNaLookupComputeIndex(Int4 scan_shift, Int4 mask, 		      const Uint1* word, Int4 index){   return (((index)<<scan_shift) & mask) | *(word);     }/* Given a word computed from full bytes of a compressed sequence,  *  shift it by 0-3 bases  */static NCBI_INLINE Int4 BlastNaLookupAdjustIndex(Uint1* s, Int4 index,                       Int4 mask, Uint1 bit){   return (((index)<<bit) & mask) | ((*s)>>(FULL_BYTE_SHIFT-bit));}#define BLAST2NA_MASK 0xfc/** Compute the lookup index for a word in an uncompressed sequence, without  *  using any previous index information. * @param lookup Pointer to the traditional BLASTn lookup table structure [in] * @param word Pointer to the start of the word [in] * @param index The lookup index [out] */static NCBI_INLINE Int2Na_LookupComputeIndex(LookupTable* lookup, Uint1* word, Int4* index){   Int4 i;   Int4 wordsize = lookup->reduced_wordsize*COMPRESSION_RATIO; /* i.e. 8 or 4 */   *index = 0;   for (i = 0; i < wordsize; ++i) {      if ((word[i] & BLAST2NA_MASK) != 0) {	 *index = 0;	 return -1;      } else {	 *index = (((*index)<<lookup->charsize) & lookup->mask) | word[i];      }   }   return 0;}/** Pack 4 sequence bytes into a one byte integer, assuming sequence contains *  no ambiguities. */#define	PACK_WORD(q) ((q[0]<<6) + (q[1]<<4) + (q[2]<<2) + q[3]) /** Compare a given number of bytes of an compressed subject sequence with *  the non-compressed query sequence. * @param q Pointer to the first byte to be compared in the query sequence [in] * @param s Pointer to the first byte to be compared in the subject  *        sequence [in] * @param extra_bytes Number of compressed bytes to compare [in] * @return TRUE if sequences are identical, FALSE if mismatch is found. */static NCBI_INLINE Boolean BlastNaCompareExtraBytes(Uint1* q, Uint1* s,                                                Int4 extra_bytes){   Int4 index;      for (index = 0; index < extra_bytes; ++index) {      if (*s++ != PACK_WORD(q))	 return FALSE;      q += COMPRESSION_RATIO;   }   return TRUE;}/** Perform mini extension (up to max_left <= 4 bases) to the left;  * @param q Pointer to the query base right after the ones to be extended [in] * @param s Pointer to a byte in the compressed subject sequence that is to be  *        tested for extension [in] * @param max_left Maximal number of bits to compare [in] * @return Number of matched bases*/static NCBI_INLINE Uint1 BlastNaMiniExtendLeft(Uint1* q, const Uint1* s, Uint1 max_left){   Uint1 left = 0;   for (left = 0; left < max_left; ++left) {      if (NCBI2NA_UNPACK_BASE(*s, left) != *--q) {	 break;      }   }   return left;}/** Perform mini extension (up to max_right <= 4 bases) to the right;  * @param q Pointer to the start of the extension in the query [in] * @param s Pointer to a byte in the compressed subject sequence that is to be  *        tested for extension [in] * @param max_right Maximal number of bits to compare [in] * @return Number of matched bases*/static NCBI_INLINE Uint1 BlastNaMiniExtendRight(Uint1* q, const Uint1* s, Uint1 max_right){   Uint1 right;   Uint1 index = 3;      for (right = 0; right < max_right; ++right, --index) {      if (NCBI2NA_UNPACK_BASE(*s, index) != *q++) {	 break;      }   }   return right;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -