⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mb_lookup.c

📁 ncbi源码
💻 C
📖 第 1 页 / 共 2 页
字号:
                     case TEMPL_12_18:                        ecode2 = (GET_WORD_INDEX_12_18_OPT(ecode)) |                           (GET_EXTRA_CODE_18_OPT(seq));                        break;                     case TEMPL_11_21:                        ecode2 = (GET_WORD_INDEX_11_21_OPT(ecode)) |                           (GET_EXTRA_CODE_21_OPT(seq));                        break;                     case TEMPL_12_21:                        ecode2 = (GET_WORD_INDEX_12_21_OPT(ecode)) |                           (GET_EXTRA_CODE_21_OPT(seq));                        break;                     default:                        ecode2 = 0; break;                     }                     #ifdef USE_HASH_TABLE                                          hash_buf = (Uint1*)&ecode2;                     CRC32(crc, hash_buf);                     ecode2 = (crc>>hash_shift) & hash_mask;#endif                     if (mb_lt->hashtable[ecode1] == 0 ||                          mb_lt->hashtable2[ecode2] == 0)                        mb_lt->num_unique_pos_added++;                     mb_lt->next_pos2[index] = mb_lt->hashtable2[ecode2];                     mb_lt->hashtable2[ecode2] = index;                  } else {                     if (mb_lt->hashtable[ecode1] == 0)                        mb_lt->num_unique_pos_added++;                  }                  mb_lt->next_pos[index] = mb_lt->hashtable[ecode1];                  mb_lt->hashtable[ecode1] = index;               }            }         }      }   }   mb_lt->pv_array_bts = pv_array_bts = PV_ARRAY_BTS + pv_shift; #ifdef QUESTION_PV_ARRAY_USE   if (mb_lt->num_unique_pos_added <        (PV_ARRAY_FACTOR*(mb_lt->hashsize>>pv_shift))) {#endif      pv_size = mb_lt->hashsize>>pv_array_bts;      /* Size measured in PV_ARRAY_TYPE's */      pv_array = calloc(PV_ARRAY_BYTES, pv_size);      for (index=0; index<mb_lt->hashsize; index++) {         if (mb_lt->hashtable[index] != 0 ||             (two_templates && mb_lt->hashtable2[index] != 0))            pv_array[index>>pv_array_bts] |=                (((PV_ARRAY_TYPE) 1)<<(index&PV_ARRAY_MASK));      }      mb_lt->pv_array = pv_array;#ifdef QUESTION_PV_ARRAY_USE   }#endif   /* Now remove the hash entries that have too many positions */#if 0   if (lookup_options->max_positions>0) {#endif      for (pv_index = 0; pv_index < pv_size; ++pv_index) {         if (pv_array[pv_index]) {            for (ecode = pv_index<<pv_array_bts;                  ecode < (pv_index+1)<<pv_array_bts; ++ecode) {               for (index=mb_lt->hashtable[ecode], pcount=0;                     index>0; index=mb_lt->next_pos[index], pcount++);               if (two_templates) {                  for (index=mb_lt->hashtable2[ecode], pcount1=0;                        index>0; index=mb_lt->next_pos2[index], pcount1++);               }               if ((pcount=MAX(pcount,pcount1))>lookup_options->max_positions) {                  mb_lt->hashtable[ecode] = 0;                  if (two_templates)                     mb_lt->hashtable2[ecode] = 0;#ifdef ERR_POST_EX_DEFINED                  ErrPostEx(SEV_WARNING, 0, 0, "%lx - %d", ecode, pcount);#endif                  masked_word_count++;               }               longest_chain = MAX(longest_chain, pcount);            }         }      }      if (masked_word_count) {#ifdef ERR_POST_EX_DEFINED	 ErrPostEx(SEV_WARNING, 0, 0, "Masked %d %dmers", masked_word_count,		   4*width);#endif      }#if 0   }#endif   mb_lt->longest_chain = longest_chain;   *mb_lt_ptr = mb_lt;   return 0;}Int4 MB_AG_ScanSubject(const LookupTableWrap* lookup_wrap,       const BLAST_SequenceBlk* subject, Int4 start_offset,       Uint4* q_offsets, Uint4* s_offsets, Int4 max_hits,         Int4* end_offset){   MBLookupTable* mb_lt = (MBLookupTable*) lookup_wrap->lut;   Uint1* s;   Uint1* abs_start;   Int4  index=0, s_off;      Int4 q_off;   PV_ARRAY_TYPE *pv_array = mb_lt->pv_array;   Int4 total_hits = 0;   Int4 compressed_wordsize, compressed_scan_step, word_size;   Boolean full_byte_scan = mb_lt->full_byte_scan;   Uint1 pv_array_bts = mb_lt->pv_array_bts;      /* Since the test for number of hits here is done after adding them,       subtract the longest chain length from the allowed offset array size. */   max_hits -= mb_lt->longest_chain;   abs_start = subject->sequence;   s = abs_start + start_offset/COMPRESSION_RATIO;   compressed_scan_step = mb_lt->scan_step / COMPRESSION_RATIO;   compressed_wordsize = mb_lt->compressed_wordsize;   word_size = compressed_wordsize*COMPRESSION_RATIO;   index = 0;      /* NB: s in this function always points to the start of the word!    */   if (full_byte_scan) {      Uint1* s_end = abs_start + subject->length/COMPRESSION_RATIO -          compressed_wordsize;      for ( ; s <= s_end; s += compressed_scan_step) {         BlastNaLookupInitIndex(compressed_wordsize, s, &index);                  if (NA_PV_TEST(pv_array, index, pv_array_bts)) {            q_off = mb_lt->hashtable[index];            s_off = (s - abs_start)*COMPRESSION_RATIO;            if (q_off && (total_hits >= max_hits))               break;            while (q_off) {               q_offsets[total_hits] = q_off - 1;               s_offsets[total_hits++] = s_off;               q_off = mb_lt->next_pos[q_off];            }         }      }      *end_offset = (s - abs_start)*COMPRESSION_RATIO;   } else {      Int4 last_offset = subject->length - word_size;      Uint1 bit;      Int4 adjusted_index;      for (s_off = start_offset; s_off <= last_offset;            s_off += mb_lt->scan_step) {         s = abs_start + (s_off / COMPRESSION_RATIO);         bit = 2*(s_off % COMPRESSION_RATIO);         /* Compute index for a word made of full bytes */         s = BlastNaLookupInitIndex(compressed_wordsize, s, &index);         /* Adjust the word index by the base within a byte */         adjusted_index = BlastNaLookupAdjustIndex(s, index, mb_lt->mask,                                                   bit);         if (NA_PV_TEST(pv_array, adjusted_index, pv_array_bts)) {            q_off = mb_lt->hashtable[adjusted_index];            if (q_off && (total_hits >= max_hits))               break;            while (q_off) {               q_offsets[total_hits] = q_off - 1;               s_offsets[total_hits++] = s_off;                q_off = mb_lt->next_pos[q_off];            }         }      }      *end_offset = s_off;   }   return total_hits;}Int4 MB_ScanSubject(const LookupTableWrap* lookup,       const BLAST_SequenceBlk* subject, Int4 start_offset,        Uint4* q_offsets, Uint4* s_offsets, Int4 max_hits,       Int4* end_offset) {   Uint1* abs_start,* s_end;   Uint1* s;   Int4 hitsfound = 0;   Uint4 query_offset, subject_offset;   Int4 index;   MBLookupTable* mb_lt = (MBLookupTable*) lookup->lut;   Uint4* q_ptr = q_offsets,* s_ptr = s_offsets;   PV_ARRAY_TYPE *pv_array = mb_lt->pv_array;   Uint1 pv_array_bts = mb_lt->pv_array_bts;   Int4 compressed_wordsize = mb_lt->compressed_wordsize;#ifdef DEBUG_LOG   FILE *logfp0 = fopen("new0.log", "a");#endif      /* Since the test for number of hits here is done after adding them,       subtract the longest chain length from the allowed offset array size.   */   max_hits -= mb_lt->longest_chain;   abs_start = subject->sequence;   s = abs_start + start_offset/COMPRESSION_RATIO;   s_end = abs_start + (*end_offset)/COMPRESSION_RATIO;   s = BlastNaLookupInitIndex(mb_lt->compressed_wordsize, s, &index);   /* In the following code, s points to the byte right after the end of       the word. */   while (s <= s_end) {      if (NA_PV_TEST(pv_array, index, pv_array_bts)) {         query_offset = mb_lt->hashtable[index];         subject_offset =             ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO;         if (query_offset && (hitsfound >= max_hits))            break;         while (query_offset) {#ifdef DEBUG_LOG            fprintf(logfp0, "%ld\t%ld\t%ld\n", query_offset,                     subject_offset, index);#endif            *(q_ptr++) = query_offset - 1;            *(s_ptr++) = subject_offset;            ++hitsfound;            query_offset = mb_lt->next_pos[query_offset];         }      }      /* Compute the next value of the lookup index          (shifting sequence by a full byte) */      index = BlastNaLookupComputeIndex(FULL_BYTE_SHIFT, mb_lt->mask,                                         s++, index);   }   *end_offset =      ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO;#ifdef DEBUG_LOG   fclose(logfp0);#endif   return hitsfound;}Int4 MB_DiscWordScanSubject(const LookupTableWrap* lookup,        const BLAST_SequenceBlk* subject, Int4 start_offset,       Uint4* q_offsets, Uint4* s_offsets, Int4 max_hits,        Int4* end_offset){   Uint1* s;   Uint1* s_start,* abs_start,* s_end;   Int4 hitsfound = 0;   Uint4 query_offset, subject_offset;   Int4 word, index, index2=0;   MBLookupTable* mb_lt = (MBLookupTable*) lookup->lut;   Uint4* q_ptr = q_offsets,* s_ptr = s_offsets;   Boolean full_byte_scan = mb_lt->full_byte_scan;   Boolean two_templates = mb_lt->two_templates;   Uint1 template_type = mb_lt->template_type;   Uint1 second_template_type = mb_lt->second_template_type;   PV_ARRAY_TYPE *pv_array = mb_lt->pv_array;   Uint1 pv_array_bts = mb_lt->pv_array_bts;   Int4 compressed_wordsize = mb_lt->compressed_wordsize;#ifdef DEBUG_LOG   FILE *logfp0 = fopen("new0.log", "a");#endif         /* Since the test for number of hits here is done after adding them,       subtract the longest chain length from the allowed offset array size. */   max_hits -= mb_lt->longest_chain;   abs_start = subject->sequence;   s_start = abs_start + start_offset/COMPRESSION_RATIO;   s_end = abs_start + (*end_offset)/COMPRESSION_RATIO;   s = BlastNaLookupInitIndex(mb_lt->compressed_wordsize, s_start, &word);   /* s now points to the byte right after the end of the current word */   if (full_byte_scan) {     while (s <= s_end) {       index = ComputeDiscontiguousIndex(s, word, template_type);       if (two_templates) {          index2 = ComputeDiscontiguousIndex(s, word, second_template_type);       }       if (NA_PV_TEST(pv_array, index, pv_array_bts)) {          query_offset = mb_lt->hashtable[index];          subject_offset =              ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO;          if (query_offset && (hitsfound >= max_hits))             break;          while (query_offset) {#ifdef DEBUG_LOG             fprintf(logfp0, "%ld\t%ld\t%ld\n", query_offset,                      subject_offset, index);#endif             *(q_ptr++) = query_offset - 1;             *(s_ptr++) = subject_offset;             ++hitsfound;             query_offset = mb_lt->next_pos[query_offset];          }       }       if (two_templates && NA_PV_TEST(pv_array, index2, pv_array_bts)) {          query_offset = mb_lt->hashtable2[index2];          subject_offset =              ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO;          if (query_offset && (hitsfound >= max_hits))             break;          while (query_offset) {             q_offsets[hitsfound] = query_offset - 1;             s_offsets[hitsfound++] = subject_offset;             query_offset = mb_lt->next_pos2[query_offset];          }       }       word = BlastNaLookupComputeIndex(FULL_BYTE_SHIFT,                            mb_lt->mask, s++, word);     }   } else {      Int4 scan_shift = 2*mb_lt->scan_step;      Uint1 bit = 2*(start_offset % COMPRESSION_RATIO);      Int4 adjusted_word;      while (s <= s_end) {         /* Adjust the word index by the base within a byte */         adjusted_word = BlastNaLookupAdjustIndex(s, word, mb_lt->mask,                                         bit);            index = ComputeDiscontiguousIndex_1b(s, adjusted_word, bit,                    template_type);                if (two_templates)            index2 = ComputeDiscontiguousIndex_1b(s, adjusted_word, bit,                        second_template_type);         if (NA_PV_TEST(pv_array, index, pv_array_bts)) {            query_offset = mb_lt->hashtable[index];            subject_offset =                ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO               + bit/2;            if (query_offset && (hitsfound >= max_hits))               break;            while (query_offset) {#ifdef DEBUG_LOG               fprintf(logfp0, "%ld\t%ld\t%ld\n", query_offset,                        subject_offset, index);#endif               *(q_ptr++) = query_offset - 1;               *(s_ptr++) = subject_offset;               ++hitsfound;               query_offset = mb_lt->next_pos[query_offset];            }         }         if (two_templates && NA_PV_TEST(pv_array, index2, pv_array_bts)) {            query_offset = mb_lt->hashtable2[index2];            subject_offset =                ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO               + bit/2;            if (query_offset && (hitsfound >= max_hits))               break;            while (query_offset) {               q_offsets[hitsfound] = query_offset - 1;               s_offsets[hitsfound++] = subject_offset;               query_offset = mb_lt->next_pos2[query_offset];            }         }         bit += scan_shift;         if (bit >= FULL_BYTE_SHIFT) {            /* Advance to the next full byte */            bit -= FULL_BYTE_SHIFT;            word = BlastNaLookupComputeIndex(FULL_BYTE_SHIFT,                                 mb_lt->mask, s++, word);         }      }   }   *end_offset =      ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO;#ifdef DEBUG_LOG   fclose(logfp0);#endif   return hitsfound;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -