📄 mb_lookup.c
字号:
case TEMPL_12_18: ecode2 = (GET_WORD_INDEX_12_18_OPT(ecode)) | (GET_EXTRA_CODE_18_OPT(seq)); break; case TEMPL_11_21: ecode2 = (GET_WORD_INDEX_11_21_OPT(ecode)) | (GET_EXTRA_CODE_21_OPT(seq)); break; case TEMPL_12_21: ecode2 = (GET_WORD_INDEX_12_21_OPT(ecode)) | (GET_EXTRA_CODE_21_OPT(seq)); break; default: ecode2 = 0; break; } #ifdef USE_HASH_TABLE hash_buf = (Uint1*)&ecode2; CRC32(crc, hash_buf); ecode2 = (crc>>hash_shift) & hash_mask;#endif if (mb_lt->hashtable[ecode1] == 0 || mb_lt->hashtable2[ecode2] == 0) mb_lt->num_unique_pos_added++; mb_lt->next_pos2[index] = mb_lt->hashtable2[ecode2]; mb_lt->hashtable2[ecode2] = index; } else { if (mb_lt->hashtable[ecode1] == 0) mb_lt->num_unique_pos_added++; } mb_lt->next_pos[index] = mb_lt->hashtable[ecode1]; mb_lt->hashtable[ecode1] = index; } } } } } mb_lt->pv_array_bts = pv_array_bts = PV_ARRAY_BTS + pv_shift; #ifdef QUESTION_PV_ARRAY_USE if (mb_lt->num_unique_pos_added < (PV_ARRAY_FACTOR*(mb_lt->hashsize>>pv_shift))) {#endif pv_size = mb_lt->hashsize>>pv_array_bts; /* Size measured in PV_ARRAY_TYPE's */ pv_array = calloc(PV_ARRAY_BYTES, pv_size); for (index=0; index<mb_lt->hashsize; index++) { if (mb_lt->hashtable[index] != 0 || (two_templates && mb_lt->hashtable2[index] != 0)) pv_array[index>>pv_array_bts] |= (((PV_ARRAY_TYPE) 1)<<(index&PV_ARRAY_MASK)); } mb_lt->pv_array = pv_array;#ifdef QUESTION_PV_ARRAY_USE }#endif /* Now remove the hash entries that have too many positions */#if 0 if (lookup_options->max_positions>0) {#endif for (pv_index = 0; pv_index < pv_size; ++pv_index) { if (pv_array[pv_index]) { for (ecode = pv_index<<pv_array_bts; ecode < (pv_index+1)<<pv_array_bts; ++ecode) { for (index=mb_lt->hashtable[ecode], pcount=0; index>0; index=mb_lt->next_pos[index], pcount++); if (two_templates) { for (index=mb_lt->hashtable2[ecode], pcount1=0; index>0; index=mb_lt->next_pos2[index], pcount1++); } if ((pcount=MAX(pcount,pcount1))>lookup_options->max_positions) { mb_lt->hashtable[ecode] = 0; if (two_templates) mb_lt->hashtable2[ecode] = 0;#ifdef ERR_POST_EX_DEFINED ErrPostEx(SEV_WARNING, 0, 0, "%lx - %d", ecode, pcount);#endif masked_word_count++; } longest_chain = MAX(longest_chain, pcount); } } } if (masked_word_count) {#ifdef ERR_POST_EX_DEFINED ErrPostEx(SEV_WARNING, 0, 0, "Masked %d %dmers", masked_word_count, 4*width);#endif }#if 0 }#endif mb_lt->longest_chain = longest_chain; *mb_lt_ptr = mb_lt; return 0;}Int4 MB_AG_ScanSubject(const LookupTableWrap* lookup_wrap, const BLAST_SequenceBlk* subject, Int4 start_offset, Uint4* q_offsets, Uint4* s_offsets, Int4 max_hits, Int4* end_offset){ MBLookupTable* mb_lt = (MBLookupTable*) lookup_wrap->lut; Uint1* s; Uint1* abs_start; Int4 index=0, s_off; Int4 q_off; PV_ARRAY_TYPE *pv_array = mb_lt->pv_array; Int4 total_hits = 0; Int4 compressed_wordsize, compressed_scan_step, word_size; Boolean full_byte_scan = mb_lt->full_byte_scan; Uint1 pv_array_bts = mb_lt->pv_array_bts; /* Since the test for number of hits here is done after adding them, subtract the longest chain length from the allowed offset array size. */ max_hits -= mb_lt->longest_chain; abs_start = subject->sequence; s = abs_start + start_offset/COMPRESSION_RATIO; compressed_scan_step = mb_lt->scan_step / COMPRESSION_RATIO; compressed_wordsize = mb_lt->compressed_wordsize; word_size = compressed_wordsize*COMPRESSION_RATIO; index = 0; /* NB: s in this function always points to the start of the word! */ if (full_byte_scan) { Uint1* s_end = abs_start + subject->length/COMPRESSION_RATIO - compressed_wordsize; for ( ; s <= s_end; s += compressed_scan_step) { BlastNaLookupInitIndex(compressed_wordsize, s, &index); if (NA_PV_TEST(pv_array, index, pv_array_bts)) { q_off = mb_lt->hashtable[index]; s_off = (s - abs_start)*COMPRESSION_RATIO; if (q_off && (total_hits >= max_hits)) break; while (q_off) { q_offsets[total_hits] = q_off - 1; s_offsets[total_hits++] = s_off; q_off = mb_lt->next_pos[q_off]; } } } *end_offset = (s - abs_start)*COMPRESSION_RATIO; } else { Int4 last_offset = subject->length - word_size; Uint1 bit; Int4 adjusted_index; for (s_off = start_offset; s_off <= last_offset; s_off += mb_lt->scan_step) { s = abs_start + (s_off / COMPRESSION_RATIO); bit = 2*(s_off % COMPRESSION_RATIO); /* Compute index for a word made of full bytes */ s = BlastNaLookupInitIndex(compressed_wordsize, s, &index); /* Adjust the word index by the base within a byte */ adjusted_index = BlastNaLookupAdjustIndex(s, index, mb_lt->mask, bit); if (NA_PV_TEST(pv_array, adjusted_index, pv_array_bts)) { q_off = mb_lt->hashtable[adjusted_index]; if (q_off && (total_hits >= max_hits)) break; while (q_off) { q_offsets[total_hits] = q_off - 1; s_offsets[total_hits++] = s_off; q_off = mb_lt->next_pos[q_off]; } } } *end_offset = s_off; } return total_hits;}Int4 MB_ScanSubject(const LookupTableWrap* lookup, const BLAST_SequenceBlk* subject, Int4 start_offset, Uint4* q_offsets, Uint4* s_offsets, Int4 max_hits, Int4* end_offset) { Uint1* abs_start,* s_end; Uint1* s; Int4 hitsfound = 0; Uint4 query_offset, subject_offset; Int4 index; MBLookupTable* mb_lt = (MBLookupTable*) lookup->lut; Uint4* q_ptr = q_offsets,* s_ptr = s_offsets; PV_ARRAY_TYPE *pv_array = mb_lt->pv_array; Uint1 pv_array_bts = mb_lt->pv_array_bts; Int4 compressed_wordsize = mb_lt->compressed_wordsize;#ifdef DEBUG_LOG FILE *logfp0 = fopen("new0.log", "a");#endif /* Since the test for number of hits here is done after adding them, subtract the longest chain length from the allowed offset array size. */ max_hits -= mb_lt->longest_chain; abs_start = subject->sequence; s = abs_start + start_offset/COMPRESSION_RATIO; s_end = abs_start + (*end_offset)/COMPRESSION_RATIO; s = BlastNaLookupInitIndex(mb_lt->compressed_wordsize, s, &index); /* In the following code, s points to the byte right after the end of the word. */ while (s <= s_end) { if (NA_PV_TEST(pv_array, index, pv_array_bts)) { query_offset = mb_lt->hashtable[index]; subject_offset = ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO; if (query_offset && (hitsfound >= max_hits)) break; while (query_offset) {#ifdef DEBUG_LOG fprintf(logfp0, "%ld\t%ld\t%ld\n", query_offset, subject_offset, index);#endif *(q_ptr++) = query_offset - 1; *(s_ptr++) = subject_offset; ++hitsfound; query_offset = mb_lt->next_pos[query_offset]; } } /* Compute the next value of the lookup index (shifting sequence by a full byte) */ index = BlastNaLookupComputeIndex(FULL_BYTE_SHIFT, mb_lt->mask, s++, index); } *end_offset = ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO;#ifdef DEBUG_LOG fclose(logfp0);#endif return hitsfound;}Int4 MB_DiscWordScanSubject(const LookupTableWrap* lookup, const BLAST_SequenceBlk* subject, Int4 start_offset, Uint4* q_offsets, Uint4* s_offsets, Int4 max_hits, Int4* end_offset){ Uint1* s; Uint1* s_start,* abs_start,* s_end; Int4 hitsfound = 0; Uint4 query_offset, subject_offset; Int4 word, index, index2=0; MBLookupTable* mb_lt = (MBLookupTable*) lookup->lut; Uint4* q_ptr = q_offsets,* s_ptr = s_offsets; Boolean full_byte_scan = mb_lt->full_byte_scan; Boolean two_templates = mb_lt->two_templates; Uint1 template_type = mb_lt->template_type; Uint1 second_template_type = mb_lt->second_template_type; PV_ARRAY_TYPE *pv_array = mb_lt->pv_array; Uint1 pv_array_bts = mb_lt->pv_array_bts; Int4 compressed_wordsize = mb_lt->compressed_wordsize;#ifdef DEBUG_LOG FILE *logfp0 = fopen("new0.log", "a");#endif /* Since the test for number of hits here is done after adding them, subtract the longest chain length from the allowed offset array size. */ max_hits -= mb_lt->longest_chain; abs_start = subject->sequence; s_start = abs_start + start_offset/COMPRESSION_RATIO; s_end = abs_start + (*end_offset)/COMPRESSION_RATIO; s = BlastNaLookupInitIndex(mb_lt->compressed_wordsize, s_start, &word); /* s now points to the byte right after the end of the current word */ if (full_byte_scan) { while (s <= s_end) { index = ComputeDiscontiguousIndex(s, word, template_type); if (two_templates) { index2 = ComputeDiscontiguousIndex(s, word, second_template_type); } if (NA_PV_TEST(pv_array, index, pv_array_bts)) { query_offset = mb_lt->hashtable[index]; subject_offset = ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO; if (query_offset && (hitsfound >= max_hits)) break; while (query_offset) {#ifdef DEBUG_LOG fprintf(logfp0, "%ld\t%ld\t%ld\n", query_offset, subject_offset, index);#endif *(q_ptr++) = query_offset - 1; *(s_ptr++) = subject_offset; ++hitsfound; query_offset = mb_lt->next_pos[query_offset]; } } if (two_templates && NA_PV_TEST(pv_array, index2, pv_array_bts)) { query_offset = mb_lt->hashtable2[index2]; subject_offset = ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO; if (query_offset && (hitsfound >= max_hits)) break; while (query_offset) { q_offsets[hitsfound] = query_offset - 1; s_offsets[hitsfound++] = subject_offset; query_offset = mb_lt->next_pos2[query_offset]; } } word = BlastNaLookupComputeIndex(FULL_BYTE_SHIFT, mb_lt->mask, s++, word); } } else { Int4 scan_shift = 2*mb_lt->scan_step; Uint1 bit = 2*(start_offset % COMPRESSION_RATIO); Int4 adjusted_word; while (s <= s_end) { /* Adjust the word index by the base within a byte */ adjusted_word = BlastNaLookupAdjustIndex(s, word, mb_lt->mask, bit); index = ComputeDiscontiguousIndex_1b(s, adjusted_word, bit, template_type); if (two_templates) index2 = ComputeDiscontiguousIndex_1b(s, adjusted_word, bit, second_template_type); if (NA_PV_TEST(pv_array, index, pv_array_bts)) { query_offset = mb_lt->hashtable[index]; subject_offset = ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO + bit/2; if (query_offset && (hitsfound >= max_hits)) break; while (query_offset) {#ifdef DEBUG_LOG fprintf(logfp0, "%ld\t%ld\t%ld\n", query_offset, subject_offset, index);#endif *(q_ptr++) = query_offset - 1; *(s_ptr++) = subject_offset; ++hitsfound; query_offset = mb_lt->next_pos[query_offset]; } } if (two_templates && NA_PV_TEST(pv_array, index2, pv_array_bts)) { query_offset = mb_lt->hashtable2[index2]; subject_offset = ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO + bit/2; if (query_offset && (hitsfound >= max_hits)) break; while (query_offset) { q_offsets[hitsfound] = query_offset - 1; s_offsets[hitsfound++] = subject_offset; query_offset = mb_lt->next_pos2[query_offset]; } } bit += scan_shift; if (bit >= FULL_BYTE_SHIFT) { /* Advance to the next full byte */ bit -= FULL_BYTE_SHIFT; word = BlastNaLookupComputeIndex(FULL_BYTE_SHIFT, mb_lt->mask, s++, word); } } } *end_offset = ((s - abs_start) - compressed_wordsize)*COMPRESSION_RATIO;#ifdef DEBUG_LOG fclose(logfp0);#endif return hitsfound;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -