📄 blast_extend.c
字号:
/* Check for extra bytes if required for longer words. */ if (extra_bytes_needed && !BlastNaCompareExtraBytes(q+reduced_word_length, s+compressed_wordsize, extra_bytes_needed)) continue; /* mini extension to the left */ max_bases = MIN(COMPRESSION_RATIO, MIN(q_offsets[i], s_offsets[i])); left = BlastNaMiniExtendLeft(q, s-1, max_bases); /* mini extension to the right */ max_bases = MIN(COMPRESSION_RATIO, MIN(subject->length - s_offsets[i] - lookup->wordsize, query->length - q_offsets[i] - word_size)); right = 0; if (max_bases > 0) { s += lookup->wordsize; q += word_size; right = BlastNaMiniExtendRight(q, s, max_bases); } if (left + right >= extra_bases) { /* Check if this diagonal has already been explored. */ if (BlastnExtendInitialHit(query, subject, 0, word_params, matrix, ewp, q_offsets[i], s_offsets[i] + word_size + right, s_offsets[i], init_hitlist)) ++hits_extended; } } start_offset = next_start; } BlastNaExtendWordExit(ewp, subject->length); Blast_UngappedStatsUpdate(ungapped_stats, total_hits, hits_extended, init_hitlist->total); return 0;} /** Extend an exact match in both directions up to the provided * maximal length. * @param q_start Pointer to the start of the extension in query [in] * @param s_start Pointer to the start of the extension in subject [in] * @param max_bases_left At most how many bases to extend to the left [in] * @param max_bases_right At most how many bases to extend to the right [in] * @param max_length The length of the required exact match [in] * @param extend_partial_byte Should partial byte extension be perfomed?[in] * @param extended_right How far has extension succeeded to the right? [out] * @return TRUE if extension successful */static Boolean BlastNaExactMatchExtend(Uint1* q_start, Uint1* s_start, Uint4 max_bases_left, Uint4 max_bases_right, Uint4 max_length, Boolean extend_partial_byte, Uint4* extended_right){ Uint4 length; Uint1* q,* s; *extended_right = 0; length = 0; /* Extend to the right; start from the firstt byte (it must be the first one that's guaranteed to match by the lookup table hit). */ q = q_start; s = s_start; while (length < max_length && max_bases_right >= COMPRESSION_RATIO) { if (*s != PACK_WORD(q)) break; length += COMPRESSION_RATIO; ++s; q += COMPRESSION_RATIO; max_bases_right -= COMPRESSION_RATIO; } if (extend_partial_byte) { if (max_bases_right > 0) { length += BlastNaMiniExtendRight(q, s, (Uint1) MIN(max_bases_right, COMPRESSION_RATIO)); } } *extended_right = length; if (length >= max_length) return TRUE; if (max_bases_left < max_length - length) return FALSE; else max_bases_left = max_length - length; /* Extend to the left; start with the byte just before the first. */ q = q_start - COMPRESSION_RATIO; s = s_start - 1; while (length < max_length && max_bases_left >= COMPRESSION_RATIO) { if (*s != PACK_WORD(q)) break; length += COMPRESSION_RATIO; --s; q -= COMPRESSION_RATIO; max_bases_left -= COMPRESSION_RATIO; } if (length >= max_length) return TRUE; if (extend_partial_byte && max_bases_left > 0) { length += BlastNaMiniExtendLeft(q+COMPRESSION_RATIO, s, (Uint1) MIN(max_bases_left, COMPRESSION_RATIO)); } return (length >= max_length);}/** Extend the lookup table exact match hit in both directions and * update the diagonal structure. * @param q_offsets Array of query offsets [in] * @param s_offsets Array of subject offsets [in] * @param num_hits Size of the above arrays [in] * @param word_params Parameters for word extension [in] * @param lookup_wrap Lookup table wrapper structure [in] * @param query Query sequence data [in] * @param subject Subject sequence data [in] * @param matrix Scoring matrix for ungapped extension [in] * @param ewp Word extension structure containing information about the * extent of already processed hits on each diagonal [in] * @param init_hitlist Structure to keep the extended hits. * Must be allocated outside of this function [in] [out] * @return Number of hits extended. */static Int4 BlastNaExtendRightAndLeft(Uint4* q_offsets, Uint4* s_offsets, Int4 num_hits, const BlastInitialWordParameters* word_params, LookupTableWrap* lookup_wrap, BLAST_SequenceBlk* query, BLAST_SequenceBlk* subject, Int4** matrix, Blast_ExtendWord* ewp, BlastInitHitList* init_hitlist){ Int4 index; Uint4 query_length = query->length; Uint4 subject_length = subject->length; Uint1* q_start = query->sequence; Uint1* s_start = subject->sequence; Uint4 word_length = 0; Uint4 q_off, s_off; Uint4 max_bases_left, max_bases_right; Uint4 extended_right; Uint4 shift; Uint1* q, *s; Uint4 min_step = 0; Boolean do_ungapped_extension = word_params->options->ungapped_extension; Boolean variable_wordsize = (Boolean) word_params->options->variable_wordsize; Int4 hits_extended = 0; if (lookup_wrap->lut_type == MB_LOOKUP_TABLE) { MBLookupTable* lut = (MBLookupTable*)lookup_wrap->lut; word_length = lut->word_length; if(!do_ungapped_extension && !lut->discontiguous) min_step = lut->scan_step; } else { LookupTable* lut = (LookupTable*)lookup_wrap->lut; word_length = lut->word_length; } for (index = 0; index < num_hits; ++index) { /* Adjust offsets to the start of the next full byte in the subject sequence */ shift = (COMPRESSION_RATIO - s_offsets[index]%COMPRESSION_RATIO) % COMPRESSION_RATIO; q_off = q_offsets[index] + shift; s_off = s_offsets[index] + shift; s = s_start + s_off/COMPRESSION_RATIO; q = q_start + q_off; max_bases_left = MIN(word_length, MIN(q_off, s_off)); max_bases_right = MIN(word_length, MIN(query_length-q_off, subject_length-s_off)); if (BlastNaExactMatchExtend(q, s, max_bases_left, max_bases_right, word_length, (Boolean) !variable_wordsize, &extended_right)) { /* Check if this diagonal has already been explored and save the hit if needed. */ if (BlastnExtendInitialHit(query, subject, min_step, word_params, matrix, ewp, q_offsets[index], s_off + extended_right, s_offsets[index], init_hitlist)) ++hits_extended; } } return hits_extended;}/* Description in blast_extend.h */Int2 MB_WordFinder(BLAST_SequenceBlk* subject, BLAST_SequenceBlk* query, LookupTableWrap* lookup_wrap, Int4** matrix, const BlastInitialWordParameters* word_params, Blast_ExtendWord* ewp, Uint4* q_offsets, Uint4* s_offsets, Int4 max_hits, BlastInitHitList* init_hitlist, BlastUngappedStats* ungapped_stats){ const BlastInitialWordOptions* word_options = word_params->options; /* Pointer to the beginning of the first word of the subject sequence */ MBLookupTable* mb_lt = (MBLookupTable*) lookup_wrap->lut; Int4 hitsfound=0; Int4 total_hits=0, index; Int4 start_offset, next_start, last_start, last_end; Int4 subject_length = subject->length; Boolean ag_blast; Int4 hits_extended = 0; ag_blast = (Boolean) (word_options->extension_method == eRightAndLeft); start_offset = 0; if (mb_lt->discontiguous) { last_start = subject_length - mb_lt->template_length; last_end = last_start + mb_lt->word_length; } else { last_end = subject_length; if (ag_blast) { last_start = last_end - COMPRESSION_RATIO*mb_lt->compressed_wordsize; } else { last_start = last_end - mb_lt->word_length; } } /* start_offset points to the beginning of the word */ while ( start_offset <= last_start ) { /* Set the last argument's value to the end of the last word, without the extra bases for the discontiguous case */ next_start = last_end; if (mb_lt->discontiguous) { hitsfound = MB_DiscWordScanSubject(lookup_wrap, subject, start_offset, q_offsets, s_offsets, max_hits, &next_start); } else if (!ag_blast) { hitsfound = MB_ScanSubject(lookup_wrap, subject, start_offset, q_offsets, s_offsets, max_hits, &next_start); } else { hitsfound = MB_AG_ScanSubject(lookup_wrap, subject, start_offset, q_offsets, s_offsets, max_hits, &next_start); } if (ag_blast) { hits_extended += BlastNaExtendRightAndLeft(q_offsets, s_offsets, hitsfound, word_params, lookup_wrap, query, subject, matrix, ewp, init_hitlist); } else { for (index = 0; index < hitsfound; ++index) { if (MB_ExtendInitialHit(query, subject, lookup_wrap, word_params, matrix, ewp, q_offsets[index], s_offsets[index], init_hitlist)) ++hits_extended; } } /* next_start returned from the ScanSubject points to the beginning of the word */ start_offset = next_start; total_hits += hitsfound; } MB_ExtendWordExit(ewp, subject_length); Blast_UngappedStatsUpdate(ungapped_stats, total_hits, hits_extended, init_hitlist->total); return 0;}/* Description in blast_extend.h */Int2 BlastNaWordFinder_AG(BLAST_SequenceBlk* subject, BLAST_SequenceBlk* query, LookupTableWrap* lookup_wrap, Int4** matrix, const BlastInitialWordParameters* word_params, Blast_ExtendWord* ewp, Uint4* q_offsets, Uint4* s_offsets, Int4 max_hits, BlastInitHitList* init_hitlist, BlastUngappedStats* ungapped_stats){ Int4 hitsfound, total_hits = 0; Int4 start_offset, end_offset, next_start; LookupTable* lookup = (LookupTable*) lookup_wrap->lut; Int4 hits_extended = 0; start_offset = 0; end_offset = subject->length - COMPRESSION_RATIO*lookup->reduced_wordsize; /* start_offset points to the beginning of the word; end_offset is the beginning of the last word */ while (start_offset <= end_offset) { hitsfound = BlastNaScanSubject_AG(lookup_wrap, subject, start_offset, q_offsets, s_offsets, max_hits, &next_start); total_hits += hitsfound; hits_extended += BlastNaExtendRightAndLeft(q_offsets, s_offsets, hitsfound, word_params, lookup_wrap, query, subject, matrix, ewp, init_hitlist); start_offset = next_start; } BlastNaExtendWordExit(ewp, subject->length); Blast_UngappedStatsUpdate(ungapped_stats, total_hits, hits_extended, init_hitlist->total); return 0;} /** Deallocate memory for the diagonal table structure */static BLAST_DiagTable* BlastDiagTableFree(BLAST_DiagTable* diag_table){ if (diag_table) { sfree(diag_table->diag_array); sfree(diag_table); } return NULL;}/** Deallocate memory for the stack table structure */static MB_StackTable* MBStackTableFree(MB_StackTable* stack_table){ Int4 index; if (!stack_table) return NULL; for (index = 0; index < stack_table->num_stacks; ++index) sfree(stack_table->estack[index]); sfree(stack_table->estack); sfree(stack_table->stack_index); sfree(stack_table->stack_size); sfree(stack_table); return NULL;}Blast_ExtendWord* BlastExtendWordFree(Blast_ExtendWord* ewp){ BlastDiagTableFree(ewp->diag_table); MBStackTableFree(ewp->stack_table); sfree(ewp); return NULL;}void BlastSaveInitHsp(BlastInitHitList* ungapped_hsps, Int4 q_start, Int4 s_start, Int4 q_off, Int4 s_off, Int4 len, Int4 score){ BlastUngappedData* ungapped_data = NULL; ungapped_data = (BlastUngappedData*) malloc(sizeof(BlastUngappedData)); ungapped_data->q_start = q_start; ungapped_data->s_start = s_start; ungapped_data->length = len; ungapped_data->score = score; ungapped_data->frame = 0; BLAST_SaveInitialHit(ungapped_hsps, q_off, s_off, ungapped_data); return;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -