📄 blast_extend.c
字号:
/* Set hit length back to 0 after ungapped extension failure */ estack[index].length = 0; } } else { /* First hit in the 2-hit case or a direct extension of the previous hit - update the last hit information only */ if (new_hit) estack[index].length = 0; else estack[index].length += step; estack[index].level = s_off; } /* In case the size of this stack changed */ stack_table->stack_index[index1] = stack_top + 1; return hit_ready; } else if (step <= scan_step || (step <= window && estack[index].length >= word_extra_length)) { /* Hit from a different diagonal, and it can be continued */ index++; } else { /* Hit from a different diagonal that does not continue: remove it from the stack */ estack[index] = estack[stack_top]; --stack_top; } } /* Need an extra slot on the stack for this hit */ if (++stack_top >= stack_table->stack_size[index1]) { /* Stack about to overflow - reallocate memory */ MB_Stack* ptr; if (!(ptr = (MB_Stack*)realloc(estack, 2*stack_table->stack_size[index1]*sizeof(MB_Stack)))) { return FALSE; } else { stack_table->stack_size[index1] *= 2; estack = stack_table->estack[index1] = ptr; } } /* Start a new hit */ estack[stack_top].diag = s_off - q_off; estack[stack_top].level = s_off; estack[stack_top].length = 0; stack_table->stack_index[index1] = stack_top + 1; /* Save the hit if it already qualifies */ if (!two_hits && (word_extra_length == 0)) { hit_ready = TRUE; if (do_ungapped_extension) { /* Perform ungapped extension */ BlastnWordUngappedExtend(query, subject, matrix, q_off, s_off, word_params->cutoff_score, -word_params->x_dropoff, &ungapped_data); estack[stack_top].level = ungapped_data->length - 1 + ungapped_data->s_start; estack[stack_top].length = ungapped_data->length; } else { ungapped_data = NULL; } if (!ungapped_data || ungapped_data->score >= word_params->cutoff_score) { BLAST_SaveInitialHit(init_hitlist, q_off, s_off, ungapped_data); } else { sfree(ungapped_data); /* Set hit length back to 0 after ungapped extension failure */ estack[index].length = 0; } } } return hit_ready;}/** Update the word extension structure after scanning of each subject sequence * @param ewp The structure holding word extension information [in] [out] * @param subject_length The length of the subject sequence that has just been * processed [in] */static Int2 BlastNaExtendWordExit(Blast_ExtendWord* ewp, Int4 subject_length){ BLAST_DiagTable* diag_table; Int4 diag_array_length, i; if (!ewp) return -1; diag_table = ewp->diag_table; if (diag_table->offset >= INT4_MAX/2) { diag_array_length = diag_table->diag_array_length; for(i=0;i<diag_array_length;i++) { diag_table->diag_array[i].diag_level=0; diag_table->diag_array[i].last_hit = -diag_table->window; } } if (diag_table->offset < INT4_MAX/2) { diag_table->offset += subject_length + diag_table->window; } else { diag_table->offset = 0; } return 0;}/** Update the MegaBlast word extension structure after scanning of each * subject sequence. * @param ewp The structure holding word extension information [in] [out] * @param subject_length The length of the subject sequence that has just been * processed [in] */static Int2 MB_ExtendWordExit(Blast_ExtendWord* ewp, Int4 subject_length){ if (!ewp) return -1; if (ewp->diag_table) { return BlastNaExtendWordExit(ewp, subject_length); } else { memset(ewp->stack_table->stack_index, 0, ewp->stack_table->num_stacks*sizeof(Int4)); } return 0;}/* Description in blast_extend.h */BooleanBlastnWordUngappedExtend(BLAST_SequenceBlk* query, BLAST_SequenceBlk* subject, Int4** matrix, Int4 q_off, Int4 s_off, Int4 cutoff, Int4 X, BlastUngappedData** ungapped_data){ Uint1* q; Int4 sum, score; Uint1 ch; Uint1* subject0,* sf,* q_beg,* q_end,* s_end,* s,* start; Int2 remainder, base; Int4 q_avail, s_avail; base = 3 - (s_off % 4); subject0 = subject->sequence; q_avail = query->length - q_off; s_avail = subject->length - s_off; q = q_beg = q_end = query->sequence + q_off; s = s_end = subject0 + s_off/COMPRESSION_RATIO; if (q_off < s_off) { start = subject0 + (s_off-q_off)/COMPRESSION_RATIO; remainder = 3 - ((s_off-q_off)%COMPRESSION_RATIO); } else { start = subject0; remainder = 3; } /* Find where positive scoring starts & ends within the word hit */ score = 0; sum = 0; /* extend to the left */ while ((s > start) || (s == start && base < remainder)) { if (base == 3) { s--; base = 0; } else { ++base; } ch = *s; if ((sum += matrix[*--q][NCBI2NA_UNPACK_BASE(ch, base)]) > 0) { q_beg = q; score += sum; sum = 0; } else if (sum < X) { break; } } if (score >= cutoff && !ungapped_data) return FALSE; if (ungapped_data) { *ungapped_data = (BlastUngappedData*) malloc(sizeof(BlastUngappedData)); (*ungapped_data)->q_start = q_beg - query->sequence; (*ungapped_data)->s_start = s_off - (q_off - (*ungapped_data)->q_start); } if (q_avail < s_avail) { sf = subject0 + (s_off + q_avail)/COMPRESSION_RATIO; remainder = 3 - ((s_off + q_avail)%COMPRESSION_RATIO); } else { sf = subject0 + (subject->length)/COMPRESSION_RATIO; remainder = 3 - ((subject->length)%COMPRESSION_RATIO); } /* extend to the right */ q = q_end; s = s_end; sum = 0; base = 3 - (s_off % COMPRESSION_RATIO); while (s < sf || (s == sf && base > remainder)) { ch = *s; if ((sum += matrix[*q++][NCBI2NA_UNPACK_BASE(ch, base)]) > 0) { q_end = q; score += sum; sum = 0; } else if (sum < X) break; if (base == 0) { base = 3; s++; } else base--; } if (ungapped_data) { (*ungapped_data)->length = q_end - q_beg; (*ungapped_data)->score = score; (*ungapped_data)->frame = 0; } return (score < cutoff);}/** Perform ungapped extension given an offset pair, and save the initial * hit information if the hit qualifies. This function assumes that the * exact match has already been extended to the word size parameter. * @param query The query sequence [in] * @param subject The subject sequence [in] * @param min_step Distance at which new word hit lies within the previously * extended hit. Non-zero only when ungapped extension is not * performed, e.g. for contiguous megablast. [in] * @param word_params The parameters related to initial word extension [in] * @param matrix the substitution matrix for ungapped extension [in] * @param ewp The structure containing word extension information [in] * @param q_off The offset in the query sequence [in] * @param s_end The offset in the subject sequence where this hit ends [in] * @param s_off The offset in the subject sequence [in] * @param init_hitlist The structure containing information about all * initial hits [in] [out] * @return Has this hit been extended? */static BooleanBlastnExtendInitialHit(BLAST_SequenceBlk* query, BLAST_SequenceBlk* subject, Uint4 min_step, const BlastInitialWordParameters* word_params, Int4** matrix, Blast_ExtendWord* ewp, Int4 q_off, Int4 s_end, Int4 s_off, BlastInitHitList* init_hitlist){ Int4 diag, real_diag; Int4 s_pos, last_hit; BLAST_DiagTable* diag_table; BlastUngappedData* ungapped_data; const BlastInitialWordOptions* word_options = word_params->options; Int4 window_size = word_options->window_size; Boolean hit_ready; Boolean new_hit = FALSE, second_hit = FALSE; Int4 step; DiagStruct* diag_array_elem; diag_table = ewp->diag_table; diag = s_off + diag_table->diag_array_length - q_off; real_diag = diag & diag_table->diag_mask; diag_array_elem = &diag_table->diag_array[real_diag]; s_pos = s_end + diag_table->offset; last_hit = diag_array_elem->last_hit; step = s_pos - last_hit; if (step <= 0) /* This is a hit on a diagonal that has already been explored further down */ return 0; if (window_size == 0 || (diag_array_elem->diag_level == 1)) { /* Single hit version or previous hit was already a second hit */ new_hit = (step > (Int4)min_step); } else { /* Previous hit was the first hit */ new_hit = (step > window_size); second_hit = (step > 0 && step <= window_size); } hit_ready = ((window_size == 0) && new_hit) || second_hit; if (hit_ready) { if (word_options->ungapped_extension) { /* Perform ungapped extension */ BlastnWordUngappedExtend(query, subject, matrix, q_off, s_off, word_params->cutoff_score, -word_params->x_dropoff, &ungapped_data); diag_array_elem->last_hit = ungapped_data->length + ungapped_data->s_start + diag_table->offset; } else { ungapped_data = NULL; diag_array_elem->last_hit = s_pos; } if (!ungapped_data || ungapped_data->score >= word_params->cutoff_score) { BLAST_SaveInitialHit(init_hitlist, q_off, s_off, ungapped_data); /* Set diag_level to 1, indicating that this hit has already been saved */ diag_array_elem->diag_level = 1; } else { sfree(ungapped_data); /* Set diag_level to 0, indicating that any hit after this will be new */ diag_array_elem->diag_level = 0; } } else { /* First hit in the 2-hit case or a direct extension of the previous hit - update the last hit information only */ diag_array_elem->last_hit = s_pos; if (new_hit) diag_array_elem->diag_level = 0; } return hit_ready;}/* Description in blast_extend.h */Int2 BlastNaWordFinder(BLAST_SequenceBlk* subject, BLAST_SequenceBlk* query, LookupTableWrap* lookup_wrap, Int4** matrix, const BlastInitialWordParameters* word_params, Blast_ExtendWord* ewp, Uint4* q_offsets, Uint4* s_offsets, Int4 max_hits, BlastInitHitList* init_hitlist, BlastUngappedStats* ungapped_stats){ LookupTable* lookup = (LookupTable*) lookup_wrap->lut; Uint1* s_start = subject->sequence; Uint1* abs_start = s_start; Int4 i; Uint1* s; Uint1* q_start = query->sequence; Int4 hitsfound, total_hits = 0; Int4 hits_extended = 0; Uint4 word_size, compressed_wordsize, reduced_word_length; Uint4 extra_bytes_needed; Uint2 extra_bases, left, right; Uint1* q; Int4 start_offset, last_start, next_start, last_end; Uint1 max_bases; word_size = COMPRESSION_RATIO*lookup->wordsize; last_start = subject->length - word_size; start_offset = 0; compressed_wordsize = lookup->reduced_wordsize; extra_bytes_needed = lookup->wordsize - compressed_wordsize; reduced_word_length = COMPRESSION_RATIO*compressed_wordsize; extra_bases = lookup->word_length - word_size; last_end = subject->length - word_size; while (start_offset <= last_start) { /* Pass the last word ending offset */ next_start = last_end; hitsfound = BlastNaScanSubject(lookup_wrap, subject, start_offset, q_offsets, s_offsets, max_hits, &next_start); total_hits += hitsfound; for (i = 0; i < hitsfound; ++i) { /* Here it is guaranteed that subject offset is divisible by 4, because we only extend to the right, so scanning stride must be equal to 4. */ s = abs_start + (s_offsets[i])/COMPRESSION_RATIO; q = q_start + q_offsets[i];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -