首页 › 资源下载 › 生物技术 › ncbi源码 › 源码查看
blast_extend.c

来自「ncbi源码」· C语言代码 · 共 1,141 行 · 第 1/3 页
1,141 行
                  /* Set hit length back to 0 after ungapped extension                      failure */                  estack[index].length = 0;               }            } else {               /* First hit in the 2-hit case or a direct extension of the                   previous hit - update the last hit information only */               if (new_hit)                  estack[index].length = 0;               else                  estack[index].length += step;               estack[index].level = s_off;            }            /* In case the size of this stack changed */            stack_table->stack_index[index1] = stack_top + 1;	             return hit_ready;         } else if (step <= scan_step || (step <= window &&                       estack[index].length >= word_extra_length)) {            /* Hit from a different diagonal, and it can be continued */            index++;         } else {            /* Hit from a different diagonal that does not continue: remove               it from the stack */            estack[index] = estack[stack_top];            --stack_top;         }      }      /* Need an extra slot on the stack for this hit */      if (++stack_top >= stack_table->stack_size[index1]) {         /* Stack about to overflow - reallocate memory */         MB_Stack* ptr;         if (!(ptr = (MB_Stack*)realloc(estack,                     2*stack_table->stack_size[index1]*sizeof(MB_Stack)))) {            return FALSE;         } else {            stack_table->stack_size[index1] *= 2;            estack = stack_table->estack[index1] = ptr;         }      }      /* Start a new hit */      estack[stack_top].diag = s_off - q_off;      estack[stack_top].level = s_off;      estack[stack_top].length = 0;      stack_table->stack_index[index1] = stack_top + 1;      /* Save the hit if it already qualifies */      if (!two_hits && (word_extra_length == 0)) {         hit_ready = TRUE;         if (do_ungapped_extension) {            /* Perform ungapped extension */            BlastnWordUngappedExtend(query, subject, matrix, q_off, s_off,                word_params->cutoff_score, -word_params->x_dropoff,                &ungapped_data);            estack[stack_top].level = ungapped_data->length - 1 +                ungapped_data->s_start;            estack[stack_top].length = ungapped_data->length;         } else {            ungapped_data = NULL;         }         if (!ungapped_data ||              ungapped_data->score >= word_params->cutoff_score) {            BLAST_SaveInitialHit(init_hitlist, q_off, s_off,                                  ungapped_data);         } else {            sfree(ungapped_data);            /* Set hit length back to 0 after ungapped extension                failure */            estack[index].length = 0;         }      }   }   return hit_ready;}/** Update the word extension structure after scanning of each subject sequence * @param ewp The structure holding word extension information [in] [out] * @param subject_length The length of the subject sequence that has just been *        processed [in] */static Int2 BlastNaExtendWordExit(Blast_ExtendWord* ewp, Int4 subject_length){   BLAST_DiagTable* diag_table;   Int4 diag_array_length, i;   if (!ewp)      return -1;   diag_table = ewp->diag_table;         if (diag_table->offset >= INT4_MAX/2) {      diag_array_length = diag_table->diag_array_length;      for(i=0;i<diag_array_length;i++) {	 diag_table->diag_array[i].diag_level=0;	 diag_table->diag_array[i].last_hit = -diag_table->window;      }   }         if (diag_table->offset < INT4_MAX/2)	{      diag_table->offset += subject_length + diag_table->window;   } else {      diag_table->offset = 0;   }   return 0;}/** Update the MegaBlast word extension structure after scanning of each  *  subject sequence. * @param ewp The structure holding word extension information [in] [out] * @param subject_length The length of the subject sequence that has just been *        processed [in] */static Int2 MB_ExtendWordExit(Blast_ExtendWord* ewp, Int4 subject_length){   if (!ewp)      return -1;   if (ewp->diag_table) {      return BlastNaExtendWordExit(ewp, subject_length);   } else {       memset(ewp->stack_table->stack_index, 0,              ewp->stack_table->num_stacks*sizeof(Int4));   }   return 0;}/* Description in blast_extend.h */BooleanBlastnWordUngappedExtend(BLAST_SequenceBlk* query,    BLAST_SequenceBlk* subject, Int4** matrix,    Int4 q_off, Int4 s_off, Int4 cutoff, Int4 X,    BlastUngappedData** ungapped_data){   Uint1* q;   Int4 sum, score;   Uint1 ch;   Uint1* subject0,* sf,* q_beg,* q_end,* s_end,* s,* start;   Int2 remainder, base;   Int4 q_avail, s_avail;      base = 3 - (s_off % 4);      subject0 = subject->sequence;   q_avail = query->length - q_off;   s_avail = subject->length - s_off;   q = q_beg = q_end = query->sequence + q_off;   s = s_end = subject0 + s_off/COMPRESSION_RATIO;   if (q_off < s_off) {      start = subject0 + (s_off-q_off)/COMPRESSION_RATIO;      remainder = 3 - ((s_off-q_off)%COMPRESSION_RATIO);   } else {      start = subject0;      remainder = 3;   }      /* Find where positive scoring starts & ends within the word hit */   score = 0;   sum = 0;   /* extend to the left */   while ((s > start) || (s == start && base < remainder)) {      if (base == 3) {         s--;         base = 0;      } else {         ++base;      }      ch = *s;      if ((sum += matrix[*--q][NCBI2NA_UNPACK_BASE(ch, base)]) > 0) {         q_beg = q;         score += sum;         sum = 0;      } else if (sum < X) {         break;      }   }      if (score >= cutoff && !ungapped_data)       return FALSE;      if (ungapped_data) {      *ungapped_data = (BlastUngappedData*)          malloc(sizeof(BlastUngappedData));      (*ungapped_data)->q_start = q_beg - query->sequence;      (*ungapped_data)->s_start =          s_off - (q_off - (*ungapped_data)->q_start);   }   if (q_avail < s_avail) {      sf = subject0 + (s_off + q_avail)/COMPRESSION_RATIO;      remainder = 3 - ((s_off + q_avail)%COMPRESSION_RATIO);   } else {      sf = subject0 + (subject->length)/COMPRESSION_RATIO;      remainder = 3 - ((subject->length)%COMPRESSION_RATIO);   }	   /* extend to the right */   q = q_end;   s = s_end;   sum = 0;   base = 3 - (s_off % COMPRESSION_RATIO);      while (s < sf || (s == sf && base > remainder)) {      ch = *s;      if ((sum += matrix[*q++][NCBI2NA_UNPACK_BASE(ch, base)]) > 0) {         q_end = q;         score += sum;         sum = 0;      } else if (sum < X)         break;      if (base == 0) {         base = 3;         s++;      } else         base--;   }      if (ungapped_data) {      (*ungapped_data)->length = q_end - q_beg;      (*ungapped_data)->score = score;      (*ungapped_data)->frame = 0;   }      return (score < cutoff);}/** Perform ungapped extension given an offset pair, and save the initial  * hit information if the hit qualifies. This function assumes that the * exact match has already been extended to the word size parameter. * @param query The query sequence [in] * @param subject The subject sequence [in] * @param min_step Distance at which new word hit lies within the previously *                 extended hit. Non-zero only when ungapped extension is not *                 performed, e.g. for contiguous megablast. [in] * @param word_params The parameters related to initial word extension [in] * @param matrix the substitution matrix for ungapped extension [in] * @param ewp The structure containing word extension information [in] * @param q_off The offset in the query sequence [in] * @param s_end The offset in the subject sequence where this hit ends [in] * @param s_off The offset in the subject sequence [in] * @param init_hitlist The structure containing information about all  *                     initial hits [in] [out] * @return Has this hit been extended?  */static BooleanBlastnExtendInitialHit(BLAST_SequenceBlk* query,    BLAST_SequenceBlk* subject, Uint4 min_step,   const BlastInitialWordParameters* word_params,    Int4** matrix, Blast_ExtendWord* ewp, Int4 q_off, Int4 s_end,   Int4 s_off, BlastInitHitList* init_hitlist){   Int4 diag, real_diag;   Int4 s_pos, last_hit;   BLAST_DiagTable*     diag_table;   BlastUngappedData* ungapped_data;   const BlastInitialWordOptions* word_options = word_params->options;   Int4 window_size = word_options->window_size;   Boolean hit_ready;   Boolean new_hit = FALSE, second_hit = FALSE;   Int4 step;   DiagStruct* diag_array_elem;   diag_table = ewp->diag_table;   diag = s_off + diag_table->diag_array_length - q_off;   real_diag = diag & diag_table->diag_mask;   diag_array_elem = &diag_table->diag_array[real_diag];   s_pos = s_end + diag_table->offset;   last_hit = diag_array_elem->last_hit;   step = s_pos - last_hit;   if (step <= 0)      /* This is a hit on a diagonal that has already been explored          further down */      return 0;   if (window_size == 0 ||       (diag_array_elem->diag_level == 1)) {      /* Single hit version or previous hit was already a second hit */      new_hit = (step > (Int4)min_step);   } else {      /* Previous hit was the first hit */      new_hit = (step > window_size);      second_hit = (step > 0 && step <= window_size);   }   hit_ready = ((window_size == 0) && new_hit) || second_hit;   if (hit_ready) {      if (word_options->ungapped_extension) {         /* Perform ungapped extension */         BlastnWordUngappedExtend(query, subject, matrix, q_off, s_off,             word_params->cutoff_score, -word_params->x_dropoff,             &ungapped_data);               diag_array_elem->last_hit =             ungapped_data->length + ungapped_data->s_start             + diag_table->offset;      } else {         ungapped_data = NULL;         diag_array_elem->last_hit = s_pos;      }      if (!ungapped_data ||           ungapped_data->score >= word_params->cutoff_score) {         BLAST_SaveInitialHit(init_hitlist, q_off, s_off, ungapped_data);         /* Set diag_level to 1, indicating that this hit has already been             saved */         diag_array_elem->diag_level = 1;      } else {         sfree(ungapped_data);         /* Set diag_level to 0, indicating that any hit after this will             be new */         diag_array_elem->diag_level = 0;      }   } else {      /* First hit in the 2-hit case or a direct extension of the previous          hit - update the last hit information only */      diag_array_elem->last_hit = s_pos;      if (new_hit)         diag_array_elem->diag_level = 0;   }   return hit_ready;}/* Description in blast_extend.h */Int2 BlastNaWordFinder(BLAST_SequenceBlk* subject, 		       BLAST_SequenceBlk* query,		       LookupTableWrap* lookup_wrap,		       Int4** matrix,		       const BlastInitialWordParameters* word_params,		       Blast_ExtendWord* ewp,		       Uint4* q_offsets,		       Uint4* s_offsets,		       Int4 max_hits,		       BlastInitHitList* init_hitlist,              BlastUngappedStats* ungapped_stats){   LookupTable* lookup = (LookupTable*) lookup_wrap->lut;   Uint1* s_start = subject->sequence;   Uint1* abs_start = s_start;   Int4 i;   Uint1* s;   Uint1* q_start = query->sequence;   Int4 hitsfound, total_hits = 0;   Int4 hits_extended = 0;   Uint4 word_size, compressed_wordsize, reduced_word_length;   Uint4 extra_bytes_needed;   Uint2 extra_bases, left, right;   Uint1* q;   Int4 start_offset, last_start, next_start, last_end;   Uint1 max_bases;   word_size = COMPRESSION_RATIO*lookup->wordsize;   last_start = subject->length - word_size;   start_offset = 0;   compressed_wordsize = lookup->reduced_wordsize;   extra_bytes_needed = lookup->wordsize - compressed_wordsize;   reduced_word_length = COMPRESSION_RATIO*compressed_wordsize;   extra_bases = lookup->word_length - word_size;   last_end = subject->length - word_size;   while (start_offset <= last_start) {      /* Pass the last word ending offset */      next_start = last_end;      hitsfound = BlastNaScanSubject(lookup_wrap, subject, start_offset,                      q_offsets, s_offsets, max_hits, &next_start);             total_hits += hitsfound;      for (i = 0; i < hitsfound; ++i) {         /* Here it is guaranteed that subject offset is divisible by 4,            because we only extend to the right, so scanning stride must be            equal to 4. */         s = abs_start + (s_offsets[i])/COMPRESSION_RATIO;         q = q_start + q_offsets[i];
blast_extend.c - 源码说明

本页面展示了「ncbi源码」中的 blast_extend.c 源码文件，采用 C语言编程语言编写，共 1,141 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ncbi相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?