📄 blast_extend.c
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_extend.c,v $ * PRODUCTION Revision 1000.2 2004/06/01 18:07:13 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.64 * PRODUCTION * =========================================================================== *//* $Id: blast_extend.c,v 1000.2 2004/06/01 18:07:13 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's offical duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Ilya Dondoshansky * *//** @file blast_extend.c * Functions to initialize structures used for BLAST extension */static char const rcsid[] = "$Id: blast_extend.c,v 1000.2 2004/06/01 18:07:13 gouriano Exp $";#include <algo/blast/core/blast_extend.h>#include <algo/blast/core/blast_options.h>#include <algo/blast/core/blast_lookup.h>#include <algo/blast/core/mb_lookup.h>#include <algo/blast/core/blast_util.h> /* for NCBI2NA_UNPACK_BASE macros */#include "blast_inline.h"#define MIN_INIT_HITLIST_SIZE 100/* Description in blast_extend.h */BlastInitHitList* BLAST_InitHitListNew(void){ BlastInitHitList* init_hitlist = (BlastInitHitList*) calloc(1, sizeof(BlastInitHitList)); init_hitlist->allocated = MIN_INIT_HITLIST_SIZE; init_hitlist->init_hsp_array = (BlastInitHSP*) malloc(MIN_INIT_HITLIST_SIZE*sizeof(BlastInitHSP)); return init_hitlist;}void BlastInitHitListReset(BlastInitHitList* init_hitlist){ Int4 index; for (index = 0; index < init_hitlist->total; ++index) sfree(init_hitlist->init_hsp_array[index].ungapped_data); init_hitlist->total = 0;}BlastInitHitList* BLAST_InitHitListFree(BlastInitHitList* init_hitlist){ BlastInitHitListReset(init_hitlist); sfree(init_hitlist->init_hsp_array); sfree(init_hitlist); return NULL;}/** Allocates memory for the BLAST_DiagTable*. This function also * sets many of the parametes such as diag_array_length etc. * @param qlen Length of the query [in] * @param multiple_hits Specifies whether multiple hits method is used [in] * @param window_size The max. distance between two hits that are extended [in] * @return The allocated BLAST_DiagTable structure*/static BLAST_DiagTable*BLAST_DiagTableNew (Int4 qlen, Boolean multiple_hits, Int4 window_size){ BLAST_DiagTable* diag_table; Int4 diag_array_length; diag_table= (BLAST_DiagTable*) calloc(1, sizeof(BLAST_DiagTable)); if (diag_table) { diag_array_length = 1; /* What power of 2 is just longer than the query? */ while (diag_array_length < (qlen+window_size)) { diag_array_length = diag_array_length << 1; } /* These are used in the word finders to shift and mask rather than dividing and taking the remainder. */ diag_table->diag_array_length = diag_array_length; diag_table->diag_mask = diag_array_length-1; diag_table->multiple_hits = multiple_hits; diag_table->offset = window_size; diag_table->window = window_size; } return diag_table;}/* Description in blast_extend.h */Int2 BlastExtendWordNew(Uint4 query_length, const BlastInitialWordOptions* word_options, Uint4 subject_length, Blast_ExtendWord** ewp_ptr){ Blast_ExtendWord* ewp; Int4 index, i; *ewp_ptr = ewp = (Blast_ExtendWord*) calloc(1, sizeof(Blast_ExtendWord)); if (!ewp) { return -1; } if (word_options->container_type == eMbStacks) { double search_space; Int4 stack_size, num_stacks; MB_StackTable* stack_table; ewp->stack_table = stack_table = (MB_StackTable*) malloc(sizeof(MB_StackTable)); search_space = ((double) query_length) * subject_length; num_stacks = MIN(1 + (Int4) (sqrt(search_space)/100), 500); stack_size = 5000/num_stacks; stack_table->stack_index = (Int4*) calloc(num_stacks, sizeof(Int4)); stack_table->stack_size = (Int4*) malloc(num_stacks*sizeof(Int4)); stack_table->estack = (MB_Stack**) malloc(num_stacks*sizeof(MB_Stack*)); for (index=0; index<num_stacks; index++) { stack_table->estack[index] = (MB_Stack*) malloc(stack_size*sizeof(MB_Stack)); stack_table->stack_size[index] = stack_size; } stack_table->num_stacks = num_stacks; } else { Boolean multiple_hits = (word_options->window_size > 0); Int4* buffer; BLAST_DiagTable* diag_table; ewp->diag_table = diag_table = BLAST_DiagTableNew(query_length, multiple_hits, word_options->window_size); /* Allocate the buffer to be used for diagonal array. */ buffer = (Int4*) calloc(diag_table->diag_array_length, sizeof(DiagStruct)); if (buffer == NULL) { sfree(ewp); return -1; } diag_table->diag_array= (DiagStruct*) buffer; for(i=0;i<diag_table->diag_array_length;i++){ diag_table->diag_array[i].diag_level=0; diag_table->diag_array[i].last_hit = -diag_table->window; } } *ewp_ptr = ewp; return 0;}Boolean BLAST_SaveInitialHit(BlastInitHitList* init_hitlist, Int4 q_off, Int4 s_off, BlastUngappedData* ungapped_data) { BlastInitHSP* match_array; Int4 num, num_avail; num = init_hitlist->total; num_avail = init_hitlist->allocated; match_array = init_hitlist->init_hsp_array; if (num>=num_avail) { if (init_hitlist->do_not_reallocate) return FALSE; num_avail *= 2; match_array = (BlastInitHSP*) realloc(match_array, num_avail*sizeof(BlastInitHSP)); if (!match_array) { init_hitlist->do_not_reallocate = TRUE; return FALSE; } else { init_hitlist->allocated = num_avail; init_hitlist->init_hsp_array = match_array; } } match_array[num].q_off = q_off; match_array[num].s_off = s_off; match_array[num].ungapped_data = ungapped_data; init_hitlist->total++; return TRUE;}/** Traditional Mega BLAST initial word extension * @param query The query sequence [in] * @param subject The subject sequence [in] * @param lookup Lookup table structure [in] * @param word_params The parameters related to initial word extension [in] * @param matrix the substitution matrix for ungapped extension [in] * @param ewp The structure containing word extension information [in] * @param q_off The offset in the query sequence [in] * @param s_off The offset in the subject sequence [in] * @param init_hitlist The structure containing information about all * initial hits [in] [out] * @return Has this hit been extended? */static BooleanMB_ExtendInitialHit(BLAST_SequenceBlk* query, BLAST_SequenceBlk* subject, LookupTableWrap* lookup, const BlastInitialWordParameters* word_params, Int4** matrix, Blast_ExtendWord* ewp, Int4 q_off, Int4 s_off, BlastInitHitList* init_hitlist) { Int4 index, index1, step; MBLookupTable* mb_lt = (MBLookupTable*) lookup->lut; MB_Stack* estack; Int4 diag, stack_top; Int4 window, word_extra_length, scan_step; Boolean new_hit, hit_ready = FALSE, two_hits, do_ungapped_extension; BLAST_DiagTable* diag_table = ewp->diag_table; MB_StackTable* stack_table = ewp->stack_table; BlastUngappedData* ungapped_data = NULL; const BlastInitialWordOptions* word_options = word_params->options; Int4 s_pos; window = word_options->window_size; word_extra_length = mb_lt->word_length - COMPRESSION_RATIO*mb_lt->compressed_wordsize; scan_step = mb_lt->scan_step; two_hits = (window > 0); do_ungapped_extension = word_options->ungapped_extension; if (diag_table) { DiagStruct* diag_array = diag_table->diag_array; DiagStruct* diag_array_elem; Int4 diag_mask; Int4 offset, s_pos; diag_mask = diag_table->diag_mask; offset = diag_table->offset; s_pos = s_off + offset; diag = (s_off + diag_table->diag_array_length - q_off) & diag_mask; diag_array_elem = &diag_array[diag]; step = s_pos - diag_array_elem->last_hit; if (step <= 0) return FALSE; if (!two_hits) { /* Single hit version */ new_hit = (step > scan_step); hit_ready = (new_hit && (word_extra_length == 0)) || (!new_hit && (step + diag_array_elem->diag_level == word_extra_length)); } else { /* Two hit version */ if (diag_array_elem->diag_level > word_extra_length) { /* Previous hit already saved */ new_hit = (step > scan_step); } else { new_hit = (step > window); hit_ready = (diag_array_elem->diag_level == word_extra_length) && !new_hit; } } if (hit_ready) { if (do_ungapped_extension) { /* Perform ungapped extension */ BlastnWordUngappedExtend(query, subject, matrix, q_off, s_off, word_params->cutoff_score, -word_params->x_dropoff, &ungapped_data); s_pos = ungapped_data->length - 1 + ungapped_data->s_start + diag_table->offset; diag_array_elem->diag_level += s_pos - diag_array_elem->last_hit; diag_array_elem->last_hit = s_pos; } else { ungapped_data = NULL; diag_array_elem->diag_level += step; diag_array_elem->last_hit = s_pos; } if (!ungapped_data || ungapped_data->score >= word_params->cutoff_score) { BLAST_SaveInitialHit(init_hitlist, q_off, s_off, ungapped_data); } else { sfree(ungapped_data); /* Set diag_level to 0, indicating that any hit after this will be new */ diag_array_elem->diag_level = 0; } } else if (step > 0) { /* First hit in the 2-hit case or a direct extension of the previous hit - update the last hit information only */ if (new_hit) diag_array_elem->diag_level = 0; else diag_array_elem->diag_level += step; diag_array_elem->last_hit = s_pos; } } else { /* Use stacks instead of the diagonal array */ index1 = (s_off - q_off) % stack_table->num_stacks; if (index1<0) index1 += stack_table->num_stacks; estack = stack_table->estack[index1]; stack_top = stack_table->stack_index[index1] - 1; for (index = 0; index <= stack_top; ) { step = s_off - estack[index].level; if (estack[index].diag == s_off - q_off) { if (step <= 0) { stack_table->stack_index[index1] = stack_top + 1; return FALSE; } if (!two_hits) { /* Single hit version */ new_hit = (step > scan_step); hit_ready = (!new_hit && (step + estack[index].length == word_extra_length)) || (new_hit && (word_extra_length == 0)); } else { /* Two hit version */ if (estack[index].length > word_extra_length) { /* Previous hit already saved */ new_hit = (step > scan_step); } else { new_hit = (step > window); hit_ready = (estack[index].length == word_extra_length) && !new_hit; } } if (hit_ready) { if (do_ungapped_extension) { /* Perform ungapped extension */ BlastnWordUngappedExtend(query, subject, matrix, q_off, s_off, word_params->cutoff_score, -word_params->x_dropoff, &ungapped_data); s_pos = ungapped_data->length - 1 + ungapped_data->s_start; estack[index].length += s_pos - estack[index].level; estack[index].level = s_pos; } else { ungapped_data = NULL; estack[index].length += step; estack[index].level = s_off; } if (!ungapped_data || ungapped_data->score >= word_params->cutoff_score) { BLAST_SaveInitialHit(init_hitlist, q_off, s_off, ungapped_data); } else { sfree(ungapped_data);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -