⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_engine.c

📁 ncbi源码
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_engine.c,v $ * PRODUCTION Revision 1000.6  2004/06/01 18:07:09  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.135 * PRODUCTION * =========================================================================== *//* $Id: blast_engine.c,v 1000.6 2004/06/01 18:07:09 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's offical duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Ilya Dondoshansky * *//** @file blast_engine.c * High level BLAST functions */static char const rcsid[] =     "$Id: blast_engine.c,v 1000.6 2004/06/01 18:07:09 gouriano Exp $";#include <algo/blast/core/blast_engine.h>#include <algo/blast/core/lookup_wrap.h>#include <algo/blast/core/aa_ungapped.h>#include <algo/blast/core/blast_util.h>#include <algo/blast/core/blast_setup.h>#include <algo/blast/core/blast_gapalign.h>#include <algo/blast/core/blast_traceback.h>#include <algo/blast/core/phi_extend.h>#include <algo/blast/core/link_hsps.h>/** Deallocates all memory in BlastCoreAuxStruct */static BlastCoreAuxStruct* BlastCoreAuxStructFree(BlastCoreAuxStruct* aux_struct){   BlastExtendWordFree(aux_struct->ewp);   BLAST_InitHitListFree(aux_struct->init_hitlist);   Blast_HSPListFree(aux_struct->hsp_list);   sfree(aux_struct->query_offsets);   sfree(aux_struct->subject_offsets);      sfree(aux_struct);   return NULL;}/** Adjust HSP coordinates for out-of-frame gapped extension. * @param program One of blastx or tblastn [in] * @param init_hitlist List of hits after ungapped extension [in] * @param query_info Query information containing context offsets; *                   needed for blastx only [in] * @param subject_frame Frame of the subject sequence; tblastn only [in] * @param subject_length Length of the original nucleotide subject sequence; *                       tblastn only [in] * @param offset Shift in the subject sequence protein coordinates [in] */static void TranslateHSPsToDNAPCoord(Uint1 program,         BlastInitHitList* init_hitlist, BlastQueryInfo* query_info,        Int2 subject_frame, Int4 subject_length, Int4 offset){   BlastInitHSP* init_hsp;   Int4 index, context, frame;   Int4* context_offsets = query_info->context_offsets;      for (index = 0; index < init_hitlist->total; ++index) {      init_hsp = &init_hitlist->init_hsp_array[index];      if (program == blast_type_blastx) {         context =             BSearchInt4(init_hsp->q_off, context_offsets,                             query_info->last_context+1);         frame = context % 3;               init_hsp->q_off =             (init_hsp->q_off - context_offsets[context]) * CODON_LENGTH +             context_offsets[context-frame] + frame;         init_hsp->ungapped_data->q_start =             (init_hsp->ungapped_data->q_start - context_offsets[context])             * CODON_LENGTH + context_offsets[context-frame] + frame;      } else {         init_hsp->s_off += offset;         init_hsp->ungapped_data->s_start += offset;         if (subject_frame > 0) {            init_hsp->s_off =                (init_hsp->s_off * CODON_LENGTH) + subject_frame - 1;            init_hsp->ungapped_data->s_start =                (init_hsp->ungapped_data->s_start * CODON_LENGTH) +                subject_frame - 1;         } else {            init_hsp->s_off = (init_hsp->s_off * CODON_LENGTH) +                subject_length - subject_frame;            init_hsp->ungapped_data->s_start =                (init_hsp->ungapped_data->s_start * CODON_LENGTH) +                subject_length - subject_frame;         }      }   }   return;}/** The core of the BLAST search: comparison between the (concatenated) * query against one subject sequence. Translation of the subject sequence * into 6 frames is done inside, if necessary. If subject sequence is  * too long, it can be split into several chunks.  */static Int2BLAST_SearchEngineCore(Uint1 program_number, BLAST_SequenceBlk* query,    BlastQueryInfo* query_info, BLAST_SequenceBlk* subject,    LookupTableWrap* lookup, BlastGapAlignStruct* gap_align,    BlastScoringParameters* score_params,    BlastInitialWordParameters* word_params,    BlastExtensionParameters* ext_params,    BlastHitSavingParameters* hit_params,    const BlastDatabaseOptions* db_options,   BlastDiagnostics* diagnostics,   BlastCoreAuxStruct* aux_struct,   BlastHSPList** hsp_list_out){   BlastInitHitList* init_hitlist = aux_struct->init_hitlist;   BlastHSPList* hsp_list = aux_struct->hsp_list;   Uint1* translation_buffer = NULL;   Int4* frame_offsets = NULL;   BlastHitSavingOptions* hit_options = hit_params->options;   BlastScoringOptions* score_options = score_params->options;   BlastHSPList* combined_hsp_list = NULL;   Int2 status = 0;   Int4 context, first_context, last_context;   Int4 orig_length = subject->length;   Uint1* orig_sequence = subject->sequence;   Int4 **matrix;   Int4 hsp_num_max;   BlastUngappedStats* ungapped_stats = NULL;   BlastGappedStats* gapped_stats = NULL;   const Boolean k_translated_subject = (program_number == blast_type_tblastn                         || program_number == blast_type_tblastx                         || program_number == blast_type_rpstblastn);   if (k_translated_subject) {      first_context = 0;      last_context = 5;      if (score_options->is_ooframe) {         BLAST_GetAllTranslations(orig_sequence, NCBI2NA_ENCODING,            orig_length, db_options->gen_code_string, &translation_buffer,            &frame_offsets, &subject->oof_sequence);         subject->oof_sequence_allocated = TRUE;      } else {         BLAST_GetAllTranslations(orig_sequence, NCBI2NA_ENCODING,            orig_length, db_options->gen_code_string, &translation_buffer,            &frame_offsets, NULL);      }   } else if (program_number == blast_type_blastn) {      first_context = 1;      last_context = 1;   } else {      first_context = 0;      last_context = 0;   }   *hsp_list_out = NULL;   if (gap_align->positionBased)      matrix = gap_align->sbp->posMatrix;   else      matrix = gap_align->sbp->matrix;   hsp_num_max = (hit_options->hsp_num_max ? hit_options->hsp_num_max : INT4_MAX);   if (diagnostics) {      ungapped_stats = diagnostics->ungapped_stat;      gapped_stats = diagnostics->gapped_stat;   }   /* Loop over frames of the subject sequence */   for (context=first_context; context<=last_context; context++) {      Int4 chunk; /* loop variable below. */      Int4 num_chunks; /* loop variable below. */      Int4 offset = 0; /* Used as offset into subject sequence (if chunked) */      Int4 total_subject_length; /* Length of subject sequence used when split. */      if (k_translated_subject) {         subject->frame = BLAST_ContextToFrame(blast_type_blastx, context);         subject->sequence =             translation_buffer + frame_offsets[context] + 1;         subject->length =            frame_offsets[context+1] - frame_offsets[context] - 1;      } else {         subject->frame = context;      }           /* Split subject sequence into chunks if it is too long */      num_chunks = (subject->length - DBSEQ_CHUNK_OVERLAP) /          (MAX_DBSEQ_LEN - DBSEQ_CHUNK_OVERLAP) + 1;      total_subject_length = subject->length;            for (chunk = 0; chunk < num_chunks; ++chunk) {         if (chunk > 0) {            offset += subject->length - DBSEQ_CHUNK_OVERLAP;            if (program_number == blast_type_blastn) {               subject->sequence +=                   (subject->length - DBSEQ_CHUNK_OVERLAP)/COMPRESSION_RATIO;            } else {               subject->sequence += (subject->length - DBSEQ_CHUNK_OVERLAP);            }         }         subject->length = MIN(total_subject_length - offset,                                MAX_DBSEQ_LEN);                  BlastInitHitListReset(init_hitlist);                  aux_struct->WordFinder(subject, query, lookup,             matrix, word_params, aux_struct->ewp, aux_struct->query_offsets,             aux_struct->subject_offsets, GetOffsetArraySize(lookup),             init_hitlist, ungapped_stats);                     if (init_hitlist->total == 0)            continue;                  if (score_options->gapped_calculation) {            Int4 prot_length = 0;            if (score_options->is_ooframe) {               /* Convert query offsets in all HSPs into the mixed-frame                    coordinates */               TranslateHSPsToDNAPCoord(program_number, init_hitlist,                   query_info, subject->frame, orig_length, offset);               if (k_translated_subject) {                  prot_length = subject->length;                  subject->length = 2*orig_length + 1;               }            }            /** NB: If queries are concatenated, HSP offsets must be adjusted             * inside the following function call, so coordinates are             * relative to the individual contexts (i.e. queries, strands or             * frames). Contexts should also be filled in HSPs when they              * are saved.            */            aux_struct->GetGappedScore(program_number, query, query_info,                subject, gap_align, score_params, ext_params, hit_params,                init_hitlist, &hsp_list, gapped_stats);            if (score_options->is_ooframe && k_translated_subject)               subject->length = prot_length;         } else {            BLAST_GetUngappedHSPList(init_hitlist, query_info, subject,                                     hit_options, &hsp_list);         }         if (hsp_list->hspcnt == 0)            continue;                  /* The subject ordinal id is not yet filled in this HSP list */         hsp_list->oid = subject->oid;                  /* Assign frames in all HSPs. */         Blast_HSPListSetFrames(program_number, hsp_list,                                 score_options->is_ooframe);                  Blast_HSPListAdjustOffsets(hsp_list, offset);         /* Allow merging of HSPs either if traceback is already             available, or if it is an ungapped search */         Blast_HSPListsMerge(hsp_list, &combined_hsp_list, hsp_num_max, offset,            (Boolean) (hsp_list->traceback_done || !score_options->gapped_calculation));      } /* End loop on chunks of subject sequence */            Blast_HSPListAppend(combined_hsp_list, hsp_list_out, hsp_num_max);      combined_hsp_list = Blast_HSPListFree(combined_hsp_list);   } /* End loop on frames */   /* Restore the original contents of the subject block */   subject->length = orig_length;   subject->sequence = orig_sequence;   hsp_list = *hsp_list_out;   if (hit_params->do_sum_stats == TRUE) {      status = BLAST_LinkHsps(program_number, hsp_list, query_info,                              subject, gap_align->sbp, hit_params,                               score_options->gapped_calculation);   } else if (hit_options->phi_align) {      /* These e-values will not be accurate yet, since we don't know         the number of pattern occurrencies in the database. That         is arbitrarily set to 1 at this time. */      Blast_HSPListPHIGetEvalues(hsp_list, gap_align->sbp);   } else {      /* Calculate e-values for all HSPs. Skip this step         for RPS blast, since calculating the E values          requires precomputation that has not been done yet */      if (program_number != blast_type_rpsblast &&          program_number != blast_type_rpstblastn)         status = Blast_HSPListGetEvalues(program_number, query_info,                      hsp_list, score_options->gapped_calculation,                      gap_align->sbp);   }      /* Discard HSPs that don't pass the e-value test */   status = Blast_HSPListReapByEvalue(hsp_list, hit_options);   if (gapped_stats && hsp_list && hsp_list->hspcnt > 0) {      ++gapped_stats->num_seqs_passed;      gapped_stats->good_extensions += hsp_list->hspcnt;   }   if (translation_buffer) {       sfree(translation_buffer);   }   if (frame_offsets) {       sfree(frame_offsets);   }   return status;}static Int2 FillReturnCutoffsInfo(BlastRawCutoffs* return_cutoffs,    BlastScoringParameters* score_params,    BlastInitialWordParameters* word_params,    BlastExtensionParameters* ext_params){   /* since the cutoff score here will be used for display      putposes, strip out any internal scaling of the scores */   Int4 scale_factor = (Int4)score_params->scale_factor;   if (!return_cutoffs)      return -1;   return_cutoffs->x_drop_ungapped = word_params->x_dropoff / scale_factor;   return_cutoffs->x_drop_gap = ext_params->gap_x_dropoff / scale_factor;   return_cutoffs->x_drop_gap_final = ext_params->gap_x_dropoff_final /                                                         scale_factor;   return_cutoffs->gap_trigger = ext_params->gap_trigger / scale_factor;   return 0;}/** Setup of the auxiliary BLAST structures;  * also calculates internally used parameters from options.  * @param program_number blastn, blastp, blastx, etc. [in] * @param seq_src Sequence source information, with callbacks to get  *             sequences, their lengths, etc. [in] * @param scoring_options options for scoring. [in] * @param eff_len_options  used to calculate effective lengths. [in] * @param lookup_wrap Lookup table, already constructed. [in] * @param word_options options for initial word finding. [in] * @param ext_options options for gapped extension. [in] * @param hit_options options for saving hits. [in] * @param query The query sequence block [in] * @param query_info The query information block [in] * @param sbp Contains scoring information. [in] * @param gap_align Gapped alignment information and allocated memory [out] * @param score_params Parameters for scoring [out] * @param word_params Parameters for initial word processing [out] * @param ext_params Parameters for gapped extension [out] * @param hit_params Parameters for saving hits [out] * @param eff_len_params Parameters for calculating effective lengths [out] * @param aux_struct_ptr Placeholder joining various auxiliary memory  *                       structures [out] */static Int2 BLAST_SetUpAuxStructures(Uint1 program_number,   const BlastSeqSrc* seq_src,   const BlastScoringOptions* scoring_options,   const BlastEffectiveLengthsOptions* eff_len_options,   LookupTableWrap* lookup_wrap,	   const BlastInitialWordOptions* word_options,   const BlastExtensionOptions* ext_options,   const BlastHitSavingOptions* hit_options,   BLAST_SequenceBlk* query, BlastQueryInfo* query_info,    BlastScoreBlk* sbp, 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -