⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_engine.c

📁 ncbi源码
💻 C
📖 第 1 页 / 共 2 页
字号:
   BlastGapAlignStruct** gap_align,    BlastScoringParameters** score_params,   BlastInitialWordParameters** word_params,   BlastExtensionParameters** ext_params,   BlastHitSavingParameters** hit_params,   BlastEffectiveLengthsParameters** eff_len_params,                            BlastCoreAuxStruct** aux_struct_ptr){   Int2 status = 0;   BlastCoreAuxStruct* aux_struct;   Boolean blastp = (lookup_wrap->lut_type == AA_LOOKUP_TABLE ||                         lookup_wrap->lut_type == RPS_LOOKUP_TABLE);   Boolean mb_lookup = (lookup_wrap->lut_type == MB_LOOKUP_TABLE);   Boolean phi_lookup = (lookup_wrap->lut_type == PHI_AA_LOOKUP ||                         lookup_wrap->lut_type == PHI_NA_LOOKUP);   Boolean ag_blast = (word_options->extension_method == eRightAndLeft);   Int4 offset_array_size = GetOffsetArraySize(lookup_wrap);   Uint4 avg_subj_length;   ASSERT(seq_src);   *aux_struct_ptr = aux_struct = (BlastCoreAuxStruct*)      calloc(1, sizeof(BlastCoreAuxStruct));   avg_subj_length = BLASTSeqSrcGetAvgSeqLen(seq_src);        if ((status = BlastExtendWordNew(query->length, word_options,                     avg_subj_length, &aux_struct->ewp)) != 0)      return status;   if ((status = BLAST_GapAlignSetUp(program_number, seq_src,                     scoring_options, eff_len_options, ext_options,                     hit_options, query_info, sbp, score_params,                    ext_params, hit_params, eff_len_params, gap_align)) != 0)      return status;         BlastInitialWordParametersNew(program_number, word_options,       *hit_params, *ext_params, sbp, query_info,       avg_subj_length, word_params);   if (mb_lookup) {      aux_struct->WordFinder = MB_WordFinder;   } else if (phi_lookup) {      aux_struct->WordFinder = PHIBlastWordFinder;   } else if (blastp) {      aux_struct->WordFinder = BlastAaWordFinder;   } else if (ag_blast) {      aux_struct->WordFinder = BlastNaWordFinder_AG;   } else {      aux_struct->WordFinder = BlastNaWordFinder;   }      aux_struct->query_offsets =       (Uint4*) malloc(offset_array_size * sizeof(Uint4));   aux_struct->subject_offsets =       (Uint4*) malloc(offset_array_size * sizeof(Uint4));      aux_struct->init_hitlist = BLAST_InitHitListNew();   /* Pick which gapped alignment algorithm to use. */   if (phi_lookup)      aux_struct->GetGappedScore = PHIGetGappedScore;   else if (ext_options->ePrelimGapExt == eDynProgExt)      aux_struct->GetGappedScore = BLAST_GetGappedScore;   else      aux_struct->GetGappedScore = BLAST_MbGetGappedScore;   aux_struct->hsp_list = Blast_HSPListNew(hit_options->hsp_num_max);   return status;}#define BLAST_DB_CHUNK_SIZE 1024Int4 BLAST_RPSSearchEngine(Uint1 program_number,    BLAST_SequenceBlk* query, BlastQueryInfo* query_info,   const BlastSeqSrc* seq_src,  BlastScoreBlk* sbp,   const BlastScoringOptions* score_options,    LookupTableWrap* lookup_wrap,   const BlastInitialWordOptions* word_options,    const BlastExtensionOptions* ext_options,    const BlastHitSavingOptions* hit_options,   const BlastEffectiveLengthsOptions* eff_len_options,   const PSIBlastOptions* psi_options,    const BlastDatabaseOptions* db_options,   BlastHSPResults* results, BlastDiagnostics* diagnostics){   BlastCoreAuxStruct* aux_struct = NULL;   BlastHSPList* hsp_list;   BlastScoringParameters* score_params = NULL;   BlastInitialWordParameters* word_params = NULL;   BlastExtensionParameters* ext_params = NULL;   BlastHitSavingParameters* hit_params = NULL;   BlastEffectiveLengthsParameters* eff_len_params = NULL;   BlastGapAlignStruct* gap_align;   Int2 status = 0;   Int8 dbsize;   Int4 num_db_seqs;   Uint4 avg_subj_length = 0;   RPSLookupTable *lookup = (RPSLookupTable *)lookup_wrap->lut;   BlastQueryInfo concat_db_info;   BLAST_SequenceBlk concat_db;   RPSAuxInfo *rps_info;   BlastHSPResults prelim_results;   Uint1 *orig_query_seq = NULL;   BlastRawCutoffs* raw_cutoffs = NULL;   if (program_number != blast_type_rpsblast &&       program_number != blast_type_rpstblastn)      return -1;   if (results->num_queries != 1)      return -2;   if ((status =       BLAST_SetUpAuxStructures(program_number, seq_src,          score_options, eff_len_options, lookup_wrap, word_options,          ext_options, hit_options, query, query_info, sbp,          &gap_align, &score_params, &word_params, &ext_params,          &hit_params, &eff_len_params, &aux_struct)) != 0)      return status;   /* modify scoring and gap alignment structures for      use with RPS blast. */   rps_info = lookup->rps_aux_info;   gap_align->positionBased = TRUE;   gap_align->sbp->posMatrix = lookup->rps_pssm;   /* determine the total number of residues in the db.      This figure must also include one trailing NULL for      each DB sequence */   num_db_seqs = BLASTSeqSrcGetNumSeqs(seq_src);   dbsize = BLASTSeqSrcGetTotLen(seq_src) + num_db_seqs;   if (dbsize > INT4_MAX)      return -3;   /* If this is a translated search, pack the query into      ncbi2na format (exclude the starting sentinel); otherwise       just use the input query */   if (program_number == blast_type_rpstblastn) {       orig_query_seq = query->sequence_start;       query->sequence_start++;       if (BLAST_PackDNA(query->sequence_start, query->length,                         NCBI4NA_ENCODING, &query->sequence) != 0)           return -4;   }   /* Concatenate all of the DB sequences together, and pretend      this is a large multiplexed sequence. Note that because the      scoring is position-specific, the actual sequence data is      not needed */   memset(&concat_db, 0, sizeof(concat_db)); /* fill in SequenceBlk */   concat_db.length = (Int4) dbsize;   memset(&concat_db_info, 0, sizeof(concat_db_info)); /* fill in QueryInfo */   concat_db_info.num_queries = num_db_seqs;   concat_db_info.first_context = 0;   concat_db_info.last_context = num_db_seqs - 1;   concat_db_info.context_offsets = lookup->rps_seq_offsets;   prelim_results.num_queries = num_db_seqs;   prelim_results.hitlist_array = (BlastHitList **)calloc(num_db_seqs,                                             sizeof(BlastHitList *));   /* Change the table of diagonals that will be used for the      search; we need a diag table that can fit the entire      concatenated DB */   avg_subj_length = (Uint4) (dbsize / num_db_seqs);   BlastExtendWordFree(aux_struct->ewp);   BlastExtendWordNew(concat_db.length, word_options, 			avg_subj_length, &aux_struct->ewp);   /* Run the search; the input query is what gets scanned      and the concatenated DB is the sequence associated with      the score matrix. This essentially means that 'query'      and 'subject' have opposite conventions for the search.           Note that while scores can be calculated for any alignment      found, we have not set up any Karlin parameters or effective      search space sizes for the concatenated DB. This means that      E-values cannot be calculated after hits are found. */   BLAST_SearchEngineCore(program_number, &concat_db, &concat_db_info,          query, lookup_wrap, gap_align, score_params,          word_params, ext_params, hit_params, db_options,          diagnostics, aux_struct, &hsp_list);   /* Fill the cutoff values in the diagnostics structure */   if (diagnostics->cutoffs)      raw_cutoffs = diagnostics->cutoffs;   FillReturnCutoffsInfo(raw_cutoffs, score_params, word_params, ext_params);   /* save the resulting list of HSPs. 'query' and 'subject' are      still reversed */   if (hsp_list && hsp_list->hspcnt > 0) {      /* Save the HSPs into a hit list */      Blast_HSPResultsSaveHitList(program_number, &prelim_results, hsp_list, hit_params);   }   /* Change the results from a single hsplist with many       contexts to many hsplists each with a single context.      'query' and 'subject' offsets are still reversed. */   Blast_HSPResultsRPSUpdate(results, &prelim_results);   /* for a translated search, throw away the packed version      of the query and replace with the original (excluding the      starting sentinel) */   if (program_number == blast_type_rpstblastn) {       sfree(query->sequence);       query->sequence = query->sequence_start;   }   /* Do the traceback. After this call, query and       subject have reverted to their traditional meanings. */   BLAST_RPSTraceback(program_number, results, &concat_db,             &concat_db_info, query, query_info, gap_align,             score_params, ext_params, hit_params, db_options,             rps_info->karlin_k);   /* The traceback calculated the E values, so it's safe      to sort the results now */   Blast_HSPResultsSortByEvalue(results);   /* free the internal structures used */   /* Do not destruct score block here */   if (program_number == blast_type_rpstblastn) {      query->sequence_start = orig_query_seq;      query->sequence = orig_query_seq + 1;   }   sfree(prelim_results.hitlist_array);   gap_align->sbp->posMatrix = NULL;   gap_align->positionBased = FALSE;   gap_align->sbp = NULL;   BLAST_GapAlignStructFree(gap_align);   BlastCoreAuxStructFree(aux_struct);   sfree(score_params);   sfree(hit_params);   sfree(ext_params);   sfree(word_params);   sfree(eff_len_params);   return status;}Int4 BLAST_SearchEngine(Uint1 program_number,    BLAST_SequenceBlk* query, BlastQueryInfo* query_info,   const BlastSeqSrc* seq_src,  BlastScoreBlk* sbp,   const BlastScoringOptions* score_options,    LookupTableWrap* lookup_wrap,   const BlastInitialWordOptions* word_options,    const BlastExtensionOptions* ext_options,    const BlastHitSavingOptions* hit_options,   const BlastEffectiveLengthsOptions* eff_len_options,   const PSIBlastOptions* psi_options,    const BlastDatabaseOptions* db_options,   BlastHSPResults* results, BlastDiagnostics* diagnostics){   BlastCoreAuxStruct* aux_struct = NULL;   BlastHSPList* hsp_list;    BlastScoringParameters* score_params = NULL;   BlastInitialWordParameters* word_params = NULL;   BlastExtensionParameters* ext_params = NULL;   BlastHitSavingParameters* hit_params = NULL;   BlastEffectiveLengthsParameters* eff_len_params = NULL;   BlastGapAlignStruct* gap_align;   GetSeqArg seq_arg;   Int2 status = 0;   BlastSeqSrcIterator* itr = NULL;   Int8 db_length = 0;   BlastRawCutoffs* raw_cutoffs = NULL;      if ((status =        BLAST_SetUpAuxStructures(program_number, seq_src,          score_options, eff_len_options, lookup_wrap, word_options,           ext_options, hit_options, query, query_info, sbp,           &gap_align, &score_params, &word_params, &ext_params,           &hit_params, &eff_len_params, &aux_struct)) != 0)      return status;   memset((void*) &seq_arg, 0, sizeof(seq_arg));   /* Encoding is set so there are no sentinel bytes, and protein/nucleotide      sequences are retieved in ncbistdaa/ncbi2na encodings respectively. */   seq_arg.encoding = BLASTP_ENCODING;    /* Initialize the iterator */   if ( !(itr = BlastSeqSrcIteratorNew(BLAST_DB_CHUNK_SIZE))) {      /** NEED AN ERROR MESSAGE HERE? */      return -1;   }      db_length = BLASTSeqSrcGetTotLen(seq_src);   /* iterate over all subject sequences */   while ( (seq_arg.oid = BlastSeqSrcIteratorNext(seq_src, itr))            != BLAST_SEQSRC_EOF) {      BlastSequenceBlkClean(seq_arg.seq);      if (BLASTSeqSrcGetSequence(seq_src, (void*) &seq_arg) < 0)          continue;      if (db_length == 0) {         /* This is not a database search, hence need to recalculate and save            the effective search spaces and length adjustments for all             queries based on the length of the current single subject             sequence. */         if ((status = BLAST_OneSubjectUpdateParameters(program_number,                           seq_arg.seq->length, score_options, query_info,                           sbp, ext_params, hit_params, word_params,                           eff_len_params)) != 0)            return status;      }      /* Calculate cutoff scores for linking HSPs */      if (hit_params->do_sum_stats) {         CalculateLinkHSPCutoffs(program_number, query_info, sbp,                                  hit_params, ext_params, db_length, seq_arg.seq->length);       }      BLAST_SearchEngineCore(program_number, query, query_info,         seq_arg.seq, lookup_wrap, gap_align, score_params, word_params,          ext_params, hit_params, db_options, diagnostics, aux_struct,          &hsp_list);       if (hsp_list && hsp_list->hspcnt > 0) {         if (program_number == blast_type_blastn) {            status =                Blast_HSPListReevaluateWithAmbiguities(hsp_list, query,                   seq_arg.seq, hit_options, query_info, sbp, score_params,                   seq_src);         }         /* Save the HSPs into a hit list */         Blast_HSPResultsSaveHitList(program_number, results, hsp_list, hit_params);      }         /*BlastSequenceBlkClean(subject);*/   }      itr = BlastSeqSrcIteratorFree(itr);   BlastSequenceBlkFree(seq_arg.seq);   /* Fill the cutoff values in the diagnostics structure */   if (diagnostics->cutoffs)      raw_cutoffs = diagnostics->cutoffs;   FillReturnCutoffsInfo(raw_cutoffs, score_params, word_params, ext_params);   if (hit_options->phi_align) {      /* Save the product of effective occurrencies of pattern in query and         occurrencies of pattern in database */      Int8 db_hits = 1;      if (diagnostics && diagnostics->ungapped_stat)         db_hits = diagnostics->ungapped_stat->lookup_hits;      gap_align->sbp->effective_search_sp *= db_hits;   }   /* Now sort the hit lists for all queries, but only if this is a database      search. */   if (db_length > 0)      Blast_HSPResultsSortByEvalue(results);   if (!ext_options->skip_traceback && score_options->gapped_calculation) {      status =          BLAST_ComputeTraceback(program_number, results, query, query_info,            seq_src, gap_align, score_params, ext_params, hit_params,            eff_len_params, db_options, psi_options);   }   /* Do not destruct score block here */   gap_align->sbp = NULL;   BLAST_GapAlignStructFree(gap_align);   BlastCoreAuxStructFree(aux_struct);   sfree(score_params);   sfree(hit_params);   sfree(ext_params);   sfree(word_params);   sfree(eff_len_params);   return status;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -