📄 blast_engine.c
字号:
BlastGapAlignStruct** gap_align, BlastScoringParameters** score_params, BlastInitialWordParameters** word_params, BlastExtensionParameters** ext_params, BlastHitSavingParameters** hit_params, BlastEffectiveLengthsParameters** eff_len_params, BlastCoreAuxStruct** aux_struct_ptr){ Int2 status = 0; BlastCoreAuxStruct* aux_struct; Boolean blastp = (lookup_wrap->lut_type == AA_LOOKUP_TABLE || lookup_wrap->lut_type == RPS_LOOKUP_TABLE); Boolean mb_lookup = (lookup_wrap->lut_type == MB_LOOKUP_TABLE); Boolean phi_lookup = (lookup_wrap->lut_type == PHI_AA_LOOKUP || lookup_wrap->lut_type == PHI_NA_LOOKUP); Boolean ag_blast = (word_options->extension_method == eRightAndLeft); Int4 offset_array_size = GetOffsetArraySize(lookup_wrap); Uint4 avg_subj_length; ASSERT(seq_src); *aux_struct_ptr = aux_struct = (BlastCoreAuxStruct*) calloc(1, sizeof(BlastCoreAuxStruct)); avg_subj_length = BLASTSeqSrcGetAvgSeqLen(seq_src); if ((status = BlastExtendWordNew(query->length, word_options, avg_subj_length, &aux_struct->ewp)) != 0) return status; if ((status = BLAST_GapAlignSetUp(program_number, seq_src, scoring_options, eff_len_options, ext_options, hit_options, query_info, sbp, score_params, ext_params, hit_params, eff_len_params, gap_align)) != 0) return status; BlastInitialWordParametersNew(program_number, word_options, *hit_params, *ext_params, sbp, query_info, avg_subj_length, word_params); if (mb_lookup) { aux_struct->WordFinder = MB_WordFinder; } else if (phi_lookup) { aux_struct->WordFinder = PHIBlastWordFinder; } else if (blastp) { aux_struct->WordFinder = BlastAaWordFinder; } else if (ag_blast) { aux_struct->WordFinder = BlastNaWordFinder_AG; } else { aux_struct->WordFinder = BlastNaWordFinder; } aux_struct->query_offsets = (Uint4*) malloc(offset_array_size * sizeof(Uint4)); aux_struct->subject_offsets = (Uint4*) malloc(offset_array_size * sizeof(Uint4)); aux_struct->init_hitlist = BLAST_InitHitListNew(); /* Pick which gapped alignment algorithm to use. */ if (phi_lookup) aux_struct->GetGappedScore = PHIGetGappedScore; else if (ext_options->ePrelimGapExt == eDynProgExt) aux_struct->GetGappedScore = BLAST_GetGappedScore; else aux_struct->GetGappedScore = BLAST_MbGetGappedScore; aux_struct->hsp_list = Blast_HSPListNew(hit_options->hsp_num_max); return status;}#define BLAST_DB_CHUNK_SIZE 1024Int4 BLAST_RPSSearchEngine(Uint1 program_number, BLAST_SequenceBlk* query, BlastQueryInfo* query_info, const BlastSeqSrc* seq_src, BlastScoreBlk* sbp, const BlastScoringOptions* score_options, LookupTableWrap* lookup_wrap, const BlastInitialWordOptions* word_options, const BlastExtensionOptions* ext_options, const BlastHitSavingOptions* hit_options, const BlastEffectiveLengthsOptions* eff_len_options, const PSIBlastOptions* psi_options, const BlastDatabaseOptions* db_options, BlastHSPResults* results, BlastDiagnostics* diagnostics){ BlastCoreAuxStruct* aux_struct = NULL; BlastHSPList* hsp_list; BlastScoringParameters* score_params = NULL; BlastInitialWordParameters* word_params = NULL; BlastExtensionParameters* ext_params = NULL; BlastHitSavingParameters* hit_params = NULL; BlastEffectiveLengthsParameters* eff_len_params = NULL; BlastGapAlignStruct* gap_align; Int2 status = 0; Int8 dbsize; Int4 num_db_seqs; Uint4 avg_subj_length = 0; RPSLookupTable *lookup = (RPSLookupTable *)lookup_wrap->lut; BlastQueryInfo concat_db_info; BLAST_SequenceBlk concat_db; RPSAuxInfo *rps_info; BlastHSPResults prelim_results; Uint1 *orig_query_seq = NULL; BlastRawCutoffs* raw_cutoffs = NULL; if (program_number != blast_type_rpsblast && program_number != blast_type_rpstblastn) return -1; if (results->num_queries != 1) return -2; if ((status = BLAST_SetUpAuxStructures(program_number, seq_src, score_options, eff_len_options, lookup_wrap, word_options, ext_options, hit_options, query, query_info, sbp, &gap_align, &score_params, &word_params, &ext_params, &hit_params, &eff_len_params, &aux_struct)) != 0) return status; /* modify scoring and gap alignment structures for use with RPS blast. */ rps_info = lookup->rps_aux_info; gap_align->positionBased = TRUE; gap_align->sbp->posMatrix = lookup->rps_pssm; /* determine the total number of residues in the db. This figure must also include one trailing NULL for each DB sequence */ num_db_seqs = BLASTSeqSrcGetNumSeqs(seq_src); dbsize = BLASTSeqSrcGetTotLen(seq_src) + num_db_seqs; if (dbsize > INT4_MAX) return -3; /* If this is a translated search, pack the query into ncbi2na format (exclude the starting sentinel); otherwise just use the input query */ if (program_number == blast_type_rpstblastn) { orig_query_seq = query->sequence_start; query->sequence_start++; if (BLAST_PackDNA(query->sequence_start, query->length, NCBI4NA_ENCODING, &query->sequence) != 0) return -4; } /* Concatenate all of the DB sequences together, and pretend this is a large multiplexed sequence. Note that because the scoring is position-specific, the actual sequence data is not needed */ memset(&concat_db, 0, sizeof(concat_db)); /* fill in SequenceBlk */ concat_db.length = (Int4) dbsize; memset(&concat_db_info, 0, sizeof(concat_db_info)); /* fill in QueryInfo */ concat_db_info.num_queries = num_db_seqs; concat_db_info.first_context = 0; concat_db_info.last_context = num_db_seqs - 1; concat_db_info.context_offsets = lookup->rps_seq_offsets; prelim_results.num_queries = num_db_seqs; prelim_results.hitlist_array = (BlastHitList **)calloc(num_db_seqs, sizeof(BlastHitList *)); /* Change the table of diagonals that will be used for the search; we need a diag table that can fit the entire concatenated DB */ avg_subj_length = (Uint4) (dbsize / num_db_seqs); BlastExtendWordFree(aux_struct->ewp); BlastExtendWordNew(concat_db.length, word_options, avg_subj_length, &aux_struct->ewp); /* Run the search; the input query is what gets scanned and the concatenated DB is the sequence associated with the score matrix. This essentially means that 'query' and 'subject' have opposite conventions for the search. Note that while scores can be calculated for any alignment found, we have not set up any Karlin parameters or effective search space sizes for the concatenated DB. This means that E-values cannot be calculated after hits are found. */ BLAST_SearchEngineCore(program_number, &concat_db, &concat_db_info, query, lookup_wrap, gap_align, score_params, word_params, ext_params, hit_params, db_options, diagnostics, aux_struct, &hsp_list); /* Fill the cutoff values in the diagnostics structure */ if (diagnostics->cutoffs) raw_cutoffs = diagnostics->cutoffs; FillReturnCutoffsInfo(raw_cutoffs, score_params, word_params, ext_params); /* save the resulting list of HSPs. 'query' and 'subject' are still reversed */ if (hsp_list && hsp_list->hspcnt > 0) { /* Save the HSPs into a hit list */ Blast_HSPResultsSaveHitList(program_number, &prelim_results, hsp_list, hit_params); } /* Change the results from a single hsplist with many contexts to many hsplists each with a single context. 'query' and 'subject' offsets are still reversed. */ Blast_HSPResultsRPSUpdate(results, &prelim_results); /* for a translated search, throw away the packed version of the query and replace with the original (excluding the starting sentinel) */ if (program_number == blast_type_rpstblastn) { sfree(query->sequence); query->sequence = query->sequence_start; } /* Do the traceback. After this call, query and subject have reverted to their traditional meanings. */ BLAST_RPSTraceback(program_number, results, &concat_db, &concat_db_info, query, query_info, gap_align, score_params, ext_params, hit_params, db_options, rps_info->karlin_k); /* The traceback calculated the E values, so it's safe to sort the results now */ Blast_HSPResultsSortByEvalue(results); /* free the internal structures used */ /* Do not destruct score block here */ if (program_number == blast_type_rpstblastn) { query->sequence_start = orig_query_seq; query->sequence = orig_query_seq + 1; } sfree(prelim_results.hitlist_array); gap_align->sbp->posMatrix = NULL; gap_align->positionBased = FALSE; gap_align->sbp = NULL; BLAST_GapAlignStructFree(gap_align); BlastCoreAuxStructFree(aux_struct); sfree(score_params); sfree(hit_params); sfree(ext_params); sfree(word_params); sfree(eff_len_params); return status;}Int4 BLAST_SearchEngine(Uint1 program_number, BLAST_SequenceBlk* query, BlastQueryInfo* query_info, const BlastSeqSrc* seq_src, BlastScoreBlk* sbp, const BlastScoringOptions* score_options, LookupTableWrap* lookup_wrap, const BlastInitialWordOptions* word_options, const BlastExtensionOptions* ext_options, const BlastHitSavingOptions* hit_options, const BlastEffectiveLengthsOptions* eff_len_options, const PSIBlastOptions* psi_options, const BlastDatabaseOptions* db_options, BlastHSPResults* results, BlastDiagnostics* diagnostics){ BlastCoreAuxStruct* aux_struct = NULL; BlastHSPList* hsp_list; BlastScoringParameters* score_params = NULL; BlastInitialWordParameters* word_params = NULL; BlastExtensionParameters* ext_params = NULL; BlastHitSavingParameters* hit_params = NULL; BlastEffectiveLengthsParameters* eff_len_params = NULL; BlastGapAlignStruct* gap_align; GetSeqArg seq_arg; Int2 status = 0; BlastSeqSrcIterator* itr = NULL; Int8 db_length = 0; BlastRawCutoffs* raw_cutoffs = NULL; if ((status = BLAST_SetUpAuxStructures(program_number, seq_src, score_options, eff_len_options, lookup_wrap, word_options, ext_options, hit_options, query, query_info, sbp, &gap_align, &score_params, &word_params, &ext_params, &hit_params, &eff_len_params, &aux_struct)) != 0) return status; memset((void*) &seq_arg, 0, sizeof(seq_arg)); /* Encoding is set so there are no sentinel bytes, and protein/nucleotide sequences are retieved in ncbistdaa/ncbi2na encodings respectively. */ seq_arg.encoding = BLASTP_ENCODING; /* Initialize the iterator */ if ( !(itr = BlastSeqSrcIteratorNew(BLAST_DB_CHUNK_SIZE))) { /** NEED AN ERROR MESSAGE HERE? */ return -1; } db_length = BLASTSeqSrcGetTotLen(seq_src); /* iterate over all subject sequences */ while ( (seq_arg.oid = BlastSeqSrcIteratorNext(seq_src, itr)) != BLAST_SEQSRC_EOF) { BlastSequenceBlkClean(seq_arg.seq); if (BLASTSeqSrcGetSequence(seq_src, (void*) &seq_arg) < 0) continue; if (db_length == 0) { /* This is not a database search, hence need to recalculate and save the effective search spaces and length adjustments for all queries based on the length of the current single subject sequence. */ if ((status = BLAST_OneSubjectUpdateParameters(program_number, seq_arg.seq->length, score_options, query_info, sbp, ext_params, hit_params, word_params, eff_len_params)) != 0) return status; } /* Calculate cutoff scores for linking HSPs */ if (hit_params->do_sum_stats) { CalculateLinkHSPCutoffs(program_number, query_info, sbp, hit_params, ext_params, db_length, seq_arg.seq->length); } BLAST_SearchEngineCore(program_number, query, query_info, seq_arg.seq, lookup_wrap, gap_align, score_params, word_params, ext_params, hit_params, db_options, diagnostics, aux_struct, &hsp_list); if (hsp_list && hsp_list->hspcnt > 0) { if (program_number == blast_type_blastn) { status = Blast_HSPListReevaluateWithAmbiguities(hsp_list, query, seq_arg.seq, hit_options, query_info, sbp, score_params, seq_src); } /* Save the HSPs into a hit list */ Blast_HSPResultsSaveHitList(program_number, results, hsp_list, hit_params); } /*BlastSequenceBlkClean(subject);*/ } itr = BlastSeqSrcIteratorFree(itr); BlastSequenceBlkFree(seq_arg.seq); /* Fill the cutoff values in the diagnostics structure */ if (diagnostics->cutoffs) raw_cutoffs = diagnostics->cutoffs; FillReturnCutoffsInfo(raw_cutoffs, score_params, word_params, ext_params); if (hit_options->phi_align) { /* Save the product of effective occurrencies of pattern in query and occurrencies of pattern in database */ Int8 db_hits = 1; if (diagnostics && diagnostics->ungapped_stat) db_hits = diagnostics->ungapped_stat->lookup_hits; gap_align->sbp->effective_search_sp *= db_hits; } /* Now sort the hit lists for all queries, but only if this is a database search. */ if (db_length > 0) Blast_HSPResultsSortByEvalue(results); if (!ext_options->skip_traceback && score_options->gapped_calculation) { status = BLAST_ComputeTraceback(program_number, results, query, query_info, seq_src, gap_align, score_params, ext_params, hit_params, eff_len_params, db_options, psi_options); } /* Do not destruct score block here */ gap_align->sbp = NULL; BLAST_GapAlignStructFree(gap_align); BlastCoreAuxStructFree(aux_struct); sfree(score_params); sfree(hit_params); sfree(ext_params); sfree(word_params); sfree(eff_len_params); return status;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -