📄 blast_app.cpp
字号:
if (args["xgap"].AsDouble()) { opt.SetGapXDropoff(args["xgap"].AsDouble()); } if (args["xfinal"].AsDouble()) { opt.SetGapXDropoffFinal(args["xfinal"].AsDouble()); } if (args["evalue"].AsDouble()) { opt.SetEvalueThreshold(args["evalue"].AsDouble()); } if (args["searchsp"].AsDouble()) { opt.SetEffectiveSearchSpace((Int8) args["searchsp"].AsDouble()); } else if (bssp) { opt.SetDbLength(BLASTSeqSrcGetTotLen(bssp)); opt.SetDbSeqNum(BLASTSeqSrcGetNumSeqs(bssp)); } if (args["hitlist"].AsInteger()) { opt.SetHitlistSize(args["hitlist"].AsInteger()); /* Hitlist size for preliminary alignments is increased, unless no traceback is performed. */ if (args["ungapped"].AsBoolean() || args["greedy"].AsInteger() == 1) { opt.SetPrelimHitlistSize(args["hitlist"].AsInteger()); } else { opt.SetPrelimHitlistSize(MIN(2*args["hitlist"].AsInteger(), args["hitlist"].AsInteger() + 50)); } } if (args["perc"].AsDouble()) { opt.SetPercentIdentity(args["perc"].AsDouble()); } if (args["gencode"].AsInteger()) { opt.SetQueryGeneticCode(args["gencode"].AsInteger()); } if ((program == "tblastn" || program == "tblastx") && args["dbgencode"].AsInteger() != BLAST_GENETIC_CODE) { opt.SetDbGeneticCode(args["dbgencode"].AsInteger()); } if (args["maxintron"].AsInteger()) { opt.SetLongestIntronLength(args["maxintron"].AsInteger()); } if (args["frameshift"].AsInteger()) { opt.SetFrameShiftPenalty(args["frameshift"].AsInteger()); opt.SetOutOfFrameMode(); } if (args["pattern"]) { opt.SetPHIPattern(args["pattern"].AsString().c_str(), (program == "blastn")); } if (args["pssm"]) {#if 0 FILE* pssmfp = fopen(args["pssm"].AsInputFile(), "r"); // Read the pssm string and fill the posMatrix bool use_pssm = TRUE; fclose(pssmfp);#endif } return;}static CDisplaySeqalign::TranslatedFrames Context2TranslatedFrame(int context){ switch (context) { case 1: return CDisplaySeqalign::ePlusStrand1; case 2: return CDisplaySeqalign::ePlusStrand2; case 3: return CDisplaySeqalign::ePlusStrand3; case 4: return CDisplaySeqalign::eMinusStrand1; case 5: return CDisplaySeqalign::eMinusStrand2; case 6: return CDisplaySeqalign::eMinusStrand3; default: return CDisplaySeqalign::eFrameNotSet; }}#define NUM_FRAMES 6static TSeqLocInfoVectorBlastMaskLoc2CSeqLoc(const BlastMaskLoc* mask, const TSeqLocVector& slp, EProgram program){ TSeqLocInfoVector retval; int frame, num_frames; bool translated_query; int index; translated_query = (program == eBlastx || program == eTblastx); num_frames = (translated_query ? NUM_FRAMES : 1); TSeqLocInfo mask_info_list; for (index = 0; index < (int)slp.size(); ++index) { mask_info_list.clear(); if (!mask) { retval.push_back(mask_info_list); continue; } for ( ; mask && mask->index < index*num_frames; mask = mask->next); BlastSeqLoc* loc; CDisplaySeqalign::SeqlocInfo* seqloc_info = new CDisplaySeqalign::SeqlocInfo; for ( ; mask && mask->index < (index+1)*num_frames; mask = mask->next) { frame = (translated_query ? (mask->index % num_frames) + 1 : 0); for (loc = mask->loc_list; loc; loc = loc->next) { seqloc_info->frame = Context2TranslatedFrame(frame); CRef<CSeq_loc> seqloc(new CSeq_loc()); seqloc->SetInt().SetFrom(((SSeqRange*) loc->ptr)->left); seqloc->SetInt().SetTo(((SSeqRange*) loc->ptr)->right); seqloc->SetInt().SetId(*(const_cast<CSeq_id*>(&sequence::GetId(*slp[index].seqloc, slp[index].scope)))); seqloc_info->seqloc = seqloc; mask_info_list.push_back(seqloc_info); } } retval.push_back(mask_info_list); } return retval;}void CBlastApplication::FormatResults(CDbBlast& blaster, TSeqAlignVector& seqalignv){ CArgs args = GetArgs(); if (args["asnout"]) { auto_ptr<CObjectOStream> asnout( CObjectOStream::Open(args["asnout"].AsString(), eSerial_AsnText)); unsigned int query_index; for (query_index = 0; query_index < seqalignv.size(); ++query_index) { if (!seqalignv[query_index]->IsSet()) continue; *asnout << *seqalignv[query_index]; } } if (args["out"]) { EProgram program = blaster.SetOptions().GetProgram(); char* dbname = const_cast<char*>(args["db"].AsString().c_str()); bool db_is_na = (program == eBlastn || program == eTblastn || program == eTblastx); /* Revert RPS program names to their conventional counterparts, to avoid confusing the C toolkit formatter */ if (program == eRPSBlast) program = eBlastp; if (program == eRPSTblastn) program = eTblastn; CBlastFormatOptions* format_options = new CBlastFormatOptions(program, args["out"].AsOutputFile()); format_options->SetAlignments(args["align"].AsInteger()); format_options->SetDescriptions(args["descr"].AsInteger()); format_options->SetAlignView(args["format"].AsInteger()); format_options->SetHtml(args["html"].AsBoolean()); #ifdef C_FORMATTING if (dbname) { BLAST_PrintOutputHeader(format_options, args["greedy"].AsBoolean(), dbname, db_is_na); }#endif RegisterBlastDbLoader(dbname, db_is_na); /* Format the results */ TSeqLocInfoVector maskv = BlastMaskLoc2CSeqLoc(blaster.GetFilteredQueryRegions(), blaster.GetQueries(), program); if (BLAST_FormatResults(seqalignv, program, blaster.GetQueries(), maskv, format_options, blaster.GetOptions().GetOutOfFrameMode())) { NCBI_THROW(CBlastException, eInternal, "Error in formatting results"); } #ifdef C_FORMATTING PrintOutputFooter(program, format_options, score_options, m_sbp, lookup_options, word_options, ext_options, hit_options, blaster.GetQueryInfo(), dbname, blaster.GetDiagnostics());#endif }}static Int2 BLAST_FillRPSInfo( RPSInfo **ppinfo, Nlm_MemMap **rps_mmap, Nlm_MemMap **rps_pssm_mmap, const char* dbname ){ char pathname[PATH_MAX]; char filename[PATH_MAX]; RPSInfo *info; FILE *auxfile; Int4 i; Int4 seq_size; Int4 num_db_seqs; Nlm_MemMapPtr lut_mmap; Nlm_MemMapPtr pssm_mmap; char buffer[PATH_MAX]; info = (RPSInfo *)malloc(sizeof(RPSInfo)); if (info == NULL) ErrPostEx(SEV_FATAL, 1, 0, "Memory allocation failed"); /* construct the full path to the DB file. Look in the local directory, then BLASTDB environment variable (if any), then .ncbirc */ sprintf(filename, "%s.loo", dbname); if (FileLength(filename) > 0) { strcpy(pathname, dbname); } else {#ifdef OS_UNIX if (getenv("BLASTDB")) Nlm_GetAppParam("NCBI", "BLAST", "BLASTDB", getenv("BLASTDB"), pathname, PATH_MAX); else#endif Nlm_GetAppParam ("NCBI", "BLAST", "BLASTDB", BLASTDB_DIR, pathname, PATH_MAX); sprintf(filename, "%s%s%s", pathname, DIRDELIMSTR, dbname); strcpy(pathname, filename); } sprintf(filename, "%s.loo", pathname); lut_mmap = Nlm_MemMapInit(filename); if (lut_mmap == NULL) ErrPostEx(SEV_FATAL, 1, 0, "Cannot map RPS BLAST lookup file"); info->lookup_header = (RPSLookupFileHeader *)lut_mmap->mmp_begin; sprintf(filename, "%s.rps", pathname); pssm_mmap = Nlm_MemMapInit(filename); if (pssm_mmap == NULL) ErrPostEx(SEV_FATAL, 1, 0, "Cannot map RPS BLAST profile file"); info->profile_header = (RPSProfileHeader *)pssm_mmap->mmp_begin; num_db_seqs = info->profile_header->num_profiles; sprintf(filename, "%s.aux", pathname); auxfile = FileOpen(filename, "r"); if (auxfile == NULL) ErrPostEx(SEV_FATAL, 1, 0,"Cannot open RPS BLAST parameters file"); fscanf(auxfile, "%s", buffer); info->aux_info.orig_score_matrix = strdup(buffer); fscanf(auxfile, "%d", &info->aux_info.gap_open_penalty); fscanf(auxfile, "%d", &info->aux_info.gap_extend_penalty); fscanf(auxfile, "%le", &info->aux_info.ungapped_k); fscanf(auxfile, "%le", &info->aux_info.ungapped_h); fscanf(auxfile, "%d", &info->aux_info.max_db_seq_length); fscanf(auxfile, "%d", &info->aux_info.db_length); fscanf(auxfile, "%lf", &info->aux_info.scale_factor); info->aux_info.karlin_k = (double *)malloc(num_db_seqs * sizeof(double)); for (i = 0; i < num_db_seqs && !feof(auxfile); i++) { fscanf(auxfile, "%d", &seq_size); /* not used */ fscanf(auxfile, "%le", &info->aux_info.karlin_k[i]); } if (i < num_db_seqs) ErrPostEx(SEV_FATAL, 1, 0, "Missing Karlin parameters"); FileClose(auxfile); *ppinfo = info; *rps_mmap = lut_mmap; *rps_pssm_mmap = pssm_mmap; return 0;}int CBlastApplication::Run(void){ Uint1 program_number; int status = 0; // Process command line args const CArgs& args = GetArgs(); BlastProgram2Number(args["program"].AsString().c_str(), &program_number); EProgram program = static_cast<EProgram>(program_number); Int4 strand_number = args["strand"].AsInteger(); ENa_strand strand; Int4 from = args["qstart"].AsInteger(); Int4 to = args["qend"].AsInteger(); Int4 first_oid = 0; Int4 last_oid = 0; if (program == eBlastn || program == eBlastx || program == eRPSTblastn) { if (strand_number == 1) strand = eNa_strand_plus; else if (strand_number == 2) strand = eNa_strand_minus; else strand = eNa_strand_both; } else { strand = eNa_strand_unknown; } InitScope(); int id_counter = 0; // Read the query(ies) from input file; perform the setup TSeqLocVector query_loc = BLASTGetSeqLocFromStream(args["query"].AsInputFile(), *m_ObjMgr, strand, from, to, &id_counter, args["lcase"].AsBoolean()); if (args["dbrange"]) { const char* delimiters = " ,:;"; char* range_str = strdup(args["dbrange"].AsString().c_str()); first_oid = atoi(strtok(range_str, delimiters)); last_oid = atoi(strtok(NULL, delimiters)); sfree(range_str); } bool db_is_na = (program == eBlastn || program == eTblastn || program == eTblastx); BlastSeqSrc* seq_src = SeqDbSrcInit(args["db"].AsString().c_str(), !db_is_na, first_oid, last_oid, NULL); CBlastOptionsHandle* opts = CBlastOptionsFactory::Create(program); Nlm_MemMapPtr rps_mmap = NULL; Nlm_MemMapPtr rps_pssm_mmap = NULL; RPSInfo *rps_info = NULL; // Need to set up the RPS database information structure too if (program == eRPSBlast || program == eRPSTblastn) { if (BLAST_FillRPSInfo(&rps_info, &rps_mmap, &rps_pssm_mmap, args["db"].AsString().c_str()) != 0) { NCBI_THROW(CBlastException, eBadParameter, "Cannot initialize RPS BLAST database"); } } ProcessCommandLineArgs(opts, seq_src, rps_info); CDbBlast blaster(query_loc, seq_src, *opts, rps_info); TSeqAlignVector seqalignv = blaster.Run(); BlastSeqSrcFree(seq_src); if (rps_info) { Nlm_MemMapFini(rps_mmap); Nlm_MemMapFini(rps_pssm_mmap); sfree(rps_info->aux_info.karlin_k); sfree(rps_info->aux_info.orig_score_matrix); sfree(rps_info); } FormatResults(blaster, seqalignv); return status;}void CBlastApplication::Exit(void){ SetDiagStream(0);}int main(int argc, const char* argv[] /*, const char* envp[]*/){ return CBlastApplication().AppMain(argc, argv, 0, eDS_Default, 0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -