📄 splign_app.cpp
字号:
!blast_output.front()->Get().empty() && !blast_output.front()->Get().front()-> GetSegs().GetDisc().Get().empty()) { CNcbiOstrstream oss; const CSeq_align_set::Tdata &sas = blast_output.front()->Get().front()->GetSegs().GetDisc().Get(); ITERATE(CSeq_align_set::Tdata, sa_iter, sas) { CHit hit (**sa_iter); oss << hit << endl; } string str = CNcbiOstrstreamToString(oss); CNcbiIstrstream* iss = new CNcbiIstrstream (str.c_str()); return iss; } else { return 0; }}int CSplignApp::Run(){ const CArgs& args = GetArgs(); // check that modes aren't mixed bool is_query = args["query"]; bool is_subj = args["subj"]; bool is_index = args["index"]; bool is_hits = args["hits"]; if(is_query ^ is_subj) { NCBI_THROW(CSplignAppException, eBadParameter, "Both query and subj must be specified in pairwise mode." ); } if(is_query && (is_hits || is_index)) { NCBI_THROW(CSplignAppException, eBadParameter, "Do not use -hits or -index in pairwise mode " "(i.e. with -query and -subj)." ); } if(!is_query && !is_hits) { NCBI_THROW(CSplignAppException, eBadParameter, "Either -query or -hits must be specified (but not both)"); } if(is_hits ^ is_index) { NCBI_THROW(CSplignAppException, eBadParameter, "When in batch mode, specify both -hits and -index"); } bool mode_pairwise = is_query; // open log stream m_logstream.open( args["log"].AsString().c_str() ); // open asn output stream, if any auto_ptr<ofstream> asn_ofs (0); if(args["asn"]) { const string filename = args["asn"].AsString(); asn_ofs.reset(new ofstream (filename.c_str())); if(!*asn_ofs) { NCBI_THROW(CSplignAppException, eCannotOpenFile, "Cannot open output asn file." ); } } // aligner setup string quality;#ifndef GENOME_PIPELINE quality = kQuality_high;#else quality = args["quality"].AsString();#endif CRef<CSplicedAligner> aligner ( quality == kQuality_high ? static_cast<CSplicedAligner*> (new CSplicedAligner16): static_cast<CSplicedAligner*> (new CSplicedAligner32) );#if GENOME_PIPELINE aligner->SetWm(args["Wm"].AsInteger()); aligner->SetWms(args["Wms"].AsInteger()); aligner->SetWg(args["Wg"].AsInteger()); aligner->SetWs(args["Ws"].AsInteger()); for(size_t i = 0, n = aligner->GetSpliceTypeCount(); i < n; ++i) { string arg_name ("Wi"); arg_name += NStr::IntToString(i); aligner->SetWi(i, args[arg_name.c_str()].AsInteger()); }#endif // setup sequence loader CRef<CSplignSeqAccessor> seq_loader; if(mode_pairwise) { const string query_filename (args["query"].AsString()); const string subj_filename (args["subj"].AsString()); CSeqLoaderPairwise* pseqloader = new CSeqLoaderPairwise(query_filename, subj_filename); seq_loader.Reset(pseqloader); } else { CSeqLoader* pseqloader = new CSeqLoader; pseqloader->Open(args["index"].AsString()); seq_loader.Reset(pseqloader); } // splign object setup CSplign splign; splign.SetPolyaDetection(!args["nopolya"]); splign.SetMinExonIdentity(args["min_idty"].AsDouble()); splign.SetCompartmentPenalty(args["compartment_penalty"].AsDouble()); splign.SetMinQueryCoverage(args["min_query_cov"].AsDouble()); splign.SetStrand(args["strand"].AsString() == "plus"); splign.SetEndGapDetection(!(args["noendgaps"])); splign.SetAligner(aligner); splign.SetSeqAccessor(seq_loader); splign.SetStartModelId(1); // splign formatter object CSplignFormatter formatter (splign); // prepare input hit stream auto_ptr<istream> hit_stream; if(mode_pairwise) { CSeqLoaderPairwise* pseq_loader_pw = static_cast<CSeqLoaderPairwise*>(seq_loader.GetNonNullPointer()); hit_stream.reset(x_GetPairwiseHitStream(*pseq_loader_pw)); } else { const string filename (args["hits"].AsString()); hit_stream.reset(new ifstream (filename.c_str())); } // iterate over input hits vector<CHit> hits; while(x_GetNextPair(hit_stream.get(), &hits) ) { if(hits.size() == 0) { continue; } const string query (hits[0].m_Query); const string subj (hits[0].m_Subj); splign.Run(&hits); formatter.SetSeqIds(query, subj); cout << formatter.AsText(); if(asn_ofs.get()) { CRef<CSeq_align_set> sa_set = formatter.AsSeqAlignSet(); auto_ptr<CObjectOStream> os(CObjectOStream::Open(eSerial_AsnText, *asn_ofs)); *os << *sa_set; } ITERATE(vector<CSplign::SAlignedCompartment>, ii, splign.GetResult()) { x_LogStatus(ii->m_id, query, subj, ii->m_error, ii->m_msg); } if(splign.GetResult().size() == 0) { x_LogStatus(0, query, subj, true, "No compartment found"); } } return 0;}END_NCBI_SCOPE USING_NCBI_SCOPE;// make Splign index filevoid MakeIDX( istream* inp_istr, const size_t file_index, ostream* out_ostr ){ istream * inp = inp_istr? inp_istr: &cin; ostream * out = out_ostr? out_ostr: &cout; inp->unsetf(IOS_BASE::skipws); char c0 = '\n', c; while(inp->good()) { c = inp->get(); if(c0 == '\n' && c == '>') { CT_OFF_TYPE pos = inp->tellg() - CT_POS_TYPE(1); string s; *inp >> s; *out << s << '\t' << file_index << '\t' << pos << endl; } c0 = c; }}int main(int argc, const char* argv[]) { // pre-scan for mkidx for(int i = 1; i < argc; ++i) { if(0 == strcmp(argv[i], "-mkidx")) { if(i + 1 == argc) { char err_msg [] = "ERROR: No FastA files specified to index. " "Please specify one or more FastA files after -mkidx. " "Your system may support wildcards " "to specify multiple files."; cerr << err_msg << endl; return 1; } else { ++i; } vector<string> fasta_filenames; for(; i < argc; ++i) { fasta_filenames.push_back(argv[i]); // test ifstream ifs (argv[i]); if(ifs.fail()) { cerr << "ERROR: Unable to open " << argv[i] << endl; return 1; } } // write the list of files const size_t files_count = fasta_filenames.size(); cout << "# This file was generated by Splign." << endl; cout << "$$$FI" << endl; for(size_t k = 0; k < files_count; ++k) { cout << fasta_filenames[k] << '\t' << k << endl; } cout << "$$$SI" << endl; for(size_t k = 0; k < files_count; ++k) { ifstream ifs (fasta_filenames[k].c_str()); MakeIDX(&ifs, k, &cout); } return 0; } } return CSplignApp().AppMain(argc, argv, 0, eDS_Default, 0);}/* * =========================================================================== * $Log: splign_app.cpp,v $ * Revision 1000.6 2004/06/01 18:05:26 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.24 * * Revision 1.24 2004/05/21 21:41:02 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.23 2004/05/18 21:43:40 kapustin * Code cleanup * * Revision 1.22 2004/05/10 16:40:12 kapustin * Support a pairwise mode * * Revision 1.21 2004/05/04 15:23:45 ucko * Split splign code out of xalgoalign into new xalgosplign. * * Revision 1.20 2004/04/30 15:00:47 kapustin * Support ASN formatting * * Revision 1.19 2004/04/26 19:26:22 kapustin * Count models from one * * Revision 1.18 2004/04/26 15:38:46 kapustin * Add model_id as a CSplign member * * Revision 1.17 2004/04/23 14:33:32 kapustin * *** empty log message *** * * Revision 1.15 2004/02/19 22:57:55 ucko * Accommodate stricter implementations of CT_POS_TYPE. * * Revision 1.14 2004/02/18 16:04:53 kapustin * Do not print PolyA and gap contents * * Revision 1.13 2003/12/15 20:16:58 kapustin * GetNextQuery() ->GetNextPair() * * Revision 1.12 2003/12/09 13:21:47 ucko * +<memory> for auto_ptr * * Revision 1.11 2003/12/04 20:08:22 kapustin * Remove endgaps argument * * Revision 1.10 2003/12/04 19:26:37 kapustin * Account for zero-length segment vector * * Revision 1.9 2003/12/03 19:47:02 kapustin * Increase exon search scope at the ends * * Revision 1.8 2003/11/26 16:13:21 kapustin * Determine subject after filtering for more accurate log reporting * * Revision 1.7 2003/11/21 16:04:18 kapustin * Enable RLE for Poly(A) tail. * * Revision 1.6 2003/11/20 17:58:20 kapustin * Make the code msvc6.0-friendly * * Revision 1.5 2003/11/20 14:38:10 kapustin * Add -nopolya flag to suppress Poly(A) detection. * * Revision 1.4 2003/11/10 19:20:26 kapustin * Generate encoded alignment transcript by default * * Revision 1.3 2003/11/05 20:32:11 kapustin * Include source information into the index * * Revision 1.2 2003/10/31 19:43:15 kapustin * Format and compatibility update * * Revision 1.1 2003/10/30 19:37:20 kapustin * Initial toolkit revision * * =========================================================================== */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -