📄 nwa.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: nwa.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 18:05:08 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.27 * PRODUCTION * =========================================================================== *//* $Id: nwa.cpp,v 1000.2 2004/06/01 18:05:08 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Yuri Kapustin * * File Description: xalgoalign application * */#include <ncbi_pch.hpp>#include <algo/align/mm_aligner.hpp>#include <algo/align/nw_spliced_aligner16.hpp>#include <algo/align/nw_spliced_aligner32.hpp>#include <algo/align/nw_formatter.hpp>#include "nwa.hpp"#define SPLALIGNER CSplicedAligner32BEGIN_NCBI_SCOPEvoid CAppNWA::Init(){ HideStdArgs(fHideLogfile | fHideConffile | fHideVersion); auto_ptr<CArgDescriptions> argdescr(new CArgDescriptions); argdescr->SetUsageContext(GetArguments().GetProgramName(), "Demo application using xalgoalign library"); argdescr->AddDefaultKey ("matrix", "matrix", "scoring matrix", CArgDescriptions::eString, "nucl"); argdescr->AddFlag("spliced", "Spliced mRna/EST-to-Genomic alignment " "(consider specifying -esf zzxx)" ); argdescr->AddOptionalKey("pattern", "pattern", "Use HSPs to guide spliced alignment", CArgDescriptions::eInteger); argdescr->AddKey ("seq1", "seq1", "the first input sequence in fasta file", CArgDescriptions::eString); argdescr->AddKey ("seq2", "seq2", "the second input sequence in fasta file", CArgDescriptions::eString); argdescr->AddDefaultKey ("esf", "esf", "End-space free alignment. Format: lrLR where each character " "can be z (free end) or x (regular end) representing " "left and right ends. First sequence's ends are specified first.", CArgDescriptions::eString, "xxxx"); argdescr->AddDefaultKey ("Wm", "match", "match bonus (nucleotide sequences)", CArgDescriptions::eInteger, NStr::IntToString(CNWAligner::GetDefaultWm()).c_str()); argdescr->AddDefaultKey ("Wms", "mismatch", "mismatch penalty (nucleotide sequences)", CArgDescriptions::eInteger, NStr::IntToString(CNWAligner::GetDefaultWms()).c_str()); argdescr->AddDefaultKey ("Wg", "gap", "gap opening penalty", CArgDescriptions::eInteger, NStr::IntToString(CNWAligner::GetDefaultWg()).c_str()); argdescr->AddDefaultKey ("Ws", "space", "gap extension (space) penalty", CArgDescriptions::eInteger, NStr::IntToString(CNWAligner::GetDefaultWs()).c_str()); argdescr->AddDefaultKey ("Wi0", "intron0", "type 0 (GT/AG) intron weight", CArgDescriptions::eInteger, NStr::IntToString(SPLALIGNER::GetDefaultWi(0)).c_str()); argdescr->AddDefaultKey ("Wi1", "intron1", "type 1 (GC/AG) intron weight", CArgDescriptions::eInteger, NStr::IntToString(SPLALIGNER::GetDefaultWi(1)).c_str()); argdescr->AddDefaultKey ("Wi2", "intron2", "type 2 (AT/AC) intron weight", CArgDescriptions::eInteger, NStr::IntToString(SPLALIGNER::GetDefaultWi(2)).c_str()); int intron_min_size = SPLALIGNER::GetDefaultIntronMinSize(); argdescr->AddDefaultKey ("IntronMinSize", "IntronMinSize", "intron minimum size", CArgDescriptions::eInteger, NStr::IntToString(intron_min_size).c_str()); argdescr->AddFlag("mm", "Limit memory use to linear (Myers and Miller method)"); argdescr->AddFlag("mt", "Use multiple threads"); // supported output formats argdescr->AddOptionalKey ("o1", "o1", "Filename for type 1 output", CArgDescriptions::eString); argdescr->AddOptionalKey ("o2", "o2", "Filename for type 2 output", CArgDescriptions::eString); argdescr->AddOptionalKey ("ofasta", "ofasta", "Generate gapped FastA output for the aligner sequences", CArgDescriptions::eString); argdescr->AddOptionalKey ("oasn", "oasn", "ASN.1 output filename", CArgDescriptions::eString); argdescr->AddOptionalKey ("oexons", "exons", "Exon table output filename (spliced alignments only)", CArgDescriptions::eString); CArgAllow_Strings* paa_st = new CArgAllow_Strings; paa_st->Allow("nucl")->Allow("blosum62"); argdescr->SetConstraint("matrix", paa_st); CArgAllow_Strings* paa_esf = new CArgAllow_Strings; paa_esf->Allow("xxxx")->Allow("xxxz")->Allow("xxzx")->Allow("xxzz"); paa_esf->Allow("xzxx")->Allow("xzxz")->Allow("xzzx")->Allow("xzzz"); paa_esf->Allow("zxxx")->Allow("zxxz")->Allow("zxzx")->Allow("zxzz"); paa_esf->Allow("zzxx")->Allow("zzxz")->Allow("zzzx")->Allow("zzzz"); argdescr->SetConstraint("esf", paa_esf); CArgAllow* paa0 = new CArgAllow_Integers(5,1000); argdescr->SetConstraint("pattern", paa0); SetupArgDescriptions(argdescr.release());}int CAppNWA::Run(){ x_RunOnPair(); return 0;}auto_ptr<ofstream> open_ofstream (const string& filename) { auto_ptr<ofstream> pofs0 ( new ofstream (filename.c_str()) ); if(*pofs0) { return pofs0; } else { NCBI_THROW(CAppNWAException, eCannotWriteFile, "Cannot write to file" + filename); }}void CAppNWA::x_RunOnPair() const THROWS((CAppNWAException, CAlgoAlignException)){ const CArgs& args = GetArgs(); // analyze parameters const bool bMM = args["mm"]; const bool bMT = args["mt"]; const bool bMrna2Dna = args["spliced"]; const bool bGuides = args["pattern"]; bool output_type1 ( args["o1"] ); bool output_type2 ( args["o2"] ); bool output_asn ( args["oasn"] ); bool output_fasta ( args["ofasta"] ); bool output_exons ( args["oexons"] ); if(bMrna2Dna && args["matrix"].AsString() != "nucl") { NCBI_THROW(CAppNWAException, eInconsistentParameters, "Spliced alignment assumes nucleotide sequences " "(matrix = nucl)"); } if(output_exons && !bMrna2Dna) { NCBI_THROW(CAppNWAException, eInconsistentParameters, "Exon output can only be requested in mRna2Dna mode"); } if(bMrna2Dna && bMM) { NCBI_THROW(CAppNWAException, eInconsistentParameters, "Linear memory approach is not yet supported for the " "spliced alignment algorithm"); } if(!bMrna2Dna && bGuides) { NCBI_THROW(CAppNWAException, eInconsistentParameters, "Guides are only supported in spliced mode" ); } if(bMT && !bMM) { NCBI_THROW(CAppNWAException, eInconsistentParameters, "Mutliple thread mode is currently supported " "for Myers-Miller method only (-mm flag)"); }#ifndef NCBI_THREADS if(bMT) { NCBI_THROW(CAppNWAException, eNotSupported, "This application was built without multithreading support. " "To run in multiple threads, please re-configure and rebuild" " with proper option."); } #endif // read input sequences vector<char> v1, v2; string seqname1, seqname2;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -