⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 convert_seq.cpp

📁 ncbi源码
💻 CPP
字号:
/* * =========================================================================== * PRODUCTION $Log: convert_seq.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 18:30:25  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.5 * PRODUCTION * =========================================================================== *//*  $Id: convert_seq.cpp,v 1000.1 2004/06/01 18:30:25 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Aaron Ucko, NCBI** File Description:*   Program to convert biological sequences between the formats the*   C++ Toolkit supports.** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbiapp.hpp>#include <serial/iterator.hpp>#include <serial/objistr.hpp>#include <serial/objostr.hpp>#include <serial/serial.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objtools/data_loaders/genbank/gbloader.hpp>#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objmgr/util/sequence.hpp>#include <objtools/flat/flat_gbseq_formatter.hpp>#include <objtools/flat/flat_gff_formatter.hpp>#include <objtools/flat/flat_table_formatter.hpp>#include <objtools/readers/fasta.hpp>#include <objtools/readers/gff_reader.hpp>#include <objtools/readers/readfeat.hpp>#include <objtools/readers/agp_read.hpp>// On Mac OS X 10.3, FixMath.h defines ff as a one-argument macro(!)#ifdef ff#  undef ff#endifUSING_NCBI_SCOPE;USING_SCOPE(objects);class CConversionApp : public CNcbiApplication{public:    void Init(void);    int  Run (void);private:    static IFlatFormatter::EDatabase GetFlatFormat  (const string& name);    static ESerialDataFormat         GetSerialFormat(const string& name);    CConstRef<CSeq_entry> Read (const CArgs& args);    void                  Write(const CSeq_entry& entry, const CArgs& args);    CRef<CObjectManager> m_ObjMgr;    CRef<CScope>         m_Scope;};void CConversionApp::Init(void){    auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);    arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),                              "Convert biological sequences between formats",                              false);    arg_desc->AddDefaultKey("type", "AsnType", "Type of object to convert",                            CArgDescriptions::eString, "Seq-entry");    arg_desc->SetConstraint("type", &(*new CArgAllow_Strings,                                      "Bioseq", "Bioseq-set", "Seq-entry"));    arg_desc->AddDefaultKey("in", "InputFile", "File to read the object from",                            CArgDescriptions::eInputFile, "-");    arg_desc->AddKey("infmt", "Format", "Input format",                     CArgDescriptions::eString);    arg_desc->SetConstraint        ("infmt", &(*new CArgAllow_Strings,                    "ID", "asn", "asnb", "xml", "fasta", "gff", "tbl", "agp"));    arg_desc->AddDefaultKey("out", "OutputFile", "File to write the object to",                            CArgDescriptions::eOutputFile, "-");    arg_desc->AddKey("outfmt", "Format", "Output format",                     CArgDescriptions::eString);    arg_desc->SetConstraint        ("outfmt", &(*new CArgAllow_Strings,                     "asn", "asnb", "xml", "ddbj", "embl", "genbank", "fasta",                     "gff", "tbl", "gbseq/xml", "gbseq/asn", "gbseq/asnb"));    SetupArgDescriptions(arg_desc.release());}int CConversionApp::Run(void){    const CArgs& args = GetArgs();    m_ObjMgr.Reset(new CObjectManager);    m_ObjMgr->RegisterDataLoader(*new CGBDataLoader("ID"),                                 CObjectManager::eDefault);    m_Scope.Reset(new CScope(*m_ObjMgr));    m_Scope->AddDefaults();    CConstRef<CSeq_entry> entry = Read(args);    if (args["infmt"].AsString() != "ID") {        m_Scope->AddTopLevelSeqEntry(const_cast<CSeq_entry&>(*entry));    }    Write(*entry, args);    return 0;}ESerialDataFormat CConversionApp::GetSerialFormat(const string& name){    if (name == "asn") {        return eSerial_AsnText;    } else if (name == "asnb") {        return eSerial_AsnBinary;    } else if (name == "xml") {        return eSerial_Xml;    } else {        return eSerial_None;    }}IFlatFormatter::EDatabase CConversionApp::GetFlatFormat(const string& name){    if (name == "ddbj") {        return IFlatFormatter::eDB_DDBJ;    } else if (name == "embl") {        return IFlatFormatter::eDB_EMBL;    } else {        return IFlatFormatter::eDB_NCBI;    }}CConstRef<CSeq_entry> CConversionApp::Read(const CArgs& args){    const string& infmt = args["infmt"].AsString();    const string& type  = args["type" ].AsString();    if (infmt == "ID") {        CSeq_id        id(args["in"].AsString());        CBioseq_Handle h = m_Scope->GetBioseqHandle(id);        return CConstRef<CSeq_entry>(&h.GetTopLevelSeqEntry());    } else if (infmt == "fasta") {        return ReadFasta(args["in"].AsInputFile());    } else if (infmt == "gff") {        return CGFFReader().Read(args["in"].AsInputFile(),                                 CGFFReader::fGBQuals);    } else if (infmt == "agp") {        CRef<CBioseq_set> bss = AgpRead(args["in"].AsInputFile());        if (bss->GetSeq_set().size() == 1) {            return bss->GetSeq_set().front();        } else {            CRef<CSeq_entry> entry(new CSeq_entry);            entry->SetSet(*bss);            return entry;        }    } else if (infmt == "tbl") {        CRef<CSeq_annot> annot = CFeature_table_reader::ReadSequinFeatureTable            (args["in"].AsInputFile());        CRef<CSeq_entry> entry(new CSeq_entry);        if (type == "Bioseq") {            CBioseq& seq = entry->SetSeq();            for (CTypeIterator<CSeq_id> it(*annot);  it;  ++it) {                seq.SetId().push_back(CRef<CSeq_id>(&*it));                BREAK(it);            }            seq.SetInst().SetRepr(CSeq_inst::eRepr_virtual);            seq.SetInst().SetMol(CSeq_inst::eMol_not_set);            seq.SetAnnot().push_back(annot);        } else {            entry->SetSet().SetAnnot().push_back(annot);        }        return entry;    } else {        CRef<CSeq_entry> entry(new CSeq_entry);        auto_ptr<CObjectIStream> in            (CObjectIStream::Open(GetSerialFormat(infmt),                                  args["in"].AsString(),                                  eSerial_StdWhenDash));        if (type == "Bioseq") {            *in >> entry->SetSeq();        } else if (type == "Bioseq-set") {            *in >> entry->SetSet();        } else {            *in >> *entry;        }        return entry;    }}void CConversionApp::Write(const CSeq_entry& entry, const CArgs& args){    const string& outfmt = args["outfmt"].AsString();    const string& type   = args["type"  ].AsString();    if (outfmt == "genbank"  ||  outfmt == "embl"  ||  outfmt == "ddbj") {        CFlatTextOStream ftos(args["out"].AsOutputFile());        auto_ptr<IFlatFormatter> ff            (CFlatTextFormatter::New(ftos, *m_Scope,                                     IFlatFormatter::eMode_Entrez,                                     GetFlatFormat(outfmt)));        ff->Format(entry, *ff);    } else if (outfmt == "fasta") {        CFastaOstream out(args["out"].AsOutputFile());        for (CTypeConstIterator<CBioseq> it(entry);  it;  ++it) {            out.Write(m_Scope->GetBioseqHandle(*it));        }    } else if (outfmt == "gff") {        CFlatTextOStream ftos(args["out"].AsOutputFile());        CFlatGFFFormatter ff(ftos, *m_Scope, IFlatFormatter::eMode_Dump,                             CFlatGFFFormatter::fGTFCompat                             | CFlatGFFFormatter::fShowSeq);        ff.Format(entry, ff);    } else if (outfmt == "tbl") {        CFlatTextOStream ftos(args["out"].AsOutputFile());        CFlatTableFormatter ff(ftos, *m_Scope);        ff.Format(entry, ff);    } else if (NStr::StartsWith(outfmt, "gbseq/")) {        CFlatGBSeqFormatter ff(*m_Scope, IFlatFormatter::eMode_Entrez);        ff.Format(entry, ff);        auto_ptr<CObjectOStream> out            (CObjectOStream::Open(GetSerialFormat(outfmt.substr(6)),                                  args["out"].AsString(),                                  eSerial_StdWhenDash));        *out << ff.GetGBSet();    } else {        auto_ptr<CObjectOStream> out            (CObjectOStream::Open(GetSerialFormat(outfmt),                                  args["out"].AsString(),                                  eSerial_StdWhenDash));        if (type == "Bioseq") {            if (entry.IsSet()) {                ERR_POST(Warning                         << "Possible truncation in conversion to Bioseq");                *out << *entry.GetSet().GetSeq_set().front();            } else {                *out << entry.GetSeq();            }        } else if (type == "Bioseq-set") {            if (entry.IsSet()) {                *out << entry.GetSet();            } else {                CBioseq_set bss;                bss.SetSeq_set().push_back                    (CRef<CSeq_entry>(const_cast<CSeq_entry*>(&entry)));                *out << bss;            }        } else {            *out << entry;        }    }}int main(int argc, const char* argv[]){    // Execute main application function    return CConversionApp().AppMain(argc, argv, 0, eDS_Default, 0);}/** ===========================================================================** $Log: convert_seq.cpp,v $* Revision 1000.1  2004/06/01 18:30:25  gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.5** Revision 1.5  2004/05/21 21:41:40  gorelenk* Added PCH ncbi_pch.hpp** Revision 1.4  2004/05/03 18:01:34  ucko* Kill unwanted definition of ff as a macro, if present (as on Mac OS 10.3)** Revision 1.3  2004/02/27 20:07:10  jcherry* Added agp as input format** Revision 1.2  2004/01/05 17:59:32  vasilche* Moved genbank loader and its readers sources to new location in objtools.* Genbank is now in library libncbi_xloader_genbank.* Id1 reader is now in library libncbi_xreader_id1.* OBJMGR_LIBS macro updated correspondingly.** Old headers temporarily will contain redirection to new location* for compatibility:* objmgr/gbloader.hpp > objtools/data_loaders/genbank/gbloader.hpp* objmgr/reader_id1.hpp > objtools/data_loaders/genbank/readers/id1/reader_id1.hpp** Revision 1.1  2003/12/03 20:58:40  ucko* Add new universal sequence converter app.*** ===========================================================================*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -