alnmrg.cpp

来自「ncbi源码」· C++ 代码 · 共 504 行 · 第 1/2 页

CPP
504
字号
/* * =========================================================================== * PRODUCTION $Log: alnmrg.cpp,v $ * PRODUCTION Revision 1000.3  2004/06/01 19:40:52  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.20 * PRODUCTION * =========================================================================== *//*  $Id: alnmrg.cpp,v 1000.3 2004/06/01 19:40:52 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Kamen Todorov, NCBI** File Description:*   Alignment merger. Demonstration of CAlnMix usage.** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbiargs.hpp>#include <corelib/ncbienv.hpp>#include <serial/iterator.hpp>#include <serial/objistr.hpp>#include <serial/objostr.hpp>#include <serial/serial.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Textseq_id.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqalign/Seq_align.hpp>#include <objects/seqalign/Seq_align_set.hpp>#include <objects/seqalign/Std_seg.hpp>#include <objects/seq/Seq_annot.hpp>#include <objects/submit/Seq_submit.hpp>#include <objtools/data_loaders/genbank/gbloader.hpp>#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objtools/alnmgr/alnmix.hpp>USING_SCOPE(ncbi);USING_SCOPE(objects);class CAlnMrgApp : public CNcbiApplication{    virtual void     Init                (void);    virtual int      Run                 (void);    CScope&          GetScope            (void)             const;    void             SetOptions          (void);    void             LoadInputAlignments (void);    void             PrintMergedAlignment(void);    void             View4               (int screen_width);private:    CAlnMix::TMergeFlags         m_MergeFlags;    CAlnMix::TAddFlags           m_AddFlags;    mutable CRef<CObjectManager> m_ObjMgr;    mutable CRef<CScope>         m_Scope;    CRef<CAlnMix>                m_Mix; // must appear AFTER m_ObjMgr!};void CAlnMrgApp::Init(void){    // Create command-line argument descriptions class    auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);    // Specify USAGE context    arg_desc->SetUsageContext        (GetArguments().GetProgramBasename(),         "Alignment merger demo program");    // Describe the expected command-line arguments    arg_desc->AddDefaultKey        ("in", "input_file_name",         "Name of file to read from (standard input by default)",         CArgDescriptions::eInputFile, "-", CArgDescriptions::fPreOpen);    arg_desc->AddDefaultKey        ("binout", "out_file_name",         "Binary output",         CArgDescriptions::eOutputFile, "/dev/null",         CArgDescriptions::fPreOpen);    arg_desc->AddDefaultKey        ("b", "bin_obj_type",         "This forced the input file to be read in binary ASN.1 mode\n"         "and specifies the type of the top-level ASN.1 object.\n",         CArgDescriptions::eString, "");    arg_desc->AddDefaultKey        ("log", "log_file_name",         "Name of log file to write to",         CArgDescriptions::eOutputFile, "-", CArgDescriptions::fPreOpen);    arg_desc->AddDefaultKey        ("gen2est", "bool",         "Perform Gen2EST Merge",         CArgDescriptions::eBoolean, "f");    arg_desc->AddDefaultKey        ("gapjoin", "bool",         "Consolidate segments of equal lens with a gap on the query sequence",         CArgDescriptions::eBoolean, "f");    arg_desc->AddDefaultKey        ("mingap", "bool",         "Consolidate all segments with a gap on the query sequence",         CArgDescriptions::eBoolean, "f");    arg_desc->AddDefaultKey        ("minusstrand", "bool",         "Minus strand on the refseq when merging.",         CArgDescriptions::eBoolean, "f");    arg_desc->AddDefaultKey        ("fillunaln", "bool",         "Fill unaligned regions.",         CArgDescriptions::eBoolean, "f");    arg_desc->AddDefaultKey        ("calcscore", "bool",         "Calculate each aligned seq pair score and use it when merging."         "(Don't stitch off ObjMgr for this).",         CArgDescriptions::eBoolean, "f");    arg_desc->AddDefaultKey        ("noobjmgr", "bool",        // ObjMgr is used to identify sequences and obtain a bioseqhandle.        // Also used to calc scores and determine the type of molecule         "Skip ObjMgr in identifying sequences, calculating scores, etc.",         CArgDescriptions::eBoolean, "f");    arg_desc->AddDefaultKey        ("queryseqmergeonly", "bool",         "Merge the query seq only, keep subject seqs on separate rows "         "(even if the same seq).",         CArgDescriptions::eBoolean, "f");    arg_desc->AddDefaultKey        ("truncateoverlaps", "bool",         "Truncate overlaps",         CArgDescriptions::eBoolean, "f");    arg_desc->AddDefaultKey        ("forcetranslation", "bool",         "Force translation of nucleotides",         CArgDescriptions::eBoolean, "f");    // Setup arg.descriptions for this application    SetupArgDescriptions(arg_desc.release());}CScope& CAlnMrgApp::GetScope(void) const{    if (!m_Scope) {        m_ObjMgr = new CObjectManager;                m_ObjMgr->RegisterDataLoader            (*new CGBDataLoader("ID", NULL, 2),             CObjectManager::eDefault);                m_Scope = new CScope(*m_ObjMgr);        m_Scope->AddDefaults();    }    return *m_Scope;}void CAlnMrgApp::LoadInputAlignments(void){    CArgs args = GetArgs();    CNcbiIstream& is = args["in"].AsInputFile();        // get the asn type of the top-level object    string asn_type = args["b"].AsString();    bool binary = asn_type.length();    auto_ptr<CObjectIStream> in        (CObjectIStream::Open(binary?eSerial_AsnBinary:eSerial_AsnText, is));    if ( !binary ) {        // auto-detection is possible in ASN.1 text mode        asn_type = in->ReadFileHeader();    }        CTypesIterator i;    CType<CSeq_align>::AddTo(i);    CNcbiOstream& os = args["binout"].AsOutputFile();    auto_ptr<CObjectOStream> binout        (CObjectOStream::Open(eSerial_AsnBinary, os));    if (asn_type == "Seq-entry") {        CRef<CSeq_entry> se(new CSeq_entry);        in->Read(Begin(*se), CObjectIStream::eNoFileHeader);        *binout << *se;        GetScope().AddTopLevelSeqEntry(*se);        for (i = Begin(*se); i; ++i) {            if (CType<CSeq_align>::Match(i)) {                m_Mix->Add(*(CType<CSeq_align>::Get(i)), m_AddFlags);            }        }    } else if (asn_type == "Seq-submit") {        CRef<CSeq_submit> ss(new CSeq_submit);        in->Read(Begin(*ss), CObjectIStream::eNoFileHeader);        *binout << *ss;        CType<CSeq_entry>::AddTo(i);        int tse_cnt = 0;        for (i = Begin(*ss); i; ++i) {            if (CType<CSeq_align>::Match(i)) {                m_Mix->Add(*(CType<CSeq_align>::Get(i)), m_AddFlags);            } else if (CType<CSeq_entry>::Match(i)) {                if ( !(tse_cnt++) ) {                    //GetScope().AddTopLevelSeqEntry                        (*(CType<CSeq_entry>::Get(i)));                }            }        }    } else if (asn_type == "Seq-align") {        CRef<CSeq_align> sa(new CSeq_align);        in->Read(Begin(*sa), CObjectIStream::eNoFileHeader);        *binout << *sa;       for (i = Begin(*sa); i; ++i) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?