gbseq_formatter.cpp

来自「ncbi源码」· C++ 代码 · 共 632 行 · 第 1/2 页

CPP
632
字号
/* * =========================================================================== * PRODUCTION $Log: gbseq_formatter.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 19:44:34  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.4 * PRODUCTION * =========================================================================== *//*  $Id: gbseq_formatter.cpp,v 1000.1 2004/06/01 19:44:34 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Aaron Ucko, NCBI*          Mati Shomrat** File Description:*   GBseq formatting        */#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <serial/objostr.hpp>#include <objects/gbseq/GBSet.hpp>#include <objects/gbseq/GBSeq.hpp>#include <objects/gbseq/GBReference.hpp>#include <objects/gbseq/GBKeyword.hpp>#include <objects/gbseq/GBSeqid.hpp>#include <objects/gbseq/GBFeature.hpp>#include <objects/gbseq/GBInterval.hpp>#include <objects/gbseq/GBQualifier.hpp>#include <objects/seq/Seqdesc.hpp>#include <objmgr/scope.hpp>#include <objmgr/impl/scope_info.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/util/sequence.hpp>#include <objmgr/impl/synonyms.hpp>#include <objtools/format/text_ostream.hpp>#include <objtools/format/gbseq_formatter.hpp>#include <objtools/format/items/locus_item.hpp>#include <objtools/format/items/defline_item.hpp>#include <objtools/format/items/accession_item.hpp>#include <objtools/format/items/version_item.hpp>#include <objtools/format/items/keywords_item.hpp>#include <objtools/format/items/source_item.hpp>#include <objtools/format/items/reference_item.hpp>#include <objtools/format/items/comment_item.hpp>#include <objtools/format/items/feature_item.hpp>#include <objtools/format/items/sequence_item.hpp>#include <objtools/format/items/segment_item.hpp>#include <objtools/format/items/contig_item.hpp>#include "utils.hpp"BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)/////////////////////////////////////////////////////////////////////////////// static functionsstatic void s_GBSeqStringCleanup(string& str, bool location = false){    list<string> l;    NStr::Split(str, " \n\r\t\b", l);    str = NStr::Join(l, " ");    if ( location ) {        str = NStr::Replace(str, ", ", ",");    }    str = NStr::TruncateSpaces(str);}static void s_GBSeqQualCleanup(string& val){        val = NStr::Replace(val, "\"", " ");    s_GBSeqStringCleanup(val);    /*    if ( NStr::EndsWith(val, ".") ) {        val.erase(val.length() - 1);    }    */}/////////////////////////////////////////////////////////////////////////////// Public// constructorCGBSeqFormatter::CGBSeqFormatter(void){}// detructorCGBSeqFormatter::~CGBSeqFormatter(void) {}void CGBSeqFormatter::Start(IFlatTextOStream& text_os){    x_WriteFileHeader(text_os);            x_StartWriteGBSet(text_os);}void CGBSeqFormatter::StartSection(const CStartSectionItem&, IFlatTextOStream&){    m_GBSeq.Reset(new CGBSeq);    _ASSERT(m_GBSeq);}void CGBSeqFormatter::EndSection(const CEndSectionItem&, IFlatTextOStream& text_os){    x_WriteGBSeq(text_os);    m_GBSeq.Reset();    _ASSERT(!m_GBSeq);}void CGBSeqFormatter::End(IFlatTextOStream& text_os){    x_EndWriteGBSet(text_os);}/////////////////////////////////////////////////////////////////////////////// Locus//CGBSeq::TStrandedness s_GBSeqStrandedness(CSeq_inst::TStrand strand){    switch ( strand ) {    case CSeq_inst::eStrand_ss:        return CGBSeq::eStrandedness_single_stranded;    case CSeq_inst::eStrand_ds:        return CGBSeq::eStrandedness_double_stranded;    case CSeq_inst::eStrand_mixed:        return CGBSeq::eStrandedness_mixed_stranded;    case CSeq_inst::eStrand_other:    case CSeq_inst::eStrand_not_set:    default:        break;    }    return CGBSeq::eStrandedness_not_set;}CGBSeq::TMoltype s_GBSeqMoltype(CMolInfo::TBiomol biomol){    switch ( biomol ) {    case CMolInfo::eBiomol_unknown:        return CGBSeq::eMoltype_nucleic_acid;    case CMolInfo::eBiomol_genomic:    case CMolInfo::eBiomol_other_genetic:    case CMolInfo::eBiomol_genomic_mRNA:        return CGBSeq::eMoltype_dna;    case CMolInfo::eBiomol_pre_RNA:    case CMolInfo::eBiomol_cRNA:    case CMolInfo::eBiomol_transcribed_RNA:        return CGBSeq::eMoltype_rna;    case CMolInfo::eBiomol_mRNA:        return CGBSeq::eMoltype_mrna;    case CMolInfo::eBiomol_rRNA:        return CGBSeq::eMoltype_rrna;    case CMolInfo::eBiomol_tRNA:        return CGBSeq::eMoltype_trna;    case CMolInfo::eBiomol_snRNA:        return CGBSeq::eMoltype_urna;    case CMolInfo::eBiomol_scRNA:        return CGBSeq::eMoltype_snrna;    case CMolInfo::eBiomol_peptide:        return CGBSeq::eMoltype_peptide;    case CMolInfo::eBiomol_snoRNA:        return CGBSeq::eMoltype_snorna;    default:        break;    }    return CGBSeq::eMoltype_nucleic_acid;}CGBSeq::TTopology s_GBSeqTopology(CSeq_inst::TTopology topology){    if ( topology == CSeq_inst::eTopology_circular ) {        return CGBSeq::eTopology_circular;    }    return CGBSeq::eTopology_linear;}string s_GetDate(const CBioseq_Handle& bsh, CSeqdesc::E_Choice choice){    _ASSERT(choice == CSeqdesc::e_Update_date  ||            choice == CSeqdesc::e_Create_date);    CSeqdesc_CI desc(bsh, choice);    if ( desc ) {        string result;        if ( desc->IsUpdate_date() ) {            DateToString(desc->GetUpdate_date(), result);        } else {            DateToString(desc->GetCreate_date(), result);        }        return result;    }    return "01-JAN-1900";}void CGBSeqFormatter::FormatLocus(const CLocusItem& locus,  IFlatTextOStream&){    _ASSERT(m_GBSeq);    CBioseqContext& ctx = *locus.GetContext();    m_GBSeq->SetLocus(locus.GetName());    m_GBSeq->SetLength(locus.GetLength());    m_GBSeq->SetStrandedness(s_GBSeqStrandedness(locus.GetStrand()));    m_GBSeq->SetMoltype(s_GBSeqMoltype(locus.GetBiomol()));    m_GBSeq->SetTopology(s_GBSeqTopology(locus.GetTopology()));    m_GBSeq->SetDivision(locus.GetDivision());    m_GBSeq->SetUpdate_date(s_GetDate(ctx.GetHandle(), CSeqdesc::e_Update_date));    m_GBSeq->SetCreate_date(s_GetDate(ctx.GetHandle(), CSeqdesc::e_Create_date));    ITERATE (CBioseq::TId, it, ctx.GetBioseqIds()) {        m_GBSeq->SetOther_seqids().push_back(CGBSeqid((*it)->AsFastaString()));    }}/////////////////////////////////////////////////////////////////////////////// Definitionvoid CGBSeqFormatter::FormatDefline(const CDeflineItem& defline, IFlatTextOStream&){    _ASSERT(m_GBSeq);    m_GBSeq->SetDefinition(defline.GetDefline());    if ( NStr::EndsWith(m_GBSeq->GetDefinition(), ".") ) {        m_GBSeq->SetDefinition().resize(m_GBSeq->GetDefinition().length() - 1);    }}/////////////////////////////////////////////////////////////////////////////// Accessionvoid CGBSeqFormatter::FormatAccession(const CAccessionItem& acc,  IFlatTextOStream&){    m_GBSeq->SetPrimary_accession(acc.GetAccession());    ITERATE (CAccessionItem::TExtra_accessions, it, acc.GetExtraAccessions()) {        m_GBSeq->SetSecondary_accessions().push_back(CGBSecondary_accn(*it));    }}/////////////////////////////////////////////////////////////////////////////// Versionvoid CGBSeqFormatter::FormatVersion(const CVersionItem& version, IFlatTextOStream&){    m_GBSeq->SetAccession_version(version.GetAccession());}/////////////////////////////////////////////////////////////////////////////// Segmentvoid CGBSeqFormatter::FormatSegment(const CSegmentItem& seg, IFlatTextOStream&){    CNcbiOstrstream segment_line;    segment_line << seg.GetNum() << " of " << seg.GetCount();    m_GBSeq->SetSegment(CNcbiOstrstreamToString(segment_line));}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?