gbseq_formatter.cpp
来自「ncbi源码」· C++ 代码 · 共 632 行 · 第 1/2 页
CPP
632 行
/* * =========================================================================== * PRODUCTION $Log: gbseq_formatter.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:44:34 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.4 * PRODUCTION * =========================================================================== *//* $Id: gbseq_formatter.cpp,v 1000.1 2004/06/01 19:44:34 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, NCBI* Mati Shomrat** File Description:* GBseq formatting */#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <serial/objostr.hpp>#include <objects/gbseq/GBSet.hpp>#include <objects/gbseq/GBSeq.hpp>#include <objects/gbseq/GBReference.hpp>#include <objects/gbseq/GBKeyword.hpp>#include <objects/gbseq/GBSeqid.hpp>#include <objects/gbseq/GBFeature.hpp>#include <objects/gbseq/GBInterval.hpp>#include <objects/gbseq/GBQualifier.hpp>#include <objects/seq/Seqdesc.hpp>#include <objmgr/scope.hpp>#include <objmgr/impl/scope_info.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/util/sequence.hpp>#include <objmgr/impl/synonyms.hpp>#include <objtools/format/text_ostream.hpp>#include <objtools/format/gbseq_formatter.hpp>#include <objtools/format/items/locus_item.hpp>#include <objtools/format/items/defline_item.hpp>#include <objtools/format/items/accession_item.hpp>#include <objtools/format/items/version_item.hpp>#include <objtools/format/items/keywords_item.hpp>#include <objtools/format/items/source_item.hpp>#include <objtools/format/items/reference_item.hpp>#include <objtools/format/items/comment_item.hpp>#include <objtools/format/items/feature_item.hpp>#include <objtools/format/items/sequence_item.hpp>#include <objtools/format/items/segment_item.hpp>#include <objtools/format/items/contig_item.hpp>#include "utils.hpp"BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)/////////////////////////////////////////////////////////////////////////////// static functionsstatic void s_GBSeqStringCleanup(string& str, bool location = false){ list<string> l; NStr::Split(str, " \n\r\t\b", l); str = NStr::Join(l, " "); if ( location ) { str = NStr::Replace(str, ", ", ","); } str = NStr::TruncateSpaces(str);}static void s_GBSeqQualCleanup(string& val){ val = NStr::Replace(val, "\"", " "); s_GBSeqStringCleanup(val); /* if ( NStr::EndsWith(val, ".") ) { val.erase(val.length() - 1); } */}/////////////////////////////////////////////////////////////////////////////// Public// constructorCGBSeqFormatter::CGBSeqFormatter(void){}// detructorCGBSeqFormatter::~CGBSeqFormatter(void) {}void CGBSeqFormatter::Start(IFlatTextOStream& text_os){ x_WriteFileHeader(text_os); x_StartWriteGBSet(text_os);}void CGBSeqFormatter::StartSection(const CStartSectionItem&, IFlatTextOStream&){ m_GBSeq.Reset(new CGBSeq); _ASSERT(m_GBSeq);}void CGBSeqFormatter::EndSection(const CEndSectionItem&, IFlatTextOStream& text_os){ x_WriteGBSeq(text_os); m_GBSeq.Reset(); _ASSERT(!m_GBSeq);}void CGBSeqFormatter::End(IFlatTextOStream& text_os){ x_EndWriteGBSet(text_os);}/////////////////////////////////////////////////////////////////////////////// Locus//CGBSeq::TStrandedness s_GBSeqStrandedness(CSeq_inst::TStrand strand){ switch ( strand ) { case CSeq_inst::eStrand_ss: return CGBSeq::eStrandedness_single_stranded; case CSeq_inst::eStrand_ds: return CGBSeq::eStrandedness_double_stranded; case CSeq_inst::eStrand_mixed: return CGBSeq::eStrandedness_mixed_stranded; case CSeq_inst::eStrand_other: case CSeq_inst::eStrand_not_set: default: break; } return CGBSeq::eStrandedness_not_set;}CGBSeq::TMoltype s_GBSeqMoltype(CMolInfo::TBiomol biomol){ switch ( biomol ) { case CMolInfo::eBiomol_unknown: return CGBSeq::eMoltype_nucleic_acid; case CMolInfo::eBiomol_genomic: case CMolInfo::eBiomol_other_genetic: case CMolInfo::eBiomol_genomic_mRNA: return CGBSeq::eMoltype_dna; case CMolInfo::eBiomol_pre_RNA: case CMolInfo::eBiomol_cRNA: case CMolInfo::eBiomol_transcribed_RNA: return CGBSeq::eMoltype_rna; case CMolInfo::eBiomol_mRNA: return CGBSeq::eMoltype_mrna; case CMolInfo::eBiomol_rRNA: return CGBSeq::eMoltype_rrna; case CMolInfo::eBiomol_tRNA: return CGBSeq::eMoltype_trna; case CMolInfo::eBiomol_snRNA: return CGBSeq::eMoltype_urna; case CMolInfo::eBiomol_scRNA: return CGBSeq::eMoltype_snrna; case CMolInfo::eBiomol_peptide: return CGBSeq::eMoltype_peptide; case CMolInfo::eBiomol_snoRNA: return CGBSeq::eMoltype_snorna; default: break; } return CGBSeq::eMoltype_nucleic_acid;}CGBSeq::TTopology s_GBSeqTopology(CSeq_inst::TTopology topology){ if ( topology == CSeq_inst::eTopology_circular ) { return CGBSeq::eTopology_circular; } return CGBSeq::eTopology_linear;}string s_GetDate(const CBioseq_Handle& bsh, CSeqdesc::E_Choice choice){ _ASSERT(choice == CSeqdesc::e_Update_date || choice == CSeqdesc::e_Create_date); CSeqdesc_CI desc(bsh, choice); if ( desc ) { string result; if ( desc->IsUpdate_date() ) { DateToString(desc->GetUpdate_date(), result); } else { DateToString(desc->GetCreate_date(), result); } return result; } return "01-JAN-1900";}void CGBSeqFormatter::FormatLocus(const CLocusItem& locus, IFlatTextOStream&){ _ASSERT(m_GBSeq); CBioseqContext& ctx = *locus.GetContext(); m_GBSeq->SetLocus(locus.GetName()); m_GBSeq->SetLength(locus.GetLength()); m_GBSeq->SetStrandedness(s_GBSeqStrandedness(locus.GetStrand())); m_GBSeq->SetMoltype(s_GBSeqMoltype(locus.GetBiomol())); m_GBSeq->SetTopology(s_GBSeqTopology(locus.GetTopology())); m_GBSeq->SetDivision(locus.GetDivision()); m_GBSeq->SetUpdate_date(s_GetDate(ctx.GetHandle(), CSeqdesc::e_Update_date)); m_GBSeq->SetCreate_date(s_GetDate(ctx.GetHandle(), CSeqdesc::e_Create_date)); ITERATE (CBioseq::TId, it, ctx.GetBioseqIds()) { m_GBSeq->SetOther_seqids().push_back(CGBSeqid((*it)->AsFastaString())); }}/////////////////////////////////////////////////////////////////////////////// Definitionvoid CGBSeqFormatter::FormatDefline(const CDeflineItem& defline, IFlatTextOStream&){ _ASSERT(m_GBSeq); m_GBSeq->SetDefinition(defline.GetDefline()); if ( NStr::EndsWith(m_GBSeq->GetDefinition(), ".") ) { m_GBSeq->SetDefinition().resize(m_GBSeq->GetDefinition().length() - 1); }}/////////////////////////////////////////////////////////////////////////////// Accessionvoid CGBSeqFormatter::FormatAccession(const CAccessionItem& acc, IFlatTextOStream&){ m_GBSeq->SetPrimary_accession(acc.GetAccession()); ITERATE (CAccessionItem::TExtra_accessions, it, acc.GetExtraAccessions()) { m_GBSeq->SetSecondary_accessions().push_back(CGBSecondary_accn(*it)); }}/////////////////////////////////////////////////////////////////////////////// Versionvoid CGBSeqFormatter::FormatVersion(const CVersionItem& version, IFlatTextOStream&){ m_GBSeq->SetAccession_version(version.GetAccession());}/////////////////////////////////////////////////////////////////////////////// Segmentvoid CGBSeqFormatter::FormatSegment(const CSegmentItem& seg, IFlatTextOStream&){ CNcbiOstrstream segment_line; segment_line << seg.GetNum() << " of " << seg.GetCount(); m_GBSeq->SetSegment(CNcbiOstrstreamToString(segment_line));}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?