genbank_formatter.cpp
来自「ncbi源码」· C++ 代码 · 共 757 行 · 第 1/2 页
CPP
757 行
/* * =========================================================================== * PRODUCTION $Log: genbank_formatter.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:44:37 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.14 * PRODUCTION * =========================================================================== *//* $Id: genbank_formatter.cpp,v 1000.2 2004/06/01 19:44:37 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, NCBI* Mati Shomrat** File Description:* **/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/biblio/Author.hpp>#include <objects/general/Person_id.hpp>#include <objmgr/util/sequence.hpp>#include <objtools/format/text_ostream.hpp>#include <objtools/format/genbank_formatter.hpp>#include <objtools/format/items/locus_item.hpp>#include <objtools/format/items/defline_item.hpp>#include <objtools/format/items/accession_item.hpp>#include <objtools/format/items/version_item.hpp>#include <objtools/format/items/dbsource_item.hpp>#include <objtools/format/items/segment_item.hpp>#include <objtools/format/items/keywords_item.hpp>#include <objtools/format/items/source_item.hpp>#include <objtools/format/items/reference_item.hpp>#include <objtools/format/items/comment_item.hpp>#include <objtools/format/items/feature_item.hpp>#include <objtools/format/items/basecount_item.hpp>#include <objtools/format/items/sequence_item.hpp>#include <objtools/format/items/wgs_item.hpp>#include <objtools/format/items/primary_item.hpp>#include <objtools/format/items/contig_item.hpp>#include <objtools/format/items/genome_item.hpp>#include <objtools/format/items/origin_item.hpp>#include <objtools/format/context.hpp>#include "utils.hpp"BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)CGenbankFormatter::CGenbankFormatter(void) { SetIndent(string(12, ' ')); SetFeatIndent(string(21, ' '));}/////////////////////////////////////////////////////////////////////////////// END SECTIONvoid CGenbankFormatter::EndSection(const CEndSectionItem&, IFlatTextOStream& text_os){ list<string> l; l.push_back("//"); text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Locus//// NB: The old locus line format is no longer supported for GenBank.// (DDBJ will still show the old line format)// Locus line format as specified in the GenBank release notes://// Positions Contents// --------- --------// 01-05 'LOCUS'// 06-12 spaces// 13-28 Locus name// 29-29 space// 30-40 Length of sequence, right-justified// 41-41 space// 42-43 bp// 44-44 space// 45-47 spaces, ss- (single-stranded), ds- (double-stranded), or// ms- (mixed-stranded)// 48-53 NA, DNA, RNA, tRNA (transfer RNA), rRNA (ribosomal RNA), // mRNA (messenger RNA), uRNA (small nuclear RNA), snRNA,// snoRNA. Left justified.// 54-55 space// 56-63 'linear' followed by two spaces, or 'circular'// 64-64 space// 65-67 The division code (see Section 3.3 in GenBank release notes)// 68-68 space// 69-79 Date, in the form dd-MMM-yyyy (e.g., 15-MAR-1991)void CGenbankFormatter::FormatLocus(const CLocusItem& locus, IFlatTextOStream& text_os){ static const string strands[] = { " ", "ss-", "ds-", "ms-" }; const CBioseqContext& ctx = *locus.GetContext(); list<string> l; CNcbiOstrstream locus_line; string units = "bp"; if ( !ctx.IsProt() ) { if ( ctx.IsWGSMaster() && ctx.IsRSWGSNuc() ) { units = "rc"; } } else { units = "aa"; } string topology = (locus.GetTopology() == CSeq_inst::eTopology_circular) ? "circular" : "linear "; locus_line.setf(IOS_BASE::left, IOS_BASE::adjustfield); locus_line << setw(16) << locus.GetName() << ' '; locus_line.setf(IOS_BASE::right, IOS_BASE::adjustfield); locus_line << setw(11) << locus.GetLength() << ' ' << units << ' ' << strands[locus.GetStrand()]; locus_line.setf(IOS_BASE::left, IOS_BASE::adjustfield); locus_line << setw(6) << s_GenbankMol[locus.GetBiomol()] << " " << topology << ' ' << locus.GetDivision() << ' ' << locus.GetDate(); Wrap(l, GetWidth(), "LOCUS", CNcbiOstrstreamToString(locus_line)); text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Definitionvoid CGenbankFormatter::FormatDefline(const CDeflineItem& defline, IFlatTextOStream& text_os){ list<string> l; Wrap(l, "DEFINITION", defline.GetDefline()); text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Accessionvoid CGenbankFormatter::FormatAccession(const CAccessionItem& acc, IFlatTextOStream& text_os){ string acc_line = x_FormatAccession(acc, ' '); if ( acc.IsSetRegion() ) { acc_line += " REGION: "; acc_line += NStr::Int8ToString(acc.GetRegion().GetFrom()); acc_line += ".."; acc_line += NStr::Int8ToString(acc.GetRegion().GetTo()); } if ( !acc_line.empty() ) { list<string> l; Wrap(l, "ACCESSION", acc_line); text_os.AddParagraph(l); }}/////////////////////////////////////////////////////////////////////////////// Versionvoid CGenbankFormatter::FormatVersion(const CVersionItem& version, IFlatTextOStream& text_os){ list<string> l; CNcbiOstrstream version_line; if ( version.GetAccession().empty() ) { l.push_back("VERSION"); } else { version_line << version.GetAccession(); if ( version.GetGi() > 0 ) { version_line << " GI:" << version.GetGi(); } Wrap(l, GetWidth(), "VERSION", CNcbiOstrstreamToString(version_line)); } text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Keywordsvoid CGenbankFormatter::FormatKeywords(const CKeywordsItem& keys, IFlatTextOStream& text_os){ list<string> l; x_GetKeywords(keys, "KEYWORDS", l); text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Segmentvoid CGenbankFormatter::FormatSegment(const CSegmentItem& seg, IFlatTextOStream& text_os){ list<string> l; CNcbiOstrstream segment_line; segment_line << seg.GetNum() << " of " << seg.GetCount(); Wrap(l, "SEGMENT", CNcbiOstrstreamToString(segment_line)); text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Source// SOURCE + ORGANISMvoid CGenbankFormatter::FormatSource(const CSourceItem& source, IFlatTextOStream& text_os){ list<string> l; x_FormatSourceLine(l, source); x_FormatOrganismLine(l, source); text_os.AddParagraph(l); }void CGenbankFormatter::x_FormatSourceLine(list<string>& l, const CSourceItem& source) const{ CNcbiOstrstream source_line; string prefix = source.IsUsingAnamorph() ? " (anamorph: " : " ("; source_line << source.GetOrganelle() << source.GetTaxname(); if ( !source.GetCommon().empty() ) { source_line << prefix << source.GetCommon() << ")"; } Wrap(l, GetWidth(), "SOURCE", CNcbiOstrstreamToString(source_line));}void CGenbankFormatter::x_FormatOrganismLine(list<string>& l, const CSourceItem& source) const{ Wrap(l, GetWidth(), "ORGANISM", source.GetTaxname(), eSubp); Wrap(l, GetWidth(), kEmptyStr, source.GetLineage() + '.', eSubp);}/////////////////////////////////////////////////////////////////////////////// REFERENCE// The REFERENCE field consists of five parts: the keyword REFERENCE, and// the subkeywords AUTHORS, TITLE (optional), JOURNAL, MEDLINE (optional),// PUBMED (optional), and REMARK (optional).void CGenbankFormatter::FormatReference(const CReferenceItem& ref, IFlatTextOStream& text_os){ CBioseqContext& ctx = *ref.GetContext(); list<string> l; x_Reference(l, ref, ctx); x_Authors(l, ref, ctx); x_Consortium(l, ref, ctx); x_Title(l, ref, ctx); x_Journal(l, ref, ctx); x_Medline(l, ref, ctx); x_Pubmed(l, ref, ctx); x_Remark(l, ref, ctx); text_os.AddParagraph(l);}// The REFERENCE line contains the number of the particular reference and// (in parentheses) the range of bases in the sequence entry reported in// this citation.void CGenbankFormatter::x_Reference(list<string>& l, const CReferenceItem& ref, CBioseqContext& ctx) const{ CNcbiOstrstream ref_line; // print serial number ref_line << ref.GetSerial() << (ref.GetSerial() < 10 ? " " : " "); // print sites or range CPubdesc::TReftype reftype = ref.GetReftype(); if ( reftype == CPubdesc::eReftype_sites || reftype == CPubdesc::eReftype_feats ) { ref_line << "(sites)"; } else if ( reftype == CPubdesc::eReftype_no_target ) { } else { const CSeq_loc* loc = ref.GetLoc() != 0 ? ref.GetLoc() : &ctx.GetLocation(); x_FormatRefLocation(ref_line, *loc, " to ", "; ", ctx); } Wrap(l, GetWidth(), "REFERENCE", CNcbiOstrstreamToString(ref_line));}void CGenbankFormatter::x_Authors(list<string>& l, const CReferenceItem& ref, CBioseqContext& ctx) const{ string auth = CReferenceItem::GetAuthString(ref.GetAuthors()); if ( !NStr::EndsWith(auth, ".") ) { auth += "."; }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?