genbank_formatter.cpp

来自「ncbi源码」· C++ 代码 · 共 757 行 · 第 1/2 页

CPP
757
字号
/* * =========================================================================== * PRODUCTION $Log: genbank_formatter.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:44:37  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.14 * PRODUCTION * =========================================================================== *//*  $Id: genbank_formatter.cpp,v 1000.2 2004/06/01 19:44:37 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Aaron Ucko, NCBI*          Mati Shomrat** File Description:*           **/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/biblio/Author.hpp>#include <objects/general/Person_id.hpp>#include <objmgr/util/sequence.hpp>#include <objtools/format/text_ostream.hpp>#include <objtools/format/genbank_formatter.hpp>#include <objtools/format/items/locus_item.hpp>#include <objtools/format/items/defline_item.hpp>#include <objtools/format/items/accession_item.hpp>#include <objtools/format/items/version_item.hpp>#include <objtools/format/items/dbsource_item.hpp>#include <objtools/format/items/segment_item.hpp>#include <objtools/format/items/keywords_item.hpp>#include <objtools/format/items/source_item.hpp>#include <objtools/format/items/reference_item.hpp>#include <objtools/format/items/comment_item.hpp>#include <objtools/format/items/feature_item.hpp>#include <objtools/format/items/basecount_item.hpp>#include <objtools/format/items/sequence_item.hpp>#include <objtools/format/items/wgs_item.hpp>#include <objtools/format/items/primary_item.hpp>#include <objtools/format/items/contig_item.hpp>#include <objtools/format/items/genome_item.hpp>#include <objtools/format/items/origin_item.hpp>#include <objtools/format/context.hpp>#include "utils.hpp"BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)CGenbankFormatter::CGenbankFormatter(void) {    SetIndent(string(12, ' '));    SetFeatIndent(string(21, ' '));}/////////////////////////////////////////////////////////////////////////////// END SECTIONvoid CGenbankFormatter::EndSection(const CEndSectionItem&, IFlatTextOStream& text_os){    list<string> l;    l.push_back("//");    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Locus//// NB: The old locus line format is no longer supported for GenBank.// (DDBJ will still show the old line format)// Locus line format as specified in the GenBank release notes://// Positions  Contents// ---------  --------// 01-05      'LOCUS'// 06-12      spaces// 13-28      Locus name// 29-29      space// 30-40      Length of sequence, right-justified// 41-41      space// 42-43      bp// 44-44      space// 45-47      spaces, ss- (single-stranded), ds- (double-stranded), or//            ms- (mixed-stranded)// 48-53      NA, DNA, RNA, tRNA (transfer RNA), rRNA (ribosomal RNA), //            mRNA (messenger RNA), uRNA (small nuclear RNA), snRNA,//            snoRNA. Left justified.// 54-55      space// 56-63      'linear' followed by two spaces, or 'circular'// 64-64      space// 65-67      The division code (see Section 3.3 in GenBank release notes)// 68-68      space// 69-79      Date, in the form dd-MMM-yyyy (e.g., 15-MAR-1991)void CGenbankFormatter::FormatLocus(const CLocusItem& locus,  IFlatTextOStream& text_os){    static const string strands[]  = { "   ", "ss-", "ds-", "ms-" };    const CBioseqContext& ctx = *locus.GetContext();    list<string> l;    CNcbiOstrstream locus_line;    string units = "bp";    if ( !ctx.IsProt() ) {        if ( ctx.IsWGSMaster()  &&  ctx.IsRSWGSNuc() ) {            units = "rc";        }    } else {        units = "aa";    }    string topology = (locus.GetTopology() == CSeq_inst::eTopology_circular) ?                "circular" : "linear  ";    locus_line.setf(IOS_BASE::left, IOS_BASE::adjustfield);    locus_line << setw(16) << locus.GetName() << ' ';    locus_line.setf(IOS_BASE::right, IOS_BASE::adjustfield);    locus_line        << setw(11) << locus.GetLength()        << ' '        << units        << ' '        << strands[locus.GetStrand()];    locus_line.setf(IOS_BASE::left, IOS_BASE::adjustfield);    locus_line        << setw(6) << s_GenbankMol[locus.GetBiomol()]        << "  "        << topology        << ' '                      << locus.GetDivision()        << ' '        << locus.GetDate();    Wrap(l, GetWidth(), "LOCUS", CNcbiOstrstreamToString(locus_line));    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Definitionvoid CGenbankFormatter::FormatDefline(const CDeflineItem& defline, IFlatTextOStream& text_os){    list<string> l;    Wrap(l, "DEFINITION", defline.GetDefline());    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Accessionvoid CGenbankFormatter::FormatAccession(const CAccessionItem& acc,  IFlatTextOStream& text_os){    string acc_line = x_FormatAccession(acc, ' ');    if ( acc.IsSetRegion() ) {        acc_line += " REGION: ";        acc_line += NStr::Int8ToString(acc.GetRegion().GetFrom());        acc_line += "..";        acc_line += NStr::Int8ToString(acc.GetRegion().GetTo());    }    if ( !acc_line.empty() ) {        list<string> l;        Wrap(l, "ACCESSION", acc_line);        text_os.AddParagraph(l);    }}/////////////////////////////////////////////////////////////////////////////// Versionvoid CGenbankFormatter::FormatVersion(const CVersionItem& version, IFlatTextOStream& text_os){    list<string> l;    CNcbiOstrstream version_line;    if ( version.GetAccession().empty() ) {        l.push_back("VERSION");    } else {        version_line << version.GetAccession();        if ( version.GetGi() > 0 ) {            version_line << "  GI:" << version.GetGi();        }        Wrap(l, GetWidth(), "VERSION", CNcbiOstrstreamToString(version_line));    }    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Keywordsvoid CGenbankFormatter::FormatKeywords(const CKeywordsItem& keys, IFlatTextOStream& text_os){    list<string> l;    x_GetKeywords(keys, "KEYWORDS", l);    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Segmentvoid CGenbankFormatter::FormatSegment(const CSegmentItem& seg, IFlatTextOStream& text_os){    list<string> l;    CNcbiOstrstream segment_line;    segment_line << seg.GetNum() << " of " << seg.GetCount();    Wrap(l, "SEGMENT", CNcbiOstrstreamToString(segment_line));    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Source// SOURCE + ORGANISMvoid CGenbankFormatter::FormatSource(const CSourceItem& source, IFlatTextOStream& text_os){    list<string> l;    x_FormatSourceLine(l, source);    x_FormatOrganismLine(l, source);    text_os.AddParagraph(l);    }void CGenbankFormatter::x_FormatSourceLine(list<string>& l, const CSourceItem& source) const{    CNcbiOstrstream source_line;        string prefix = source.IsUsingAnamorph() ? " (anamorph: " : " (";        source_line << source.GetOrganelle() << source.GetTaxname();    if ( !source.GetCommon().empty() ) {        source_line << prefix << source.GetCommon() << ")";    }        Wrap(l, GetWidth(), "SOURCE", CNcbiOstrstreamToString(source_line));}void CGenbankFormatter::x_FormatOrganismLine(list<string>& l, const CSourceItem& source) const{    Wrap(l, GetWidth(), "ORGANISM", source.GetTaxname(), eSubp);    Wrap(l, GetWidth(), kEmptyStr, source.GetLineage() + '.', eSubp);}/////////////////////////////////////////////////////////////////////////////// REFERENCE// The REFERENCE field consists of five parts: the keyword REFERENCE, and// the subkeywords AUTHORS, TITLE (optional), JOURNAL, MEDLINE (optional),// PUBMED (optional), and REMARK (optional).void CGenbankFormatter::FormatReference(const CReferenceItem& ref, IFlatTextOStream& text_os){    CBioseqContext& ctx = *ref.GetContext();    list<string> l;    x_Reference(l, ref, ctx);    x_Authors(l, ref, ctx);    x_Consortium(l, ref, ctx);    x_Title(l, ref, ctx);    x_Journal(l, ref, ctx);    x_Medline(l, ref, ctx);    x_Pubmed(l, ref, ctx);    x_Remark(l, ref, ctx);    text_os.AddParagraph(l);}// The REFERENCE line contains the number of the particular reference and// (in parentheses) the range of bases in the sequence entry reported in// this citation.void CGenbankFormatter::x_Reference(list<string>& l, const CReferenceItem& ref, CBioseqContext& ctx) const{    CNcbiOstrstream ref_line;    // print serial number    ref_line << ref.GetSerial() << (ref.GetSerial() < 10 ? "  " : " ");    // print sites or range    CPubdesc::TReftype reftype = ref.GetReftype();    if ( reftype == CPubdesc::eReftype_sites  ||         reftype == CPubdesc::eReftype_feats ) {        ref_line << "(sites)";    } else if ( reftype == CPubdesc::eReftype_no_target ) {    } else {        const CSeq_loc* loc = ref.GetLoc() != 0 ? ref.GetLoc() : &ctx.GetLocation();        x_FormatRefLocation(ref_line, *loc, " to ", "; ", ctx);    }    Wrap(l, GetWidth(), "REFERENCE", CNcbiOstrstreamToString(ref_line));}void CGenbankFormatter::x_Authors(list<string>& l, const CReferenceItem& ref, CBioseqContext& ctx) const{    string auth = CReferenceItem::GetAuthString(ref.GetAuthors());    if ( !NStr::EndsWith(auth, ".") ) {        auth += ".";    }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?