flat_embl_formatter.cpp
来自「ncbi源码」· C++ 代码 · 共 428 行
CPP
428 行
/* * =========================================================================== * PRODUCTION $Log: flat_embl_formatter.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:43:01 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10 * PRODUCTION * =========================================================================== *//* $Id: flat_embl_formatter.cpp,v 1000.1 2004/06/01 19:43:01 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, NCBI** File Description:* new (early 2003) flat-file generator -- EMBL(PEPT) output class** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/flat/flat_embl_formatter.hpp>#include <objtools/flat/flat_items.hpp>#include <objects/seq/Seq_ext.hpp>#include <objects/seq/Seq_hist.hpp>#include <objects/seqloc/Textseq_id.hpp>#include <objmgr/seq_vector.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)inlinelist<string>& CFlatEMBLFormatter::Wrap(list<string>& l, const string& tag, const string& body, EPadContext where){ NStr::TWrapFlags flags = DoHTML() ? NStr::fWrap_HTMLPre : 0; string tag2; Pad(tag, tag2, where); NStr::Wrap(body, m_Stream->GetWidth(), l, flags, tag2); return l;}inlinevoid CFlatEMBLFormatter::x_AddXX(void){ if (m_XX.empty()) { string tmp; m_XX.push_back(Pad("XX", tmp, ePara)); } m_Stream->AddParagraph(m_XX);}void CFlatEMBLFormatter::FormatHead(const CFlatHead& head){ list<string> l; {{ CNcbiOstrstream id_line; string locus; TParent::Pad(head.GetLocus(), locus, 9); id_line << locus << " standard; "; if (head.GetTopology() == CSeq_inst::eTopology_circular) { id_line << "circular "; } id_line << head.GetMolString() << "; " << Upcase(head.GetDivision()) << "; " << m_Context->GetLength() << ' ' << Upcase(m_Context->GetUnits()) << '.'; Wrap(l, "ID", CNcbiOstrstreamToString(id_line)); m_Stream->AddParagraph(l, &head); l.clear(); }} x_AddXX(); {{ string acc = m_Context->GetPrimaryID().GetSeqIdString(false); ITERATE (list<string>, it, head.GetSecondaryIDs()) { acc += "; " + *it; } Wrap(l, "AC", acc + ';'); l.push_back("XX "); Wrap(l, "SV", m_Context->GetAccession()); m_Stream->AddParagraph(l, &head, &m_Context->GetPrimaryID()); l.clear(); }} x_AddXX(); {{ string date; FormatDate(head.GetUpdateDate(), date); Wrap(l, "DT", date); m_Stream->AddParagraph(l, &head, &head.GetUpdateDate()); l.clear(); date.erase(); FormatDate(head.GetCreateDate(), date); Wrap(l, "DT", date); m_Stream->AddParagraph(l, &head, &head.GetCreateDate()); l.clear(); }} x_AddXX(); Wrap(l, "DE", head.GetDefinition()); m_Stream->AddParagraph(l, &head); // DBSOURCE for EMBLPEPT?}void CFlatEMBLFormatter::FormatKeywords(const CFlatKeywords& keys){ x_AddXX(); list<string> l, kw; vector<string> v; ITERATE (list<string>, it, keys.GetKeywords()) { v.push_back(*it); } sort(v.begin(), v.end()); ITERATE (vector<string>, it, v) { kw.push_back(*it + (&*it == &v.back() ? '.' : ';')); } if (kw.empty()) { kw.push_back("."); } string tag; NStr::WrapList(kw, m_Stream->GetWidth(), " ", l, 0, Pad("KW", tag, ePara)); m_Stream->AddParagraph(l, &keys);}void CFlatEMBLFormatter::FormatSegment(const CFlatSegment& seg){ x_AddXX(); list<string> l; // Done as comment (no corresponding line type) Wrap(l, "CC", "SEGMENT " + NStr::IntToString(seg.GetNum()) + " of " + NStr::IntToString(seg.GetCount())); m_Stream->AddParagraph(l, &seg);}void CFlatEMBLFormatter::FormatSource(const CFlatSource& source){ x_AddXX(); list<string> l; {{ string name = source.GetFormalName(); if ( !source.GetCommonName().empty() ) { name += " (" + source.GetCommonName() + ')'; } Wrap(l, "OS", name); }} Wrap(l, "OC", source.GetLineage() + '.'); m_Stream->AddParagraph(l, &source, &source.GetDescriptor()); // XXX -- OG (Organelle)?}void CFlatEMBLFormatter::FormatReference(const CFlatReference& ref){ x_AddXX(); list<string> l; Wrap(l, "RN", '[' + NStr::IntToString(ref.GetSerial()) + ']'); Wrap(l, "RC", ref.GetRemark(), eSubp); // Wrap(l, "RC", "CONSORTIUM: " + ref.GetConsortium(), eSubp); Wrap(l, "RP", ref.GetRange(*m_Context), eSubp); ITERATE (set<int>, it, ref.GetMUIDs()) { if (DoHTML()) { Wrap(l, "RX", "MEDLINE; <a href=\"" + ref.GetMedlineURL(*it) + "\">" + NStr::IntToString(*it) + "</a>.", eSubp); } else { Wrap(l, "RX", "MEDLINE; " + NStr::IntToString(*it) + '.', eSubp); } } {{ const list<string>& raw_authors = ref.GetAuthors(); list<string> authors; ITERATE (list<string>, it, raw_authors) { authors.push_back(NStr::Replace(*it, ",", " ") + (&*it == &raw_authors.back() ? ';' : ',')); } string tag; NStr::WrapList(authors, m_Stream->GetWidth(), " ", l, 0, Pad("RA", tag, ePara)); }} {{ string title, journal; ref.GetTitles(title, journal, *m_Context); Wrap(l, "RT", title + ';', eSubp); Wrap(l, "RL", journal, eSubp); }} m_Stream->AddParagraph(l, &ref, &ref.GetPubdesc());}void CFlatEMBLFormatter::FormatComment(const CFlatComment& comment){ if (comment.GetComment().empty()) { return; } x_AddXX(); string comment2 = ExpandTildes(comment.GetComment(), eTilde_newline); if ( !NStr::EndsWith(comment2, ".") ) { comment2 += '.'; } list<string> l; Wrap(l, "CC", comment2); m_Stream->AddParagraph(l, &comment);}void CFlatEMBLFormatter::FormatPrimary(const CFlatPrimary& primary){ x_AddXX(); list<string> l; Wrap(l, "AH", primary.GetHeader()); ITERATE (CFlatPrimary::TPieces, it, primary.GetPieces()) { string s; Wrap(l, "AS", it->Format(s)); } m_Stream->AddParagraph (l, &primary, &m_Context->GetHandle().GetBioseqCore()->GetInst().GetHist());}void CFlatEMBLFormatter::FormatFeatHeader(const CFlatFeatHeader& fh){ list<string> l; x_AddXX(); Wrap(l, "Key", "Location/Qualifiers", eFeatHead); l.push_back("FH "); m_Stream->AddParagraph(l, &fh);}void CFlatEMBLFormatter::FormatDataHeader(const CFlatDataHeader& dh){ x_AddXX(); list<string> l; CNcbiOstrstream oss; oss << "Sequence " << m_Context->GetLength() << ' ' << Upcase(m_Context->GetUnits()) << ';'; if ( !m_Context->IsProt() ) { TSeqPos a, c, g, t, other; dh.GetCounts(a, c, g, t, other); oss << ' ' << a << " A; " << c << " C; " << g << " G; " << t << " T; " << other << " other;"; } Wrap(l, "SQ", CNcbiOstrstreamToString(oss)); m_Stream->AddParagraph(l, &dh);}void CFlatEMBLFormatter::FormatData(const CFlatData& data){ static const TSeqPos kChunkSize = 1200; // 20 lines string buf; CSeqVector v = m_Context->GetHandle().GetSequenceView (data.GetLoc(), CBioseq_Handle::eViewConstructed, CBioseq_Handle::eCoding_Iupac); for (TSeqPos pos = 0; pos < v.size(); pos += kChunkSize) { list<string> lines; CNcbiOstrstream oss; TSeqPos l = min(kChunkSize, v.size() - pos); v.GetSeqData(pos, pos + l, buf); oss << " "; for (TSeqPos i = 0; i < l; ++i) { if (i > 0 && i % 60 == 0) { oss << ' ' << setw(9) << pos + i << "\n "; } if (i % 10 == 0) { oss << ' '; } oss.put(tolower(buf[i])); } if (l % 60) { oss << string((60 - (l % 60)) * 11 / 10 + 1, ' ') << setw(9) << pos + l; } NStr::Split(CNcbiOstrstreamToString(oss), "\n", lines); // should use narrower location m_Stream->AddParagraph(lines, &data, &data.GetLoc()); }}void CFlatEMBLFormatter::FormatContig(const CFlatContig& contig){ x_AddXX(); list<string> l; Wrap(l, "CO", CFlatLoc(contig.GetLoc(), *m_Context).GetString()); m_Stream->AddParagraph (l, &contig, &m_Context->GetHandle().GetBioseqCore()->GetInst().GetExt());}void CFlatEMBLFormatter::FormatWGSRange(const CFlatWGSRange& range){ // Done as comment (no corresponding line type) x_AddXX(); list<string> l; if (DoHTML()) { Wrap(l, "CC", "WGS: <a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?" "db=Nucleotide&cmd=Search&term=" + range.GetFirstID() + ':' + range.GetLastID() + "[ACCN]\">" + range.GetFirstID() + "-" + range.GetLastID() + "</a>"); } else { Wrap(l, "CC", "WGS: " + range.GetFirstID() + "-" + range.GetLastID()); } m_Stream->AddParagraph(l, &range, &range.GetUserObject());}void CFlatEMBLFormatter::FormatGenomeInfo(const CFlatGenomeInfo& g){ // Done as comment (no corresponding line type) x_AddXX(); list<string> l; string s = "GENOME: " + g.GetAccession(); if ( !g.GetMoltype().empty()) { s += " (" + g.GetMoltype() + ')'; } Wrap(l, "CC", s); m_Stream->AddParagraph(l, &g, &g.GetUserObject());}string& CFlatEMBLFormatter::Pad(const string& s, string& out, EPadContext where){ switch (where) { case ePara: case eSubp: return TParent::Pad(s, out, 5); case eFeatHead: return TParent::Pad(s, out, 21, "FH "); case eFeat: return TParent::Pad(s, out, 21, "FT "); default: return out; }}END_SCOPE(objects)END_NCBI_SCOPE/** ===========================================================================** $Log: flat_embl_formatter.cpp,v $* Revision 1000.1 2004/06/01 19:43:01 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10** Revision 1.10 2004/05/21 21:42:53 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.9 2003/10/11 19:29:02 ucko* Made the XX paragraph a normal member to avoid trouble when statically* linked into multiple plugins. (The old code wasn't threadsafe anyway.)** Revision 1.8 2003/06/02 16:06:42 dicuccio* Rearranged src/objects/ subtree. This includes the following shifts:* - src/objects/asn2asn --> arc/app/asn2asn* - src/objects/testmedline --> src/objects/ncbimime/test* - src/objects/objmgr --> src/objmgr* - src/objects/util --> src/objmgr/util* - src/objects/alnmgr --> src/objtools/alnmgr* - src/objects/flat --> src/objtools/flat* - src/objects/validator --> src/objtools/validator* - src/objects/cddalignview --> src/objtools/cddalignview* In addition, libseq now includes six of the objects/seq... libs, and libmmdb* replaces the three libmmdb? libs.** Revision 1.7 2003/04/10 20:08:22 ucko* Arrange to pass the item as an argument to IFlatTextOStream::AddParagraph** Revision 1.6 2003/03/31 19:52:06 ucko* Fixed typo that made last commit ineffective.** Revision 1.5 2003/03/31 16:25:14 ucko* Kludge: move the static "XX" paragraph to file scope, as it otherwise* becomes a common symbol on Darwin, preventing inclusion in shared libs.** Revision 1.4 2003/03/29 04:14:23 ucko* Move private inline methods from .hpp to .cpp.** Revision 1.3 2003/03/21 18:49:17 ucko* Turn most structs into (accessor-requiring) classes; replace some* formerly copied fields with pointers to the original data.** Revision 1.2 2003/03/11 15:37:51 kuznets* iterate -> ITERATE** Revision 1.1 2003/03/10 16:39:09 ucko* Initial check-in of new flat-file generator*** ===========================================================================*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?