embl_formatter.cpp

来自「ncbi源码」· C++ 代码 · 共 592 行

CPP
592
字号
/* * =========================================================================== * PRODUCTION $Log: embl_formatter.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:44:07  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7 * PRODUCTION * =========================================================================== *//*  $Id: embl_formatter.cpp,v 1000.2 2004/06/01 19:44:07 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Aaron Ucko, NCBI*          Mati Shomrat** File Description:*           **/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <objtools/format/text_ostream.hpp>#include <objtools/format/items/locus_item.hpp>#include <objtools/format/items/defline_item.hpp>#include <objtools/format/items/version_item.hpp>#include <objtools/format/items/date_item.hpp>#include <objtools/format/items/keywords_item.hpp>#include <objtools/format/items/source_item.hpp>#include <objtools/format/embl_formatter.hpp>#include <objtools/format/context.hpp>#include "utils.hpp"BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)// NB: For more complete documentation on the EMBL format see EMBL's user // manual (http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html)CEmblFormatter::CEmblFormatter(void) {    SetIndent(string(5, ' '));    //SetFeatIndent(string(21, ' '));    string tmp;    m_XX.push_back(Pad("XX", tmp, ePara));}/////////////////////////////////////////////////////////////////////////////// END SECTIONvoid CEmblFormatter::EndSection(const CEndSectionItem&, IFlatTextOStream& text_os){    list<string> l;    l.push_back("//");    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// ID (EMBL's locus line)//// General format://      ID   entryname  dataclass; molecule; division; sequencelength BP.//// Entryname: stable identifier.// Dataclass: The second item on the ID line indicates the data class of the entry.// Molecule Type: The third item on the line is the type of molecule as stored.// Database division: This indicates to which division the entry belongs.// Sequence length: The last item on the ID line is the length of the sequence.void CEmblFormatter::FormatLocus(const CLocusItem& locus,  IFlatTextOStream& text_os){    static string embl_mol [14] = {        "xxx", "DNA", "RNA", "RNA", "RNA", "RNA", "RNA",        "RNA", "AA ", "DNA", "DNA", "RNA", "RNA", "RNA"    };    const CBioseqContext& ctx = *locus.GetContext();    list<string> l;    CNcbiOstrstream id_line;    string hup = ctx.IsHup() ? " confidential" : " standard";    string topology = (locus.GetTopology() == CSeq_inst::eTopology_circular) ?                "circular" : kEmptyStr;    const string& mol = ctx.Config().UseEmblMolType() ?         embl_mol[locus.GetBiomol()] : s_GenbankMol[locus.GetBiomol()];                id_line.setf(IOS_BASE::left, IOS_BASE::adjustfield);    id_line         << setw(9) << locus.GetName()        << hup << "; "        << topology << mol << "; "        << locus.GetDivision() << "; "        << locus.GetLength() << " BP.";    Wrap(l, GetWidth(), "ID", CNcbiOstrstreamToString(id_line));    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// ACvoid CEmblFormatter::FormatAccession(const CAccessionItem& acc,  IFlatTextOStream& text_os){    string acc_line = x_FormatAccession(acc, ';');    x_AddXX(text_os);    list<string> l;    Wrap(l, "AC", acc_line);    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// SVvoid CEmblFormatter::FormatVersion(const CVersionItem& version, IFlatTextOStream& text_os){    if ( version.Skip() ) {        return;    }    x_AddXX(text_os);    list<string> l;    CNcbiOstrstream version_line;    if ( version.GetGi() > 0 ) {        version_line << "g" << version.GetGi();    }    Wrap(l, "SV", CNcbiOstrstreamToString(version_line));    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// DTvoid CEmblFormatter::FormatDate(const CDateItem& date, IFlatTextOStream& text_os){    string date_str;    list<string> l;    x_AddXX(text_os);    // Create Date    const CDate* dp = date.GetCreateDate();    if ( dp != 0 ) {        DateToString(*dp, date_str);    }        if ( date_str.empty() ) {        date_str = "01-JAN-1900";    }    Wrap(l, "DT", date_str);    // Update Date    dp = date.GetUpdateDate();    if ( dp != 0 ) {        date_str.erase();        DateToString(*dp, date_str);    }    Wrap(l, "DT", date_str);    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// DEvoid CEmblFormatter::FormatDefline(const CDeflineItem& defline, IFlatTextOStream& text_os){    if ( defline.Skip() ) {        return;    }    x_AddXX(text_os);    list<string> l;    Wrap(l, "DE", defline.GetDefline());    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// KWvoid CEmblFormatter::FormatKeywords(const CKeywordsItem& keys, IFlatTextOStream& text_os){    if ( keys.Skip() ) {        return;    }    x_AddXX(text_os);    list<string> l;    x_GetKeywords(keys, "KW", l);    text_os.AddParagraph(l);}/////////////////////////////////////////////////////////////////////////////// Source// SOURCE + ORGANISMvoid CEmblFormatter::FormatSource(const CSourceItem& source, IFlatTextOStream& text_os){    if ( source.Skip() ) {        return;    }    list<string> l;    x_OrganismSource(l, source);    x_OrganisClassification(l, source);    x_Organelle(l, source);    text_os.AddParagraph(l); }void CEmblFormatter::x_OrganismSource(list<string>& l, const CSourceItem& source) const{    /*    CNcbiOstrstream source_line;        string prefix = source.IsUsingAnamorph() ? " (anamorph: " : " (";        source_line << source.GetTaxname();    if ( !source.GetCommon().empty() ) {        source_line << prefix << source.GetCommon() << ")";    }        Wrap(l, GetWidth(), "SOURCE", CNcbiOstrstreamToString(source_line));    */}void CEmblFormatter::x_OrganisClassification(list<string>& l, const CSourceItem& source) const{    //Wrap(l, GetWidth(), "ORGANISM", source.GetTaxname(), eSubp);    //Wrap(l, GetWidth(), kEmptyStr, source.GetLineage() + '.', eSubp);}void CEmblFormatter::x_Organelle(list<string>& l, const CSourceItem& source) const{}/////////////////////////////////////////////////////////////////////////////// REFERENCE// The REFERENCE field consists of five parts: the keyword REFERENCE, and// the subkeywords AUTHORS, TITLE (optional), JOURNAL, MEDLINE (optional),// PUBMED (optional), and REMARK (optional).void CEmblFormatter::FormatReference(const CReferenceItem& ref, IFlatTextOStream& text_os){    /*    CFlatContext& ctx = const_cast<CFlatContext&>(ref.GetContext()); // !!!    list<string> l;    x_Reference(l, ref, ctx);    x_Authors(l, ref, ctx);    x_Consortium(l, ref, ctx);    x_Title(l, ref, ctx);    x_Journal(l, ref, ctx);    x_Medline(l, ref, ctx);    x_Pubmed(l, ref, ctx);    x_Remark(l, ref, ctx);    text_os.AddParagraph(l);    */}/*// The REFERENCE line contains the number of the particular reference and// (in parentheses) the range of bases in the sequence entry reported in// this citation.void CEmblFormatter::x_Reference(list<string>& l, const CReferenceItem& ref, CFlatContext& ctx){    CNcbiOstrstream ref_line;    // print serial number    ref_line << ref.GetSerial() << (ref.GetSerial() < 10 ? "  " : " ");    // print sites or range    CPubdesc::TReftype reftype = ref.GetReftype();    if ( reftype == CPubdesc::eReftype_sites  ||         reftype == CPubdesc::eReftype_feats ) {        ref_line << "(sites)";    } else if ( reftype == CPubdesc::eReftype_no_target ) {    } else {        const CSeq_loc* loc = ref.GetLoc() != 0 ? ref.GetLoc() : ctx.GetLocation();        x_FormatRefLocation(ref_line, *loc, " to ", "; ",            ctx.IsProt(), ctx.GetScope());    }    Wrap(l, GetWidth(), "REFERENCE", CNcbiOstrstreamToString(ref_line));}void CEmblFormatter::x_Authors(list<string>& l, const CReferenceItem& ref, CFlatContext& ctx) const{    Wrap(l, "AUTHORS", CReferenceItem::GetAuthString(ref.GetAuthors()), eSubp);}void CEmblFormatter::x_Consortium(list<string>& l, const CReferenceItem& ref, CFlatContext& ctx) const{    Wrap(l, GetWidth(), "CONSRTM", ref.GetConsortium(), eSubp);}void CEmblFormatter::x_Title(list<string>& l, const CReferenceItem& ref, CFlatContext& ctx) const{    // !!! kludge - fix it    string title, journal;    ref.GetTitles(title, journal, ctx);    Wrap(l, "TITLE",   title,   eSubp);}void CEmblFormatter::x_Journal(list<string>& l, const CReferenceItem& ref, CFlatContext& ctx) const{    // !!! kludge - fix it    string title, journal;    ref.GetTitles(title, journal, ctx);    Wrap(l, "JOURNAL", journal, eSubp);}void CEmblFormatter::x_Medline(list<string>& l, const CReferenceItem& ref, CFlatContext& ctx) const{    Wrap(l, GetWidth(), "MEDLINE", NStr::IntToString(ref.GetMUID()), eSubp);}void CEmblFormatter::x_Pubmed(list<string>& l, const CReferenceItem& ref, CFlatContext& ctx) const{    Wrap(l, GetWidth(), " PUBMED", NStr::IntToString(ref.GetPMID()), eSubp);}void CEmblFormatter::x_Remark(list<string>& l, const CReferenceItem& ref, CFlatContext& ctx) const{    Wrap(l, GetWidth(), "REMARK", ref.GetRemark(), eSubp);}*//////////////////////////////////////////////////////////////////////////////// COMMENTvoid CEmblFormatter::FormatComment(const CCommentItem& comment, IFlatTextOStream& text_os){    /*    list<string> l;    if ( !comment.IsFirst() ) {        Wrap(l, kEmptyStr, comment.GetComment(), eSubp);    } else {        Wrap(l, "COMMENT", comment.GetComment());    }    text_os.AddParagraph(l);    */}/////////////////////////////////////////////////////////////////////////////// FEATURES// Fetures Headervoid CEmblFormatter::FormatFeatHeader(const CFeatHeaderItem& fh, IFlatTextOStream& text_os){    /*    list<string> l;    Wrap(l, "FEATURES", "Location/Qualifiers", eFeatHead);    text_os.AddParagraph(l);    */}void CEmblFormatter::FormatFeature(const CFeatureItemBase& f, IFlatTextOStream& text_os){     /*    const CFlatFeature& feat = *f.Format();    list<string>        l;    Wrap(l, feat.GetKey(), feat.GetLoc().GetString(), eFeat);    ITERATE (vector<CRef<CFlatQual> >, it, feat.GetQuals()) {        string qual = '/' + (*it)->GetName(), value = (*it)->GetValue();        switch ((*it)->GetStyle()) {        case CFlatQual::eEmpty:                    value.erase();  break;        case CFlatQual::eQuoted:   qual += "=\"";  value += '"';   break;        case CFlatQual::eUnquoted: qual += '=';                    break;        }        // Call NStr::Wrap directly to avoid unwanted line breaks right        // before the start of the value (in /translation, e.g.)        NStr::Wrap(value, GetWidth(), l,                   / *DoHTML() ? NStr::fWrap_HTMLPre : * /0, GetFeatIndent(),                   GetFeatIndent() + qual);    }    text_os.AddParagraph(l);    */}/////////////////////////////////////////////////////////////////////////////// BASE COUNTvoid CEmblFormatter::FormatBasecount(const CBaseCountItem& bc, IFlatTextOStream& text_os){    /*    list<string> l;    CNcbiOstrstream bc_line;    bc_line         << right << setw(7) << bc.GetA() << " a"        << right << setw(7) << bc.GetC() << " c"        << right << setw(7) << bc.GetG() << " g"        << right << setw(7) << bc.GetT() << " t";    if ( bc.GetOther() > 0 ) {        bc_line << right << setw(7) << bc.GetOther() << " others";    }    Wrap(l, "BASE COUNT", CNcbiOstrstreamToString(bc_line));    text_os.AddParagraph(l);    */}/////////////////////////////////////////////////////////////////////////////// SEQUENCEvoid CEmblFormatter::FormatSequence(const CSequenceItem& seq, IFlatTextOStream& text_os){    /*    list<string> l;    CNcbiOstrstream seq_line;    const CSeqVector& vec = seq.GetSequence();    TSeqPos base_count = seq.GetFrom();    CSeqVector::const_iterator iter = vec.begin();    while ( iter ) {        seq_line << setw(9) << right << base_count;        for ( TSeqPos count = 0; count < 60  &&  iter; ++count, ++iter, ++base_count ) {            if ( count % 10 == 0 ) {                seq_line << ' ';            }            seq_line << (char)tolower(*iter);        }        seq_line << '\n';    }    if ( seq.IsFirst() ) {        l.push_back("ORIGIN      ");    }    NStr::Split(CNcbiOstrstreamToString(seq_line), "\n", l);    text_os.AddParagraph(l);    */}string& CEmblFormatter::Pad(const string& s, string& out,                                EPadContext where) const{    switch (where) {    case ePara:  case eSubp:  return x_Pad(s, out, 5);    case eFeatHead:           return x_Pad(s, out, 21, "FH   ");    case eFeat:               return x_Pad(s, out, 21, "FT   ");    default:                  return out;    }}void CEmblFormatter::x_AddXX(IFlatTextOStream& text_os) const{    text_os.AddParagraph(m_XX);}END_SCOPE(objects)END_NCBI_SCOPE

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?