gather_items.cpp

来自「ncbi源码」· C++ 代码 · 共 1,327 行 · 第 1/3 页

CPP
1,327
字号
/* * =========================================================================== * PRODUCTION $Log: gather_items.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:44:32  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.21 * PRODUCTION * =========================================================================== *//*  $Id: gather_items.cpp,v 1000.2 2004/06/01 19:44:32 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Aaron Ucko, NCBI*          Mati Shomrat, NCBI** File Description:*   ** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seq/Seq_hist.hpp>#include <objects/seq/Seq_hist_rec.hpp>#include <objects/seq/Seqdesc.hpp>#include <objects/seq/Seq_descr.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/general/User_object.hpp>#include <objects/general/User_field.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqblock/GB_block.hpp>#include <objects/seqfeat/BioSource.hpp>#include <objmgr/scope.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/bioseq_ci.hpp>#include <objmgr/seq_entry_handle.hpp>#include <objmgr/seq_entry_ci.hpp>#include <objmgr/seq_map.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/util/sequence.hpp>#include <objmgr/seq_loc_mapper.hpp>#include <algorithm>#include <objtools/format/item_ostream.hpp>#include <objtools/format/flat_expt.hpp>#include <objtools/format/items/locus_item.hpp>#include <objtools/format/items/defline_item.hpp>#include <objtools/format/items/accession_item.hpp>#include <objtools/format/items/version_item.hpp>#include <objtools/format/items/keywords_item.hpp>#include <objtools/format/items/source_item.hpp>#include <objtools/format/items/reference_item.hpp>#include <objtools/format/items/comment_item.hpp>#include <objtools/format/items/basecount_item.hpp>#include <objtools/format/items/sequence_item.hpp>#include <objtools/format/items/feature_item.hpp>#include <objtools/format/items/segment_item.hpp>#include <objtools/format/items/ctrl_items.hpp>#include <objtools/format/gather_items.hpp>#include <objtools/format/genbank_gather.hpp>#include <objtools/format/embl_gather.hpp>#include <objtools/format/gff_gather.hpp>#include <objtools/format/ftable_gather.hpp>#include <objtools/format/context.hpp>#include "utils.hpp"BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)USING_SCOPE(sequence);///////////////////////////////////////////////////////////////////////////////// Public:// "virtual constructor"CFlatGatherer* CFlatGatherer::New(CFlatFileConfig::TFormat format){    switch ( format ) {    case CFlatFileConfig::eFormat_GenBank:    case CFlatFileConfig::eFormat_GBSeq:        //case CFlatFileGenerator<>::eFormat_Index:        return new CGenbankGatherer;            case CFlatFileConfig::eFormat_EMBL:        return new CEmblGatherer;    case CFlatFileConfig::eFormat_GFF:        return new CGFFGatherer;        case CFlatFileConfig::eFormat_FTable:        return new CFtableGatherer;    case CFlatFileConfig::eFormat_DDBJ:    default:        NCBI_THROW(CFlatException, eNotSupported,             "This format is currently not supported");    }    return 0;}void CFlatGatherer::Gather(CFlatFileContext& ctx, CFlatItemOStream& os) const{    m_ItemOS.Reset(&os);    m_Context.Reset(&ctx);    os << new CStartItem();    x_GatherSeqEntry(ctx.GetEntry());    os << new CEndItem();}CFlatGatherer::~CFlatGatherer(void){}///////////////////////////////////////////////////////////////////////////////// Protected:void CFlatGatherer::x_GatherSeqEntry(const CSeq_entry_Handle& entry) const{    if ( entry.IsSet()  &&  entry.GetSet().IsSetClass() ) {        CBioseq_set::TClass clss = entry.GetSet().GetClass();        if ( clss == CBioseq_set::eClass_genbank  ||             clss == CBioseq_set::eClass_mut_set  ||             clss == CBioseq_set::eClass_pop_set  ||             clss == CBioseq_set::eClass_phy_set  ||             clss == CBioseq_set::eClass_eco_set  ||             clss == CBioseq_set::eClass_wgs_set  ||             clss == CBioseq_set::eClass_gen_prod_set ) {            for ( CSeq_entry_CI it(entry); it; ++it ) {                x_GatherSeqEntry(*it);            }            return;        }    }    // visit each bioseq in the entry (excluding segments)    CBioseq_CI seq_iter(entry, CSeq_inst::eMol_not_set,        CBioseq_CI::eLevel_Mains);    for ( ; seq_iter; ++seq_iter ) {        if ( x_DisplayBioseq(entry, *seq_iter) ) {            x_GatherBioseq(*seq_iter);        }    }} bool CFlatGatherer::x_DisplayBioseq(const CSeq_entry_Handle& entry, const CBioseq_Handle& seq) const{    const CFlatFileConfig& cfg = Config();    const CSeq_id& id = GetId(seq, eGetId_Best);    if ( id.IsLocal()  &&  cfg.SuppressLocalId() ) {        return false;    }    if ( (CSeq_inst::IsNa(seq.GetInst_Mol())  &&  cfg.IsViewNuc())  ||         (CSeq_inst::IsAa(seq.GetInst_Mol())  &&  cfg.IsViewProt()) ) {        return true;    }    return false;}bool s_IsSegmented(const CBioseq_Handle& seq){    return seq  &&           seq.IsSetInst()  &&           seq.IsSetInst_Repr()  &&           seq.GetInst_Repr() == CSeq_inst::eRepr_seg;}// a defualt implementation for GenBank /  DDBJ formatsvoid CFlatGatherer::x_GatherBioseq(const CBioseq_Handle& seq) const{    bool segmented = s_IsSegmented(seq);    const CFlatFileConfig& cfg = Config();    // Do multiple sections (segmented style) if:    // a. the bioseq is segmented    // b. style is normal or segmented (not master)    // c. user didn't specify a location    // d. not FTable format    if ( segmented                                        &&         (cfg.IsStyleNormal()  ||  cfg.IsStyleSegment())  &&         (m_Context->GetLocation() == 0)                  &&         !cfg.IsFormatFTable() ) {        x_DoMultipleSections(seq);    } else {        // display as a single bioseq (single section)        m_Current.Reset(new CBioseqContext(seq, *m_Context));        m_Context->AddSection(m_Current);        x_DoSingleSection(*m_Current);    }   }void CFlatGatherer::x_DoMultipleSections(const CBioseq_Handle& seq) const{    CRef<CMasterContext> mctx(new CMasterContext(seq));    CScope* scope = &seq.GetScope();    const CSeqMap& seqmap = seq.GetSeqMap();    CSeqMap::TSegment_CI it = seqmap.BeginResolved(scope, 1, CSeqMap::fFindRef);    while ( it ) {        CSeq_id_Handle id = it.GetRefSeqid();        CBioseq_Handle part = scope->GetBioseqHandleFromTSE(id, seq);        if ( part ) {            m_Current.Reset(new CBioseqContext(part, *m_Context, mctx));            m_Context->AddSection(m_Current);            x_DoSingleSection(*m_Current);        }        ++it;    }}    ///////////////////////////////////////////////////////////////////////////////// REFERENCESbool s_FilterPubdesc(const CPubdesc& pubdesc, CBioseqContext& ctx){    if ( pubdesc.CanGetComment() ) {        const string& comment = pubdesc.GetComment();        bool is_gene_rif = NStr::StartsWith(comment, "GeneRIF", NStr::eNocase);        const CFlatFileConfig& cfg = ctx.Config();        if ( (cfg.HideGeneRIFs()  &&  is_gene_rif)  ||             ((cfg.OnlyGeneRIFs()  ||  cfg.LatestGeneRIFs())  &&  !is_gene_rif) ) {            return true;        }    }    return false;}void CFlatGatherer::x_GatherReferences(void) const{    CBioseqContext::TReferences& refs = m_Current->SetReferences();    // gather references from descriptors    for (CSeqdesc_CI it(m_Current->GetHandle(), CSeqdesc::e_Pub); it; ++it) {        const CPubdesc& pubdesc = it->GetPub();        if ( s_FilterPubdesc(pubdesc, *m_Current) ) {            continue;        }                refs.push_back(CBioseqContext::TRef(new CReferenceItem(*it, *m_Current)));    }    // gather references from features    SAnnotSelector sel(CSeqFeatData::e_Pub);    sel.SetCombineMethod(SAnnotSelector::eCombine_All);    CFeat_CI it(m_Current->GetScope(), m_Current->GetLocation(), sel);    for ( ; it; ++it) {        refs.push_back(CBioseqContext::TRef(new CReferenceItem(it->GetData().GetPub(),                                        *m_Current, &it->GetLocation())));    }    CReferenceItem::Rearrange(refs, *m_Current);    ITERATE (CBioseqContext::TReferences, ref, refs) {        *m_ItemOS << *ref;    }}///////////////////////////////////////////////////////////////////////////////// COMMENTSvoid CFlatGatherer::x_GatherComments(void) const{    CBioseqContext& ctx = *m_Current;    // Gather comments related to the seq-id    x_IdComments(ctx);    x_RefSeqComments(ctx);    if ( CCommentItem::NsAreGaps(ctx.GetHandle(), ctx) ) {        x_AddComment(new CCommentItem(CCommentItem::kNsAreGaps, ctx));    }    x_HistoryComments(ctx);    x_WGSComment(ctx);    if ( ctx.ShowGBBSource() ) {        x_GBBSourceComment(ctx);    }    x_DescComments(ctx);    x_MaplocComments(ctx);    x_RegionComments(ctx);    x_HTGSComments(ctx);    x_FeatComments(ctx);    x_FlushComments();}void CFlatGatherer::x_AddComment(CCommentItem* comment) const{    CRef<CCommentItem> com(comment);    if ( !com->Skip() ) {        m_Comments.push_back(com);    }}void CFlatGatherer::x_AddGSDBComment(const CDbtag& dbtag, CBioseqContext& ctx) const{    CRef<CCommentItem> gsdb_comment(new CGsdbComment(dbtag, ctx));    if ( !gsdb_comment->Skip() ) {        m_Comments.push_back(gsdb_comment);    }}void CFlatGatherer::x_FlushComments(void) const{    if ( m_Comments.empty() ) {        return;    }    // add a period to the last comment (if not local id)    if ( dynamic_cast<CLocalIdComment*>(&*m_Comments.back()) == 0 ) {        m_Comments.back()->AddPeriod();    }        // add a period to a GSDB comment (if exist and not last)    TCommentVec::iterator last = m_Comments.end();    --last;    NON_CONST_ITERATE (TCommentVec, it, m_Comments) {        CGsdbComment* gsdb = dynamic_cast<CGsdbComment*>(it->GetPointerOrNull());        if ( gsdb != 0   &&  it != last ) {            gsdb->AddPeriod();        }        *m_ItemOS << *it;    }    m_Comments.clear();}string s_GetGenomeBuildNumber(const CBioseq_Handle& bsh){    for (CSeqdesc_CI it(bsh, CSeqdesc::e_User);  it;  ++it) {        const CUser_object& uo = it->GetUser();        if ( uo.IsSetType()  &&  uo.GetType().IsStr()  &&             uo.GetType().GetStr() == "GenomeBuild" ) {            if ( uo.HasField("NcbiAnnotation") ) {                const CUser_field& uf = uo.GetField("NcbiAnnotation");                if ( uf.CanGetData()  &&  uf.GetData().IsStr()  &&                     !uf.GetData().GetStr().empty() ) {                    return uf.GetData().GetStr();                }            } else if ( uo.HasField("Annotation") ) {                const CUser_field& uf = uo.GetField("Annotation");                if ( uf.CanGetData()  &&  uf.GetData().IsStr()  &&                     !uf.GetData().GetStr().empty() ) {                    static const string prefix = "NCBI build ";                    if ( NStr::StartsWith(uf.GetData().GetStr(), prefix) ) {                        return uf.GetData().GetStr().substr(prefix.length());                    }                }            }        }    }    return kEmptyStr;}bool s_HasRefTrackStatus(const CBioseq_Handle& bsh) {    for (CSeqdesc_CI it(bsh, CSeqdesc::e_User);  it;  ++it) {        CCommentItem::TRefTrackStatus status =             CCommentItem::GetRefTrackStatus(it->GetUser());        if ( status != CCommentItem::eRefTrackStatus_Unknown ) {             return true;        }    }    return false;}void CFlatGatherer::x_IdComments(CBioseqContext& ctx) const{    const CObject_id* local_id = 0;    string genome_build_number = s_GetGenomeBuildNumber(ctx.GetHandle());    bool has_ref_track_status = s_HasRefTrackStatus(ctx.GetHandle());    ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) {        const CSeq_id& id = **id_iter;        switch ( id.Which() ) {        case CSeq_id::e_Other:            {{                if ( ctx.IsRSCompleteGenomic() ) {                    if ( !genome_build_number.empty() ) {                        x_AddComment(new CGenomeAnnotComment(ctx, genome_build_number));                    }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?