gather_items.cpp
来自「ncbi源码」· C++ 代码 · 共 1,327 行 · 第 1/3 页
CPP
1,327 行
/* * =========================================================================== * PRODUCTION $Log: gather_items.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:44:32 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.21 * PRODUCTION * =========================================================================== *//* $Id: gather_items.cpp,v 1000.2 2004/06/01 19:44:32 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, NCBI* Mati Shomrat, NCBI** File Description:* ** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seq/Seq_hist.hpp>#include <objects/seq/Seq_hist_rec.hpp>#include <objects/seq/Seqdesc.hpp>#include <objects/seq/Seq_descr.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/general/User_object.hpp>#include <objects/general/User_field.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqblock/GB_block.hpp>#include <objects/seqfeat/BioSource.hpp>#include <objmgr/scope.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/bioseq_ci.hpp>#include <objmgr/seq_entry_handle.hpp>#include <objmgr/seq_entry_ci.hpp>#include <objmgr/seq_map.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/util/sequence.hpp>#include <objmgr/seq_loc_mapper.hpp>#include <algorithm>#include <objtools/format/item_ostream.hpp>#include <objtools/format/flat_expt.hpp>#include <objtools/format/items/locus_item.hpp>#include <objtools/format/items/defline_item.hpp>#include <objtools/format/items/accession_item.hpp>#include <objtools/format/items/version_item.hpp>#include <objtools/format/items/keywords_item.hpp>#include <objtools/format/items/source_item.hpp>#include <objtools/format/items/reference_item.hpp>#include <objtools/format/items/comment_item.hpp>#include <objtools/format/items/basecount_item.hpp>#include <objtools/format/items/sequence_item.hpp>#include <objtools/format/items/feature_item.hpp>#include <objtools/format/items/segment_item.hpp>#include <objtools/format/items/ctrl_items.hpp>#include <objtools/format/gather_items.hpp>#include <objtools/format/genbank_gather.hpp>#include <objtools/format/embl_gather.hpp>#include <objtools/format/gff_gather.hpp>#include <objtools/format/ftable_gather.hpp>#include <objtools/format/context.hpp>#include "utils.hpp"BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)USING_SCOPE(sequence);///////////////////////////////////////////////////////////////////////////////// Public:// "virtual constructor"CFlatGatherer* CFlatGatherer::New(CFlatFileConfig::TFormat format){ switch ( format ) { case CFlatFileConfig::eFormat_GenBank: case CFlatFileConfig::eFormat_GBSeq: //case CFlatFileGenerator<>::eFormat_Index: return new CGenbankGatherer; case CFlatFileConfig::eFormat_EMBL: return new CEmblGatherer; case CFlatFileConfig::eFormat_GFF: return new CGFFGatherer; case CFlatFileConfig::eFormat_FTable: return new CFtableGatherer; case CFlatFileConfig::eFormat_DDBJ: default: NCBI_THROW(CFlatException, eNotSupported, "This format is currently not supported"); } return 0;}void CFlatGatherer::Gather(CFlatFileContext& ctx, CFlatItemOStream& os) const{ m_ItemOS.Reset(&os); m_Context.Reset(&ctx); os << new CStartItem(); x_GatherSeqEntry(ctx.GetEntry()); os << new CEndItem();}CFlatGatherer::~CFlatGatherer(void){}///////////////////////////////////////////////////////////////////////////////// Protected:void CFlatGatherer::x_GatherSeqEntry(const CSeq_entry_Handle& entry) const{ if ( entry.IsSet() && entry.GetSet().IsSetClass() ) { CBioseq_set::TClass clss = entry.GetSet().GetClass(); if ( clss == CBioseq_set::eClass_genbank || clss == CBioseq_set::eClass_mut_set || clss == CBioseq_set::eClass_pop_set || clss == CBioseq_set::eClass_phy_set || clss == CBioseq_set::eClass_eco_set || clss == CBioseq_set::eClass_wgs_set || clss == CBioseq_set::eClass_gen_prod_set ) { for ( CSeq_entry_CI it(entry); it; ++it ) { x_GatherSeqEntry(*it); } return; } } // visit each bioseq in the entry (excluding segments) CBioseq_CI seq_iter(entry, CSeq_inst::eMol_not_set, CBioseq_CI::eLevel_Mains); for ( ; seq_iter; ++seq_iter ) { if ( x_DisplayBioseq(entry, *seq_iter) ) { x_GatherBioseq(*seq_iter); } }} bool CFlatGatherer::x_DisplayBioseq(const CSeq_entry_Handle& entry, const CBioseq_Handle& seq) const{ const CFlatFileConfig& cfg = Config(); const CSeq_id& id = GetId(seq, eGetId_Best); if ( id.IsLocal() && cfg.SuppressLocalId() ) { return false; } if ( (CSeq_inst::IsNa(seq.GetInst_Mol()) && cfg.IsViewNuc()) || (CSeq_inst::IsAa(seq.GetInst_Mol()) && cfg.IsViewProt()) ) { return true; } return false;}bool s_IsSegmented(const CBioseq_Handle& seq){ return seq && seq.IsSetInst() && seq.IsSetInst_Repr() && seq.GetInst_Repr() == CSeq_inst::eRepr_seg;}// a defualt implementation for GenBank / DDBJ formatsvoid CFlatGatherer::x_GatherBioseq(const CBioseq_Handle& seq) const{ bool segmented = s_IsSegmented(seq); const CFlatFileConfig& cfg = Config(); // Do multiple sections (segmented style) if: // a. the bioseq is segmented // b. style is normal or segmented (not master) // c. user didn't specify a location // d. not FTable format if ( segmented && (cfg.IsStyleNormal() || cfg.IsStyleSegment()) && (m_Context->GetLocation() == 0) && !cfg.IsFormatFTable() ) { x_DoMultipleSections(seq); } else { // display as a single bioseq (single section) m_Current.Reset(new CBioseqContext(seq, *m_Context)); m_Context->AddSection(m_Current); x_DoSingleSection(*m_Current); } }void CFlatGatherer::x_DoMultipleSections(const CBioseq_Handle& seq) const{ CRef<CMasterContext> mctx(new CMasterContext(seq)); CScope* scope = &seq.GetScope(); const CSeqMap& seqmap = seq.GetSeqMap(); CSeqMap::TSegment_CI it = seqmap.BeginResolved(scope, 1, CSeqMap::fFindRef); while ( it ) { CSeq_id_Handle id = it.GetRefSeqid(); CBioseq_Handle part = scope->GetBioseqHandleFromTSE(id, seq); if ( part ) { m_Current.Reset(new CBioseqContext(part, *m_Context, mctx)); m_Context->AddSection(m_Current); x_DoSingleSection(*m_Current); } ++it; }} ///////////////////////////////////////////////////////////////////////////////// REFERENCESbool s_FilterPubdesc(const CPubdesc& pubdesc, CBioseqContext& ctx){ if ( pubdesc.CanGetComment() ) { const string& comment = pubdesc.GetComment(); bool is_gene_rif = NStr::StartsWith(comment, "GeneRIF", NStr::eNocase); const CFlatFileConfig& cfg = ctx.Config(); if ( (cfg.HideGeneRIFs() && is_gene_rif) || ((cfg.OnlyGeneRIFs() || cfg.LatestGeneRIFs()) && !is_gene_rif) ) { return true; } } return false;}void CFlatGatherer::x_GatherReferences(void) const{ CBioseqContext::TReferences& refs = m_Current->SetReferences(); // gather references from descriptors for (CSeqdesc_CI it(m_Current->GetHandle(), CSeqdesc::e_Pub); it; ++it) { const CPubdesc& pubdesc = it->GetPub(); if ( s_FilterPubdesc(pubdesc, *m_Current) ) { continue; } refs.push_back(CBioseqContext::TRef(new CReferenceItem(*it, *m_Current))); } // gather references from features SAnnotSelector sel(CSeqFeatData::e_Pub); sel.SetCombineMethod(SAnnotSelector::eCombine_All); CFeat_CI it(m_Current->GetScope(), m_Current->GetLocation(), sel); for ( ; it; ++it) { refs.push_back(CBioseqContext::TRef(new CReferenceItem(it->GetData().GetPub(), *m_Current, &it->GetLocation()))); } CReferenceItem::Rearrange(refs, *m_Current); ITERATE (CBioseqContext::TReferences, ref, refs) { *m_ItemOS << *ref; }}///////////////////////////////////////////////////////////////////////////////// COMMENTSvoid CFlatGatherer::x_GatherComments(void) const{ CBioseqContext& ctx = *m_Current; // Gather comments related to the seq-id x_IdComments(ctx); x_RefSeqComments(ctx); if ( CCommentItem::NsAreGaps(ctx.GetHandle(), ctx) ) { x_AddComment(new CCommentItem(CCommentItem::kNsAreGaps, ctx)); } x_HistoryComments(ctx); x_WGSComment(ctx); if ( ctx.ShowGBBSource() ) { x_GBBSourceComment(ctx); } x_DescComments(ctx); x_MaplocComments(ctx); x_RegionComments(ctx); x_HTGSComments(ctx); x_FeatComments(ctx); x_FlushComments();}void CFlatGatherer::x_AddComment(CCommentItem* comment) const{ CRef<CCommentItem> com(comment); if ( !com->Skip() ) { m_Comments.push_back(com); }}void CFlatGatherer::x_AddGSDBComment(const CDbtag& dbtag, CBioseqContext& ctx) const{ CRef<CCommentItem> gsdb_comment(new CGsdbComment(dbtag, ctx)); if ( !gsdb_comment->Skip() ) { m_Comments.push_back(gsdb_comment); }}void CFlatGatherer::x_FlushComments(void) const{ if ( m_Comments.empty() ) { return; } // add a period to the last comment (if not local id) if ( dynamic_cast<CLocalIdComment*>(&*m_Comments.back()) == 0 ) { m_Comments.back()->AddPeriod(); } // add a period to a GSDB comment (if exist and not last) TCommentVec::iterator last = m_Comments.end(); --last; NON_CONST_ITERATE (TCommentVec, it, m_Comments) { CGsdbComment* gsdb = dynamic_cast<CGsdbComment*>(it->GetPointerOrNull()); if ( gsdb != 0 && it != last ) { gsdb->AddPeriod(); } *m_ItemOS << *it; } m_Comments.clear();}string s_GetGenomeBuildNumber(const CBioseq_Handle& bsh){ for (CSeqdesc_CI it(bsh, CSeqdesc::e_User); it; ++it) { const CUser_object& uo = it->GetUser(); if ( uo.IsSetType() && uo.GetType().IsStr() && uo.GetType().GetStr() == "GenomeBuild" ) { if ( uo.HasField("NcbiAnnotation") ) { const CUser_field& uf = uo.GetField("NcbiAnnotation"); if ( uf.CanGetData() && uf.GetData().IsStr() && !uf.GetData().GetStr().empty() ) { return uf.GetData().GetStr(); } } else if ( uo.HasField("Annotation") ) { const CUser_field& uf = uo.GetField("Annotation"); if ( uf.CanGetData() && uf.GetData().IsStr() && !uf.GetData().GetStr().empty() ) { static const string prefix = "NCBI build "; if ( NStr::StartsWith(uf.GetData().GetStr(), prefix) ) { return uf.GetData().GetStr().substr(prefix.length()); } } } } } return kEmptyStr;}bool s_HasRefTrackStatus(const CBioseq_Handle& bsh) { for (CSeqdesc_CI it(bsh, CSeqdesc::e_User); it; ++it) { CCommentItem::TRefTrackStatus status = CCommentItem::GetRefTrackStatus(it->GetUser()); if ( status != CCommentItem::eRefTrackStatus_Unknown ) { return true; } } return false;}void CFlatGatherer::x_IdComments(CBioseqContext& ctx) const{ const CObject_id* local_id = 0; string genome_build_number = s_GetGenomeBuildNumber(ctx.GetHandle()); bool has_ref_track_status = s_HasRefTrackStatus(ctx.GetHandle()); ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) { const CSeq_id& id = **id_iter; switch ( id.Which() ) { case CSeq_id::e_Other: {{ if ( ctx.IsRSCompleteGenomic() ) { if ( !genome_build_number.empty() ) { x_AddComment(new CGenomeAnnotComment(ctx, genome_build_number)); }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?