flat_formatter.cpp
来自「ncbi源码」· C++ 代码 · 共 406 行
CPP
406 行
/* * =========================================================================== * PRODUCTION $Log: flat_formatter.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:43:08 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.9 * PRODUCTION * =========================================================================== *//* $Id: flat_formatter.cpp,v 1000.2 2004/06/01 19:43:08 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, NCBI** File Description:* new (early 2003) flat-file generator -- base formatter class** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/flat/flat_items.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seg_ext.hpp>#include <objects/seq/Seq_ext.hpp>#include <objects/seqset/Bioseq_set.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objmgr/scope.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/util/sequence.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)void IFlatFormatter::Format(const CSeq_entry& entry, IFlatItemOStream& out, IFlatFormatter::TFilterFlags flags, CFlatContext* ctx){ CRef<CFlatContext> ctx0; if (ctx == 0) { ctx0.Reset(new CFlatContext); ctx = ctx0; ctx->SetFlags(entry, true); } else { ctx->SetFlags(entry, false); } if (entry.IsSeq()) { const CBioseq& seq = entry.GetSeq(); if (flags & (seq.IsAa() ? fSkipProteins : fSkipNucleotides)) { return; } Format(seq, out, ctx); } else { const CBioseq_set& bss = entry.GetSet(); ITERATE (CBioseq_set::TSeq_set, it, bss.GetSeq_set()) { if (ctx->InSegSet() && (*it)->IsSet() && (*it)->GetSet().GetClass() == CBioseq_set::eClass_parts) { // skip internal parts sets -- covered indirectly continue; } CRef<CFlatContext> ctx2(new CFlatContext(*ctx)); Format(**it, out, flags, ctx2); if (ctx->GetSegmentCount() > 0) { ++ctx->m_SegmentNum; } } }}void IFlatFormatter::Format(const CBioseq& seq, IFlatItemOStream& out, CFlatContext* ctx){ CRef<CFlatContext> ctx0; if (ctx == 0) { ctx0.Reset(new CFlatContext); ctx = ctx0; if (seq.GetParentEntry()) { ctx->SetFlags(*seq.GetParentEntry(), true); } } // XXX - also count deltas containing external references bool contig = false; // put in ctx instead? if (seq.GetInst().GetRepr() == CSeq_inst::eRepr_seg) { if (x_FormatSegments(seq, out, *ctx)) { return; } else if (m_Style != eStyle_Master || (m_Flags & fShowContigInMaster)) { contig = true; } } ctx->m_Formatter = const_cast<IFlatFormatter*>(this); ctx->m_Handle = m_Scope->GetBioseqHandle(seq); ctx->m_References.clear(); ctx->m_Mol = seq.GetInst().GetMol(); ctx->m_IsProt = seq.IsAa(); if ( !ctx->m_Location ) { CRef<CSeq_loc> loc(new CSeq_loc); loc->SetWhole().Assign(*ctx->m_Handle.GetSeqId()); ctx->m_Location = loc; } out << new CFlatForehead(*ctx); out << new CFlatHead(*ctx); out << new CFlatKeywords(*ctx); if (ctx->GetSegmentCount()) { out << new CFlatSegment(*ctx); } out << new CFlatSource(*ctx); x_FormatReferences(*ctx, out); out << new CFlatComment(*ctx); if (ctx->IsTPA()) { // also some types of refseq... out << new CFlatPrimary(*ctx); } out << new CFlatFeatHeader; x_FormatFeatures(*ctx, out, true); if (ctx->IsWGSMaster()) { out << new CFlatWGSRange(*ctx); } else if (ctx->IsRefSeqGenome()) { // NS_ out << new CFlatGenomeInfo(*ctx); } else { if ( !contig || (m_Flags & fShowContigFeatures) || m_Style == eStyle_Master) { x_FormatFeatures(*ctx, out, false); } if (contig) { out << new CFlatContig(*ctx); } if ( !contig || m_Style == eStyle_Master) { out << new CFlatDataHeader(*ctx); out << new CFlatData(*ctx); } } out << new CFlatTail;}void IFlatFormatter::Format(const CSeq_loc& loc, bool adjust_coords, IFlatItemOStream& out, CFlatContext* ctx){ if ( !adjust_coords ) { _ASSERT(sequence::IsOneBioseq(loc)); // otherwise, should split... } CBioseq_Handle h = m_Scope->GetBioseqHandle(loc); CRef<CFlatContext> ctx0; if (ctx == 0) { ctx0.Reset(new CFlatContext); ctx = ctx0; ctx->SetFlags(*h.GetBioseqCore()->GetParentEntry(), true); } ctx->m_Location.Reset(&loc); ctx->m_AdjustCoords = adjust_coords; Format(h.GetBioseq(), out, ctx);}string IFlatFormatter::ExpandTildes(const string& s, ETildeStyle style){ if (style == eTilde_tilde) { return s; } SIZE_TYPE start = 0, tilde, length = s.size(); string result; while (start < length && (tilde = s.find('~', start)) != NPOS) { result += s.substr(start, tilde - start); start = tilde + 1; char next = start < length ? s[start] : 0; switch (style) { case eTilde_space: if ((start < length && isdigit(next)) || (start + 1 < length && (next == ' ' || next == '(') && isdigit(s[start + 1]))) { result += '~'; } else { result += ' '; } break; case eTilde_newline: if (next == '~') { result += '~'; ++start; } else { result += '\n'; } break; default: // just keep it, for lack of better ideas result += '~'; break; } } result += s.substr(start); return result;}bool IFlatFormatter::x_FormatSegments(const CBioseq& seq, IFlatItemOStream& out, CFlatContext& ctx){ // Proceed iff either the style is segmented or the style is // normal and we have near segments if (m_Style != eStyle_Segment && (m_Style != eStyle_Normal || !ctx.InSegSet())) { return false; // just treat as a normal sequence } const CSeg_ext::Tdata& segs = seq.GetInst().GetExt().GetSeg().Get(); ctx.m_SegmentCount = 0; ITERATE (CSeg_ext::Tdata, it, segs) { if ( !(*it)->IsNull() ) { ++ctx.m_SegmentCount; } } ctx.m_SegmentNum = 1; ITERATE (CSeg_ext::Tdata, it, segs) { CRef<CFlatContext> ctx2(new CFlatContext(ctx)); if ( !(*it)->IsNull() ) { Format(**it, true, out, ctx2); ++ctx.m_SegmentNum; } } return true;}void IFlatFormatter::x_FormatReferences(CFlatContext& ctx, IFlatItemOStream& out){ typedef CRef<CFlatReference> TRefRef; for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Pub); it; ++it) { ctx.m_References.push_back (TRefRef(new CFlatReference(it->GetPub(), 0, ctx))); } for (CFeat_CI it(ctx.GetHandle().GetScope(), ctx.GetLocation(), CSeqFeatData::e_Pub); it; ++it) { ctx.m_References.push_back (TRefRef(new CFlatReference(it->GetData().GetPub(), &it->GetLocation(), ctx))); } CFlatReference::Sort(ctx.m_References, ctx); ITERATE (vector<TRefRef>, it, ctx.m_References) { out << *it; }}inlinestatic bool operator <(const CConstRef<IFlattishFeature>& f1, const CConstRef<IFlattishFeature>& f2){ return *f1 < *f2;}void IFlatFormatter::x_FormatFeatures(CFlatContext& ctx, IFlatItemOStream& out, bool source){ CScope& scope = ctx.GetHandle().GetScope(); typedef CConstRef<IFlattishFeature> TFFRef; list<TFFRef> l, l2; // XXX -- should select according to flags; may require merging/re-sorting. // (Generally needs lots of additional logic, basically...) if (source) { for (CSeqdesc_CI it(ctx.GetHandle()); it; ++it) { switch (it->Which()) { case CSeqdesc::e_Org: out << new CFlattishSourceFeature(it->GetOrg(), ctx); break; case CSeqdesc::e_Source: out << new CFlattishSourceFeature(it->GetSource(), ctx); break; default: break; } } } else if (ctx.IsProt()) { // broaden condition? for (CFeat_CI it(scope, ctx.GetLocation(), CSeqFeatData::e_not_set, SAnnotSelector::eOverlap_Intervals, SAnnotSelector::eResolve_All, CFeat_CI::e_Product); it; ++it) { l.push_back(TFFRef(new CFlattishFeature (*it, ctx, &it->GetProduct(), true))); } } for (CFeat_CI it(scope, ctx.GetLocation(), CSeqFeatData::e_not_set, SAnnotSelector::eOverlap_Intervals, SAnnotSelector::eResolve_All); it; ++it) { switch (it->GetData().Which()) { case CSeqFeatData::e_Pub: // done as REFERENCEs break; case CSeqFeatData::e_Org: case CSeqFeatData::e_Biosrc: if (source) { out << new CFlattishSourceFeature(*it, ctx); } break; default: if ( !source ) { if (l.empty()) { // no merging to worry about out << new CFlattishFeature(*it, ctx); } else { l2.push_back(TFFRef(new CFlattishFeature(*it, ctx))); } } break; } } if ( !l.empty() ) { l.merge(l2); ITERATE (list<TFFRef>, it, l) { out << *it; } }}END_SCOPE(objects)END_NCBI_SCOPE/** ===========================================================================** $Log: flat_formatter.cpp,v $* Revision 1000.2 2004/06/01 19:43:08 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.9** Revision 1.9 2004/05/21 21:42:53 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.8 2004/04/05 15:56:15 grichenk* Redesigned CAnnotTypes_CI: moved all data and data collecting* functions to CAnnotDataCollector. CAnnotTypes_CI is no more* inherited from SAnnotSelector.** Revision 1.7 2003/12/02 19:21:26 ucko* Fix a potential infinite loop in tilde expansion.** Revision 1.6 2003/06/02 16:06:42 dicuccio* Rearranged src/objects/ subtree. This includes the following shifts:* - src/objects/asn2asn --> arc/app/asn2asn* - src/objects/testmedline --> src/objects/ncbimime/test* - src/objects/objmgr --> src/objmgr* - src/objects/util --> src/objmgr/util* - src/objects/alnmgr --> src/objtools/alnmgr* - src/objects/flat --> src/objtools/flat* - src/objects/validator --> src/objtools/validator* - src/objects/cddalignview --> src/objtools/cddalignview* In addition, libseq now includes six of the objects/seq... libs, and libmmdb* replaces the three libmmdb? libs.** Revision 1.5 2003/03/21 18:49:17 ucko* Turn most structs into (accessor-requiring) classes; replace some* formerly copied fields with pointers to the original data.** Revision 1.4 2003/03/18 21:56:06 grichenk* Removed obsolete class CAnnot_CI** Revision 1.3 2003/03/11 15:37:51 kuznets* iterate -> ITERATE** Revision 1.2 2003/03/10 22:02:14 ucko* Rename s_FFLess to operator <, due to bogus MSVC pickiness.** Revision 1.1 2003/03/10 16:39:09 ucko* Initial check-in of new flat-file generator*** ===========================================================================*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?