flat_items.cpp
来自「ncbi源码」· C++ 代码 · 共 376 行
CPP
376 行
/* * =========================================================================== * PRODUCTION $Log: flat_items.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:43:20 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== *//* $Id: flat_items.cpp,v 1000.1 2004/06/01 19:43:20 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, NCBI** File Description:* new (early 2003) flat-file generator -- most item types** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/flat/flat_items.hpp>#include <serial/iterator.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Object_id.hpp>#include <objects/general/User_field.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_hist.hpp>#include <objects/seqalign/Seq_align.hpp>#include <objects/seqblock/EMBL_block.hpp>#include <objects/seqblock/GB_block.hpp>#include <objects/seqblock/PIR_block.hpp>#include <objects/seqblock/PRF_block.hpp>#include <objects/seqblock/SP_block.hpp>#include <objects/seqfeat/BioSource.hpp>#include <objects/seqfeat/BinomialOrgName.hpp>#include <objects/seqfeat/OrgName.hpp>#include <objects/seqfeat/PartialOrgName.hpp>#include <objects/seqfeat/Org_ref.hpp>#include <objects/seqfeat/TaxElement.hpp>#include <objtools/alnmgr/alnmap.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/seq_vector.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)CFlatKeywords::CFlatKeywords(const CFlatContext& ctx){ for (CSeqdesc_CI it(ctx.GetHandle()); it; ++it) { switch (it->Which()) { // Grr, MSVC won't let me handle everything with a single template. case CSeqdesc::e_Pir: if (it->GetPir().IsSetKeywords()) { m_Keywords.insert(m_Keywords.end(), it->GetPir().GetKeywords().begin(), it->GetPir().GetKeywords().end()); } break; case CSeqdesc::e_Genbank: if (it->GetGenbank().IsSetKeywords()) { m_Keywords.insert(m_Keywords.end(), it->GetGenbank().GetKeywords().begin(), it->GetGenbank().GetKeywords().end()); } break; case CSeqdesc::e_Sp: if (it->GetSp().IsSetKeywords()) { m_Keywords.insert(m_Keywords.end(), it->GetSp().GetKeywords().begin(), it->GetSp().GetKeywords().end()); } break; case CSeqdesc::e_Embl: if (it->GetEmbl().IsSetKeywords()) { m_Keywords.insert(m_Keywords.end(), it->GetEmbl().GetKeywords().begin(), it->GetEmbl().GetKeywords().end()); } break; case CSeqdesc::e_Prf: if (it->GetPrf().IsSetKeywords()) { m_Keywords.insert(m_Keywords.end(), it->GetPrf().GetKeywords().begin(), it->GetPrf().GetKeywords().end()); } break; default: break; } }}CFlatSource::CFlatSource(const CFlatContext& ctx){ for (CSeqdesc_CI it(ctx.GetHandle()); it; ++it) { switch (it->Which()) { case CSeqdesc::e_Org: case CSeqdesc::e_Source: { if ( !m_Descriptor ) { m_Descriptor.Reset(&*it); } const COrg_ref& org = (it->IsOrg() ? it->GetOrg() : it->GetSource().GetOrg()); if (org.IsSetOrgname()) { // done first so taxname can override m_FormalName org.GetOrgname().GetFlatName(m_FormalName, &m_Lineage); } if (org.IsSetTaxname()) { m_FormalName = org.GetTaxname(); } if (org.IsSetCommon()) { m_CommonName = org.GetCommon(); } m_TaxID = org.GetTaxId(); break; } default: break; } }}CFlatComment::CFlatComment(const CFlatContext& ctx){ string delim; for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Comment); it; ++it) { m_Comment += delim + it->GetComment(); delim = '\n'; } for (CFeat_CI it(ctx.GetHandle().GetScope(), ctx.GetLocation(), CSeqFeatData::e_Comment); it; ++it) { if (it->IsSetComment()) { // ought to be, but just in case... m_Comment += delim + it->GetComment(); delim = '\n'; } } }CFlatPrimary::CFlatPrimary(const CFlatContext& ctx) : m_IsRefSeq(ctx.IsRefSeq()){ const CSeq_inst& inst = ctx.GetHandle().GetBioseqCore()->GetInst(); if ( !inst.IsSetHist() || !inst.GetHist().IsSetAssembly()) { return; } ITERATE (CSeq_hist::TAssembly, it, inst.GetHist().GetAssembly()) { if ( !(*it)->GetSegs().IsDenseg() ) { // complain continue; } CAlnMap aln((*it)->GetSegs().GetDenseg()); // XXX - more sanity checks (2 rows, first is seq, running forward) // XXX - should honor m_Location SPiece p; p.m_Span = aln.GetSeqRange(0); p.m_PrimaryID = &ctx.GetPreferredSynonym(aln.GetSeqId(1)); p.m_PrimarySpan = aln.GetSeqRange(1); p.m_Complemented = aln.IsNegativeStrand(1); m_Pieces.push_back(p); }}const char* CFlatPrimary::GetHeader(void) const{ if (m_IsRefSeq) { return "REFSEQ_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP"; } else { return "TPA_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP"; }}string& CFlatPrimary::SPiece::Format(string& s) const{ s += NStr::IntToString(m_Span.GetFrom()) + '-' + NStr::IntToString(m_Span.GetTo()); s.resize(20, ' '); s += m_PrimaryID->GetSeqIdString(true); s.resize(39, ' '); s += NStr::IntToString(m_PrimarySpan.GetFrom()) + '-' + NStr::IntToString(m_PrimarySpan.GetTo()); s.resize(59, ' '); s += m_Complemented ? 'c' : ' '; return s;}void CFlatDataHeader::GetCounts(TSeqPos& a, TSeqPos& c, TSeqPos& g, TSeqPos& t, TSeqPos& other) const{ if ( !m_IsProt && !m_As && !m_Cs && !m_Gs && !m_Ts && !m_Others ) { CSeqVector v = m_Handle.GetSequenceView (*m_Loc, CBioseq_Handle::eViewConstructed); // TSeqPos counts[numeric_limits<CSeqVector::TResidue>::max() + 1]; TSeqPos counts[256]; memset(counts, 0, sizeof(counts)); string buf; // avoids slow CSeqVector::operator[] static const TSeqPos chunk_size = 4096; for (TSeqPos start = 0; start < v.size(); start += chunk_size) { TSeqPos count = min(chunk_size, v.size() - start); v.GetSeqData(start, start + count, buf); for (TSeqPos i = 0; i < count; ++i) { ++counts[buf[i]]; } } m_As = counts[1]; m_Cs = counts[2]; m_Gs = counts[4]; m_Ts = counts[8]; m_Others = v.size() - m_As - m_Cs - m_Gs - m_Ts; } a = m_As; c = m_Cs; g = m_Gs; t = m_Ts; other = m_Others;}CFlatWGSRange::CFlatWGSRange(const CFlatContext& ctx){ for (CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_User); desc; ++desc) { const CUser_object& uo = desc->GetUser(); if ( !uo.GetType().IsStr() || NStr::CompareNocase(uo.GetType().GetStr(), "WGSProjects")) { continue; } ITERATE (CUser_object::TData, it, uo.GetData()) { if ( !(*it)->GetLabel().IsStr() ) { // complain? continue; } const string& label = (*it)->GetLabel().GetStr(); if ( !NStr::CompareNocase(label, "WGS_accession_first") ) { // san-check m_First = (*it)->GetData().GetStr(); } else if ( !NStr::CompareNocase(label, "WGS_accession_last") ) { // san-check m_Last = (*it)->GetData().GetStr(); } } if ( !m_UO ) { m_UO.Reset(&uo); } } if (m_First.empty() || m_Last.empty()) { // complain? // reconstruct from other info SIZE_TYPE digit_pos = ctx.GetAccession().find_first_of("0123456789"); string prefix = ctx.GetAccession().substr(0, digit_pos + 2); m_First = prefix + "000001"; CNcbiOstrstream oss; oss << prefix << setw(6) << setfill('0') << ctx.GetHandle().GetBioseqCore()->GetInst().GetLength(); m_Last = CNcbiOstrstreamToString(oss); }}CFlatGenomeInfo::CFlatGenomeInfo(const CFlatContext& ctx){ for (CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_User); desc; ++desc) { const CUser_object& uo = desc->GetUser(); if ( !uo.GetType().IsStr() || NStr::CompareNocase(uo.GetType().GetStr(), "GenomeProject")) { continue; } ITERATE (CUser_object::TData, it, uo.GetData()) { if ( !(*it)->GetLabel().IsStr() ) { // complain? continue; } const string& label = (*it)->GetLabel().GetStr(); if ( !NStr::CompareNocase(label, "accession") ) { // san-check m_Accession = &(*it)->GetData().GetStr(); } else if ( !NStr::CompareNocase(label, "Moltype") ) { // san-check m_Moltype = &(*it)->GetData().GetStr(); } } if ( !m_UO ) { m_UO.Reset(&uo); } } // complain if no accession found? (moltype seems optional)}END_SCOPE(objects)END_NCBI_SCOPE/** ===========================================================================** $Log: flat_items.cpp,v $* Revision 1000.1 2004/06/01 19:43:20 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6** Revision 1.6 2004/05/21 21:42:53 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.5 2003/06/02 16:06:42 dicuccio* Rearranged src/objects/ subtree. This includes the following shifts:* - src/objects/asn2asn --> arc/app/asn2asn* - src/objects/testmedline --> src/objects/ncbimime/test* - src/objects/objmgr --> src/objmgr* - src/objects/util --> src/objmgr/util* - src/objects/alnmgr --> src/objtools/alnmgr* - src/objects/flat --> src/objtools/flat* - src/objects/validator --> src/objtools/validator* - src/objects/cddalignview --> src/objtools/cddalignview* In addition, libseq now includes six of the objects/seq... libs, and libmmdb* replaces the three libmmdb? libs.** Revision 1.4 2003/03/21 18:49:17 ucko* Turn most structs into (accessor-requiring) classes; replace some* formerly copied fields with pointers to the original data.** Revision 1.3 2003/03/11 15:37:51 kuznets* iterate -> ITERATE** Revision 1.2 2003/03/10 22:04:43 ucko* Expand out x_AddKeys manually because MSVC wouldn't.** Revision 1.1 2003/03/10 16:39:09 ucko* Initial check-in of new flat-file generator*** ===========================================================================*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?