flat_head.cpp
来自「ncbi源码」· C++ 代码 · 共 759 行 · 第 1/2 页
CPP
759 行
/* * =========================================================================== * PRODUCTION $Log: flat_head.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:43:17 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.8 * PRODUCTION * =========================================================================== *//* $Id: flat_head.cpp,v 1000.2 2004/06/01 19:43:17 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, NCBI** File Description:* New (early 2003) flat-file generator -- representation of "header"* data, which translates into a format-dependent sequence of paragraphs.** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/flat/flat_head.hpp>#include <corelib/ncbiutil.hpp>#include <serial/iterator.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Object_id.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seqblock/GB_block.hpp>#include <objects/seqblock/PDB_block.hpp>#include <objects/seqblock/PDB_replace.hpp>#include <objects/seqblock/PIR_block.hpp>#include <objects/seqblock/PRF_ExtraSrc.hpp>#include <objects/seqblock/PRF_block.hpp>#include <objects/seqblock/SP_block.hpp>#include <objects/seqfeat/OrgName.hpp>#include <objects/seqloc/PDB_mol_id.hpp>#include <objects/seqloc/PDB_seq_id.hpp>#include <objects/seqloc/Textseq_id.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/scope.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/util/sequence.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)inlinevoid CFlatHead::x_AddDate(const CDate& date){ if (m_UpdateDate.Empty() || date.Compare(*m_UpdateDate) == CDate::eCompare_after) { m_UpdateDate = &date; } if (m_CreateDate.Empty() || date.Compare(*m_CreateDate) == CDate::eCompare_before) { m_CreateDate = &date; }}CFlatHead::CFlatHead(CFlatContext& ctx) : m_Strandedness(CSeq_inst::eStrand_not_set), m_Topology (CSeq_inst::eTopology_not_set), m_GBDivision (0), m_Context (&ctx){ CScope& scope = ctx.GetHandle().GetScope(); CBioseq_Handle::TBioseqCore seq = ctx.GetHandle().GetBioseqCore(); ctx.m_PrimaryID = FindBestChoice(seq->GetId(), CSeq_id::Score); ctx.m_Accession = ctx.GetPrimaryID().GetSeqIdString(true); {{ const CTextseq_id* tsid = ctx.GetPrimaryID().GetTextseq_Id(); if (tsid && tsid->IsSetName()) { m_Locus = tsid->GetName(); } else if (tsid && tsid->IsSetAccession()) { m_Locus = tsid->GetAccession(); } else { // complain? m_Locus = ctx.GetPrimaryID().GetSeqIdString(false); } }} ITERATE (CBioseq::TId, it, seq->GetId()) { if (*it != &ctx.GetPrimaryID()) { m_OtherIDs.push_back(*it); } switch ((*it)->Which()) { case CSeq_id::e_Gi: ctx.m_GI = (*it)->GetGi(); break; case CSeq_id::e_Other: ctx.m_IsRefSeq = true; break; case CSeq_id::e_Tpg: case CSeq_id::e_Tpe: case CSeq_id::e_Tpd: ctx.m_IsTPA = true; break; default: break; } CSeq_id::EAccessionInfo ai = (*it)->IdentifyAccession(); if ((ai & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_wgs && NStr::EndsWith((*it)->GetTextseq_Id()->GetAccession(), "000000")) { ctx.m_IsWGSMaster = true; } else if (ai == CSeq_id::eAcc_refseq_genome) { ctx.m_IsRefSeqGenome = true; } } {{ const CSeq_inst& inst = seq->GetInst(); if (inst.IsSetStrand()) { m_Strandedness = inst.GetStrand(); } m_Topology = inst.GetTopology(); }} ctx.m_Length = sequence::GetLength(ctx.GetLocation(), &scope); m_Definition = sequence::GetTitle(ctx.GetHandle()); if ( !NStr::EndsWith(m_Definition, ".") ) { m_Definition += '.'; } if (ctx.IsProt()) { // populate m_SourceIDs x_AddDBSource(); } for (CSeqdesc_CI it(ctx.GetHandle()); it; ++it) { switch (it->Which()) { // bother translating old GIBB-* data? case CSeqdesc::e_Org: case CSeqdesc::e_Source: if ( !m_GBDivision ) { // iterate to deal with hybrids for (CTypeConstIterator<COrgName> orgn(*it); orgn; ++orgn) { if (orgn->IsSetDiv()) { m_GBDivision = &orgn->GetDiv(); BREAK(orgn); } } } break; case CSeqdesc::e_Genbank: { const CGB_block& gb = it->GetGenbank(); if (gb.IsSetExtra_accessions()) { m_SecondaryIDs.insert(m_SecondaryIDs.end(), gb.GetExtra_accessions().begin(), gb.GetExtra_accessions().end()); } if (gb.IsSetEntry_date()) { x_AddDate(gb.GetEntry_date()); } if (gb.IsSetDiv()) { m_GBDivision = &gb.GetDiv(); } break; } case CSeqdesc::e_Sp: { const CSP_block& sp = it->GetSp(); if (sp.IsSetExtra_acc()) { m_SecondaryIDs.insert(m_SecondaryIDs.end(), sp.GetExtra_acc().begin(), sp.GetExtra_acc().end()); } if (sp.IsSetCreated()) { x_AddDate(sp.GetCreated()); } if (sp.IsSetSequpd()) { x_AddDate(sp.GetSequpd()); } if (sp.IsSetAnnotupd()) { x_AddDate(sp.GetAnnotupd()); } break; } case CSeqdesc::e_Embl: { const CEMBL_block& embl = it->GetEmbl(); if (embl.IsSetDiv()) { m_EMBLDivision = embl.GetDiv(); } x_AddDate(embl.GetCreation_date()); // mandatory field x_AddDate(embl.GetUpdate_date()); // mandatory field if (embl.IsSetExtra_acc()) { m_SecondaryIDs.insert(m_SecondaryIDs.end(), embl.GetExtra_acc().begin(), embl.GetExtra_acc().end()); } break; } case CSeqdesc::e_Create_date: x_AddDate(it->GetCreate_date()); break; case CSeqdesc::e_Update_date: x_AddDate(it->GetUpdate_date()); break; case CSeqdesc::e_Pdb: { const CPDB_block& pdb = it->GetPdb(); x_AddDate(pdb.GetDeposition()); // mandatory field // replacement history -> secondary IDs? break; } case CSeqdesc::e_Molinfo: { const CMolInfo& mi = it->GetMolinfo(); if (mi.IsSetBiomol()) { ctx.m_Biomol = mi.GetBiomol(); } } default: break; } }}const char* CFlatHead::GetMolString(void) const{ const IFlatFormatter& f = m_Context->GetFormatter(); if (f.GetDatabase() == IFlatFormatter::eDB_EMBL && f.GetMode() <= IFlatFormatter::eMode_Entrez) { switch (m_Context->GetBiomol()) { case CMolInfo::eBiomol_genomic: case CMolInfo::eBiomol_other_genetic: case CMolInfo::eBiomol_genomic_mRNA: return "DNA"; case CMolInfo::eBiomol_pre_RNA: case CMolInfo::eBiomol_mRNA: case CMolInfo::eBiomol_rRNA: case CMolInfo::eBiomol_tRNA: case CMolInfo::eBiomol_snRNA: case CMolInfo::eBiomol_scRNA: case CMolInfo::eBiomol_cRNA: case CMolInfo::eBiomol_snoRNA: case CMolInfo::eBiomol_transcribed_RNA: return "RNA"; case CMolInfo::eBiomol_peptide: return "AA "; default: switch (m_Context->GetMol()) { case CSeq_inst::eMol_dna: return "DNA"; case CSeq_inst::eMol_rna: return "RNA"; case CSeq_inst::eMol_aa: return "AA "; default: return "xxx"; } } } else { switch (m_Context->GetBiomol()) { case CMolInfo::eBiomol_genomic: return "DNA"; case CMolInfo::eBiomol_pre_RNA: return "RNA"; case CMolInfo::eBiomol_mRNA: return "mRNA"; case CMolInfo::eBiomol_rRNA: return "rRNA"; case CMolInfo::eBiomol_tRNA: return "tRNA"; case CMolInfo::eBiomol_snRNA: return "uRNA"; case CMolInfo::eBiomol_scRNA: return "scRNA"; case CMolInfo::eBiomol_peptide: return " AA"; case CMolInfo::eBiomol_other_genetic: return "DNA"; case CMolInfo::eBiomol_genomic_mRNA: return "DNA"; case CMolInfo::eBiomol_cRNA: return "RNA"; case CMolInfo::eBiomol_snoRNA: return "snoRNA"; case CMolInfo::eBiomol_transcribed_RNA: return "RNA"; default: switch (m_Context->GetMol()) { case CSeq_inst::eMol_dna: return "DNA"; case CSeq_inst::eMol_rna: return "RNA"; case CSeq_inst::eMol_aa: return " AA"; default: return " "; } } }}inlinestatic int s_ScoreForDBSource(const CRef<CSeq_id>& x) { switch (x->Which()) { case CSeq_id::e_not_set: return kMax_Int; case CSeq_id::e_Gi: return 31; case CSeq_id::e_Giim: return 30; case CSeq_id::e_Local: case CSeq_id::e_General: return 20; case CSeq_id::e_Other: return 18; case CSeq_id::e_Gibbmt: return 16; case CSeq_id::e_Gibbsq: case CSeq_id::e_Patent: return 15; case CSeq_id::e_Pdb: return 12; default: return 10; }}void CFlatHead::x_AddDBSource(void){ CBioseq_Handle::TBioseqCore seq = m_Context->GetHandle().GetBioseqCore(); const CSeq_id* id = FindBestChoice(seq->GetId(), s_ScoreForDBSource); if ( !id ) { m_DBSource.push_back("UNKNOWN"); return; } switch (id->Which()) { case CSeq_id::e_Pir: m_DBSource.push_back(x_FormatDBSourceID(*id)); x_AddPIRBlock(); break; case CSeq_id::e_Swissprot: m_DBSource.push_back(x_FormatDBSourceID(*id)); x_AddSPBlock(); break; case CSeq_id::e_Prf: m_DBSource.push_back(x_FormatDBSourceID(*id)); x_AddPRFBlock(); break; case CSeq_id::e_Pdb: m_DBSource.push_back(x_FormatDBSourceID(*id)); x_AddPDBBlock(); break; case CSeq_id::e_General: if ( !NStr::StartsWith(id->GetGeneral().GetDb(), "PID") ) { m_DBSource.push_back("UNKNOWN"); break; } // otherwise, fall through case CSeq_id::e_Gibbsq: case CSeq_id::e_Gibbmt: case CSeq_id::e_Giim: case CSeq_id::e_Genbank: case CSeq_id::e_Embl: case CSeq_id::e_Other: case CSeq_id::e_Gi: case CSeq_id::e_Ddbj: case CSeq_id::e_Tpg: case CSeq_id::e_Tpe: case CSeq_id::e_Tpd: { set<CBioseq_Handle> sources; CScope& scope = m_Context->GetHandle().GetScope(); for (CFeat_CI it(scope, m_Context->GetLocation(), CSeqFeatData::e_not_set, SAnnotSelector::eOverlap_Intervals, SAnnotSelector::eResolve_TSE, CFeat_CI::e_Product); it; ++it) { for (CTypeConstIterator<CSeq_id> id2(it->GetLocation()); id2; ++id2) { sources.insert(scope.GetBioseqHandle(*id2));
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?