flat_feature.cpp
来自「ncbi源码」· C++ 代码 · 共 822 行 · 第 1/2 页
CPP
822 行
/* * =========================================================================== * PRODUCTION $Log: flat_feature.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:43:05 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.11 * PRODUCTION * =========================================================================== *//* $Id: flat_feature.cpp,v 1000.1 2004/06/01 19:43:05 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, NCBI** File Description:* new (early 2003) flat-file generator -- representation of features* (mainly of interest to implementors)** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/flat/flat_quals.hpp>#include <objtools/flat/flat_gbseq_formatter.hpp>#include <serial/iterator.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seqfeat/seqfeat__.hpp>#include <objmgr/scope.hpp>#include <objmgr/util/sequence.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)CRef<CFlatFeature> IFlattishFeature::Format(void) const{ // extremely rough cut for now -- qualifiers still in progress! if (m_FF) { return m_FF; } m_FF.Reset(new CFlatFeature(GetKey(), *new CFlatLoc(*m_Loc, *m_Context), *m_Feat)); x_AddQuals(); x_FormatQuals(); return m_FF;}void CFlattishFeature::x_AddQuals(void) const{ CScope& scope = m_Context->GetHandle().GetScope(); const CSeqFeatData& data = m_Feat->GetData(); m_Type = data.GetSubtype(); // add various generic qualifiers... if (m_Feat->IsSetComment()) { x_AddQual(eFQ_seqfeat_note, new CFlatStringQV(m_Feat->GetComment())); } if (m_Feat->IsSetProduct()) { if (m_IsProduct) { x_AddQual(eFQ_coded_by, new CFlatSeqLocQV(m_Feat->GetLocation())); } else { CBioseq_Handle prod = scope.GetBioseqHandle(m_Feat->GetProduct()); if ( !m_Context->IsProt() ) { EFeatureQualifier slot = ((prod.GetBioseqCore()->GetInst().GetMol() == CSeq_inst::eMol_aa) ? eFQ_translation : eFQ_transcription); x_AddQual(slot, new CFlatSeqDataQV(m_Feat->GetProduct())); } try { const CSeq_id& id = sequence::GetId(m_Feat->GetProduct(), &scope); if (id.IsGi()) { // cheat slightly x_AddQual(eFQ_db_xref, new CFlatStringQV ("GI:" + NStr::IntToString(id.GetGi()))); } if (data.IsCdregion()) { x_AddQual(eFQ_protein_id, new CFlatSeqIdQV(id)); } } catch (sequence::CNotUnique&) { } } } if ( !data.IsGene() ) { CConstRef<CSeq_feat> gene_feat = sequence::GetBestOverlappingFeat(m_Feat->GetLocation(), CSeqFeatData::e_Gene, sequence::eOverlap_Simple, scope); if (gene_feat) { const CGene_ref& gene = gene_feat->GetData().GetGene(); string label; gene.GetLabel(&label); if ( !label.empty() ) { // XXX - should expand certain SGML entities x_AddQual(eFQ_gene, new CFlatStringQV(label)); } if (gene.IsSetDb() && !data.IsCdregion() && !data.IsRna() ) { x_AddQual(eFQ_gene_xref, new CFlatXrefQV(gene.GetDb())); } if (gene.IsSetLocus_tag() && gene.GetLocus_tag() != label) { x_AddQual(eFQ_locus_tag, new CFlatStringQV(gene.GetLocus_tag())); } } } if (m_Feat->IsSetQual()) { x_ImportQuals(m_Feat->GetQual()); } if (m_Feat->IsSetTitle()) { x_AddQual(eFQ_label, new CFlatLabelQV(m_Feat->GetTitle())); } if (m_Feat->IsSetCit()) { x_AddQual(eFQ_citation, new CFlatPubSetQV(m_Feat->GetCit())); } if (m_Feat->IsSetExp_ev()) { x_AddQual(eFQ_evidence, new CFlatExpEvQV(m_Feat->GetExp_ev())); } if (m_Feat->IsSetDbxref()) { x_AddQual(eFQ_db_xref, new CFlatXrefQV(m_Feat->GetDbxref())); } if (m_Feat->IsSetPseudo()) { x_AddQual(eFQ_pseudo, new CFlatBoolQV(m_Feat->GetPseudo())); } if (m_Feat->IsSetExcept_text()) { x_AddQual(eFQ_exception, new CFlatStringQV(m_Feat->GetExcept_text())); } switch (data.Which()) { case CSeqFeatData::e_Gene: x_AddQuals(data.GetGene()); break; case CSeqFeatData::e_Cdregion: x_AddQuals(data.GetCdregion()); break; case CSeqFeatData::e_Prot: x_AddQuals(data.GetProt()); break; // ... default: break; }}void CFlattishFeature::x_AddQuals(const CGene_ref& gene) const{ bool got_name = false; if (gene.IsSetLocus() && !gene.GetLocus().empty() ) { x_AddQual(eFQ_gene, new CFlatStringQV(gene.GetLocus())); got_name = true; } if (gene.IsSetDesc() && !gene.GetDesc().empty() ) { x_AddQual(got_name ? eFQ_gene_desc : eFQ_gene, new CFlatStringQV(gene.GetDesc())); got_name = true; } if (gene.IsSetSyn() && !gene.GetSyn().empty() ) { ITERATE (CGene_ref::TSyn, it, gene.GetSyn()) { if ( !it->empty() ) { x_AddQual(got_name ? eFQ_gene_syn : eFQ_gene, new CFlatStringQV(*it)); got_name = true; } } } if (gene.IsSetAllele() && !gene.GetAllele().empty() ) { x_AddQual(eFQ_gene_allele, new CFlatStringQV(gene.GetAllele())); } if (gene.IsSetMaploc() && !gene.GetMaploc().empty() ) { x_AddQual(eFQ_gene_map, new CFlatStringQV(gene.GetMaploc())); } if (gene.IsSetDb()) { x_AddQual(eFQ_gene_xref, new CFlatXrefQV(gene.GetDb())); } if (gene.IsSetLocus_tag()) { x_AddQual(eFQ_locus_tag, new CFlatStringQV(gene.GetLocus_tag())); }}void CFlattishFeature::x_AddQuals(const CCdregion& cds) const{ if (m_IsProduct) { return; // We don't need directions when we have the sequence! } else if ( !m_Feat->IsSetProduct() ) { // warn? return; } CScope& scope = m_Context->GetHandle().GetScope(); CConstRef<CSeq_feat> prod = sequence::GetBestOverlappingFeat(m_Feat->GetProduct(), CSeqFeatData::e_Prot, sequence::eOverlap_Contains, scope); if (prod) { string label; prod->GetData().GetProt().GetLabel(&label); x_AddQual(eFQ_cds_product, new CFlatStringQV(label)); } if (cds.IsSetFrame()) { x_AddQual(eFQ_codon_start, new CFlatIntQV(cds.GetFrame())); } if (cds.IsSetCode()) { int id = cds.GetCode().GetId(); if (id == 255) { // none found, so substitute default id = 1; } if (id != 1 || dynamic_cast<CFlatGBSeqFormatter*>(&m_Context->GetFormatter())) { x_AddQual(eFQ_transl_table, new CFlatIntQV(id)); } const string& ncbieaa = cds.GetCode().GetNcbieaa(); if ( !ncbieaa.empty() ) { const string& std_ncbieaa = CGen_code_table::GetNcbieaa(id); if ( !std_ncbieaa.empty() ) { for (unsigned int i = 0; i < ncbieaa.size(); ++i) { if (ncbieaa[i] != std_ncbieaa[i]) { x_AddQual(eFQ_codon, new CFlatCodonQV(i, ncbieaa[i])); } } } } } if (cds.IsSetCode_break()) { x_AddQual(eFQ_transl_except, new CFlatCodeBreakQV(cds.GetCode_break())); }}void CFlattishFeature::x_AddQuals(const CProt_ref& prot) const{ bool got_name = false; if (prot.IsSetName() && !prot.GetName().empty() ) { ITERATE (CProt_ref::TName, it, prot.GetName()) { if ( !it->empty() ) { x_AddQual(got_name ? eFQ_prot_names : eFQ_product, new CFlatStringQV(*it)); got_name = true; } } }#if 0 if (prot.IsSetDesc() && !prot.GetDesc().empty() ) { x_AddQual(got_name ? eFQ_prot_desc : eFQ_prot, new CFlatStringQV(prot.GetDesc())); got_name = true; } if (prot.IsSetSyn() && !prot.GetSyn().empty() ) { typedef CProt_ref::TSyn::const_iterator TProtSyn_CI; TProtSyn_CI first = prot.GetSyn().begin(), last = prot.GetSyn().end(); if ( !got_name && first != last) { x_AddQual(eFQ_prot, new CFlatStringQV(*first)); got_name = true; ++first; } while (first != last) { x_AddQual(eFQ_prot_syn, new CFlatStringQV(*first)); ++first; } } if (prot.IsSetAllele() && !prot.GetAllele().empty() ) { x_AddQual(eFQ_prot_allele, new CFlatStringQV(prot.GetAllele())); } if (prot.IsSetMaploc() && !prot.GetMaploc().empty() ) { x_AddQual(eFQ_prot_map, new CFlatStringQV(prot.GetMaploc())); } if (prot.IsSetDb()) { x_AddQual(eFQ_prot_xref, new CFlatXrefQV(prot.GetDb())); } if (prot.IsSetLocus_tag()) { x_AddQual(eFQ_locus_tag, new CFlatStringQV(prot.GetLocus_tag())); }#endif}struct SLegalImport { const char* m_Name; EFeatureQualifier m_Value; operator string(void) const { return m_Name; }};void CFlattishFeature::x_ImportQuals(const CSeq_feat::TQual& quals) const{ static const SLegalImport kLegalImports[] = { // Must be in case-insensitive alphabetical order!#define DO_IMPORT(x) { #x, eFQ_##x } DO_IMPORT(allele), DO_IMPORT(bound_moiety), DO_IMPORT(clone), DO_IMPORT(codon), DO_IMPORT(cons_splice), DO_IMPORT(direction), DO_IMPORT(EC_number), DO_IMPORT(frequency), DO_IMPORT(function), DO_IMPORT(insertion_seq), DO_IMPORT(label), DO_IMPORT(map), DO_IMPORT(mod_base), DO_IMPORT(number), DO_IMPORT(organism), DO_IMPORT(PCR_conditions), DO_IMPORT(phenotype), { "product", eFQ_product_quals }, DO_IMPORT(replace), DO_IMPORT(rpt_family), DO_IMPORT(rpt_type), DO_IMPORT(rpt_unit), DO_IMPORT(standard_name), DO_IMPORT(transposon), DO_IMPORT(usedin)#undef DO_IMPORT }; static const SLegalImport* kLegalImportsEnd = kLegalImports + sizeof(kLegalImports)/sizeof(SLegalImport); ITERATE (CSeq_feat::TQual, it, quals) { const string& name = (*it)->GetQual(); const SLegalImport* li = lower_bound(kLegalImports, kLegalImportsEnd, name, PNocase()); EFeatureQualifier slot = eFQ_illegal_qual; if (li != kLegalImportsEnd && !NStr::CompareNocase(li->m_Name,name)) { slot = li->m_Value; } switch (slot) { case eFQ_codon: case eFQ_cons_splice: case eFQ_direction: case eFQ_mod_base: case eFQ_number: case eFQ_rpt_type: case eFQ_rpt_unit: case eFQ_usedin: // XXX -- each of these should really get its own class // (to verify correct syntax) x_AddQual(slot, new CFlatStringQV((*it)->GetVal(), CFlatQual::eUnquoted)); break; case eFQ_label: x_AddQual(slot, new CFlatLabelQV((*it)->GetVal())); break; case eFQ_illegal_qual: x_AddQual(slot, new CFlatIllegalQV(**it)); break; default: // XXX - should split off EC_number and replace // (to verify correct syntax) x_AddQual(slot, new CFlatStringQV((*it)->GetVal())); break; } }}void CFlattishFeature::x_FormatQuals(void) const{ m_FF->SetQuals().reserve(m_Quals.size());#define DO_QUAL(x) x_FormatQual(eFQ_##x, #x) DO_QUAL(partial); DO_QUAL(gene); DO_QUAL(locus_tag); DO_QUAL(product); x_FormatQual(eFQ_prot_EC_number, "EC_number"); x_FormatQual(eFQ_prot_activity, "function"); DO_QUAL(standard_name); DO_QUAL(coded_by); DO_QUAL(derived_from); x_FormatQual(eFQ_prot_name, "name"); DO_QUAL(region_name); DO_QUAL(bond_type); DO_QUAL(site_type); DO_QUAL(sec_str_type); DO_QUAL(heterogen);#define DO_NOTE(x) x_FormatNoteQual(eFQ_##x, #x) DO_NOTE(gene_desc);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?