feature_item.cpp
来自「ncbi源码」· C++ 代码 · 共 2,018 行 · 第 1/5 页
CPP
2,018 行
/* * =========================================================================== * PRODUCTION $Log: feature_item.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:44:14 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.26 * PRODUCTION * =========================================================================== *//* $Id: feature_item.cpp,v 1000.2 2004/06/01 19:44:14 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, NCBI* Mati Shomrat** File Description:* new (early 2003) flat-file generator -- representation of features* (mainly of interest to implementors)** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <serial/iterator.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Heterogen.hpp>#include <objects/seqfeat/Org_ref.hpp>#include <objects/seqfeat/OrgName.hpp>#include <objects/seqfeat/OrgMod.hpp>#include <objects/seqfeat/Code_break.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objects/seqfeat/Imp_feat.hpp>#include <objects/seqfeat/RNA_ref.hpp>#include <objects/seqfeat/Trna_ext.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Packed_seqpnt.hpp>#include <objects/seqloc/Textseq_id.hpp>#include <objects/general/Object_id.hpp>#include <objmgr/scope.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/seq_loc_mapper.hpp>#include <objmgr/util/sequence.hpp>#include <objmgr/util/feature.hpp>#include <util/static_set.hpp>#include <util/static_map.hpp>#include <util/sequtil/sequtil.hpp>#include <util/sequtil/sequtil_convert.hpp>#include <algorithm>#include <objtools/format/formatter.hpp>#include <objtools/format/items/feature_item.hpp>#include <objtools/format/context.hpp>#include <objtools/format/items/qualifiers.hpp>#include "utils.hpp"// On Mac OS X 10.3, FixMath.h defines ff as a one-argument macro(!)#ifdef ff# undef ff#endifBEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)USING_SCOPE(sequence);// -- static functionsconst CGb_qual* s_GetQual(const CSeq_feat& feat, const string& qual){ ITERATE(CSeq_feat::TQual, it, feat.GetQual()) { if ( (*it)->CanGetQual() && NStr::CompareNocase((*it)->GetQual(), qual) == 0 && (*it)->CanGetVal() && !(*it)->GetVal().empty() ) { return *it; } } return 0;}static bool s_ValidId(const CSeq_id& id){ return id.IsGenbank() || id.IsEmbl() || id.IsDdbj() || id.IsOther() || id.IsPatent() || id.IsTpg() || id.IsTpe() || id.IsTpd();}static bool s_CheckQuals_cdregion(const CSeq_feat& feat, CBioseqContext& ctx){ if ( !ctx.Config().CheckCDSProductId() ) { return true; } CScope& scope = ctx.GetScope(); // non-pseudo CDS must have /product bool pseudo = feat.CanGetPseudo() && feat.GetPseudo(); if ( !pseudo ) { const CGene_ref* grp = feat.GetGeneXref(); if ( grp == 0 ) { CConstRef<CSeq_feat> gene = GetOverlappingGene(feat.GetLocation(), scope); if ( gene ) { pseudo = gene->CanGetPseudo() && gene->GetPseudo(); if ( !pseudo ) { grp = &(gene->GetData().GetGene()); } } } if ( !pseudo && grp != 0 ) { pseudo = grp->GetPseudo(); } } bool just_stop = false; if ( feat.CanGetLocation() ) { const CSeq_loc& loc = feat.GetLocation(); if ( loc.IsPartialLeft() && !loc.IsPartialRight() ) { if ( GetLength(loc, &scope) <= 5 ) { just_stop = true; } } } if ( pseudo || just_stop ) { return true; } // make sure the product has a valid accession if ( feat.CanGetProduct() ) { const CSeq_id* id = 0; try { id = &(GetId(feat.GetProduct(), &ctx.GetScope())); } catch ( CException& ) {} if ( id != 0 ) { if ( (id->IsGi() && id->GetGi() > 0) || id->IsLocal() ) { CBioseq_Handle prod = scope.GetBioseqHandle(*id); if ( prod ) { ITERATE (CBioseq_Handle::TId, it, prod.GetId()) { if ( s_ValidId(*it->GetSeqId()) ) { const CTextseq_id* tsip = it->GetSeqId()->GetTextseq_Id(); if ( tsip != 0 && tsip->IsSetAccession() && ValidateAccession(tsip->GetAccession()) ) { return true; } } } } else if ( id->IsGi() && id->GetGi() > 0 ) { // RELEASE_MODE requires that /protein_id is an accession // !!! need to be able to retrieve accession from server withou // retrieving the entire bioseq return true; } } else if ( s_ValidId(*id) ) { const CTextseq_id* tsip = id->GetTextseq_Id(); if ( tsip != 0 && tsip->CanGetAccession() && ValidateAccession(tsip->GetAccession()) ) { return true; } } } } else { // no product if ( feat.CanGetExcept() && feat.GetExcept() && feat.CanGetExcept_text() ) { if ( NStr::Find(feat.GetExcept_text(), "rearrangement required for product") != NPOS ) { return true; } } } return false;}// conflict and old_sequence require a publication printable on the segmentstatic bool s_HasPub(const CSeq_feat& feat, CBioseqContext& ctx){ if ( !feat.CanGetCit() ) { return false; } ITERATE(CBioseqContext::TReferences, it, ctx.GetReferences()) { if ( (*it)->Matches(feat.GetCit()) ) { return true; } } return false;}static bool s_CheckQuals_conflict(const CSeq_feat& feat, CBioseqContext& ctx){ if ( !feat.CanGetCit() ) { // RefSeq allows conflict with accession in comment instead of sfp->cit if ( ctx.IsRefSeq() && feat.CanGetComment() && !feat.GetComment().empty() ) { return true; } } else { return s_HasPub(feat, ctx); } return false;}static bool s_CheckQuals_old_seq(const CSeq_feat& feat, CBioseqContext& ctx){ return s_HasPub(feat, ctx);}static bool s_CheckQuals_gene(const CSeq_feat& feat){ // gene requires /gene or /locus_tag, but desc or syn can be mapped to /gene const CSeqFeatData::TGene& gene = feat.GetData().GetGene(); if ( (gene.CanGetLocus() && !gene.GetLocus().empty()) || (gene.CanGetLocus_tag() && !gene.GetLocus_tag().empty()) || (gene.CanGetDesc() && !gene.GetDesc().empty()) || (!gene.GetSyn().empty() && !gene.GetSyn().front().empty()) ) { return true; } return false;}static bool s_CheckQuals_bind(const CSeq_feat& feat){ // protein_bind or misc_binding require eFQ_bound_moiety return s_GetQual(feat, "bound_moiety") != 0;}static bool s_CheckQuals_mod_base(const CSeq_feat& feat){ // modified_base requires eFQ_mod_base return s_GetQual(feat, "mod_base") != 0;}static bool s_CheckMandatoryQuals(const CSeq_feat& feat, CBioseqContext& ctx){ switch ( feat.GetData().GetSubtype() ) { case CSeqFeatData::eSubtype_cdregion: { return s_CheckQuals_cdregion(feat, ctx); } case CSeqFeatData::eSubtype_conflict: { return s_CheckQuals_conflict(feat, ctx); } case CSeqFeatData::eSubtype_old_sequence: { return s_CheckQuals_old_seq(feat, ctx); } case CSeqFeatData::eSubtype_gene: { return s_CheckQuals_gene(feat); } case CSeqFeatData::eSubtype_protein_bind: case CSeqFeatData::eSubtype_misc_binding: { return s_CheckQuals_bind(feat); } case CSeqFeatData::eSubtype_modified_base: { return s_CheckQuals_mod_base(feat); } default: break; } return true;}static bool s_SkipFeature(const CSeq_feat& feat, CBioseqContext& ctx){ CSeqFeatData::E_Choice type = feat.GetData().Which(); CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype(); if ( subtype == CSeqFeatData::eSubtype_pub || subtype == CSeqFeatData::eSubtype_non_std_residue || subtype == CSeqFeatData::eSubtype_biosrc || subtype == CSeqFeatData::eSubtype_rsite || subtype == CSeqFeatData::eSubtype_seq ) { return true; } const CFlatFileConfig& cfg = ctx.Config(); // check feature customization flags if ( cfg.ValidateFeats() && (subtype == CSeqFeatData::eSubtype_bad || subtype == CSeqFeatData::eSubtype_virion) ) { return true; } if ( ctx.IsNuc() && subtype == CSeqFeatData::eSubtype_het ) { return true; } if ( cfg.HideImpFeats() && type == CSeqFeatData::e_Imp ) { return true; } if ( cfg.HideSNPFeatures() && subtype == CSeqFeatData::eSubtype_variation ) { return true; } if ( cfg.HideExonFeatures() && subtype == CSeqFeatData::eSubtype_exon ) { return true; } if ( cfg.HideIntronFeatures() && subtype == CSeqFeatData::eSubtype_intron ) { return true; } if ( cfg.HideRemoteImpFeats() && type == CSeqFeatData::e_Imp ) { if ( subtype == CSeqFeatData::eSubtype_variation || subtype == CSeqFeatData::eSubtype_exon || subtype == CSeqFeatData::eSubtype_intron || subtype == CSeqFeatData::eSubtype_misc_feature ) { return true; } } // skip genes in DDBJ format if ( cfg.IsFormatDDBJ() && type == CSeqFeatData::e_Gene ) { return true; } // To Do: // !!! supress full length comment features // if RELEASE mode, make sure we have all info to create mandatory quals. if ( cfg.NeedRequiredQuals() ) { return !s_CheckMandatoryQuals(feat, ctx); } return false;}static const string s_BondList[] = { "", "disulfide", "thiolester", "xlink", "thioether", "unclassified"};static const string s_SiteList[] = { "", "active", "binding", "cleavage", "inhibit", "modified", "glycosylation", "myristoylation", "mutagenized", "metal-binding", "phosphorylation", "acetylation", "amidation", "methylation", "hydroxylation", "sulfatation",
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?