validerror_feat.cpp
来自「ncbi源码」· C++ 代码 · 共 1,990 行 · 第 1/5 页
CPP
1,990 行
/* * =========================================================================== * PRODUCTION $Log: validerror_feat.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:48:04 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.56 * PRODUCTION * =========================================================================== *//* $Id: validerror_feat.cpp,v 1000.2 2004/06/01 19:48:04 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Jonathan Kans, Clifford Clausen, Aaron Ucko...... * * File Description: * validation of Seq_feat * ....... * */#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <corelib/ncbistr.hpp>#include "validatorp.hpp"#include "utilities.hpp"#include <serial/serialbase.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/seq_entry_handle.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/scope.hpp>#include <objmgr/util/sequence.hpp>#include <objmgr/util/feature.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqfeat/BioSource.hpp>#include <objects/seqfeat/Cdregion.hpp>#include <objects/seqfeat/Code_break.hpp>#include <objects/seqfeat/Gb_qual.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objects/seqfeat/Imp_feat.hpp>#include <objects/seqfeat/Org_ref.hpp>#include <objects/seqfeat/Prot_ref.hpp>#include <objects/seqfeat/RNA_ref.hpp>#include <objects/seqfeat/SubSource.hpp>#include <objects/seqfeat/Trna_ext.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objects/seqloc/Textseq_id.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqset/Bioseq_set.hpp>#include <objects/seq/MolInfo.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/pub/Pub.hpp>#include <objects/pub/Pub_set.hpp>#include <objects/general/Dbtag.hpp>#include <algorithm>#include <string>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)BEGIN_SCOPE(validator)using namespace sequence;// =============================================================================// Public// =============================================================================CValidError_feat::CValidError_feat(CValidError_imp& imp) : CValidError_base(imp), m_NumGenes(0), m_NumGeneXrefs(0){}CValidError_feat::~CValidError_feat(void){}void CValidError_feat::ValidateSeqFeat(const CSeq_feat& feat){ if ( !feat.CanGetLocation() ) { PostErr(eDiag_Critical, eErr_SEQ_FEAT_MissingLocation, "The feature is missing a location", feat); } CBioseq_Handle bsh = m_Scope->GetBioseqHandle(feat.GetLocation()); m_Imp.ValidateSeqLoc(feat.GetLocation(), bsh, "Location", feat); if ( feat.CanGetProduct() ) { ValidateSeqFeatProduct(feat.GetProduct(), feat); } ValidateFeatPartialness(feat); ValidateExcept(feat); ValidateSeqFeatData(feat.GetData(), feat); if (feat.CanGetDbxref ()) { m_Imp.ValidateDbxref (feat.GetDbxref (), feat); } if ( feat.CanGetComment() ) { ValidateFeatComment(feat.GetComment(), feat); } if ( feat.CanGetCit() ) { ValidateFeatCit(feat.GetCit(), feat); } const CGene_ref* gene_xref = feat.GetGeneXref(); if ( gene_xref != 0 && !gene_xref->IsSuppressed() ) { ++m_NumGeneXrefs; }}// =============================================================================// Private// =============================================================================// static member initializationsconst string s_PlastidTxt[] = { "", "", "chloroplast", "chromoplast", "", "", "plastid", "", "", "", "", "", "cyanelle", "", "", "", "apicoplast", "leucoplast", "proplastid", ""};static string s_LegalRepeatTypes[] = { "tandem", "inverted", "flanking", "terminal", "direct", "dispersed", "other"};static string s_LegalConsSpliceStrings[] = { "(5'site:YES, 3'site:YES)", "(5'site:YES, 3'site:NO)", "(5'site:YES, 3'site:ABSENT)", "(5'site:NO, 3'site:YES)", "(5'site:NO, 3'site:NO)", "(5'site:NO, 3'site:ABSENT)", "(5'site:ABSENT, 3'site:YES)", "(5'site:ABSENT, 3'site:NO)", "(5'site:ABSENT, 3'site:ABSENT)"};static bool s_IsLocRefSeqMrna(const CSeq_loc& loc, CScope& scope){ CBioseq_Handle bsh = scope.GetBioseqHandle(loc); if ( bsh ) { ITERATE (CBioseq::TId, it, bsh.GetBioseqCore()->GetId()) { if ( (*it)->IdentifyAccession() == CSeq_id::eAcc_refseq_mrna ) { return true; } } } return false;}static bool s_IsLocGEDL(const CSeq_loc& loc, CScope& scope){ CBioseq_Handle bsh = scope.GetBioseqHandle(loc); if ( bsh ) { ITERATE (CBioseq::TId, it, bsh.GetBioseqCore()->GetId()) { CSeq_id::EAccessionInfo acc_info = (*it)->IdentifyAccession(); if ( acc_info == CSeq_id::eAcc_gb_embl_ddbj || acc_info == CSeq_id::eAcc_local ) { return true; } } } return false;}// private member functions:void CValidError_feat::ValidateSeqFeatData(const CSeqFeatData& data, const CSeq_feat& feat){ switch ( data.Which () ) { case CSeqFeatData::e_Gene: // Validate CGene_ref ValidateGene(data.GetGene (), feat); break; case CSeqFeatData::e_Cdregion: // Validate CCdregion ValidateCdregion(data.GetCdregion (), feat); break; case CSeqFeatData::e_Prot: // Validate CProt_ref ValidateProt(data.GetProt (), feat); break; case CSeqFeatData::e_Rna: // Validate CRNA_ref ValidateRna(data.GetRna (), feat); break; case CSeqFeatData::e_Pub: // Validate CPubdesc m_Imp.ValidatePubdesc(data.GetPub (), feat); break; case CSeqFeatData::e_Imp: // Validate CPubdesc ValidateImp(data.GetImp (), feat); break; case CSeqFeatData::e_Biosrc: // Validate CBioSource ValidateFeatBioSource(data.GetBiosrc(), feat); break; case CSeqFeatData::e_Org: case CSeqFeatData::e_Region: case CSeqFeatData::e_Seq: case CSeqFeatData::e_Comment: case CSeqFeatData::e_Bond: case CSeqFeatData::e_Site: case CSeqFeatData::e_Rsite: case CSeqFeatData::e_User: case CSeqFeatData::e_Txinit: case CSeqFeatData::e_Num: case CSeqFeatData::e_Psec_str: case CSeqFeatData::e_Non_std_residue: case CSeqFeatData::e_Het: break; default: PostErr(eDiag_Error, eErr_SEQ_FEAT_InvalidType, "Invalid SeqFeat type [" + NStr::IntToString(data.Which ()) + "]", feat); break; } if ( !data.IsGene() ) { ValidateGeneXRef(feat); } else { ValidateOperon(feat); }}void CValidError_feat::ValidateSeqFeatProduct(const CSeq_loc& prod, const CSeq_feat& feat){ CBioseq_Handle bsh = m_Scope->GetBioseqHandle(feat.GetProduct()); m_Imp.ValidateSeqLoc(feat.GetProduct(), bsh, "Product", feat); if ( IsOneBioseq(prod, m_Scope) ) { const CSeq_id& sid = GetId(prod, m_Scope); switch ( sid.Which() ) { case CSeq_id::e_Genbank: case CSeq_id::e_Embl: case CSeq_id::e_Ddbj: case CSeq_id::e_Tpg: case CSeq_id::e_Tpe: case CSeq_id::e_Tpd: { const CTextseq_id* tsid = sid.GetTextseq_Id(); if ( tsid != NULL ) { if ( !tsid->CanGetAccession() && tsid->CanGetName() ) { if ( m_Imp.IsNucAcc(tsid->GetName()) ) { PostErr(eDiag_Warning, eErr_SEQ_FEAT_BadProductSeqId, "Feature product should not use " "Textseq-id 'name' slot", feat); } } } } break; default: break; } }}bool CValidError_feat::IsPlastid(int genome){ if ( genome == CBioSource::eGenome_chloroplast || genome == CBioSource::eGenome_chromoplast || genome == CBioSource::eGenome_plastid || genome == CBioSource::eGenome_cyanelle || genome == CBioSource::eGenome_apicoplast || genome == CBioSource::eGenome_leucoplast || genome == CBioSource::eGenome_proplastid ) { return true; } return false;}bool CValidError_feat::IsOverlappingGenePseudo(const CSeq_feat& feat){ const CGene_ref* grp = feat.GetGeneXref(); if ( grp ) { return (grp->CanGetPseudo() && grp->GetPseudo()); } // !!! DEBUG { // For testing purposes. Remove when test is done. if ( m_Imp.AvoidPerfBottlenecks() ) { return false; } // } // check overlapping gene CConstRef<CSeq_feat> overlap = GetOverlappingGene(feat.GetLocation(), *m_Scope); if ( overlap ) { if ( (overlap->CanGetPseudo() && overlap->GetPseudo()) || (overlap->GetData().GetGene().CanGetPseudo() && overlap->GetData().GetGene().GetPseudo()) ) { return true; } } return false;}bool CValidError_feat::SuppressCheck(const string& except_text){ static string exceptions[] = { "ribosomal slippage", "artificial frameshift", "nonconsensus splice site" }; for ( size_t i = 0; i < sizeof(exceptions) / sizeof(string); ++i ) { if ( NStr::FindNoCase(except_text, exceptions[i] ) != string::npos ) return true; } return false;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?