validerror_feat.cpp

来自「ncbi源码」· C++ 代码 · 共 1,990 行 · 第 1/5 页

CPP
1,990
字号
/* * =========================================================================== * PRODUCTION $Log: validerror_feat.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:48:04  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.56 * PRODUCTION * =========================================================================== *//*  $Id: validerror_feat.cpp,v 1000.2 2004/06/01 19:48:04 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author:  Jonathan Kans, Clifford Clausen, Aaron Ucko...... * * File Description: *   validation of Seq_feat *   ....... * */#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <corelib/ncbistr.hpp>#include "validatorp.hpp"#include "utilities.hpp"#include <serial/serialbase.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/seq_entry_handle.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/scope.hpp>#include <objmgr/util/sequence.hpp>#include <objmgr/util/feature.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqfeat/BioSource.hpp>#include <objects/seqfeat/Cdregion.hpp>#include <objects/seqfeat/Code_break.hpp>#include <objects/seqfeat/Gb_qual.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objects/seqfeat/Imp_feat.hpp>#include <objects/seqfeat/Org_ref.hpp>#include <objects/seqfeat/Prot_ref.hpp>#include <objects/seqfeat/RNA_ref.hpp>#include <objects/seqfeat/SubSource.hpp>#include <objects/seqfeat/Trna_ext.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objects/seqloc/Textseq_id.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqset/Bioseq_set.hpp>#include <objects/seq/MolInfo.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/pub/Pub.hpp>#include <objects/pub/Pub_set.hpp>#include <objects/general/Dbtag.hpp>#include <algorithm>#include <string>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)BEGIN_SCOPE(validator)using namespace sequence;// =============================================================================//                                     Public// =============================================================================CValidError_feat::CValidError_feat(CValidError_imp& imp) :    CValidError_base(imp),    m_NumGenes(0),    m_NumGeneXrefs(0){}CValidError_feat::~CValidError_feat(void){}void CValidError_feat::ValidateSeqFeat(const CSeq_feat& feat){    if ( !feat.CanGetLocation() ) {        PostErr(eDiag_Critical, eErr_SEQ_FEAT_MissingLocation,            "The feature is missing a location", feat);    }    CBioseq_Handle bsh = m_Scope->GetBioseqHandle(feat.GetLocation());    m_Imp.ValidateSeqLoc(feat.GetLocation(), bsh, "Location", feat);        if ( feat.CanGetProduct() ) {        ValidateSeqFeatProduct(feat.GetProduct(), feat);    }        ValidateFeatPartialness(feat);        ValidateExcept(feat);        ValidateSeqFeatData(feat.GetData(), feat);        if (feat.CanGetDbxref ()) {        m_Imp.ValidateDbxref (feat.GetDbxref (), feat);    }        if ( feat.CanGetComment() ) {        ValidateFeatComment(feat.GetComment(), feat);    }    if ( feat.CanGetCit() ) {        ValidateFeatCit(feat.GetCit(), feat);    }    const CGene_ref* gene_xref = feat.GetGeneXref();    if ( gene_xref != 0  &&  !gene_xref->IsSuppressed() ) {        ++m_NumGeneXrefs;    }}// =============================================================================//                                     Private// =============================================================================// static member initializationsconst string s_PlastidTxt[] = {  "",  "",  "chloroplast",  "chromoplast",  "",  "",  "plastid",  "",  "",  "",  "",  "",  "cyanelle",  "",  "",  "",  "apicoplast",  "leucoplast",  "proplastid",  ""};static string s_LegalRepeatTypes[] = {  "tandem", "inverted", "flanking", "terminal",  "direct", "dispersed", "other"};static string s_LegalConsSpliceStrings[] = {  "(5'site:YES, 3'site:YES)",  "(5'site:YES, 3'site:NO)",  "(5'site:YES, 3'site:ABSENT)",  "(5'site:NO, 3'site:YES)",  "(5'site:NO, 3'site:NO)",  "(5'site:NO, 3'site:ABSENT)",  "(5'site:ABSENT, 3'site:YES)",  "(5'site:ABSENT, 3'site:NO)",  "(5'site:ABSENT, 3'site:ABSENT)"};static bool s_IsLocRefSeqMrna(const CSeq_loc& loc, CScope& scope){    CBioseq_Handle bsh = scope.GetBioseqHandle(loc);    if ( bsh ) {        ITERATE (CBioseq::TId, it, bsh.GetBioseqCore()->GetId()) {            if ( (*it)->IdentifyAccession() == CSeq_id::eAcc_refseq_mrna ) {                return true;            }        }    }    return false;}static bool s_IsLocGEDL(const CSeq_loc& loc, CScope& scope){    CBioseq_Handle bsh = scope.GetBioseqHandle(loc);    if ( bsh ) {        ITERATE (CBioseq::TId, it, bsh.GetBioseqCore()->GetId()) {            CSeq_id::EAccessionInfo acc_info = (*it)->IdentifyAccession();            if ( acc_info == CSeq_id::eAcc_gb_embl_ddbj  ||                 acc_info == CSeq_id::eAcc_local ) {                return true;            }        }    }    return false;}// private member functions:void CValidError_feat::ValidateSeqFeatData(const CSeqFeatData& data, const CSeq_feat& feat){    switch ( data.Which () ) {    case CSeqFeatData::e_Gene:        // Validate CGene_ref        ValidateGene(data.GetGene (), feat);        break;    case CSeqFeatData::e_Cdregion:        // Validate CCdregion        ValidateCdregion(data.GetCdregion (), feat);        break;    case CSeqFeatData::e_Prot:        // Validate CProt_ref        ValidateProt(data.GetProt (), feat);        break;    case CSeqFeatData::e_Rna:        // Validate CRNA_ref        ValidateRna(data.GetRna (), feat);        break;    case CSeqFeatData::e_Pub:        // Validate CPubdesc        m_Imp.ValidatePubdesc(data.GetPub (), feat);        break;    case CSeqFeatData::e_Imp:        // Validate CPubdesc        ValidateImp(data.GetImp (), feat);        break;    case CSeqFeatData::e_Biosrc:        // Validate CBioSource        ValidateFeatBioSource(data.GetBiosrc(), feat);        break;    case CSeqFeatData::e_Org:    case CSeqFeatData::e_Region:    case CSeqFeatData::e_Seq:    case CSeqFeatData::e_Comment:    case CSeqFeatData::e_Bond:    case CSeqFeatData::e_Site:    case CSeqFeatData::e_Rsite:    case CSeqFeatData::e_User:    case CSeqFeatData::e_Txinit:    case CSeqFeatData::e_Num:    case CSeqFeatData::e_Psec_str:    case CSeqFeatData::e_Non_std_residue:    case CSeqFeatData::e_Het:        break;    default:        PostErr(eDiag_Error, eErr_SEQ_FEAT_InvalidType,            "Invalid SeqFeat type [" +             NStr::IntToString(data.Which ()) +            "]", feat);        break;    }    if ( !data.IsGene() ) {        ValidateGeneXRef(feat);    } else {        ValidateOperon(feat);    }}void CValidError_feat::ValidateSeqFeatProduct(const CSeq_loc& prod, const CSeq_feat& feat){    CBioseq_Handle bsh = m_Scope->GetBioseqHandle(feat.GetProduct());    m_Imp.ValidateSeqLoc(feat.GetProduct(), bsh, "Product", feat);        if ( IsOneBioseq(prod, m_Scope) ) {        const CSeq_id& sid = GetId(prod, m_Scope);            switch ( sid.Which() ) {        case CSeq_id::e_Genbank:        case CSeq_id::e_Embl:        case CSeq_id::e_Ddbj:        case CSeq_id::e_Tpg:        case CSeq_id::e_Tpe:        case CSeq_id::e_Tpd:            {                const CTextseq_id* tsid = sid.GetTextseq_Id();                if ( tsid != NULL ) {                    if ( !tsid->CanGetAccession()  &&  tsid->CanGetName() ) {                        if ( m_Imp.IsNucAcc(tsid->GetName()) ) {                            PostErr(eDiag_Warning, eErr_SEQ_FEAT_BadProductSeqId,                                "Feature product should not use "                                "Textseq-id 'name' slot", feat);                        }                    }                }            }            break;                    default:            break;        }    }}bool CValidError_feat::IsPlastid(int genome){    if ( genome == CBioSource::eGenome_chloroplast  ||         genome == CBioSource::eGenome_chromoplast  ||         genome == CBioSource::eGenome_plastid      ||         genome == CBioSource::eGenome_cyanelle     ||         genome == CBioSource::eGenome_apicoplast   ||         genome == CBioSource::eGenome_leucoplast   ||         genome == CBioSource::eGenome_proplastid  ) {         return true;    }    return false;}bool CValidError_feat::IsOverlappingGenePseudo(const CSeq_feat& feat){    const CGene_ref* grp = feat.GetGeneXref();    if ( grp  ) {        return (grp->CanGetPseudo()  &&  grp->GetPseudo());    }    // !!! DEBUG {    // For testing purposes. Remove when test is done.    if ( m_Imp.AvoidPerfBottlenecks() ) {        return false;    }    // }    // check overlapping gene    CConstRef<CSeq_feat> overlap =         GetOverlappingGene(feat.GetLocation(), *m_Scope);    if ( overlap ) {        if ( (overlap->CanGetPseudo()  &&  overlap->GetPseudo())  ||             (overlap->GetData().GetGene().CanGetPseudo()  &&              overlap->GetData().GetGene().GetPseudo()) ) {            return true;        }    }    return false;}bool CValidError_feat::SuppressCheck(const string& except_text){    static string exceptions[] = {        "ribosomal slippage",        "artificial frameshift",        "nonconsensus splice site"    };    for ( size_t i = 0; i < sizeof(exceptions) / sizeof(string); ++i ) {    if ( NStr::FindNoCase(except_text, exceptions[i] ) != string::npos )         return true;    }    return false;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?