validerror_bioseqset.cpp

来自「ncbi源码」· C++ 代码 · 共 493 行 · 第 1/2 页

CPP
493
字号
/* * =========================================================================== * PRODUCTION $Log: validerror_bioseqset.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:47:54  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.14 * PRODUCTION * =========================================================================== *//*  $Id: validerror_bioseqset.cpp,v 1000.2 2004/06/01 19:47:54 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author:  Jonathan Kans, Clifford Clausen, Aaron Ucko...... * * File Description: *   validation of bioseq_set  *   ....... * */#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include "validatorp.hpp"#include <objmgr/util/sequence.hpp>#include <serial/enumvalues.hpp>#include <serial/iterator.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqset/Bioseq_set.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/MolInfo.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqfeat/BioSource.hpp>#include <objects/seqfeat/Org_ref.hpp>#include <objects/seqfeat/RNA_ref.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/bioseq_handle.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)BEGIN_SCOPE(validator)using namespace sequence;// =============================================================================//                                     Public// =============================================================================CValidError_bioseqset::CValidError_bioseqset(CValidError_imp& imp) :    CValidError_base(imp){}CValidError_bioseqset::~CValidError_bioseqset(void){}void CValidError_bioseqset::ValidateBioseqSet(const CBioseq_set& seqset){    int protcnt = 0;    int nuccnt  = 0;    int segcnt  = 0;        // Validate Set Contents    CTypeConstIterator<CBioseq> seqit(ConstBegin(seqset));    for (; seqit; ++seqit) {                if ( seqit->IsAa() ) {            protcnt++;        } else {            nuccnt++;        }                if (seqit->GetInst().GetRepr() == CSeq_inst::eRepr_seg) {            segcnt++;        }    }        switch ( seqset.GetClass() ) {    case CBioseq_set::eClass_nuc_prot:        ValidateNucProtSet(seqset, nuccnt, protcnt);        break;            case CBioseq_set::eClass_segset:        ValidateSegSet(seqset, segcnt);        break;            case CBioseq_set::eClass_parts:        ValidatePartsSet(seqset);        break;            case CBioseq_set::eClass_pop_set:        ValidatePopSet(seqset);        break;            case CBioseq_set::eClass_gen_prod_set:        ValidateGenProdSet(seqset);        break;    case CBioseq_set::eClass_other:        PostErr(eDiag_Critical, eErr_SEQ_PKG_GenomicProductPackagingProblem,             "Genomic product set class incorrectly set to other", seqset);        break;    default:        if ( nuccnt == 0  &&  protcnt == 0 )  {            PostErr(eDiag_Error, eErr_SEQ_PKG_EmptySet,                 "No Bioseqs in this set", seqset);        }        break;    }}// =============================================================================//                                     Private// =============================================================================bool CValidError_bioseqset::IsMrnaProductInGPS(const CBioseq& seq){    if ( m_Imp.IsGPS() ) {        CFeat_CI mrna(            m_Scope->GetBioseqHandle(seq),             0, 0,            CSeqFeatData::e_Rna,            SAnnotSelector::eOverlap_Intervals,            SAnnotSelector::eResolve_TSE,            CFeat_CI::e_Product);        return (bool)mrna;    }    return true;}void CValidError_bioseqset::ValidateNucProtSet(const CBioseq_set& seqset, int nuccnt,  int protcnt){    if ( nuccnt == 0 ) {        PostErr(eDiag_Error, eErr_SEQ_PKG_NucProtProblem,                 "No nucleotides in nuc-prot set", seqset);    }    if ( protcnt == 0 ) {        PostErr(eDiag_Error, eErr_SEQ_PKG_NucProtProblem,                 "No proteins in nuc-prot set", seqset);    }    ITERATE( list< CRef<CSeq_entry> >, se_list_it, seqset.GetSeq_set() ) {        if ( (**se_list_it).IsSeq() ) {            const CBioseq& seq = (**se_list_it).GetSeq();            if ( seq.IsNa()  &&  !IsMrnaProductInGPS(seq) ) {                PostErr(eDiag_Warning,                    eErr_SEQ_PKG_GenomicProductPackagingProblem,                    "Nucleotide bioseq should be product of mRNA "                    "feature on contig, but is not",                    seq);            }        }        if ( !(**se_list_it).IsSet() )            continue;        const CBioseq_set& set = (**se_list_it).GetSet();        if ( set.GetClass() != CBioseq_set::eClass_segset ) {            const CEnumeratedTypeValues* tv =                 CBioseq_set::GetTypeInfo_enum_EClass();            const string& set_class = tv->FindName(set.GetClass(), true);            PostErr(eDiag_Error, eErr_SEQ_PKG_NucProtNotSegSet,                     "Nuc-prot Bioseq-set contains wrong Bioseq-set, "                     "its class is \"" + set_class + "\"", set);            break;        }    }}void CValidError_bioseqset::ValidateSegSet(const CBioseq_set& seqset, int segcnt){    if ( segcnt == 0 ) {        PostErr(eDiag_Error, eErr_SEQ_PKG_SegSetProblem,            "No segmented Bioseq in segset", seqset);    }    CSeq_inst::EMol     mol = CSeq_inst::eMol_not_set;    CSeq_inst::EMol     seq_inst_mol;        ITERATE ( CBioseq_set::TSeq_set, se_list_it, seqset.GetSeq_set() ) {        if ( (*se_list_it)->IsSeq() ) {            const CSeq_inst& seq_inst = (*se_list_it)->GetSeq().GetInst();                        if ( mol == CSeq_inst::eMol_not_set ||                 mol == CSeq_inst::eMol_other ) {                mol = seq_inst.GetMol();            } else if ( (seq_inst_mol = seq_inst.GetMol()) != CSeq_inst::eMol_other) {                if ( seq_inst.IsNa() != CSeq_inst::IsNa(mol) ) {                    PostErr(eDiag_Critical, eErr_SEQ_PKG_SegSetMixedBioseqs,                        "Segmented set contains mixture of nucleotides"                        "and proteins", seqset);                    break;                }            }        } else if ( (*se_list_it)->IsSet() ) {            const CBioseq_set& set = (*se_list_it)->GetSet();            if ( set.IsSetClass()  &&                   set.GetClass() != CBioseq_set::eClass_parts ) {                const CEnumeratedTypeValues* tv =                     CBioseq_set::GetTypeInfo_enum_EClass();                const string& set_class_str =                     tv->FindName(set.GetClass(), true);                                PostErr(eDiag_Critical, eErr_SEQ_PKG_SegSetNotParts,                    "Segmented set contains wrong Bioseq-set, "                    "its class is \"" + set_class_str + "\"", set);                break;            }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?