validerror_bioseqset.cpp
来自「ncbi源码」· C++ 代码 · 共 493 行 · 第 1/2 页
CPP
493 行
/* * =========================================================================== * PRODUCTION $Log: validerror_bioseqset.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:47:54 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.14 * PRODUCTION * =========================================================================== *//* $Id: validerror_bioseqset.cpp,v 1000.2 2004/06/01 19:47:54 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Jonathan Kans, Clifford Clausen, Aaron Ucko...... * * File Description: * validation of bioseq_set * ....... * */#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include "validatorp.hpp"#include <objmgr/util/sequence.hpp>#include <serial/enumvalues.hpp>#include <serial/iterator.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqset/Bioseq_set.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/MolInfo.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqfeat/BioSource.hpp>#include <objects/seqfeat/Org_ref.hpp>#include <objects/seqfeat/RNA_ref.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/bioseq_handle.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)BEGIN_SCOPE(validator)using namespace sequence;// =============================================================================// Public// =============================================================================CValidError_bioseqset::CValidError_bioseqset(CValidError_imp& imp) : CValidError_base(imp){}CValidError_bioseqset::~CValidError_bioseqset(void){}void CValidError_bioseqset::ValidateBioseqSet(const CBioseq_set& seqset){ int protcnt = 0; int nuccnt = 0; int segcnt = 0; // Validate Set Contents CTypeConstIterator<CBioseq> seqit(ConstBegin(seqset)); for (; seqit; ++seqit) { if ( seqit->IsAa() ) { protcnt++; } else { nuccnt++; } if (seqit->GetInst().GetRepr() == CSeq_inst::eRepr_seg) { segcnt++; } } switch ( seqset.GetClass() ) { case CBioseq_set::eClass_nuc_prot: ValidateNucProtSet(seqset, nuccnt, protcnt); break; case CBioseq_set::eClass_segset: ValidateSegSet(seqset, segcnt); break; case CBioseq_set::eClass_parts: ValidatePartsSet(seqset); break; case CBioseq_set::eClass_pop_set: ValidatePopSet(seqset); break; case CBioseq_set::eClass_gen_prod_set: ValidateGenProdSet(seqset); break; case CBioseq_set::eClass_other: PostErr(eDiag_Critical, eErr_SEQ_PKG_GenomicProductPackagingProblem, "Genomic product set class incorrectly set to other", seqset); break; default: if ( nuccnt == 0 && protcnt == 0 ) { PostErr(eDiag_Error, eErr_SEQ_PKG_EmptySet, "No Bioseqs in this set", seqset); } break; }}// =============================================================================// Private// =============================================================================bool CValidError_bioseqset::IsMrnaProductInGPS(const CBioseq& seq){ if ( m_Imp.IsGPS() ) { CFeat_CI mrna( m_Scope->GetBioseqHandle(seq), 0, 0, CSeqFeatData::e_Rna, SAnnotSelector::eOverlap_Intervals, SAnnotSelector::eResolve_TSE, CFeat_CI::e_Product); return (bool)mrna; } return true;}void CValidError_bioseqset::ValidateNucProtSet(const CBioseq_set& seqset, int nuccnt, int protcnt){ if ( nuccnt == 0 ) { PostErr(eDiag_Error, eErr_SEQ_PKG_NucProtProblem, "No nucleotides in nuc-prot set", seqset); } if ( protcnt == 0 ) { PostErr(eDiag_Error, eErr_SEQ_PKG_NucProtProblem, "No proteins in nuc-prot set", seqset); } ITERATE( list< CRef<CSeq_entry> >, se_list_it, seqset.GetSeq_set() ) { if ( (**se_list_it).IsSeq() ) { const CBioseq& seq = (**se_list_it).GetSeq(); if ( seq.IsNa() && !IsMrnaProductInGPS(seq) ) { PostErr(eDiag_Warning, eErr_SEQ_PKG_GenomicProductPackagingProblem, "Nucleotide bioseq should be product of mRNA " "feature on contig, but is not", seq); } } if ( !(**se_list_it).IsSet() ) continue; const CBioseq_set& set = (**se_list_it).GetSet(); if ( set.GetClass() != CBioseq_set::eClass_segset ) { const CEnumeratedTypeValues* tv = CBioseq_set::GetTypeInfo_enum_EClass(); const string& set_class = tv->FindName(set.GetClass(), true); PostErr(eDiag_Error, eErr_SEQ_PKG_NucProtNotSegSet, "Nuc-prot Bioseq-set contains wrong Bioseq-set, " "its class is \"" + set_class + "\"", set); break; } }}void CValidError_bioseqset::ValidateSegSet(const CBioseq_set& seqset, int segcnt){ if ( segcnt == 0 ) { PostErr(eDiag_Error, eErr_SEQ_PKG_SegSetProblem, "No segmented Bioseq in segset", seqset); } CSeq_inst::EMol mol = CSeq_inst::eMol_not_set; CSeq_inst::EMol seq_inst_mol; ITERATE ( CBioseq_set::TSeq_set, se_list_it, seqset.GetSeq_set() ) { if ( (*se_list_it)->IsSeq() ) { const CSeq_inst& seq_inst = (*se_list_it)->GetSeq().GetInst(); if ( mol == CSeq_inst::eMol_not_set || mol == CSeq_inst::eMol_other ) { mol = seq_inst.GetMol(); } else if ( (seq_inst_mol = seq_inst.GetMol()) != CSeq_inst::eMol_other) { if ( seq_inst.IsNa() != CSeq_inst::IsNa(mol) ) { PostErr(eDiag_Critical, eErr_SEQ_PKG_SegSetMixedBioseqs, "Segmented set contains mixture of nucleotides" "and proteins", seqset); break; } } } else if ( (*se_list_it)->IsSet() ) { const CBioseq_set& set = (*se_list_it)->GetSet(); if ( set.IsSetClass() && set.GetClass() != CBioseq_set::eClass_parts ) { const CEnumeratedTypeValues* tv = CBioseq_set::GetTypeInfo_enum_EClass(); const string& set_class_str = tv->FindName(set.GetClass(), true); PostErr(eDiag_Critical, eErr_SEQ_PKG_SegSetNotParts, "Segmented set contains wrong Bioseq-set, " "its class is \"" + set_class_str + "\"", set); break; }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?