validatorp.cpp
来自「ncbi源码」· C++ 代码 · 共 2,068 行 · 第 1/5 页
CPP
2,068 行
loc.GetLabel(&loc_lbl); } PostErr(eDiag_Error, eErr_SEQ_FEAT_MixedStrand, prefix + ": Mixed strands in SeqLoc [" + loc_lbl + "]", obj); }}void CValidError_imp::AddBioseqWithNoPub(const CBioseq& seq){ m_BioseqWithNoPubs.push_back(CConstRef<CBioseq>(&seq));}void CValidError_imp::AddBioseqWithNoBiosource(const CBioseq& seq){ m_BioseqWithNoSource.push_back(CConstRef<CBioseq>(&seq));}void CValidError_imp::AddBioseqWithNoMolinfo(const CBioseq& seq){ m_BioseqWithNoMolinfo.push_back(CConstRef<CBioseq>(&seq));}void CValidError_imp::AddProtWithoutFullRef(const CBioseq_Handle& seq){ const CSeq_feat* cds = GetCDSForProduct(seq); if ( cds != 0 ) { m_ProtWithNoFullRef.push_back(CConstRef<CSeq_feat>(cds)); }}void CValidError_imp::ReportMissingPubs(const CSeq_entry& se, const CCit_sub* cs){ if ( m_NoPubs ) { if ( !m_IsGPS && !m_IsRefSeq && !cs) { PostErr(eDiag_Error, eErr_SEQ_DESCR_NoPubFound, "No publications anywhere on this entire record", se); } return; } size_t num_no_pubs = m_BioseqWithNoPubs.size(); EDiagSev sev = IsCuratedRefSeq() ? eDiag_Error : eDiag_Warning; if ( num_no_pubs == 1 ) { PostErr(sev, eErr_SEQ_DESCR_NoPubFound, "No publications refer to this Bioseq.", *(m_BioseqWithNoPubs[0])); } else if ( num_no_pubs > 10 ) { PostErr(sev, eErr_SEQ_DESCR_NoPubFound, NStr::IntToString(num_no_pubs) + " Bioseqs without publication in this record (first reported)", *(m_BioseqWithNoPubs[0])); } else { string msg; for ( size_t i = 0; i < num_no_pubs; ++i ) { msg = NStr::IntToString(i + 1) + " of " + NStr::IntToString(num_no_pubs) + " Bioseqs without publication"; PostErr(sev, eErr_SEQ_DESCR_NoPubFound, msg, *(m_BioseqWithNoPubs[i])); } }}void CValidError_imp::ReportMissingBiosource(const CSeq_entry& se){ if(m_NoBioSource && !m_IsPatent && !m_IsPDB) { PostErr(eDiag_Error, eErr_SEQ_DESCR_NoOrgFound, "No organism name anywhere on this entire record", se); return; } size_t num_no_source = m_BioseqWithNoSource.size(); if ( num_no_source == 1 ) { PostErr(eDiag_Error, eErr_SEQ_DESCR_NoOrgFound, "No organism name has been applied to this Bioseq.", *(m_BioseqWithNoSource[0])); } else if ( num_no_source > 10 ) { PostErr(eDiag_Error, eErr_SEQ_DESCR_NoOrgFound, NStr::IntToString(num_no_source) + " Bioseqs without organism name in this record (first reported)", *(m_BioseqWithNoSource[0])); } else { string msg; for ( size_t i = 0; i < num_no_source; ++i ) { msg = NStr::IntToString(i + 1) + " of " + NStr::IntToString(num_no_source) + " Bioseqs without organism name"; PostErr(eDiag_Error, eErr_SEQ_DESCR_NoOrgFound, msg, *(m_BioseqWithNoSource[i])); } }}void CValidError_imp::ReportProtWithoutFullRef(void){ size_t num = m_ProtWithNoFullRef.size(); if ( num == 1 ) { PostErr(eDiag_Error, eErr_SEQ_FEAT_NoProtRefFound, "No full length Prot-ref feature applied to this Bioseq", *(m_ProtWithNoFullRef[0])); } else if ( num > 10 ) { PostErr(eDiag_Error, eErr_SEQ_FEAT_NoProtRefFound, NStr::IntToString(num) + " Bioseqs with no full length " "Prot-ref feature applied to them (first reported)", *(m_ProtWithNoFullRef[0])); } else { string msg; for ( size_t i = 0; i < num; ++i ) { msg = NStr::IntToString(i + 1) + " of " + NStr::IntToString(num) + " Bioseqs without full length Prot-ref feature applied to"; PostErr(eDiag_Error, eErr_SEQ_FEAT_NoProtRefFound, msg, *(m_ProtWithNoFullRef[i])); } }} void CValidError_imp::ReportBioseqsWithNoMolinfo(void){ if ( m_BioseqWithNoMolinfo.empty() ) { return; } size_t num = m_BioseqWithNoMolinfo.size(); if ( num == 1 ) { PostErr(eDiag_Error, eErr_SEQ_DESCR_NoMolInfoFound, "No Mol-info applies to this Bioseq", *(m_BioseqWithNoMolinfo[0])); } else if ( num > 10 ) { PostErr(eDiag_Error, eErr_SEQ_DESCR_NoMolInfoFound, NStr::IntToString(num) + " Bioseqs with no Mol-info " "applied to them (first reported)", *(m_BioseqWithNoMolinfo[0])); } else { string msg; for ( size_t i = 0; i < num; ++i ) { msg = NStr::IntToString(i + 1) + " of " + NStr::IntToString(num) + " Bioseqs with no Mol-info applied to"; PostErr(eDiag_Error, eErr_SEQ_DESCR_NoMolInfoFound, msg, *(m_BioseqWithNoMolinfo[i])); } }} bool CValidError_imp::IsNucAcc(const string& acc){ if ( isupper(acc[0]) && acc.find('_') != NPOS ) { return true; } return false;}bool CValidError_imp::IsFarLocation(const CSeq_loc& loc){ for ( CSeq_loc_CI citer(loc); citer; ++citer ) { CConstRef<CSeq_id> id(&citer.GetSeq_id()); if ( id ) { CBioseq_Handle near_seq = m_Scope->GetBioseqHandleFromTSE(*id, *m_TSE); if ( !near_seq ) { return true; } } } return false;}CConstRef<CSeq_feat> CValidError_imp::GetCDSGivenProduct(const CBioseq& seq){ CConstRef<CSeq_feat> feat; CBioseq_Handle bsh = m_Scope->GetBioseqHandle(seq); // In case of a NT bioseq limit the search to features packaged on the // NT (we assume features have been pulled from the segments to the NT). const CSeq_entry* limit = 0; if ( IsNT() ) { limit = m_TSE.GetPointerOrNull(); } if ( bsh ) { CFeat_CI fi(bsh, 0, 0, CSeqFeatData::e_Cdregion, SAnnotSelector::eOverlap_Intervals, SAnnotSelector::eResolve_TSE, CFeat_CI::e_Product, limit); if ( fi ) { // return the first one (should be the one packaged on the // nuc-prot set). feat.Reset(&(fi->GetOriginalFeature())); } } return feat;}const CSeq_entry* CValidError_imp::GetAncestor(const CBioseq& seq, CBioseq_set::EClass clss){ const CSeq_entry* parent = 0; for ( parent = seq.GetParentEntry(); parent != 0; parent = parent->GetParentEntry() ) { if ( parent->IsSet() ) { const CBioseq_set& set = parent->GetSet(); if ( set.IsSetClass() && set.GetClass() == clss ) { break; } } } return parent;}bool CValidError_imp::IsSerialNumberInComment(const string& comment){ size_t pos = comment.find('[', 0); while ( pos != string::npos ) { ++pos; if ( isdigit(comment[pos]) ) { while ( isdigit(comment[pos]) ) { ++pos; } if ( comment[pos] == ']' ) { return true; } } pos = comment.find('[', pos); } return false;}bool CValidError_imp::CheckSeqVector(const CSeqVector& vec){ if ( IsSequenceAvaliable(vec) ) { return true; } if ( IsRemoteFetch() ) { // issue some sort of error } return false;}bool CValidError_imp::IsSequenceAvaliable(const CSeqVector& vec){ // IMPORTANT: This is a temporary implementation, due to (yet) restricted // implementation of the Scope / object manager classes. // if the first and last elements are accesible the sequence is available. try { vec[0]; vec[vec.size() - 1]; } catch ( const exception& ) { // do something return false; } return true;}// =============================================================================// Private// =============================================================================bool CValidError_imp::IsMixedStrands(const CSeq_loc& loc){ if ( SeqLocCheck(loc, m_Scope) == eSeqLocCheck_warning ) { return false; } CSeq_loc_CI curr(loc); if ( !curr ) { return false; } CSeq_loc_CI prev = curr; ++curr; while ( curr ) { ENa_strand curr_strand = curr.GetStrand(); ENa_strand prev_strand = prev.GetStrand(); if ( (prev_strand == eNa_strand_minus && curr_strand != eNa_strand_minus) || (prev_strand != eNa_strand_minus && curr_strand == eNa_strand_minus) ) { return true; } prev = curr; ++curr; } return false;}void CValidError_imp::Setup(const CSeq_entry& se, CScope* scope) { // "Save" the Seq-entry m_TSE = &se; if ( scope ) { m_Scope.Reset(scope); } else { SetScope(se); } // If no Pubs/BioSource in CSeq_entry, post only one error CTypeConstIterator<CPub> pub(ConstBegin(se)); m_NoPubs = !pub; CTypeConstIterator<CBioSource> src(ConstBegin(se)); m_NoBioSource = !src; // Look for genomic product set for (CTypeConstIterator <CBioseq_set> si (se); si; ++si) { if (si->IsSetClass ()) { if (si->GetClass () == CBioseq_set::eClass_gen_prod_set) { m_IsGPS = true; } } } // Examine all Seq-ids on Bioseqs for (CTypeConstIterator <CBioseq> bi (se); bi; ++bi) { ITERATE (CBioseq::TId, id, bi->GetId()) { CSeq_id::E_Choice typ = (**id).Which(); switch (typ) { case CSeq_id::e_not_set: break; case CSeq_id::e_Local: break; case CSeq_id::e_Gibbsq: break; case CSeq_id::e_Gibbmt: break; case CSeq_id::e_Giim: break; case CSeq_id::e_Genbank: m_IsGED = true; break; case CSeq_id::e_Embl: m_IsGED = true; break; case CSeq_id::e_Pir: break; case CSeq_id::e_Swissprot: break; case CSeq_id::e_Patent: m_IsPatent = true; break; case CSeq_id::e_Other: m_IsRefSeq = true; // and do RefSeq subclasses up front as well if ((**id).GetOther().IsSetAccession()) { string acc = (**id).GetOther().GetAccession().substr(0, 3); if (acc == "NC_") { m_IsNC = true; } else if (acc == "NG_") { m_IsNG = true; } else if (acc == "NM_") { m_IsNM = true; } else if (acc == "NP_") { m_IsNP = true; } else if (acc == "NR_") { m_IsNR = true; } else if (acc == "NS_") { m_IsNS = true; } else if (acc == "NT_") { m_IsNT = true; } else if (acc == "NW_") { m_IsNW = true; } else if (acc == "XR_") { m_IsXR = true; } } break; case CSeq_id::e_General: if (!NStr::CompareCase((**id).GetGeneral().GetDb(), "BankIt")) { m_IsTPA = true; } break; case CSeq_id
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?