validerror_bioseq.cpp
来自「ncbi源码」· C++ 代码 · 共 1,861 行 · 第 1/5 页
CPP
1,861 行
NStr::IntToString(gi) + ") is same as current Bioseq", seq); break; } } } } if ( hist.IsSetReplaces() ) { const CSeq_hist_rec& rec = hist.GetReplaces(); ITERATE( CSeq_hist_rec::TIds, id, rec.GetIds() ) { if ( (*id)->IsGi() ) { if ( gi == (*id)->GetGi() ) { PostErr(eDiag_Error, eErr_SEQ_INST_HistoryGiCollision, "Replaces gi (" + NStr::IntToString(gi) + ") is same as current Bioseq", seq); break; } } } }}// =============================================================================// Private// =============================================================================bool CValidError_bioseq::IsDifferentDbxrefs(const TDbtags& list1, const TDbtags& list2){ if (list1.empty() || list2.empty()) { return false; } else if (list1.size() != list2.size()) { return true; } TDbtags::const_iterator it1 = list1.begin(); TDbtags::const_iterator it2 = list2.begin(); for (; it1 != list1.end(); ++it1, ++it2) { if ((*it1)->GetDb() != (*it2)->GetDb()) { return true; } string str1 = (*it1)->GetTag().IsStr() ? (*it1)->GetTag().GetStr() : ""; string str2 = (*it2)->GetTag().IsStr() ? (*it2)->GetTag().GetStr() : ""; if ( str1.empty() && str2.empty() ) { if (!(*it1)->GetTag().IsId() && !(*it2)->GetTag().IsId()) { continue; } else if ((*it1)->GetTag().IsId() && (*it2)->GetTag().IsId()) { if ((*it1)->GetTag().GetId() != (*it2)->GetTag().GetId()) { return true; } } else { return true; } } } return false;}// Is the id contained in the bioseq?bool CValidError_bioseq::IsIdIn(const CSeq_id& id, const CBioseq& seq){ ITERATE (CBioseq::TId, it, seq.GetId()) { if (id.Match(**it)) { return true; } } return false;}TSeqPos CValidError_bioseq::GetDataLen(const CSeq_inst& inst){ if (!inst.IsSetSeq_data()) { return 0; } const CSeq_data& seqdata = inst.GetSeq_data(); switch (seqdata.Which()) { case CSeq_data::e_not_set: return 0; case CSeq_data::e_Iupacna: return seqdata.GetIupacna().Get().size(); case CSeq_data::e_Iupacaa: return seqdata.GetIupacaa().Get().size(); case CSeq_data::e_Ncbi2na: return seqdata.GetNcbi2na().Get().size(); case CSeq_data::e_Ncbi4na: return seqdata.GetNcbi4na().Get().size(); case CSeq_data::e_Ncbi8na: return seqdata.GetNcbi8na().Get().size(); case CSeq_data::e_Ncbipna: return seqdata.GetNcbipna().Get().size(); case CSeq_data::e_Ncbi8aa: return seqdata.GetNcbi8aa().Get().size(); case CSeq_data::e_Ncbieaa: return seqdata.GetNcbieaa().Get().size(); case CSeq_data::e_Ncbipaa: return seqdata.GetNcbipaa().Get().size(); case CSeq_data::e_Ncbistdaa: return seqdata.GetNcbistdaa().Get().size(); default: return 0; }}// Returns true if seq derived from translation ending in "*" or// seq is 3' partial (i.e. the right of the sequence is incomplete)bool CValidError_bioseq::SuppressTrailingXMsg(const CBioseq& seq){ // Look for the Cdregion feature used to create this aa product // Use the Cdregion to translate the associated na sequence // and check if translation has a '*' at the end. If it does. // message about 'X' at the end of this aa product sequence is suppressed const CSeq_feat* sfp = m_Imp.GetCDSGivenProduct(seq); if ( sfp ) { // Get CCdregion CTypeConstIterator<CCdregion> cdr(ConstBegin(*sfp)); // Get location on source sequence const CSeq_loc& loc = sfp->GetLocation(); // Get CBioseq_Handle for source sequence CBioseq_Handle hnd = m_Scope->GetBioseqHandle(loc); // Translate na CSeq_data string prot; CCdregion_translate::TranslateCdregion(prot, hnd, loc, *cdr); if ( prot[prot.size() - 1] == '*' ) { return true; } return false; } // Get CMolInfo for seq and determine if completeness is // "eCompleteness_no_right or eCompleteness_no_ends. If so // suppress message about "X" at end of aa sequence is suppressed CTypeConstIterator<CMolInfo> mi = ConstBegin(seq); if (mi && mi->IsSetCompleteness()) { if (mi->GetCompleteness() == CMolInfo::eCompleteness_no_right || mi->GetCompleteness() == CMolInfo::eCompleteness_no_ends) { return true; } } return false;}bool CValidError_bioseq::GetLocFromSeq(const CBioseq& seq, CSeq_loc* loc){ if (!seq.GetInst().IsSetExt() || !seq.GetInst().GetExt().IsSeg()) { return false; } CSeq_loc_mix& mix = loc->SetMix(); ITERATE (list< CRef<CSeq_loc> >, it, seq.GetInst().GetExt().GetSeg().Get()) { mix.Set().push_back(*it); } return true;}// Check if CdRegion required but not foundbool CValidError_bioseq::CdError(const CBioseq_Handle& bsh){ if ( bsh && CSeq_inst::IsAa(bsh.GetInst_Mol()) ) { CSeq_entry_Handle nps = bsh.GetExactComplexityLevel(CBioseq_set::eClass_nuc_prot); if ( nps ) { const CSeq_feat* cds = GetCDSForProduct(bsh); if ( cds == 0 ) { return true; } } } return false;}bool CValidError_bioseq::IsMrna(const CBioseq_Handle& bsh) { CSeqdesc_CI sd(bsh, CSeqdesc::e_Molinfo); if ( sd ) { const CMolInfo &mi = sd->GetMolinfo(); if ( mi.IsSetBiomol() ) { return mi.GetBiomol() == CMolInfo::eBiomol_mRNA; } } return false;}bool CValidError_bioseq::IsPrerna(const CBioseq_Handle& bsh) { CSeqdesc_CI sd(bsh, CSeqdesc::e_Molinfo); if ( sd ) { const CMolInfo &mi = sd->GetMolinfo(); if ( mi.IsSetBiomol() ) { return mi.GetBiomol() == CMolInfo::eBiomol_pre_RNA; } } return false;}size_t CValidError_bioseq::NumOfIntervals(const CSeq_loc& loc) { size_t counter = 0; for ( CSeq_loc_CI slit(loc); slit; ++slit ) { if ( !m_Imp.IsFarLocation(slit.GetSeq_loc()) ) { ++counter; } } return counter;}bool CValidError_bioseq::LocOnSeg(const CBioseq& seq, const CSeq_loc& loc) { for ( CSeq_loc_CI sli( loc ); sli; ++sli ) { const CSeq_id& loc_id = sli.GetSeq_id(); ITERATE( CBioseq::TId, seq_id, seq.GetId() ) { if ( loc_id.Match(**seq_id) ) { return true; } } } return false;}bool CValidError_bioseq::NotPeptideException(const CFeat_CI& curr, const CFeat_CI& prev){ if ( curr->IsSetExcept() && curr->GetExcept() && curr->IsSetExcept_text() ) { if ( NStr::FindNoCase(curr->GetExcept_text(), "alternative processing") != NPOS ) { return false; } } if ( prev->IsSetExcept() && prev->GetExcept() && prev->IsSetExcept_text() ) { if ( NStr::FindNoCase(prev->GetExcept_text(), "alternative processing") != NPOS ) { return false; } } return true;}bool CValidError_bioseq::IsSameSeqAnnot(const CFeat_CI& fi1, const CFeat_CI& fi2){ return fi1->GetAnnot() == fi2->GetAnnot();}bool CValidError_bioseq::IsSameSeqAnnotDesc(const CFeat_CI& fi1, const CFeat_CI& fi2){ const CSeq_annot& annot1 = fi1->GetSeq_annot(); const CSeq_annot& annot2 = fi2->GetSeq_annot(); if ( !(annot1.IsSetDesc()) || !(annot2.IsSetDesc()) ) { return true; } CAnnot_descr::Tdata descr1 = annot1.GetDesc().Get(); CAnnot_descr::Tdata descr2 = annot2.GetDesc().Get(); // Check only on the first? (same as in C toolkit) const CAnnotdesc& desc1 = descr1.front().GetObject(); const CAnnotdesc& desc2 = descr2.front().GetObject(); if ( desc1.Which() == desc2.Which() ) { if ( desc1.IsName() ) { return NStr::EqualNocase(desc1.GetName(), desc2.GetName()); } else if ( desc1.IsTitle() ) { return NStr::EqualNocase(desc1.GetTitle(), desc2.GetTitle()); } } return false;}void CValidError_bioseq::ValidateSeqLen(const CBioseq& seq){ const CSeq_inst& inst = seq.GetInst(); TSeqPos len = inst.IsSetLength() ? inst.GetLength() : 0; if ( seq.IsAa() ) { if ( len <= 3 && !m_Imp.IsPDB() ) { PostErr(eDiag_Warning, eErr_SEQ_INST_ShortSeq, "Sequence only " + NStr::IntToString(len) + " residue(s) long", seq); } } else { if ( len <= 10 && !m_Imp.IsPDB()) { PostErr(eDiag_Warning, eErr_SEQ_INST_ShortSeq, "Sequence only " + NStr::IntToString(len) + " residue(s) long", seq); } } if ( (len <= 350000) || m_Imp.IsNC() || m_Imp.IsNT() ) { return; } CBioseq_Handle bsh = m_Scope->GetBioseqHandle(seq); if ( !bsh ) { return; } CSeqdesc_CI desc( bsh, CSeqdesc::e_Molinfo ); const CMolInfo* mi = desc ? &(desc->GetMolinfo()) : 0; if ( inst.GetRepr() == CSeq_inst::eRepr_delta ) { if ( mi && m_Imp.IsGED() ) { CMolInfo::TTech tech = mi->IsSetTech() ? mi->GetTech() : CMolInfo::eTech_unknown; if (tech == CMolInfo::eTech_htgs_0 || tech == CMolInfo::eTech_htgs_1 || tech == CMolInfo::eTech_htgs_2) { PostErr(eDiag_Warning, eErr_SEQ_INST_LongHtgsSequence, "Phase 0, 1 or 2 HTGS sequence exceeds 350kbp limit", seq); } else if (tech == CMolInfo::eTech_htgs_3) { PostErr(eDiag_Warning, eErr_SEQ_INST_SequenceExceeds350kbp, "Phase 3 HTGS sequence exceeds 350kbp limit", seq); } else if (tech == CMolInfo::eTech_wgs) { PostErr(eDiag_Warning, eErr_SEQ_INST_SequenceExceeds350kbp, "WGS sequence exceeds 350kbp limit", seq); } else { len = 0; bool litHasData = false; CTypeConstIterator<CSeq_literal> lit(ConstBegin(seq)); for (; lit; ++lit) { if (lit->IsSetSeq_data()) { litHasData = true; } len += lit->GetLength(); } if ( len > 500000 && litHasData ) { PostErr(eDiag_Error, eErr_SEQ_INST_LongLiteralSequence, "Length of sequence literals exceeds 500kbp limit", seq); } } } } else if ( inst.GetRepr() == CSeq_inst::eRepr_raw ) { if ( mi ) { CMolInfo::TTech tech = mi->IsSetTech() ? mi->GetTech() : CMolInfo::eTech_unknown;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?