validerror_bioseq.cpp

来自「ncbi源码」· C++ 代码 · 共 1,861 行 · 第 1/5 页

CPP
1,861
字号
                        NStr::IntToString(gi) + ") is same as current Bioseq",                        seq);                    break;                }            }        }    }    if ( hist.IsSetReplaces() ) {        const CSeq_hist_rec& rec = hist.GetReplaces();        ITERATE( CSeq_hist_rec::TIds, id, rec.GetIds() ) {            if ( (*id)->IsGi() ) {                if ( gi == (*id)->GetGi() ) {                    PostErr(eDiag_Error, eErr_SEQ_INST_HistoryGiCollision,                        "Replaces gi (" +                         NStr::IntToString(gi) + ") is same as current Bioseq",                        seq);                    break;                }            }        }    }}// =============================================================================//                                     Private// =============================================================================bool CValidError_bioseq::IsDifferentDbxrefs(const TDbtags& list1,                                            const TDbtags& list2){    if (list1.empty()  ||  list2.empty()) {        return false;    } else if (list1.size() != list2.size()) {        return true;    }    TDbtags::const_iterator it1 = list1.begin();    TDbtags::const_iterator it2 = list2.begin();    for (; it1 != list1.end(); ++it1, ++it2) {        if ((*it1)->GetDb() != (*it2)->GetDb()) {            return true;        }        string str1 =            (*it1)->GetTag().IsStr() ? (*it1)->GetTag().GetStr() : "";        string str2 =            (*it2)->GetTag().IsStr() ? (*it2)->GetTag().GetStr() : "";        if ( str1.empty()  &&  str2.empty() ) {            if (!(*it1)->GetTag().IsId()  &&  !(*it2)->GetTag().IsId()) {                continue;            } else if ((*it1)->GetTag().IsId()  &&  (*it2)->GetTag().IsId()) {                if ((*it1)->GetTag().GetId() != (*it2)->GetTag().GetId()) {                    return true;                }            } else {                return true;            }        }    }    return false;}// Is the id contained in the bioseq?bool CValidError_bioseq::IsIdIn(const CSeq_id& id, const CBioseq& seq){    ITERATE (CBioseq::TId, it, seq.GetId()) {        if (id.Match(**it)) {            return true;        }    }    return false;}TSeqPos CValidError_bioseq::GetDataLen(const CSeq_inst& inst){    if (!inst.IsSetSeq_data()) {        return 0;    }    const CSeq_data& seqdata = inst.GetSeq_data();    switch (seqdata.Which()) {    case CSeq_data::e_not_set:        return 0;    case CSeq_data::e_Iupacna:        return seqdata.GetIupacna().Get().size();    case CSeq_data::e_Iupacaa:        return seqdata.GetIupacaa().Get().size();    case CSeq_data::e_Ncbi2na:        return seqdata.GetNcbi2na().Get().size();    case CSeq_data::e_Ncbi4na:        return seqdata.GetNcbi4na().Get().size();    case CSeq_data::e_Ncbi8na:        return seqdata.GetNcbi8na().Get().size();    case CSeq_data::e_Ncbipna:        return seqdata.GetNcbipna().Get().size();    case CSeq_data::e_Ncbi8aa:        return seqdata.GetNcbi8aa().Get().size();    case CSeq_data::e_Ncbieaa:        return seqdata.GetNcbieaa().Get().size();    case CSeq_data::e_Ncbipaa:        return seqdata.GetNcbipaa().Get().size();    case CSeq_data::e_Ncbistdaa:        return seqdata.GetNcbistdaa().Get().size();    default:        return 0;    }}// Returns true if seq derived from translation ending in "*" or// seq is 3' partial (i.e. the right of the sequence is incomplete)bool CValidError_bioseq::SuppressTrailingXMsg(const CBioseq& seq){    // Look for the Cdregion feature used to create this aa product    // Use the Cdregion to translate the associated na sequence    // and check if translation has a '*' at the end. If it does.    // message about 'X' at the end of this aa product sequence is suppressed    const CSeq_feat* sfp = m_Imp.GetCDSGivenProduct(seq);    if ( sfp ) {            // Get CCdregion         CTypeConstIterator<CCdregion> cdr(ConstBegin(*sfp));                // Get location on source sequence        const CSeq_loc& loc = sfp->GetLocation();        // Get CBioseq_Handle for source sequence        CBioseq_Handle hnd = m_Scope->GetBioseqHandle(loc);        // Translate na CSeq_data        string prot;                CCdregion_translate::TranslateCdregion(prot, hnd, loc, *cdr);                if ( prot[prot.size() - 1] == '*' ) {            return true;        }        return false;    }    // Get CMolInfo for seq and determine if completeness is    // "eCompleteness_no_right or eCompleteness_no_ends. If so    // suppress message about "X" at end of aa sequence is suppressed    CTypeConstIterator<CMolInfo> mi = ConstBegin(seq);    if (mi  &&  mi->IsSetCompleteness()) {        if (mi->GetCompleteness() == CMolInfo::eCompleteness_no_right  ||          mi->GetCompleteness() == CMolInfo::eCompleteness_no_ends) {            return true;        }    }    return false;}bool CValidError_bioseq::GetLocFromSeq(const CBioseq& seq, CSeq_loc* loc){    if (!seq.GetInst().IsSetExt()  ||  !seq.GetInst().GetExt().IsSeg()) {        return false;    }    CSeq_loc_mix& mix = loc->SetMix();    ITERATE (list< CRef<CSeq_loc> >, it,        seq.GetInst().GetExt().GetSeg().Get()) {        mix.Set().push_back(*it);    }    return true;}// Check if CdRegion required but not foundbool CValidError_bioseq::CdError(const CBioseq_Handle& bsh){    if ( bsh  &&  CSeq_inst::IsAa(bsh.GetInst_Mol()) ) {        CSeq_entry_Handle nps =             bsh.GetExactComplexityLevel(CBioseq_set::eClass_nuc_prot);        if ( nps ) {            const CSeq_feat* cds = GetCDSForProduct(bsh);            if ( cds == 0 ) {                return true;            }        }    }    return false;}bool CValidError_bioseq::IsMrna(const CBioseq_Handle& bsh) {    CSeqdesc_CI sd(bsh, CSeqdesc::e_Molinfo);    if ( sd ) {        const CMolInfo &mi = sd->GetMolinfo();        if ( mi.IsSetBiomol() ) {            return mi.GetBiomol() == CMolInfo::eBiomol_mRNA;        }    }    return false;}bool CValidError_bioseq::IsPrerna(const CBioseq_Handle& bsh) {    CSeqdesc_CI sd(bsh, CSeqdesc::e_Molinfo);    if ( sd ) {        const CMolInfo &mi = sd->GetMolinfo();        if ( mi.IsSetBiomol() ) {            return mi.GetBiomol() == CMolInfo::eBiomol_pre_RNA;        }    }    return false;}size_t CValidError_bioseq::NumOfIntervals(const CSeq_loc& loc) {    size_t counter = 0;    for ( CSeq_loc_CI slit(loc); slit; ++slit ) {        if ( !m_Imp.IsFarLocation(slit.GetSeq_loc()) ) {            ++counter;        }    }    return counter;}bool CValidError_bioseq::LocOnSeg(const CBioseq& seq, const CSeq_loc& loc) {    for ( CSeq_loc_CI sli( loc ); sli;  ++sli ) {        const CSeq_id& loc_id = sli.GetSeq_id();        ITERATE(  CBioseq::TId, seq_id, seq.GetId() ) {            if ( loc_id.Match(**seq_id) ) {                return true;            }        }    }    return false;}bool CValidError_bioseq::NotPeptideException(const CFeat_CI& curr, const CFeat_CI& prev){    if ( curr->IsSetExcept()  &&  curr->GetExcept()  &&         curr->IsSetExcept_text() ) {        if ( NStr::FindNoCase(curr->GetExcept_text(), "alternative processing") != NPOS ) {            return false;        }    }    if ( prev->IsSetExcept()  &&  prev->GetExcept()  &&         prev->IsSetExcept_text() ) {        if ( NStr::FindNoCase(prev->GetExcept_text(), "alternative processing") != NPOS ) {            return false;        }    }    return true;}bool CValidError_bioseq::IsSameSeqAnnot(const CFeat_CI& fi1, const CFeat_CI& fi2){    return fi1->GetAnnot() == fi2->GetAnnot();}bool CValidError_bioseq::IsSameSeqAnnotDesc(const CFeat_CI& fi1, const CFeat_CI& fi2){    const CSeq_annot& annot1 = fi1->GetSeq_annot();    const CSeq_annot& annot2 = fi2->GetSeq_annot();    if ( !(annot1.IsSetDesc())  ||  !(annot2.IsSetDesc()) ) {        return true;    }    CAnnot_descr::Tdata descr1 = annot1.GetDesc().Get();    CAnnot_descr::Tdata descr2 = annot2.GetDesc().Get();    // Check only on the first? (same as in C toolkit)    const CAnnotdesc& desc1 = descr1.front().GetObject();    const CAnnotdesc& desc2 = descr2.front().GetObject();    if ( desc1.Which() == desc2.Which() ) {        if ( desc1.IsName() ) {            return NStr::EqualNocase(desc1.GetName(), desc2.GetName());        } else if ( desc1.IsTitle() ) {            return NStr::EqualNocase(desc1.GetTitle(), desc2.GetTitle());        }    }    return false;}void CValidError_bioseq::ValidateSeqLen(const CBioseq& seq){    const CSeq_inst& inst = seq.GetInst();    TSeqPos len = inst.IsSetLength() ? inst.GetLength() : 0;    if ( seq.IsAa() ) {        if ( len <= 3  &&  !m_Imp.IsPDB() ) {            PostErr(eDiag_Warning, eErr_SEQ_INST_ShortSeq, "Sequence only " +                NStr::IntToString(len) + " residue(s) long", seq);        }    } else {        if ( len <= 10  &&  !m_Imp.IsPDB()) {            PostErr(eDiag_Warning, eErr_SEQ_INST_ShortSeq, "Sequence only " +                NStr::IntToString(len) + " residue(s) long", seq);        }    }    if ( (len <= 350000)  ||  m_Imp.IsNC()  ||  m_Imp.IsNT() ) {        return;    }    CBioseq_Handle bsh = m_Scope->GetBioseqHandle(seq);    if ( !bsh ) {        return;    }    CSeqdesc_CI desc( bsh, CSeqdesc::e_Molinfo );    const CMolInfo* mi = desc ? &(desc->GetMolinfo()) : 0;    if ( inst.GetRepr() == CSeq_inst::eRepr_delta ) {        if ( mi  &&  m_Imp.IsGED() ) {            CMolInfo::TTech tech = mi->IsSetTech() ?                 mi->GetTech() : CMolInfo::eTech_unknown;            if (tech == CMolInfo::eTech_htgs_0  ||                tech == CMolInfo::eTech_htgs_1  ||                tech == CMolInfo::eTech_htgs_2)            {                PostErr(eDiag_Warning, eErr_SEQ_INST_LongHtgsSequence,                    "Phase 0, 1 or 2 HTGS sequence exceeds 350kbp limit",                    seq);            } else if (tech == CMolInfo::eTech_htgs_3) {                PostErr(eDiag_Warning, eErr_SEQ_INST_SequenceExceeds350kbp,                    "Phase 3 HTGS sequence exceeds 350kbp limit", seq);            } else if (tech == CMolInfo::eTech_wgs) {                PostErr(eDiag_Warning, eErr_SEQ_INST_SequenceExceeds350kbp,                    "WGS sequence exceeds 350kbp limit", seq);            } else {                len = 0;                bool litHasData = false;                CTypeConstIterator<CSeq_literal> lit(ConstBegin(seq));                for (; lit; ++lit) {                    if (lit->IsSetSeq_data()) {                        litHasData = true;                    }                    len += lit->GetLength();                }                if ( len > 500000  && litHasData ) {                    PostErr(eDiag_Error, eErr_SEQ_INST_LongLiteralSequence,                        "Length of sequence literals exceeds 500kbp limit",                        seq);                }            }        }    } else if ( inst.GetRepr() == CSeq_inst::eRepr_raw ) {        if ( mi ) {            CMolInfo::TTech tech = mi->IsSetTech() ?                 mi->GetTech() : CMolInfo::eTech_unknown;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?