validatorp.cpp

来自「ncbi源码」· C++ 代码 · 共 2,068 行 · 第 1/5 页

CPP
2,068
字号
            loc.GetLabel(&loc_lbl);        }        PostErr(eDiag_Error, eErr_SEQ_FEAT_MixedStrand,            prefix + ": Mixed strands in SeqLoc [" +            loc_lbl + "]", obj);    }}void CValidError_imp::AddBioseqWithNoPub(const CBioseq& seq){    m_BioseqWithNoPubs.push_back(CConstRef<CBioseq>(&seq));}void CValidError_imp::AddBioseqWithNoBiosource(const CBioseq& seq){    m_BioseqWithNoSource.push_back(CConstRef<CBioseq>(&seq));}void CValidError_imp::AddBioseqWithNoMolinfo(const CBioseq& seq){    m_BioseqWithNoMolinfo.push_back(CConstRef<CBioseq>(&seq));}void CValidError_imp::AddProtWithoutFullRef(const CBioseq_Handle& seq){    const CSeq_feat* cds = GetCDSForProduct(seq);    if ( cds != 0 ) {        m_ProtWithNoFullRef.push_back(CConstRef<CSeq_feat>(cds));    }}void CValidError_imp::ReportMissingPubs(const CSeq_entry& se, const CCit_sub* cs){    if ( m_NoPubs ) {        if ( !m_IsGPS  &&  !m_IsRefSeq  &&  !cs) {            PostErr(eDiag_Error, eErr_SEQ_DESCR_NoPubFound,                 "No publications anywhere on this entire record", se);        }         return;    }    size_t num_no_pubs = m_BioseqWithNoPubs.size();    EDiagSev sev = IsCuratedRefSeq() ? eDiag_Error : eDiag_Warning;    if ( num_no_pubs == 1 ) {        PostErr(sev, eErr_SEQ_DESCR_NoPubFound,             "No publications refer to this Bioseq.",            *(m_BioseqWithNoPubs[0]));    } else if ( num_no_pubs > 10 ) {        PostErr(sev, eErr_SEQ_DESCR_NoPubFound,             NStr::IntToString(num_no_pubs) +             " Bioseqs without publication in this record  (first reported)",            *(m_BioseqWithNoPubs[0]));    } else {        string msg;        for ( size_t i = 0; i < num_no_pubs; ++i ) {            msg = NStr::IntToString(i + 1) + " of " +                 NStr::IntToString(num_no_pubs) +                 " Bioseqs without publication";            PostErr(sev, eErr_SEQ_DESCR_NoPubFound, msg,                 *(m_BioseqWithNoPubs[i]));        }    }}void CValidError_imp::ReportMissingBiosource(const CSeq_entry& se){    if(m_NoBioSource  &&  !m_IsPatent  &&  !m_IsPDB) {        PostErr(eDiag_Error, eErr_SEQ_DESCR_NoOrgFound,            "No organism name anywhere on this entire record", se);        return;    }        size_t num_no_source = m_BioseqWithNoSource.size();        if ( num_no_source == 1 ) {        PostErr(eDiag_Error, eErr_SEQ_DESCR_NoOrgFound,             "No organism name has been applied to this Bioseq.",            *(m_BioseqWithNoSource[0]));    } else if ( num_no_source > 10 ) {        PostErr(eDiag_Error, eErr_SEQ_DESCR_NoOrgFound,             NStr::IntToString(num_no_source) +             " Bioseqs without organism name in this record (first reported)",            *(m_BioseqWithNoSource[0]));    } else {        string msg;        for ( size_t i = 0; i < num_no_source; ++i ) {            msg = NStr::IntToString(i + 1) + " of " +                 NStr::IntToString(num_no_source) +                 " Bioseqs without organism name";            PostErr(eDiag_Error, eErr_SEQ_DESCR_NoOrgFound, msg,                 *(m_BioseqWithNoSource[i]));        }    }}void CValidError_imp::ReportProtWithoutFullRef(void){    size_t num = m_ProtWithNoFullRef.size();        if ( num == 1 ) {        PostErr(eDiag_Error, eErr_SEQ_FEAT_NoProtRefFound,             "No full length Prot-ref feature applied to this Bioseq",            *(m_ProtWithNoFullRef[0]));    } else if ( num > 10 ) {        PostErr(eDiag_Error, eErr_SEQ_FEAT_NoProtRefFound,             NStr::IntToString(num) + " Bioseqs with no full length "             "Prot-ref feature applied to them (first reported)",            *(m_ProtWithNoFullRef[0]));    } else {        string msg;        for ( size_t i = 0; i < num; ++i ) {            msg = NStr::IntToString(i + 1) + " of " +                 NStr::IntToString(num) +                 " Bioseqs without full length Prot-ref feature applied to";            PostErr(eDiag_Error, eErr_SEQ_FEAT_NoProtRefFound, msg,                 *(m_ProtWithNoFullRef[i]));        }    }}   void CValidError_imp::ReportBioseqsWithNoMolinfo(void){    if ( m_BioseqWithNoMolinfo.empty() ) {        return;    }    size_t num = m_BioseqWithNoMolinfo.size();        if ( num == 1 ) {        PostErr(eDiag_Error, eErr_SEQ_DESCR_NoMolInfoFound,             "No Mol-info applies to this Bioseq",            *(m_BioseqWithNoMolinfo[0]));    } else if ( num > 10 ) {        PostErr(eDiag_Error, eErr_SEQ_DESCR_NoMolInfoFound,             NStr::IntToString(num) + " Bioseqs with no Mol-info "             "applied to them (first reported)",            *(m_BioseqWithNoMolinfo[0]));    } else {        string msg;        for ( size_t i = 0; i < num; ++i ) {            msg = NStr::IntToString(i + 1) + " of " +                 NStr::IntToString(num) +                 " Bioseqs with no Mol-info applied to";            PostErr(eDiag_Error, eErr_SEQ_DESCR_NoMolInfoFound, msg,                 *(m_BioseqWithNoMolinfo[i]));        }    }}   bool CValidError_imp::IsNucAcc(const string& acc){    if ( isupper(acc[0])  &&  acc.find('_') != NPOS ) {        return true;    }    return false;}bool CValidError_imp::IsFarLocation(const CSeq_loc& loc){    for ( CSeq_loc_CI citer(loc); citer; ++citer ) {        CConstRef<CSeq_id> id(&citer.GetSeq_id());        if ( id ) {            CBioseq_Handle near_seq =                 m_Scope->GetBioseqHandleFromTSE(*id, *m_TSE);            if ( !near_seq ) {                return true;            }        }    }    return false;}CConstRef<CSeq_feat> CValidError_imp::GetCDSGivenProduct(const CBioseq& seq){    CConstRef<CSeq_feat> feat;    CBioseq_Handle bsh = m_Scope->GetBioseqHandle(seq);    // In case of a NT bioseq limit the search to features packaged on the     // NT (we assume features have been pulled from the segments to the NT).    const CSeq_entry* limit = 0;    if ( IsNT() ) {        limit = m_TSE.GetPointerOrNull();    }    if ( bsh ) {        CFeat_CI fi(bsh,                     0, 0,                    CSeqFeatData::e_Cdregion,                    SAnnotSelector::eOverlap_Intervals,                    SAnnotSelector::eResolve_TSE,                    CFeat_CI::e_Product,                    limit);        if ( fi ) {            // return the first one (should be the one packaged on the            // nuc-prot set).            feat.Reset(&(fi->GetOriginalFeature()));        }    }    return feat;}const CSeq_entry* CValidError_imp::GetAncestor(const CBioseq& seq, CBioseq_set::EClass clss){    const CSeq_entry* parent = 0;    for ( parent = seq.GetParentEntry();           parent != 0;          parent = parent->GetParentEntry() ) {        if ( parent->IsSet() ) {            const CBioseq_set& set = parent->GetSet();            if ( set.IsSetClass()  &&  set.GetClass() == clss ) {                break;            }        }    }    return parent;}bool CValidError_imp::IsSerialNumberInComment(const string& comment){    size_t pos = comment.find('[', 0);    while ( pos != string::npos ) {        ++pos;        if ( isdigit(comment[pos]) ) {            while ( isdigit(comment[pos]) ) {                ++pos;            }            if ( comment[pos] == ']' ) {                return true;            }        }        pos = comment.find('[', pos);    }    return false;}bool CValidError_imp::CheckSeqVector(const CSeqVector& vec){    if ( IsSequenceAvaliable(vec) ) {        return true;    }    if ( IsRemoteFetch() ) {        // issue some sort of error    }    return false;}bool CValidError_imp::IsSequenceAvaliable(const CSeqVector& vec){    // IMPORTANT: This is a temporary implementation, due to (yet) restricted    // implementation of the Scope / object manager classes.    // if the first and last elements are accesible the sequence is available.    try {        vec[0];         vec[vec.size() - 1];    } catch ( const exception& ) {        // do something        return false;    }    return true;}// =============================================================================//                                  Private// =============================================================================bool CValidError_imp::IsMixedStrands(const CSeq_loc& loc){    if ( SeqLocCheck(loc, m_Scope) == eSeqLocCheck_warning ) {        return false;    }    CSeq_loc_CI curr(loc);    if ( !curr ) {        return false;    }    CSeq_loc_CI prev = curr;    ++curr;        while ( curr ) {        ENa_strand curr_strand = curr.GetStrand();        ENa_strand prev_strand = prev.GetStrand();        if ( (prev_strand == eNa_strand_minus  &&                curr_strand != eNa_strand_minus)   ||             (prev_strand != eNa_strand_minus  &&                curr_strand == eNa_strand_minus) ) {            return true;        }        prev = curr;        ++curr;    }    return false;}void CValidError_imp::Setup(const CSeq_entry& se, CScope* scope) {    // "Save" the Seq-entry    m_TSE = &se;    if ( scope ) {        m_Scope.Reset(scope);    } else {        SetScope(se);    }    // If no Pubs/BioSource in CSeq_entry, post only one error    CTypeConstIterator<CPub> pub(ConstBegin(se));    m_NoPubs = !pub;    CTypeConstIterator<CBioSource> src(ConstBegin(se));    m_NoBioSource = !src;        // Look for genomic product set    for (CTypeConstIterator <CBioseq_set> si (se); si; ++si) {        if (si->IsSetClass ()) {            if (si->GetClass () == CBioseq_set::eClass_gen_prod_set) {                m_IsGPS = true;            }        }    }    // Examine all Seq-ids on Bioseqs    for (CTypeConstIterator <CBioseq> bi (se); bi; ++bi) {        ITERATE (CBioseq::TId, id, bi->GetId()) {            CSeq_id::E_Choice typ = (**id).Which();            switch (typ) {                case CSeq_id::e_not_set:                    break;                case CSeq_id::e_Local:                    break;                case CSeq_id::e_Gibbsq:                    break;                case CSeq_id::e_Gibbmt:                    break;                case CSeq_id::e_Giim:                    break;                case CSeq_id::e_Genbank:                    m_IsGED = true;                    break;                case CSeq_id::e_Embl:                    m_IsGED = true;                    break;                case CSeq_id::e_Pir:                    break;                case CSeq_id::e_Swissprot:                    break;                case CSeq_id::e_Patent:                    m_IsPatent = true;                    break;                case CSeq_id::e_Other:                    m_IsRefSeq = true;                    // and do RefSeq subclasses up front as well                    if ((**id).GetOther().IsSetAccession()) {                        string acc = (**id).GetOther().GetAccession().substr(0, 3);                        if (acc == "NC_") {                            m_IsNC = true;                        } else if (acc == "NG_") {                            m_IsNG = true;                        } else if (acc == "NM_") {                            m_IsNM = true;                        } else if (acc == "NP_") {                            m_IsNP = true;                        } else if (acc == "NR_") {                            m_IsNR = true;                        } else if (acc == "NS_") {                            m_IsNS = true;                        } else if (acc == "NT_") {                            m_IsNT = true;                        } else if (acc == "NW_") {                            m_IsNW = true;                        } else if (acc == "XR_") {                            m_IsXR = true;                        }                    }                    break;                case CSeq_id::e_General:                    if (!NStr::CompareCase((**id).GetGeneral().GetDb(), "BankIt")) {                        m_IsTPA = true;                    }                    break;                case CSeq_id

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?