validatorp.cpp

来自「ncbi源码」· C++ 代码 · 共 2,068 行 · 第 1/5 页

CPP
2,068
字号
                    chromosome = ssit->GetPointer();                }                break;                            case CSubSource::eSubtype_transposon_name:            case CSubSource::eSubtype_insertion_seq_name:                PostErr(eDiag_Warning, eErr_SEQ_DESCR_ObsoleteSourceQual,                    "Transposon name and insertion sequence name are no "                    "longer legal qualifiers.", obj);                break;                            case 0:                PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadSubSource,                    "Unknown subsource subtype 0.", obj);                break;                            case CSubSource::eSubtype_other:                ValidateSourceQualTags((**ssit).GetName(), obj);                break;            case CSubSource::eSubtype_germline:                germline = true;                break;            case CSubSource::eSubtype_rearranged:                rearranged = true;                break;            default:                break;            }        }    }    if ( germline  &&  rearranged ) {        PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadSubSource,            "Germline and rearranged should not both be present", obj);    }	if ( chrom_count > 1 ) {		string msg =             chrom_conflict ? "Multiple conflicting chromosome qualifiers" :                             "Multiple identical chromosome qualifiers";		PostErr(eDiag_Warning, eErr_SEQ_DESCR_MultipleChromosomes, msg, obj);	}    if ( !orgref.IsSetOrgname()  ||         !orgref.GetOrgname().IsSetLineage()  ||         orgref.GetOrgname().GetLineage().empty() ) {		PostErr(eDiag_Error, eErr_SEQ_DESCR_MissingLineage, 			     "No lineage for this BioSource.", obj);	} else {        const string& lineage = orgref.GetOrgname().GetLineage();		if ( bsrc.GetGenome() == CBioSource::eGenome_kinetoplast ) {			if ( lineage.find("Kinetoplastida") == string::npos ) {				PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadOrganelle, 						 "Only Kinetoplastida have kinetoplasts", obj);			}		} 		if ( bsrc.GetGenome() == CBioSource::eGenome_nucleomorph ) {			if ( lineage.find("Chlorarchniophyta") == string::npos  &&				lineage.find("Cryptophyta") == string::npos ) {				PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadOrganelle,                     "Only Chlorarchniophyta and Cryptophyta have nucleomorphs", obj);			}		}	}    if ( !orgref.IsSetOrgname() ) {        return;    }    const COrgName& orgname = orgref.GetOrgname();    if ( orgname.IsSetMod() ) {        ITERATE ( COrgName::TMod, omit, orgname.GetMod() ) {            int subtype = (**omit).GetSubtype();                        if ( (subtype == 0) || (subtype == 1) ) {                PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadOrgMod,                     "Unknown orgmod subtype " + subtype, obj);            }            if ( subtype == COrgMod::eSubtype_variety ) {                if ( NStr::CompareNocase( orgname.GetDiv(), "PLN" ) != 0 ) {                    PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadOrgMod,                         "Orgmod variety should only be in plants or fungi",                         obj);                }            }            if ( subtype == COrgMod::eSubtype_other ) {                ValidateSourceQualTags( (**omit).GetSubname(), obj);            }        }    }    if ( orgref.IsSetDb() ) {        ValidateDbxref(orgref.GetDb(), obj, true);    }    if ( IsRequireTaxonID() ) {        bool found = false;        if ( orgref.IsSetDb() ) {            ITERATE( COrg_ref::TDb, dbt, orgref.GetDb() ) {                if ( NStr::CompareNocase((*dbt)->GetDb(), "taxon") == 0 ) {                    found = true;                    break;                }            }        }        if ( !found ) {            PostErr(eDiag_Warning, eErr_SEQ_DESCR_NoTaxonID,                "BioSource is missing taxon ID", obj);        }    }}void CValidError_imp::ValidateCitSub(const CCit_sub& cs, const CSerialObject& obj){    bool has_name  = false,         has_affil = false;    if ( cs.CanGetAuthors() ) {        const CAuth_list& authors = cs.GetAuthors();        if ( authors.CanGetNames() ) {            const CAuth_list::TNames& names = cs.GetAuthors().GetNames();            switch ( names.Which() ) {            case CAuth_list::TNames::e_Std:                has_name = HasName(names.GetStd());                break;                            case CAuth_list::TNames::e_Ml:                ITERATE( CAuth_list::TNames::TMl, it, names.GetMl() ) {                    if ( !IsBlankString(*it) ) {                        has_name = true;                        break;                    }                }                break;                            case CAuth_list::TNames::e_Str:                ITERATE( CAuth_list::TNames::TStr, it, names.GetStr() ) {                    if ( !IsBlankString(*it) ) {                        has_name = true;                        break;                    }                }                break;                            default:                break;            }        }        if ( authors.CanGetAffil() ) {            const CAffil& affil = authors.GetAffil();            switch ( affil.Which() ) {            case CAffil::e_Str:                {{                    if ( !IsBlankString(affil.GetStr()) ) {                        has_affil = true;                    }                }}                break;            case CAffil::e_Std:#define HAS_VALUE(o, x) (o.CanGet##x()  &&  !IsBlankString(o.Get##x()))                {{                    const CAffil::TStd& std = affil.GetStd();                    if ( HAS_VALUE(std, Affil)    ||                         HAS_VALUE(std, Div)      ||                         HAS_VALUE(std, City)     ||                         HAS_VALUE(std, Sub)      ||                         HAS_VALUE(std, Country)  ||                         HAS_VALUE(std, Street)   ||                         HAS_VALUE(std, Email)    ||                         HAS_VALUE(std, Fax)      ||                         HAS_VALUE(std, Phone)    ||                         HAS_VALUE(std, Postal_code) ) {                        has_affil = true;                    }                }}#undef HAS_VALUE                break;            }        }    }    if ( !has_name ) {        PostErr(eDiag_Error, eErr_GENERIC_MissingPubInfo,            "Submission citation has no author names", obj);    }    if ( !has_affil ) {        PostErr(eDiag_Error, eErr_GENERIC_MissingPubInfo,            "Submission citation has no affiliation", obj);    }}void CValidError_imp::ValidateSeqLoc(const CSeq_loc& loc, const CBioseq_Handle&  seq, const string&   prefix, const CSerialObject& obj){    bool circular = false;    circular = seq  &&  seq.GetInst_Topology() == CSeq_inst::eTopology_circular;        bool ordered = true, adjacent = false, chk = true,        unmarked_strand = false, mixed_strand = false;    const CSeq_id* id_cur = 0, *id_prv = 0;    const CSeq_interval *int_cur = 0, *int_prv = 0;    ENa_strand strand_cur = eNa_strand_unknown,        strand_prv = eNa_strand_unknown;    CTypeConstIterator<CSeq_loc> lit = ConstBegin(loc);    for (; lit; ++lit) {        try {            switch (lit->Which()) {            case CSeq_loc::e_Int:                int_cur = &lit->GetInt();                strand_cur = int_cur->IsSetStrand() ?                    int_cur->GetStrand() : eNa_strand_unknown;                id_cur = &int_cur->GetId();                chk = IsValid(*int_cur, m_Scope);                if (chk  &&  int_prv  && ordered  &&                    !circular  && id_prv) {                    if (IsSameBioseq(*id_prv, *id_cur, m_Scope)) {                        if (strand_cur == eNa_strand_minus) {                            if (int_prv->GetTo() < int_cur->GetTo()) {                                ordered = false;                            }                            if (int_cur->GetTo() + 1 == int_prv->GetFrom()) {                                adjacent = true;                            }                        } else {                            if (int_prv->GetTo() > int_cur->GetTo()) {                                ordered = false;                            }                            if (int_prv->GetTo() + 1 == int_cur->GetFrom()) {                                adjacent = true;                            }                        }                    }                }                if (int_prv) {                    if (IsSameBioseq(int_prv->GetId(), int_cur->GetId(), m_Scope)){                        if (strand_prv == strand_cur  &&                            int_prv->GetFrom() == int_cur->GetFrom()  &&                            int_prv->GetTo() == int_cur->GetTo()) {                            PostErr(eDiag_Error,                                eErr_SEQ_FEAT_DuplicateInterval,                                "Duplicate exons in location", obj);                        }                    }                }                int_prv = int_cur;                break;            case CSeq_loc::e_Pnt:                strand_cur = lit->GetPnt().IsSetStrand() ?                    lit->GetPnt().GetStrand() : eNa_strand_unknown;                id_cur = &lit->GetPnt().GetId();                chk = IsValid(lit->GetPnt(), m_Scope);                int_prv = 0;                break;            case CSeq_loc::e_Packed_pnt:                strand_cur = lit->GetPacked_pnt().IsSetStrand() ?                    lit->GetPacked_pnt().GetStrand() : eNa_strand_unknown;                id_cur = &lit->GetPacked_pnt().GetId();                chk = IsValid(lit->GetPacked_pnt(), m_Scope);                int_prv = 0;                break;            case CSeq_loc::e_Null:                break;            default:                strand_cur = eNa_strand_other;                id_cur = 0;                int_prv = 0;                break;            }            if (!chk) {                string lbl;                lit->GetLabel(&lbl);                PostErr(eDiag_Critical, eErr_SEQ_FEAT_Range,                    prefix + ": Seq-loc " + lbl + " out of range", obj);            }                        if (lit->Which() != CSeq_loc::e_Null) {                if (strand_prv != eNa_strand_other  &&                    strand_cur != eNa_strand_other) {                    if (id_cur  &&  id_prv  &&                        IsSameBioseq(*id_cur, *id_prv, m_Scope)) {                        if (strand_prv != strand_cur) {                            if ((strand_prv == eNa_strand_plus  &&                                strand_cur == eNa_strand_unknown)  ||                                (strand_prv == eNa_strand_unknown  &&                                strand_cur == eNa_strand_plus)) {                                unmarked_strand = true;                            } else {                                mixed_strand = true;                            }                        }                    }                }                                strand_prv = strand_cur;                id_prv = id_cur;            }        } catch( const exception& e ) {            string label;            lit->GetLabel(&label);            PostErr(eDiag_Error, eErr_Internal_Exception,                  "Exception caught while validating location " +                label + ". Exception: " + e.what(), obj);                            strand_cur = eNa_strand_other;            id_cur = 0;            int_prv = 0;        }            }    // Warn if different parts of a seq-loc refer to the same bioseq using     // differnt id types (i.e. gi and accession)    ValidateSeqLocIds(loc, obj);        bool exception = false;    const CSeq_feat* sfp = dynamic_cast<const CSeq_feat*>(&obj);    if (sfp != 0) {                // Publication intervals ordering does not matter                if ( sfp->GetData().GetSubtype() == CSeqFeatData::eSubtype_pub ) {            ordered = true;            adjacent = false;        }                // ignore ordering of heterogen bonds                if ( sfp->GetData().GetSubtype() == CSeqFeatData::eSubtype_het ) {            ordered = true;            adjacent = false;        }                // misc_recomb intervals SHOULD be in reverse order        if ( sfp->GetData().GetSubtype() == CSeqFeatData::eSubtype_misc_recomb ) {            ordered = true;        }                // primer_bind intervals MAY be in on opposite strands                if ( sfp->GetData().GetSubtype() == CSeqFeatData::eSubtype_primer_bind ) {            mixed_strand = false;            unmarked_strand = false;            ordered = true;        }                exception = sfp->IsSetExcept() ?  sfp->GetExcept() : false;    }    string loc_lbl;    if (adjacent) {        loc.GetLabel(&loc_lbl);        EDiagSev sev = exception ? eDiag_Warning : eDiag_Error;        PostErr(sev, eErr_SEQ_FEAT_AbuttingIntervals,            prefix + ": Adjacent intervals in SeqLoc [" +            loc_lbl + "]", obj);    }    if (exception  &&  sfp->CanGetExcept_text()) {        // trans splicing exception turns off both mixed_strand and        // out_of_order messages        if (NStr::FindNoCase(sfp->GetExcept_text(), "trans-splicing") != NPOS) {            return;        }    }    if (mixed_strand  ||  unmarked_strand  ||  !ordered) {        if (loc_lbl.empty()) {            loc.GetLabel(&loc_lbl);        }        if (mixed_strand) {            PostErr(eDiag_Error, eErr_SEQ_FEAT_MixedStrand,                prefix + ": Mixed strands in SeqLoc [" +                loc_lbl + "]", obj);        } else if (unmarked_strand) {            PostErr(eDiag_Warning, eErr_SEQ_FEAT_MixedStrand,                prefix + ": Mixed plus and unknown strands in SeqLoc "                " [" + loc_lbl + "]", obj);        }        if (!ordered) {            PostErr(eDiag_Error, eErr_SEQ_FEAT_SeqLocOrder,                prefix + ": Intervals out of order in SeqLoc [" +                loc_lbl + "]", obj);        }        return;    }    if ( seq  &&         seq.IsSetInst_Repr()  &&         seq.GetInst_Repr() != CSeq_inst::eRepr_seg ) {        return;    }    // Check for intervals out of order on segmented Bioseq    if ( seq  &&  BadSeqLocSortOrder(*seq.GetCompleteBioseq(), loc, m_Scope) ) {        if (loc_lbl.empty()) {            loc.GetLabel(&loc_lbl);        }        PostErr(eDiag_Error, eErr_SEQ_FEAT_SeqLocOrder,            prefix + "Intervals out of order in SeqLoc [" +            loc_lbl + "]", obj);    }    // Check for mixed strand on segmented Bioseq    if ( IsMixedStrands(loc) ) {        if (loc_lbl.empty()) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?