validatorp.cpp
来自「ncbi源码」· C++ 代码 · 共 2,068 行 · 第 1/5 页
CPP
2,068 行
chromosome = ssit->GetPointer(); } break; case CSubSource::eSubtype_transposon_name: case CSubSource::eSubtype_insertion_seq_name: PostErr(eDiag_Warning, eErr_SEQ_DESCR_ObsoleteSourceQual, "Transposon name and insertion sequence name are no " "longer legal qualifiers.", obj); break; case 0: PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadSubSource, "Unknown subsource subtype 0.", obj); break; case CSubSource::eSubtype_other: ValidateSourceQualTags((**ssit).GetName(), obj); break; case CSubSource::eSubtype_germline: germline = true; break; case CSubSource::eSubtype_rearranged: rearranged = true; break; default: break; } } } if ( germline && rearranged ) { PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadSubSource, "Germline and rearranged should not both be present", obj); } if ( chrom_count > 1 ) { string msg = chrom_conflict ? "Multiple conflicting chromosome qualifiers" : "Multiple identical chromosome qualifiers"; PostErr(eDiag_Warning, eErr_SEQ_DESCR_MultipleChromosomes, msg, obj); } if ( !orgref.IsSetOrgname() || !orgref.GetOrgname().IsSetLineage() || orgref.GetOrgname().GetLineage().empty() ) { PostErr(eDiag_Error, eErr_SEQ_DESCR_MissingLineage, "No lineage for this BioSource.", obj); } else { const string& lineage = orgref.GetOrgname().GetLineage(); if ( bsrc.GetGenome() == CBioSource::eGenome_kinetoplast ) { if ( lineage.find("Kinetoplastida") == string::npos ) { PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadOrganelle, "Only Kinetoplastida have kinetoplasts", obj); } } if ( bsrc.GetGenome() == CBioSource::eGenome_nucleomorph ) { if ( lineage.find("Chlorarchniophyta") == string::npos && lineage.find("Cryptophyta") == string::npos ) { PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadOrganelle, "Only Chlorarchniophyta and Cryptophyta have nucleomorphs", obj); } } } if ( !orgref.IsSetOrgname() ) { return; } const COrgName& orgname = orgref.GetOrgname(); if ( orgname.IsSetMod() ) { ITERATE ( COrgName::TMod, omit, orgname.GetMod() ) { int subtype = (**omit).GetSubtype(); if ( (subtype == 0) || (subtype == 1) ) { PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadOrgMod, "Unknown orgmod subtype " + subtype, obj); } if ( subtype == COrgMod::eSubtype_variety ) { if ( NStr::CompareNocase( orgname.GetDiv(), "PLN" ) != 0 ) { PostErr(eDiag_Warning, eErr_SEQ_DESCR_BadOrgMod, "Orgmod variety should only be in plants or fungi", obj); } } if ( subtype == COrgMod::eSubtype_other ) { ValidateSourceQualTags( (**omit).GetSubname(), obj); } } } if ( orgref.IsSetDb() ) { ValidateDbxref(orgref.GetDb(), obj, true); } if ( IsRequireTaxonID() ) { bool found = false; if ( orgref.IsSetDb() ) { ITERATE( COrg_ref::TDb, dbt, orgref.GetDb() ) { if ( NStr::CompareNocase((*dbt)->GetDb(), "taxon") == 0 ) { found = true; break; } } } if ( !found ) { PostErr(eDiag_Warning, eErr_SEQ_DESCR_NoTaxonID, "BioSource is missing taxon ID", obj); } }}void CValidError_imp::ValidateCitSub(const CCit_sub& cs, const CSerialObject& obj){ bool has_name = false, has_affil = false; if ( cs.CanGetAuthors() ) { const CAuth_list& authors = cs.GetAuthors(); if ( authors.CanGetNames() ) { const CAuth_list::TNames& names = cs.GetAuthors().GetNames(); switch ( names.Which() ) { case CAuth_list::TNames::e_Std: has_name = HasName(names.GetStd()); break; case CAuth_list::TNames::e_Ml: ITERATE( CAuth_list::TNames::TMl, it, names.GetMl() ) { if ( !IsBlankString(*it) ) { has_name = true; break; } } break; case CAuth_list::TNames::e_Str: ITERATE( CAuth_list::TNames::TStr, it, names.GetStr() ) { if ( !IsBlankString(*it) ) { has_name = true; break; } } break; default: break; } } if ( authors.CanGetAffil() ) { const CAffil& affil = authors.GetAffil(); switch ( affil.Which() ) { case CAffil::e_Str: {{ if ( !IsBlankString(affil.GetStr()) ) { has_affil = true; } }} break; case CAffil::e_Std:#define HAS_VALUE(o, x) (o.CanGet##x() && !IsBlankString(o.Get##x())) {{ const CAffil::TStd& std = affil.GetStd(); if ( HAS_VALUE(std, Affil) || HAS_VALUE(std, Div) || HAS_VALUE(std, City) || HAS_VALUE(std, Sub) || HAS_VALUE(std, Country) || HAS_VALUE(std, Street) || HAS_VALUE(std, Email) || HAS_VALUE(std, Fax) || HAS_VALUE(std, Phone) || HAS_VALUE(std, Postal_code) ) { has_affil = true; } }}#undef HAS_VALUE break; } } } if ( !has_name ) { PostErr(eDiag_Error, eErr_GENERIC_MissingPubInfo, "Submission citation has no author names", obj); } if ( !has_affil ) { PostErr(eDiag_Error, eErr_GENERIC_MissingPubInfo, "Submission citation has no affiliation", obj); }}void CValidError_imp::ValidateSeqLoc(const CSeq_loc& loc, const CBioseq_Handle& seq, const string& prefix, const CSerialObject& obj){ bool circular = false; circular = seq && seq.GetInst_Topology() == CSeq_inst::eTopology_circular; bool ordered = true, adjacent = false, chk = true, unmarked_strand = false, mixed_strand = false; const CSeq_id* id_cur = 0, *id_prv = 0; const CSeq_interval *int_cur = 0, *int_prv = 0; ENa_strand strand_cur = eNa_strand_unknown, strand_prv = eNa_strand_unknown; CTypeConstIterator<CSeq_loc> lit = ConstBegin(loc); for (; lit; ++lit) { try { switch (lit->Which()) { case CSeq_loc::e_Int: int_cur = &lit->GetInt(); strand_cur = int_cur->IsSetStrand() ? int_cur->GetStrand() : eNa_strand_unknown; id_cur = &int_cur->GetId(); chk = IsValid(*int_cur, m_Scope); if (chk && int_prv && ordered && !circular && id_prv) { if (IsSameBioseq(*id_prv, *id_cur, m_Scope)) { if (strand_cur == eNa_strand_minus) { if (int_prv->GetTo() < int_cur->GetTo()) { ordered = false; } if (int_cur->GetTo() + 1 == int_prv->GetFrom()) { adjacent = true; } } else { if (int_prv->GetTo() > int_cur->GetTo()) { ordered = false; } if (int_prv->GetTo() + 1 == int_cur->GetFrom()) { adjacent = true; } } } } if (int_prv) { if (IsSameBioseq(int_prv->GetId(), int_cur->GetId(), m_Scope)){ if (strand_prv == strand_cur && int_prv->GetFrom() == int_cur->GetFrom() && int_prv->GetTo() == int_cur->GetTo()) { PostErr(eDiag_Error, eErr_SEQ_FEAT_DuplicateInterval, "Duplicate exons in location", obj); } } } int_prv = int_cur; break; case CSeq_loc::e_Pnt: strand_cur = lit->GetPnt().IsSetStrand() ? lit->GetPnt().GetStrand() : eNa_strand_unknown; id_cur = &lit->GetPnt().GetId(); chk = IsValid(lit->GetPnt(), m_Scope); int_prv = 0; break; case CSeq_loc::e_Packed_pnt: strand_cur = lit->GetPacked_pnt().IsSetStrand() ? lit->GetPacked_pnt().GetStrand() : eNa_strand_unknown; id_cur = &lit->GetPacked_pnt().GetId(); chk = IsValid(lit->GetPacked_pnt(), m_Scope); int_prv = 0; break; case CSeq_loc::e_Null: break; default: strand_cur = eNa_strand_other; id_cur = 0; int_prv = 0; break; } if (!chk) { string lbl; lit->GetLabel(&lbl); PostErr(eDiag_Critical, eErr_SEQ_FEAT_Range, prefix + ": Seq-loc " + lbl + " out of range", obj); } if (lit->Which() != CSeq_loc::e_Null) { if (strand_prv != eNa_strand_other && strand_cur != eNa_strand_other) { if (id_cur && id_prv && IsSameBioseq(*id_cur, *id_prv, m_Scope)) { if (strand_prv != strand_cur) { if ((strand_prv == eNa_strand_plus && strand_cur == eNa_strand_unknown) || (strand_prv == eNa_strand_unknown && strand_cur == eNa_strand_plus)) { unmarked_strand = true; } else { mixed_strand = true; } } } } strand_prv = strand_cur; id_prv = id_cur; } } catch( const exception& e ) { string label; lit->GetLabel(&label); PostErr(eDiag_Error, eErr_Internal_Exception, "Exception caught while validating location " + label + ". Exception: " + e.what(), obj); strand_cur = eNa_strand_other; id_cur = 0; int_prv = 0; } } // Warn if different parts of a seq-loc refer to the same bioseq using // differnt id types (i.e. gi and accession) ValidateSeqLocIds(loc, obj); bool exception = false; const CSeq_feat* sfp = dynamic_cast<const CSeq_feat*>(&obj); if (sfp != 0) { // Publication intervals ordering does not matter if ( sfp->GetData().GetSubtype() == CSeqFeatData::eSubtype_pub ) { ordered = true; adjacent = false; } // ignore ordering of heterogen bonds if ( sfp->GetData().GetSubtype() == CSeqFeatData::eSubtype_het ) { ordered = true; adjacent = false; } // misc_recomb intervals SHOULD be in reverse order if ( sfp->GetData().GetSubtype() == CSeqFeatData::eSubtype_misc_recomb ) { ordered = true; } // primer_bind intervals MAY be in on opposite strands if ( sfp->GetData().GetSubtype() == CSeqFeatData::eSubtype_primer_bind ) { mixed_strand = false; unmarked_strand = false; ordered = true; } exception = sfp->IsSetExcept() ? sfp->GetExcept() : false; } string loc_lbl; if (adjacent) { loc.GetLabel(&loc_lbl); EDiagSev sev = exception ? eDiag_Warning : eDiag_Error; PostErr(sev, eErr_SEQ_FEAT_AbuttingIntervals, prefix + ": Adjacent intervals in SeqLoc [" + loc_lbl + "]", obj); } if (exception && sfp->CanGetExcept_text()) { // trans splicing exception turns off both mixed_strand and // out_of_order messages if (NStr::FindNoCase(sfp->GetExcept_text(), "trans-splicing") != NPOS) { return; } } if (mixed_strand || unmarked_strand || !ordered) { if (loc_lbl.empty()) { loc.GetLabel(&loc_lbl); } if (mixed_strand) { PostErr(eDiag_Error, eErr_SEQ_FEAT_MixedStrand, prefix + ": Mixed strands in SeqLoc [" + loc_lbl + "]", obj); } else if (unmarked_strand) { PostErr(eDiag_Warning, eErr_SEQ_FEAT_MixedStrand, prefix + ": Mixed plus and unknown strands in SeqLoc " " [" + loc_lbl + "]", obj); } if (!ordered) { PostErr(eDiag_Error, eErr_SEQ_FEAT_SeqLocOrder, prefix + ": Intervals out of order in SeqLoc [" + loc_lbl + "]", obj); } return; } if ( seq && seq.IsSetInst_Repr() && seq.GetInst_Repr() != CSeq_inst::eRepr_seg ) { return; } // Check for intervals out of order on segmented Bioseq if ( seq && BadSeqLocSortOrder(*seq.GetCompleteBioseq(), loc, m_Scope) ) { if (loc_lbl.empty()) { loc.GetLabel(&loc_lbl); } PostErr(eDiag_Error, eErr_SEQ_FEAT_SeqLocOrder, prefix + "Intervals out of order in SeqLoc [" + loc_lbl + "]", obj); } // Check for mixed strand on segmented Bioseq if ( IsMixedStrands(loc) ) { if (loc_lbl.empty()) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?