validerror_feat.cpp
来自「ncbi源码」· C++ 代码 · 共 1,990 行 · 第 1/5 页
CPP
1,990 行
}unsigned char CValidError_feat::Residue(unsigned char res){ return res == 255 ? '?' : res;}int CValidError_feat::CheckForRaggedEnd(const CSeq_loc& loc, const CCdregion& cdregion){ size_t len = GetLength(loc, m_Scope); if ( cdregion.GetFrame() > CCdregion::eFrame_one ) { len -= cdregion.GetFrame() - 1; } int ragged = len % 3; if ( ragged > 0 ) { len = GetLength(loc, m_Scope); CSeq_loc::TRange range = CSeq_loc::TRange::GetEmpty(); ITERATE( CCdregion::TCode_break, cbr, cdregion.GetCode_break() ) { SRelLoc rl(loc, (*cbr)->GetLoc(), m_Scope); CRef<CSeq_loc> rel_loc = rl.Resolve(m_Scope); range += rel_loc->GetTotalRange(); } // allowing a partial codon at the end TSeqPos codon_length = range.GetLength(); if ( (codon_length == 0 || codon_length == 1) && range.GetTo() == len - 1 ) { ragged = 0; } } return ragged;}string CValidError_feat::MapToNTCoords(const CSeq_feat& feat, const CSeq_loc& product, TSeqPos pos){ string result; CSeq_point pnt; pnt.SetPoint(pos); pnt.SetStrand( GetStrand(product, m_Scope) ); try { pnt.SetId().Assign(GetId(product, m_Scope)); } catch (CNotUnique) {} CSeq_loc tmp; tmp.SetPnt(pnt); CRef<CSeq_loc> loc = ProductToSource(feat, tmp, 0, m_Scope); loc->GetLabel(&result); return result;}void CValidError_feat::ValidateFeatPartialness(const CSeq_feat& feat){ unsigned int partial_prod = eSeqlocPartial_Complete, partial_loc = eSeqlocPartial_Complete; static string parterr[2] = { "PartialProduct", "PartialLocation" }; static string parterrs[4] = { "Start does not include first/last residue of sequence", "Stop does not include first/last residue of sequence", "Internal partial intervals do not include first/last residue of sequence", "Improper use of partial (greater than or less than)" }; partial_loc = SeqLocPartialCheck(feat.GetLocation(), m_Scope ); if (feat.CanGetProduct ()) { partial_prod = SeqLocPartialCheck(feat.GetProduct (), m_Scope ); } if ( (partial_loc != eSeqlocPartial_Complete) || (partial_prod != eSeqlocPartial_Complete) || (feat.CanGetPartial() && feat.GetPartial() == true) ) { // a feature on a partial sequence should be partial -- it often isn't if ( (!feat.CanGetPartial() || !feat.GetPartial()) && partial_loc != eSeqlocPartial_Complete && feat.GetLocation ().Which () == CSeq_loc::e_Whole ) { PostErr(eDiag_Info, eErr_SEQ_FEAT_PartialProblem, "On partial Bioseq, SeqFeat.partial should be TRUE", feat); } // a partial feature, with complete location, but partial product else if ( (feat.CanGetPartial() && feat.GetPartial()) && partial_loc == eSeqlocPartial_Complete && feat.CanGetProduct () && feat.GetProduct ().Which () == CSeq_loc::e_Whole && partial_prod != eSeqlocPartial_Complete ) { PostErr(eDiag_Warning, eErr_SEQ_FEAT_PartialProblem, "When SeqFeat.product is a partial Bioseq, SeqFeat.location " "should also be partial", feat); } // gene on segmented set is now 'order', should also be partial else if ( feat.GetData ().IsGene () && !feat.CanGetProduct () && partial_loc == eSeqlocPartial_Internal ) { PostErr(eDiag_Warning, eErr_SEQ_FEAT_PartialProblem, "Gene of 'order' with otherwise complete location should " "have partial flag set", feat); } // inconsistent combination of partial/complete product,location,partial flag - part 1 else if ( partial_prod == eSeqlocPartial_Complete && feat.CanGetProduct() ) { // if not local bioseq product, lower severity EDiagSev sev = eDiag_Warning; if ( IsOneBioseq(feat.GetProduct(), m_Scope) ) { const CSeq_id& prod_id = GetId(feat.GetProduct()); CBioseq_Handle prod = m_Scope->GetBioseqHandleFromTSE(prod_id, m_Imp.GetTSE()); if ( !prod ) { sev = eDiag_Info; } } string str("Inconsistent: Product= complete, Location= "); if ( partial_loc != eSeqlocPartial_Complete ) { str += "partial, "; } else { str += "complete, "; } str += "Feature.partial= "; if ( feat.CanGetPartial() && feat.GetPartial() ) { str += "TRUE"; } else { str += "FALSE"; } PostErr(sev, eErr_SEQ_FEAT_PartialsInconsistent, str, feat); } // inconsistent combination of partial/complete product,location,partial flag - part 2 else if ( partial_loc == eSeqlocPartial_Complete || (feat.CanGetPartial() && !feat.GetPartial()) ) { string str("Inconsistent: "); if ( feat.CanGetProduct() ) { str += "Product= "; if ( partial_prod != eSeqlocPartial_Complete ) { str += "partial, "; } else { str += "complete, "; } str += "Location= "; if ( partial_loc != eSeqlocPartial_Complete ) { str += "partial, "; } else { str += "complete, "; } str += "Feature.partial= "; if ( feat.CanGetPartial() && feat.GetPartial() ) { str += "TRUE"; } else { str += "FALSE"; } } PostErr(eDiag_Warning, eErr_SEQ_FEAT_PartialsInconsistent, str, feat); } // 5' or 3' partial location giving unclassified partial product else if ( (partial_loc & eSeqlocPartial_Start || partial_loc & eSeqlocPartial_Stop) && partial_prod & eSeqlocPartial_Other && feat.CanGetPartial() && feat.GetPartial() ) { PostErr(eDiag_Warning, eErr_SEQ_FEAT_PartialProblem, "5' or 3' partial location should not have unclassified " "partial location", feat); } // may have other error bits set as well unsigned int partials[2] = { partial_prod, partial_loc }; for ( int i = 0; i < 2; ++i ) { unsigned int errtype = eSeqlocPartial_Nostart; for ( int j = 0; j < 4; ++j ) { if (partials[i] & errtype) { if ( i == 1 && j < 2 && IsCDDFeat(feat) ) { // supress warning } else if ( i == 1 && j < 2 && IsPartialAtSpliceSite(feat.GetLocation(), errtype) ) { PostErr(eDiag_Info, eErr_SEQ_FEAT_PartialProblem, parterr[i] + ": " + parterrs[j] + " (but is at consensus splice site)", feat); } else if ( i == 1 && j < 2 && (feat.GetData().Which() == CSeqFeatData::e_Gene || feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_mRNA) && IsSameAsCDS(feat) ) { PostErr(eDiag_Info, eErr_SEQ_FEAT_PartialProblem, parterr[i] + ": " + parterrs[j], feat); } else if (feat.GetData().Which() == CSeqFeatData::e_Cdregion && j == 0) { PostErr (eDiag_Warning, eErr_SEQ_FEAT_PartialProblem, parterr[i] + ": 5' partial is not at start AND is not at consensus splice site", feat); } else if (feat.GetData().Which() == CSeqFeatData::e_Cdregion && j == 1) { PostErr (eDiag_Warning, eErr_SEQ_FEAT_PartialProblem, parterr[i] + ": 3' partial is not at stop AND is not at consensus splice site", feat); } else { PostErr (eDiag_Warning, eErr_SEQ_FEAT_PartialProblem, parterr[i] + ": " + parterrs[j], feat); } } errtype <<= 1; } } }}void CValidError_feat::ValidateGene(const CGene_ref& gene, const CSeq_feat& feat){ ++m_NumGenes; if ( (gene.CanGetLocus() && gene.GetLocus().empty()) && (gene.CanGetAllele() && gene.GetAllele().empty()) && (gene.CanGetDesc() && gene.GetDesc().empty()) && (gene.CanGetMaploc() && gene.GetMaploc().empty()) && (gene.CanGetDb() && gene.GetDb().empty()) && (gene.CanGetSyn() && gene.GetSyn().empty()) && (gene.CanGetLocus_tag() && gene.GetLocus_tag().empty()) ) { PostErr(eDiag_Warning, eErr_SEQ_FEAT_GeneRefHasNoData, "There is a gene feature where all fields are empty", feat); } if ( gene.CanGetLocus() && !gene.GetLocus().empty() ) { ITERATE (string, it, gene.GetLocus() ) { if ( isspace(*it) != 0 ) { PostErr(eDiag_Warning, eErr_SEQ_FEAT_LocusTagProblem, "Gene locus_tag '" + gene.GetLocus() + "' should be a single word without any spaces", feat); break; } } } if ( gene.CanGetDb () ) { m_Imp.ValidateDbxref(gene.GetDb(), feat); }}void CValidError_feat::ValidateCdregion ( const CCdregion& cdregion, const CSeq_feat& feat) { ITERATE( CSeq_feat::TQual, qual, feat.GetQual () ) { if ( (**qual).GetQual() == "codon" ) { PostErr(eDiag_Warning, eErr_SEQ_FEAT_WrongQualOnImpFeat, "Use the proper genetic code, if available, " "or set transl_excepts on specific codons", feat); break; } } bool pseudo = (feat.CanGetPseudo() && feat.GetPseudo()) || IsOverlappingGenePseudo(feat); bool conflict = cdregion.CanGetConflict() && cdregion.GetConflict(); if ( !pseudo && !conflict ) { ValidateCdTrans(feat); ValidateSplice(feat, false); ValidateCdsProductId(feat); } else if ( conflict ) { ValidateCdConflict(cdregion, feat); } ITERATE( CCdregion::TCode_break, codebreak, cdregion.GetCode_break() ) { ECompare comp = sequence::Compare((**codebreak).GetLoc (), feat.GetLocation (), m_Scope ); if ( (comp != eContained) && (comp != eSame)) PostErr (eDiag_Error, eErr_SEQ_FEAT_Range, "Code-break location not in coding region", feat); } if ( cdregion.CanGetOrf() && cdregion.GetOrf () && feat.CanGetProduct () ) { PostErr (eDiag_Warning, eErr_SEQ_FEAT_OrfCdsHasProduct, "An ORF coding region should not have a product", feat); } if ( pseudo && feat.CanGetProduct () ) { PostErr (eDiag_Warning, eErr_SEQ_FEAT_PsuedoCdsHasProduct, "A pseudo coding region should not have a product", feat); } CBioseq_Handle bsh = m_Scope->GetBioseqHandle(feat.GetLocation ()); if ( bsh ) { CSeqdesc_CI diter (bsh, CSeqdesc::e_Source); if ( diter ) { const CBioSource& src = diter->GetSource(); int biopgencode = src.GetGenCode(); if (cdregion.CanGetCode ()) { int cdsgencode = cdregion.GetCode().GetId(); if ( biopgencode != cdsgencode ) { int genome = 0; if ( src.CanGetGenome() ) { genome = src.GetGenome(); } if ( IsPlastid(genome) ) { PostErr (eDiag_Warning, eErr_SEQ_FEAT_GenCodeMismatch, "Genetic code conflict between CDS (code " + NStr::IntToString (cdsgencode) + ") and BioSource.genome biological context (" + s_PlastidTxt [genome] + ") (uses code 11)", feat); } else { PostErr (eDiag_Warning, eErr_SEQ_FEAT_GenCodeMismatch, "Genetic code conflict between CDS (code " + NStr::IntToString (cdsgencode) + ") and BioSource (code " + NStr::IntToString (biopgencode) + ")", feat); } } } } } ValidateBothStrands(feat); ValidateBadGeneOverlap(feat); ValidateBadMRNAOverlap(feat); ValidateCommonCDSProduct(feat); ValidateCDSPartial(feat);}// non-pseudo CDS must have productvoid CValidError_feat::ValidateCdsProductId(const CSeq_feat& feat){ // bail if product exists if ( feat.CanGetProduct() ) { return; } // bail if location has just stop if ( feat.CanGetLocation() ) { const CSeq_loc& loc = feat.GetLocation(); if ( loc.IsPartialLeft() && !loc.IsPartialRight() ) { if ( GetLength(loc, m_Scope) <= 5 ) { return; } } } // supress in case of the appropriate exception if ( feat.CanGetExcept() && feat.CanGetExcept_text() && !IsBlankString(feat.GetExcept_text()) ) { if ( NStr::Find(feat.GetExcept_text(), "rearrangement required for product") != NPOS ) { return; } } PostErr(eDiag_Warning, eErr_SEQ_FEAT_MissingCDSproduct, "Expected CDS product absent", feat);}void CValidError_feat::ValidateCdConflict(const CCdregion& cdregion, const CSeq_feat& feat){ CBioseq_Handle nuc = m_Scope->GetBioseqHandle(feat.GetLocation()); CBioseq_Handle prot = m_Scope->GetBioseqHandle(feat.GetProduct()); // translate the coding region string transl_prot; try { CCdregion_translate::TranslateCdregion( transl_prot, nuc, feat.GetLocation(), cdregion, false, // do not include stop codons false); // do not remove trailing X/B/Z } catch ( const runtime_error& ) { } CSeqVector prot_vec = prot.GetSeqVector(CBioseq_Handle::eCoding_Iupac); string prot_seq; prot_vec.GetSeqData(0, prot_vec.size(), prot_seq); if ( transl_prot.empty() || prot_seq.empty() || transl_prot == prot_seq ) { PostErr(eDiag_Error, eErr_SEQ_FEAT_BadConflictFlag, "Coding region conflict flag should not be set", feat);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?