validerror_feat.cpp

来自「ncbi源码」· C++ 代码 · 共 1,990 行 · 第 1/5 页

CPP
1,990
字号
}unsigned char CValidError_feat::Residue(unsigned char res){    return res == 255 ? '?' : res;}int CValidError_feat::CheckForRaggedEnd(const CSeq_loc& loc,  const CCdregion& cdregion){    size_t len = GetLength(loc, m_Scope);    if ( cdregion.GetFrame() > CCdregion::eFrame_one ) {        len -= cdregion.GetFrame() - 1;    }    int ragged = len % 3;    if ( ragged > 0 ) {        len = GetLength(loc, m_Scope);        CSeq_loc::TRange range = CSeq_loc::TRange::GetEmpty();        ITERATE( CCdregion::TCode_break, cbr, cdregion.GetCode_break() ) {            SRelLoc rl(loc, (*cbr)->GetLoc(), m_Scope);            CRef<CSeq_loc> rel_loc = rl.Resolve(m_Scope);            range += rel_loc->GetTotalRange();        }        // allowing a partial codon at the end        TSeqPos codon_length = range.GetLength();        if ( (codon_length == 0 || codon_length == 1)  &&             range.GetTo() == len - 1 ) {            ragged = 0;        }    }    return ragged;}string CValidError_feat::MapToNTCoords(const CSeq_feat& feat, const CSeq_loc& product, TSeqPos pos){    string result;    CSeq_point pnt;    pnt.SetPoint(pos);    pnt.SetStrand( GetStrand(product, m_Scope) );    try {        pnt.SetId().Assign(GetId(product, m_Scope));    } catch (CNotUnique) {}    CSeq_loc tmp;    tmp.SetPnt(pnt);    CRef<CSeq_loc> loc = ProductToSource(feat, tmp, 0, m_Scope);        loc->GetLabel(&result);    return result;}void CValidError_feat::ValidateFeatPartialness(const CSeq_feat& feat){    unsigned int  partial_prod = eSeqlocPartial_Complete,         partial_loc = eSeqlocPartial_Complete;    static string parterr[2] = { "PartialProduct", "PartialLocation" };    static string parterrs[4] = {        "Start does not include first/last residue of sequence",        "Stop does not include first/last residue of sequence",        "Internal partial intervals do not include first/last residue of sequence",        "Improper use of partial (greater than or less than)"    };    partial_loc  = SeqLocPartialCheck(feat.GetLocation(), m_Scope );    if (feat.CanGetProduct ()) {        partial_prod = SeqLocPartialCheck(feat.GetProduct (), m_Scope );    }        if ( (partial_loc  != eSeqlocPartial_Complete)  ||         (partial_prod != eSeqlocPartial_Complete)  ||            (feat.CanGetPartial()  &&  feat.GetPartial() == true) ) {        // a feature on a partial sequence should be partial -- it often isn't        if ( (!feat.CanGetPartial()  ||  !feat.GetPartial()) &&            partial_loc != eSeqlocPartial_Complete  &&            feat.GetLocation ().Which () == CSeq_loc::e_Whole ) {            PostErr(eDiag_Info, eErr_SEQ_FEAT_PartialProblem,                "On partial Bioseq, SeqFeat.partial should be TRUE", feat);        }        // a partial feature, with complete location, but partial product        else if ( (feat.CanGetPartial()  &&  feat.GetPartial())  &&            partial_loc == eSeqlocPartial_Complete  &&            feat.CanGetProduct () &&            feat.GetProduct ().Which () == CSeq_loc::e_Whole  &&            partial_prod != eSeqlocPartial_Complete ) {            PostErr(eDiag_Warning, eErr_SEQ_FEAT_PartialProblem,                "When SeqFeat.product is a partial Bioseq, SeqFeat.location "                "should also be partial", feat);        }        // gene on segmented set is now 'order', should also be partial        else if ( feat.GetData ().IsGene ()  &&            !feat.CanGetProduct ()  &&            partial_loc == eSeqlocPartial_Internal ) {            PostErr(eDiag_Warning, eErr_SEQ_FEAT_PartialProblem,                "Gene of 'order' with otherwise complete location should "                "have partial flag set", feat);        }        // inconsistent combination of partial/complete product,location,partial flag - part 1        else if ( partial_prod == eSeqlocPartial_Complete  &&                  feat.CanGetProduct() ) {            // if not local bioseq product, lower severity            EDiagSev sev = eDiag_Warning;            if ( IsOneBioseq(feat.GetProduct(), m_Scope) ) {                const CSeq_id& prod_id = GetId(feat.GetProduct());                CBioseq_Handle prod =                    m_Scope->GetBioseqHandleFromTSE(prod_id, m_Imp.GetTSE());                if ( !prod ) {                    sev = eDiag_Info;                }            }                                    string str("Inconsistent: Product= complete, Location= ");            if ( partial_loc != eSeqlocPartial_Complete ) {                str += "partial, ";            } else {                str += "complete, ";            }            str += "Feature.partial= ";            if ( feat.CanGetPartial()  &&  feat.GetPartial() ) {                str += "TRUE";            } else {                str += "FALSE";            }            PostErr(sev, eErr_SEQ_FEAT_PartialsInconsistent, str, feat);        }        // inconsistent combination of partial/complete product,location,partial flag - part 2        else if ( partial_loc == eSeqlocPartial_Complete  ||                  (feat.CanGetPartial()  &&  !feat.GetPartial()) ) {            string str("Inconsistent: ");            if ( feat.CanGetProduct() ) {                str += "Product= ";                if ( partial_prod != eSeqlocPartial_Complete ) {                    str += "partial, ";                } else {                    str += "complete, ";                }                str += "Location= ";                if ( partial_loc != eSeqlocPartial_Complete ) {                    str += "partial, ";                } else {                    str += "complete, ";                }                str += "Feature.partial= ";                if ( feat.CanGetPartial()  &&  feat.GetPartial() ) {                    str += "TRUE";                } else {                    str += "FALSE";                }            }            PostErr(eDiag_Warning, eErr_SEQ_FEAT_PartialsInconsistent, str, feat);        }        // 5' or 3' partial location giving unclassified partial product        else if ( (partial_loc & eSeqlocPartial_Start  ||                   partial_loc & eSeqlocPartial_Stop)  &&                   partial_prod & eSeqlocPartial_Other &&                   feat.CanGetPartial()  &&  feat.GetPartial() ) {            PostErr(eDiag_Warning, eErr_SEQ_FEAT_PartialProblem,                "5' or 3' partial location should not have unclassified "                "partial location", feat);        }                // may have other error bits set as well         unsigned int partials[2] = { partial_prod, partial_loc };        for ( int i = 0; i < 2; ++i ) {            unsigned int errtype = eSeqlocPartial_Nostart;            for ( int j = 0; j < 4; ++j ) {                if (partials[i] & errtype) {                    if ( i == 1  &&  j < 2  &&  IsCDDFeat(feat) ) {                        // supress warning                    } else if ( i == 1  &&  j < 2  &&                        IsPartialAtSpliceSite(feat.GetLocation(), errtype) ) {                        PostErr(eDiag_Info, eErr_SEQ_FEAT_PartialProblem,                            parterr[i] + ": " + parterrs[j] +                             " (but is at consensus splice site)", feat);                    } else if ( i == 1  &&  j < 2  &&                        (feat.GetData().Which() == CSeqFeatData::e_Gene  ||                        feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_mRNA) &&                         IsSameAsCDS(feat) ) {                        PostErr(eDiag_Info, eErr_SEQ_FEAT_PartialProblem,                            parterr[i] + ": " + parterrs[j], feat);                    } else if (feat.GetData().Which() == CSeqFeatData::e_Cdregion && j == 0) {                        PostErr (eDiag_Warning, eErr_SEQ_FEAT_PartialProblem,                            parterr[i] +                             ": 5' partial is not at start AND is not at consensus splice site",                            feat);                     } else if (feat.GetData().Which() == CSeqFeatData::e_Cdregion && j == 1) {                        PostErr (eDiag_Warning, eErr_SEQ_FEAT_PartialProblem,                            parterr[i] +                             ": 3' partial is not at stop AND is not at consensus splice site",                            feat);                    } else {                        PostErr (eDiag_Warning, eErr_SEQ_FEAT_PartialProblem,                            parterr[i] + ": " + parterrs[j], feat);                    }                }                errtype <<= 1;            }        }    }}void CValidError_feat::ValidateGene(const CGene_ref& gene, const CSeq_feat& feat){    ++m_NumGenes;    if ( (gene.CanGetLocus()      &&  gene.GetLocus().empty())   &&         (gene.CanGetAllele()     &&  gene.GetAllele().empty())  &&         (gene.CanGetDesc()       &&  gene.GetDesc().empty())    &&         (gene.CanGetMaploc()     &&  gene.GetMaploc().empty())  &&         (gene.CanGetDb()         &&  gene.GetDb().empty())      &&         (gene.CanGetSyn()        &&  gene.GetSyn().empty())     &&         (gene.CanGetLocus_tag()  &&  gene.GetLocus_tag().empty()) ) {        PostErr(eDiag_Warning, eErr_SEQ_FEAT_GeneRefHasNoData,            "There is a gene feature where all fields are empty", feat);    }    if ( gene.CanGetLocus()  &&  !gene.GetLocus().empty() ) {        ITERATE (string, it, gene.GetLocus() ) {            if ( isspace(*it) != 0 ) {                PostErr(eDiag_Warning, eErr_SEQ_FEAT_LocusTagProblem,                    "Gene locus_tag '" + gene.GetLocus() +                     "' should be a single word without any spaces", feat);                break;            }        }             }    if ( gene.CanGetDb () ) {        m_Imp.ValidateDbxref(gene.GetDb(), feat);    }}void CValidError_feat::ValidateCdregion (    const CCdregion& cdregion,     const CSeq_feat& feat) {    ITERATE( CSeq_feat::TQual, qual, feat.GetQual () ) {        if ( (**qual).GetQual() == "codon" ) {            PostErr(eDiag_Warning, eErr_SEQ_FEAT_WrongQualOnImpFeat,                "Use the proper genetic code, if available, "                "or set transl_excepts on specific codons", feat);            break;        }    }    bool pseudo = (feat.CanGetPseudo()  &&  feat.GetPseudo())  ||        IsOverlappingGenePseudo(feat);    bool conflict = cdregion.CanGetConflict()  &&  cdregion.GetConflict();    if ( !pseudo  &&  !conflict ) {        ValidateCdTrans(feat);        ValidateSplice(feat, false);        ValidateCdsProductId(feat);    } else if ( conflict ) {        ValidateCdConflict(cdregion, feat);    }    ITERATE( CCdregion::TCode_break, codebreak, cdregion.GetCode_break() ) {        ECompare comp = sequence::Compare((**codebreak).GetLoc (),            feat.GetLocation (), m_Scope );        if ( (comp != eContained) && (comp != eSame))            PostErr (eDiag_Error, eErr_SEQ_FEAT_Range,                 "Code-break location not in coding region", feat);    }    if ( cdregion.CanGetOrf()  &&  cdregion.GetOrf ()  &&         feat.CanGetProduct () ) {        PostErr (eDiag_Warning, eErr_SEQ_FEAT_OrfCdsHasProduct,            "An ORF coding region should not have a product", feat);    }    if ( pseudo && feat.CanGetProduct () ) {        PostErr (eDiag_Warning, eErr_SEQ_FEAT_PsuedoCdsHasProduct,            "A pseudo coding region should not have a product", feat);    }        CBioseq_Handle bsh = m_Scope->GetBioseqHandle(feat.GetLocation ());    if ( bsh ) {        CSeqdesc_CI diter (bsh, CSeqdesc::e_Source);        if ( diter ) {            const CBioSource& src = diter->GetSource();                        int biopgencode = src.GetGenCode();                        if (cdregion.CanGetCode ()) {                int cdsgencode = cdregion.GetCode().GetId();                                if ( biopgencode != cdsgencode ) {                    int genome = 0;                                        if ( src.CanGetGenome() ) {                        genome = src.GetGenome();                    }                                        if ( IsPlastid(genome) ) {                        PostErr (eDiag_Warning, eErr_SEQ_FEAT_GenCodeMismatch,                            "Genetic code conflict between CDS (code " +                            NStr::IntToString (cdsgencode) +                            ") and BioSource.genome biological context (" +                            s_PlastidTxt [genome] + ") (uses code 11)", feat);                    } else {                        PostErr (eDiag_Warning, eErr_SEQ_FEAT_GenCodeMismatch,                            "Genetic code conflict between CDS (code " +                            NStr::IntToString (cdsgencode) +                            ") and BioSource (code " +                            NStr::IntToString (biopgencode) + ")", feat);                    }                }            }        }    }    ValidateBothStrands(feat);    ValidateBadGeneOverlap(feat);    ValidateBadMRNAOverlap(feat);    ValidateCommonCDSProduct(feat);    ValidateCDSPartial(feat);}// non-pseudo CDS must have productvoid CValidError_feat::ValidateCdsProductId(const CSeq_feat& feat){    // bail if product exists    if ( feat.CanGetProduct() ) {        return;    }        // bail if location has just stop    if ( feat.CanGetLocation() ) {        const CSeq_loc& loc = feat.GetLocation();        if ( loc.IsPartialLeft()  &&  !loc.IsPartialRight() ) {            if ( GetLength(loc, m_Scope) <= 5 ) {                return;            }        }    }        // supress in case of the appropriate exception    if ( feat.CanGetExcept()  &&  feat.CanGetExcept_text()  &&        !IsBlankString(feat.GetExcept_text()) ) {        if ( NStr::Find(feat.GetExcept_text(),                        "rearrangement required for product") != NPOS ) {            return;        }    }        PostErr(eDiag_Warning, eErr_SEQ_FEAT_MissingCDSproduct,        "Expected CDS product absent", feat);}void CValidError_feat::ValidateCdConflict(const CCdregion& cdregion, const CSeq_feat& feat){    CBioseq_Handle nuc  = m_Scope->GetBioseqHandle(feat.GetLocation());    CBioseq_Handle prot = m_Scope->GetBioseqHandle(feat.GetProduct());        // translate the coding region    string transl_prot;    try {        CCdregion_translate::TranslateCdregion(            transl_prot,             nuc,             feat.GetLocation(),             cdregion,            false,   // do not include stop codons            false);  // do not remove trailing X/B/Z    } catch ( const runtime_error& ) {    }    CSeqVector prot_vec = prot.GetSeqVector(CBioseq_Handle::eCoding_Iupac);    string prot_seq;    prot_vec.GetSeqData(0, prot_vec.size(), prot_seq);    if ( transl_prot.empty()  ||  prot_seq.empty()  ||  transl_prot == prot_seq ) {        PostErr(eDiag_Error, eErr_SEQ_FEAT_BadConflictFlag,            "Coding region conflict flag should not be set", feat);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?