validatorp.hpp

来自「ncbi源码」· HPP 代码 · 共 1,149 行 · 第 1/3 页

HPP
1,149
字号
    // General use validation methods    void ValidatePubdesc(const CPubdesc& pub, const CSerialObject& obj);    void ValidateBioSource(const CBioSource& bsrc, const CSerialObject& obj);    void ValidateSeqLoc(const CSeq_loc& loc, const CBioseq_Handle& seq,        const string& prefix, const CSerialObject& obj);    void ValidateSeqLocIds(const CSeq_loc& loc, const CSerialObject& obj);    void ValidateDbxref(const CDbtag& xref, const CSerialObject& obj,        bool biosource = false);    void ValidateDbxref(TDbtags& xref_list, const CSerialObject& obj,        bool biosource = false);    void ValidateCitSub(const CCit_sub& cs, const CSerialObject& obj);            // getters    inline CScope* GetScope(void) { return m_Scope; }    // flags derived from options parameter    inline bool IsNonASCII(void) const { return m_NonASCII; }    inline bool IsSuppressContext(void) const { return m_SuppressContext; }    inline bool IsValidateAlignments(void) const { return m_ValidateAlignments; }    inline bool IsValidateExons(void) const { return m_ValidateExons; }    inline bool IsSpliceErr(void) const { return m_SpliceErr; }    inline bool IsOvlPepErr(void) const { return m_OvlPepErr; }    inline bool IsRequireTaxonID(void) const { return m_RequireTaxonID; }    inline bool IsRequireISOJTA(void) const { return m_RequireISOJTA; }    inline bool IsValidateIdSet(void) const { return m_ValidateIdSet; }    inline bool IsRemoteFetch(void) const { return m_RemoteFetch; }    // !!! DEBUG {    inline bool AvoidPerfBottlenecks() const { return m_PerfBottlenecks; }    // }    // flags calculated by examining data in record    inline bool IsStandaloneAnnot(void) const { return m_IsStandaloneAnnot; }    inline bool IsNoPubs(void) const { return m_NoPubs; }    inline bool IsNoBioSource(void) const { return m_NoBioSource; }    inline bool IsGPS(void) const { return m_IsGPS; }    inline bool IsGED(void) const { return m_IsGED; }    inline bool IsPDB(void) const { return m_IsPDB; }    inline bool IsTPA(void) const { return m_IsTPA; }    inline bool IsPatent(void) const { return m_IsPatent; }    inline bool IsRefSeq(void) const { return m_IsRefSeq; }    inline bool IsNC(void) const { return m_IsNC; }    inline bool IsNG(void) const { return m_IsNG; }    inline bool IsNM(void) const { return m_IsNM; }    inline bool IsNP(void) const { return m_IsNP; }    inline bool IsNR(void) const { return m_IsNR; }    inline bool IsNS(void) const { return m_IsNS; }    inline bool IsNT(void) const { return m_IsNT; }    inline bool IsNW(void) const { return m_IsNW; }    inline bool IsXR(void) const { return m_IsXR; }    inline bool IsGI(void) const { return m_IsGI; }    inline bool IsCuratedRefSeq(void) const;        const CSeq_entry& GetTSE(void) { return *m_TSE; }    TFeatAnnotMap GetFeatAnnotMap(void);    void AddBioseqWithNoPub(const CBioseq& seq);    void AddBioseqWithNoBiosource(const CBioseq& seq);    void AddBioseqWithNoMolinfo(const CBioseq& seq);    void AddProtWithoutFullRef(const CBioseq_Handle& seq);    void ReportMissingPubs(const CSeq_entry& se, const CCit_sub* cs);    void ReportMissingBiosource(const CSeq_entry& se);    void ReportProtWithoutFullRef(void);    void ReportBioseqsWithNoMolinfo(void);    bool IsNucAcc(const string& acc);    bool IsFarLocation(const CSeq_loc& loc);    CConstRef<CSeq_feat> GetCDSGivenProduct(const CBioseq& seq);    const CSeq_entry* GetAncestor(const CBioseq& seq, CBioseq_set::EClass clss);    bool IsSerialNumberInComment(const string& comment);    bool CheckSeqVector(const CSeqVector& vec);    bool IsSequenceAvaliable(const CSeqVector& vec);private:    // Prohibit copy constructor & assignment operator    CValidError_imp(const CValidError_imp&);    CValidError_imp& operator= (const CValidError_imp&);    void Setup(const CSeq_entry& se, CScope* scope);    void Setup(const CSeq_annot& sa, CScope* scope);    void SetScope(const CSeq_entry& se);    void SetScope(const CSeq_annot& sa);    void InitializeSourceQualTags();    void ValidateSourceQualTags(const string& str, const CSerialObject& obj);    bool IsMixedStrands(const CSeq_loc& loc);    void ValidatePubGen(const CCit_gen& gen, const CSerialObject& obj);    void ValidatePubArticle(const CCit_art& art, int uid, const CSerialObject& obj);    void ValidateEtAl(const CPubdesc& pubdesc, const CSerialObject& obj);        bool HasName(const list< CRef< CAuthor > >& authors);    bool HasTitle(const CTitle& title);    bool HasIsoJTA(const CTitle& title);    CRef<CObjectManager>    m_ObjMgr;    CRef<CScope>            m_Scope;    CConstRef<CSeq_entry>   m_TSE;    // error repoitory    CValidError*       m_ErrRepository;    // flags derived from options parameter    bool m_NonASCII;            // User sets if Non ASCII char found    bool m_SuppressContext;     // Include context in errors if true    bool m_ValidateAlignments;  // Validate Alignments if true    bool m_ValidateExons;       // Check exon feature splice sites    bool m_SpliceErr;           // Bad splice site error if true, else warn    bool m_OvlPepErr;           // Peptide overlap error if true, else warn    bool m_RequireTaxonID;      // BioSource requires taxonID dbxref    bool m_RequireISOJTA;       // Journal requires ISO JTA    bool m_ValidateIdSet;       // validate update against ID set in database    bool m_RemoteFetch;         // Remote fetch enabled?    // !!! DEBUG {    bool m_PerfBottlenecks;         // Skip suspected performance bottlenecks    // }    // flags calculated by examining data in record    bool m_IsStandaloneAnnot;    bool m_NoPubs;                  // Suppress no pub error if true    bool m_NoBioSource;             // Suppress no organism error if true    bool m_IsGPS;    bool m_IsGED;    bool m_IsPDB;    bool m_IsTPA;    bool m_IsPatent;    bool m_IsRefSeq;    bool m_IsNC;    bool m_IsNG;    bool m_IsNM;    bool m_IsNP;    bool m_IsNR;    bool m_IsNS;    bool m_IsNT;    bool m_IsNW;    bool m_IsXR;    bool m_IsGI;        // seq ids contained within the orignal seq entry.     // (used to check for far location)    vector< CConstRef<CSeq_id> >    m_InitialSeqIds;    // prot bioseqs without a full reference (reporting cds feature)    vector< CConstRef<CSeq_feat> >  m_ProtWithNoFullRef;    // Bioseqs without pubs (should be considered only if m_NoPubs is false)    vector< CConstRef<CBioseq> >    m_BioseqWithNoPubs;    // Bioseqs without source (should be considered only if m_NoSource is false)    vector< CConstRef<CBioseq> >    m_BioseqWithNoSource;    // Bioseqs without MolInfo    vector< CConstRef<CBioseq> >    m_BioseqWithNoMolinfo;    // legal dbxref database strings    static const string legalDbXrefs[];    static const string legalRefSeqDbXrefs[];    // source qulalifiers prefixes    static const string sm_SourceQualPrefixes[];    static auto_ptr<CTextFsa> m_SourceQualTags;    CValidator::TProgressCallback m_PrgCallback;    CValidator::CProgressInfo     m_PrgInfo;    SIZE_TYPE   m_NumAlign;    SIZE_TYPE   m_NumAnnot;    SIZE_TYPE   m_NumBioseq;    SIZE_TYPE   m_NumBioseq_set;    SIZE_TYPE   m_NumDesc;    SIZE_TYPE   m_NumDescr;    SIZE_TYPE   m_NumFeat;    SIZE_TYPE   m_NumGraph;};// =============================================================================//                         Specific validation classes// =============================================================================class CValidError_base{protected:    // typedefs:    typedef CValidError_imp::TFeat TFeat;    typedef CValidError_imp::TBioseq TBioseq;    typedef CValidError_imp::TSet TSet;    typedef CValidError_imp::TDesc TDesc;    typedef CValidError_imp::TAnnot TAnnot;    typedef CValidError_imp::TGraph TGraph;    typedef CValidError_imp::TAlign TAlign;    typedef CValidError_imp::TEntry TEntry;    typedef CValidError_imp::TDbtags TDbtags;    CValidError_base(CValidError_imp& imp);    virtual ~CValidError_base();    void PostErr(EDiagSev sv, EErrType et, const string& msg,        const CSerialObject& obj);    void PostErr(EDiagSev sv, EErrType et, const string& msg, TDesc ds);    void PostErr(EDiagSev sv, EErrType et, const string& msg, TFeat ft);    void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq);    void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq,        TDesc ds);    void PostErr(EDiagSev sv, EErrType et, const string& msg, TSet set);    void PostErr(EDiagSev sv, EErrType et, const string& msg, TSet set,         TDesc ds);    void PostErr(EDiagSev sv, EErrType et, const string& msg, TAnnot annot);    void PostErr(EDiagSev sv, EErrType et, const string& msg, TGraph graph);    void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq,        TGraph graph);    void PostErr(EDiagSev sv, EErrType et, const string& msg, TAlign align);    void PostErr(EDiagSev sv, EErrType et, const string& msg, TEntry entry);    CValidError_imp& m_Imp;    CScope* m_Scope;};// ===========================  Validate Bioseq_set  ===========================class CValidError_bioseqset : private CValidError_base{public:    CValidError_bioseqset(CValidError_imp& imp);    virtual ~CValidError_bioseqset(void);    void ValidateBioseqSet(const CBioseq_set& seqset);private:    void ValidateNucProtSet(const CBioseq_set& seqset, int nuccnt, int protcnt);    void ValidateSegSet(const CBioseq_set& seqset, int segcnt);    void ValidatePartsSet(const CBioseq_set& seqset);    void ValidatePopSet(const CBioseq_set& seqset);    void ValidateGenProdSet(const CBioseq_set& seqset);    bool IsMrnaProductInGPS(const CBioseq& seq); };// =============================  Validate Bioseq  =============================class CValidError_bioseq : private CValidError_base{public:    CValidError_bioseq(CValidError_imp& imp);    virtual ~CValidError_bioseq(void);    void ValidateSeqIds(const CBioseq& seq);    void ValidateInst(const CBioseq& seq);    void ValidateBioseqContext(const CBioseq& seq);    void ValidateHistory(const CBioseq& seq);    size_t GetTpaWithHistory(void)    const { return m_TpaWithHistory;    }    size_t GetTpaWithoutHistory(void) const { return m_TpaWithoutHistory; }private:    typedef multimap<string, const CSeq_feat*, PNocase> TStrFeatMap;    typedef vector<CMappedFeat>                         TMappedFeatVec;    static const size_t scm_AdjacentNsThreshold; // = 80        void ValidateSeqLen(const CBioseq& seq);    void ValidateSegRef(const CBioseq& seq);    void ValidateDelta(const CBioseq& seq);    bool ValidateRepr(const CSeq_inst& inst, const CBioseq& seq);    void ValidateSeqParts(const CBioseq& seq);    void ValidateProteinTitle(const CBioseq& seq);    void ValidateRawConst(const CBioseq& seq);    void ValidateNs(const CBioseq& seq);        void ValidateMultiIntervalGene (const CBioseq& seq);    void ValidateSeqFeatContext(const CBioseq& seq);    void ValidateDupOrOverlapFeats(const CBioseq& seq);    void ValidateCollidingGenes(const CBioseq& seq);    void x_CompareStrings(const TStrFeatMap& str_feat_map, const string& type,        EErrType err, EDiagSev sev);    void x_ValidateCompletness(const CBioseq& seq, const CMolInfo& mi);    void x_ValidateAbuttingUTR(const CBioseq_Handle& seq);    void x_ValidateAbuttingCDSGroup(const TMappedFeatVec& cds_group, bool minus);    void ValidateSeqDescContext(const CBioseq& seq);    void ValidateMolInfoContext(const CMolInfo& minfo, int& seq_biomol,        const CBioseq& seq, const CSeqdesc& desc);    void ValidateMolTypeContext(const EGIBB_mol& gibb, EGIBB_mol& seq_biomol,        const CBioseq& seq, const CSeqdesc& desc);    void ValidateUpdateDateContext(const CDate& update,const CDate& create,        const CBioseq& seq, const CSeqdesc& desc);    void ValidateOrgContext(const CSeqdesc_CI& iter, const COrg_ref& this_org,        const COrg_ref& org, const CBioseq& seq, const CSeqdesc& desc);    void ValidateGraphsOnBioseq(const CBioseq& seq);    void ValidateByteGraphOnBioseq(const CSeq_graph& graph, const CBioseq& seq);    void ValidateGraphOnDeltaBioseq(const CBioseq& seq, bool& validate_values);    void ValidateGraphValues(const CSeq_graph& graph, const CBioseq& seq);    void ValidateMinValues(const CByte_graph& bg);    void ValidateMaxValues(const CByte_graph& bg);    void ValidatemRNABioseqContext(const CBioseq_Handle& seq);    bool GetLitLength(const CDelta_seq& delta, TSeqPos& len);    bool IsSuportedGraphType(const CSeq_graph& graph) const;    SIZE_TYPE GetSeqLen(const CBioseq& seq);    void ValidateSecondaryAccConflict(const string& primary_acc,        const CBioseq& seq, int choice);    void ValidateIDSetAgainstDb(const CBioseq& seq);    void CheckForPubOnBioseq(const CBioseq& seq);    void CheckForBiosourceOnBioseq(const CBioseq& seq);    void CheckForMolinfoOnBioseq(const CBioseq& seq);    void CheckTpaHistory(const CBioseq& seq);    TSeqPos GetDataLen(const CSeq_inst& inst);    bool CdError(const CBioseq_Handle& bsh);    bool IsMrna(const CBioseq_Handle& bsh);    bool IsPrerna(const CBioseq_Handle& bsh);    size_t NumOfIntervals(const CSeq_loc& loc);    bool LocOnSeg(const CBioseq& seq, const CSeq_loc& loc);    bool NotPeptideException(const CFeat_CI& curr, const CFeat_CI& prev);    bool IsSameSeqAnnot(const CFeat_CI& fi1, const CFeat_CI& fi2);    bool IsSameSeqAnnotDesc(const CFeat_CI& fi1, const CFeat_CI& fi2);    bool IsIdIn(const CSeq_id& id, const CBioseq& seq);    bool SuppressTrailingXMsg(const CBioseq& seq);    bool GetLocFromSeq(const CBioseq& seq, CSeq_loc* loc);    bool IsDifferentDbxrefs(const TDbtags& dbxref1,                            const TDbtags& dbxref2);    bool IsHistAssemblyMissing(const CBioseq& seq);    bool IsFlybaseDbxrefs(const TDbtags& dbxrefs);    bool GraphsOnBioseq(const CBioseq& seq) const;    bool IsOtherDNA(const CBioseq& seq) const;    bool IsSynthetic(const CBioseq& seq) const;    bool x_IsArtificial(const CBioseq& seq) const;    bool x_IsActiveFin(const CBioseq& seq) const;    bool x_IsMicroRNA(const CBioseq& seq) const;    bool x_IsDeltaLitOnly(const CSeq_inst& inst) const;    size_t x_CountAdjacentNs(const CSeq_literal& lit);    // data    size_t m_TpaWithHistory;    size_t m_TpaWithoutHistory;};// =============================  Validate SeqFeat  ============================class CValidError_feat : private CValidError_base{public:    CValidError_feat(CValidError_imp& imp);    virtual ~CValidError_feat(void);    void ValidateSeqFeat(const CSeq_feat& feat);    size_t GetNumGenes    (void) const { return m_NumGenes; }    size_t GetNumGeneXrefs(void) const { return m_NumGeneXrefs; }private:    void ValidateSeqFeatData(const CSeqFeatData& data, const CSeq_feat& feat);    void ValidateSeqFeatProduct(const CSeq_loc& prod, const CSeq_feat& feat);    void ValidateGene(const CGene_ref& gene, const CSeq_feat& feat);    void ValidateGeneXRef(const CSeq_feat& feat);    void ValidateOperon(const CSeq_feat& feat);    void ValidateCdregion(const CCdregion& cdregion, const CSeq_feat& obj);    void ValidateCdTrans(const CSeq_feat& feat);    void ValidateCdsProductId(const CSeq_feat& feat);    void ValidateCdConflict(const CCdregion& cdregion, const CSeq_feat& feat);    void ReportCdTransErrors(const CSeq_feat& feat,        bool show_stop, bool got_stop, bool no_end, int ragged);    void ValidateSplice(const CSeq_feat& feat, bool check_all);    void ValidateBothStrands(const CSeq_feat& feat);    void ValidateCommonCDSProduct(const CSeq_feat& feat);    void ValidateBadMRNAOverlap(const CSeq_feat& feat);    void ValidateBadGeneOverlap(const CSeq_feat& feat);    void ValidateCDSPartial(const CSeq_feat& feat);    void ValidateCodeBreakNotOnCodon(const CSeq_feat& feat,const CSeq_loc& loc,                                     const CCdregion& cdregion);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?