validatorp.hpp
来自「ncbi源码」· HPP 代码 · 共 1,149 行 · 第 1/3 页
HPP
1,149 行
// General use validation methods void ValidatePubdesc(const CPubdesc& pub, const CSerialObject& obj); void ValidateBioSource(const CBioSource& bsrc, const CSerialObject& obj); void ValidateSeqLoc(const CSeq_loc& loc, const CBioseq_Handle& seq, const string& prefix, const CSerialObject& obj); void ValidateSeqLocIds(const CSeq_loc& loc, const CSerialObject& obj); void ValidateDbxref(const CDbtag& xref, const CSerialObject& obj, bool biosource = false); void ValidateDbxref(TDbtags& xref_list, const CSerialObject& obj, bool biosource = false); void ValidateCitSub(const CCit_sub& cs, const CSerialObject& obj); // getters inline CScope* GetScope(void) { return m_Scope; } // flags derived from options parameter inline bool IsNonASCII(void) const { return m_NonASCII; } inline bool IsSuppressContext(void) const { return m_SuppressContext; } inline bool IsValidateAlignments(void) const { return m_ValidateAlignments; } inline bool IsValidateExons(void) const { return m_ValidateExons; } inline bool IsSpliceErr(void) const { return m_SpliceErr; } inline bool IsOvlPepErr(void) const { return m_OvlPepErr; } inline bool IsRequireTaxonID(void) const { return m_RequireTaxonID; } inline bool IsRequireISOJTA(void) const { return m_RequireISOJTA; } inline bool IsValidateIdSet(void) const { return m_ValidateIdSet; } inline bool IsRemoteFetch(void) const { return m_RemoteFetch; } // !!! DEBUG { inline bool AvoidPerfBottlenecks() const { return m_PerfBottlenecks; } // } // flags calculated by examining data in record inline bool IsStandaloneAnnot(void) const { return m_IsStandaloneAnnot; } inline bool IsNoPubs(void) const { return m_NoPubs; } inline bool IsNoBioSource(void) const { return m_NoBioSource; } inline bool IsGPS(void) const { return m_IsGPS; } inline bool IsGED(void) const { return m_IsGED; } inline bool IsPDB(void) const { return m_IsPDB; } inline bool IsTPA(void) const { return m_IsTPA; } inline bool IsPatent(void) const { return m_IsPatent; } inline bool IsRefSeq(void) const { return m_IsRefSeq; } inline bool IsNC(void) const { return m_IsNC; } inline bool IsNG(void) const { return m_IsNG; } inline bool IsNM(void) const { return m_IsNM; } inline bool IsNP(void) const { return m_IsNP; } inline bool IsNR(void) const { return m_IsNR; } inline bool IsNS(void) const { return m_IsNS; } inline bool IsNT(void) const { return m_IsNT; } inline bool IsNW(void) const { return m_IsNW; } inline bool IsXR(void) const { return m_IsXR; } inline bool IsGI(void) const { return m_IsGI; } inline bool IsCuratedRefSeq(void) const; const CSeq_entry& GetTSE(void) { return *m_TSE; } TFeatAnnotMap GetFeatAnnotMap(void); void AddBioseqWithNoPub(const CBioseq& seq); void AddBioseqWithNoBiosource(const CBioseq& seq); void AddBioseqWithNoMolinfo(const CBioseq& seq); void AddProtWithoutFullRef(const CBioseq_Handle& seq); void ReportMissingPubs(const CSeq_entry& se, const CCit_sub* cs); void ReportMissingBiosource(const CSeq_entry& se); void ReportProtWithoutFullRef(void); void ReportBioseqsWithNoMolinfo(void); bool IsNucAcc(const string& acc); bool IsFarLocation(const CSeq_loc& loc); CConstRef<CSeq_feat> GetCDSGivenProduct(const CBioseq& seq); const CSeq_entry* GetAncestor(const CBioseq& seq, CBioseq_set::EClass clss); bool IsSerialNumberInComment(const string& comment); bool CheckSeqVector(const CSeqVector& vec); bool IsSequenceAvaliable(const CSeqVector& vec);private: // Prohibit copy constructor & assignment operator CValidError_imp(const CValidError_imp&); CValidError_imp& operator= (const CValidError_imp&); void Setup(const CSeq_entry& se, CScope* scope); void Setup(const CSeq_annot& sa, CScope* scope); void SetScope(const CSeq_entry& se); void SetScope(const CSeq_annot& sa); void InitializeSourceQualTags(); void ValidateSourceQualTags(const string& str, const CSerialObject& obj); bool IsMixedStrands(const CSeq_loc& loc); void ValidatePubGen(const CCit_gen& gen, const CSerialObject& obj); void ValidatePubArticle(const CCit_art& art, int uid, const CSerialObject& obj); void ValidateEtAl(const CPubdesc& pubdesc, const CSerialObject& obj); bool HasName(const list< CRef< CAuthor > >& authors); bool HasTitle(const CTitle& title); bool HasIsoJTA(const CTitle& title); CRef<CObjectManager> m_ObjMgr; CRef<CScope> m_Scope; CConstRef<CSeq_entry> m_TSE; // error repoitory CValidError* m_ErrRepository; // flags derived from options parameter bool m_NonASCII; // User sets if Non ASCII char found bool m_SuppressContext; // Include context in errors if true bool m_ValidateAlignments; // Validate Alignments if true bool m_ValidateExons; // Check exon feature splice sites bool m_SpliceErr; // Bad splice site error if true, else warn bool m_OvlPepErr; // Peptide overlap error if true, else warn bool m_RequireTaxonID; // BioSource requires taxonID dbxref bool m_RequireISOJTA; // Journal requires ISO JTA bool m_ValidateIdSet; // validate update against ID set in database bool m_RemoteFetch; // Remote fetch enabled? // !!! DEBUG { bool m_PerfBottlenecks; // Skip suspected performance bottlenecks // } // flags calculated by examining data in record bool m_IsStandaloneAnnot; bool m_NoPubs; // Suppress no pub error if true bool m_NoBioSource; // Suppress no organism error if true bool m_IsGPS; bool m_IsGED; bool m_IsPDB; bool m_IsTPA; bool m_IsPatent; bool m_IsRefSeq; bool m_IsNC; bool m_IsNG; bool m_IsNM; bool m_IsNP; bool m_IsNR; bool m_IsNS; bool m_IsNT; bool m_IsNW; bool m_IsXR; bool m_IsGI; // seq ids contained within the orignal seq entry. // (used to check for far location) vector< CConstRef<CSeq_id> > m_InitialSeqIds; // prot bioseqs without a full reference (reporting cds feature) vector< CConstRef<CSeq_feat> > m_ProtWithNoFullRef; // Bioseqs without pubs (should be considered only if m_NoPubs is false) vector< CConstRef<CBioseq> > m_BioseqWithNoPubs; // Bioseqs without source (should be considered only if m_NoSource is false) vector< CConstRef<CBioseq> > m_BioseqWithNoSource; // Bioseqs without MolInfo vector< CConstRef<CBioseq> > m_BioseqWithNoMolinfo; // legal dbxref database strings static const string legalDbXrefs[]; static const string legalRefSeqDbXrefs[]; // source qulalifiers prefixes static const string sm_SourceQualPrefixes[]; static auto_ptr<CTextFsa> m_SourceQualTags; CValidator::TProgressCallback m_PrgCallback; CValidator::CProgressInfo m_PrgInfo; SIZE_TYPE m_NumAlign; SIZE_TYPE m_NumAnnot; SIZE_TYPE m_NumBioseq; SIZE_TYPE m_NumBioseq_set; SIZE_TYPE m_NumDesc; SIZE_TYPE m_NumDescr; SIZE_TYPE m_NumFeat; SIZE_TYPE m_NumGraph;};// =============================================================================// Specific validation classes// =============================================================================class CValidError_base{protected: // typedefs: typedef CValidError_imp::TFeat TFeat; typedef CValidError_imp::TBioseq TBioseq; typedef CValidError_imp::TSet TSet; typedef CValidError_imp::TDesc TDesc; typedef CValidError_imp::TAnnot TAnnot; typedef CValidError_imp::TGraph TGraph; typedef CValidError_imp::TAlign TAlign; typedef CValidError_imp::TEntry TEntry; typedef CValidError_imp::TDbtags TDbtags; CValidError_base(CValidError_imp& imp); virtual ~CValidError_base(); void PostErr(EDiagSev sv, EErrType et, const string& msg, const CSerialObject& obj); void PostErr(EDiagSev sv, EErrType et, const string& msg, TDesc ds); void PostErr(EDiagSev sv, EErrType et, const string& msg, TFeat ft); void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq); void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq, TDesc ds); void PostErr(EDiagSev sv, EErrType et, const string& msg, TSet set); void PostErr(EDiagSev sv, EErrType et, const string& msg, TSet set, TDesc ds); void PostErr(EDiagSev sv, EErrType et, const string& msg, TAnnot annot); void PostErr(EDiagSev sv, EErrType et, const string& msg, TGraph graph); void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq, TGraph graph); void PostErr(EDiagSev sv, EErrType et, const string& msg, TAlign align); void PostErr(EDiagSev sv, EErrType et, const string& msg, TEntry entry); CValidError_imp& m_Imp; CScope* m_Scope;};// =========================== Validate Bioseq_set ===========================class CValidError_bioseqset : private CValidError_base{public: CValidError_bioseqset(CValidError_imp& imp); virtual ~CValidError_bioseqset(void); void ValidateBioseqSet(const CBioseq_set& seqset);private: void ValidateNucProtSet(const CBioseq_set& seqset, int nuccnt, int protcnt); void ValidateSegSet(const CBioseq_set& seqset, int segcnt); void ValidatePartsSet(const CBioseq_set& seqset); void ValidatePopSet(const CBioseq_set& seqset); void ValidateGenProdSet(const CBioseq_set& seqset); bool IsMrnaProductInGPS(const CBioseq& seq); };// ============================= Validate Bioseq =============================class CValidError_bioseq : private CValidError_base{public: CValidError_bioseq(CValidError_imp& imp); virtual ~CValidError_bioseq(void); void ValidateSeqIds(const CBioseq& seq); void ValidateInst(const CBioseq& seq); void ValidateBioseqContext(const CBioseq& seq); void ValidateHistory(const CBioseq& seq); size_t GetTpaWithHistory(void) const { return m_TpaWithHistory; } size_t GetTpaWithoutHistory(void) const { return m_TpaWithoutHistory; }private: typedef multimap<string, const CSeq_feat*, PNocase> TStrFeatMap; typedef vector<CMappedFeat> TMappedFeatVec; static const size_t scm_AdjacentNsThreshold; // = 80 void ValidateSeqLen(const CBioseq& seq); void ValidateSegRef(const CBioseq& seq); void ValidateDelta(const CBioseq& seq); bool ValidateRepr(const CSeq_inst& inst, const CBioseq& seq); void ValidateSeqParts(const CBioseq& seq); void ValidateProteinTitle(const CBioseq& seq); void ValidateRawConst(const CBioseq& seq); void ValidateNs(const CBioseq& seq); void ValidateMultiIntervalGene (const CBioseq& seq); void ValidateSeqFeatContext(const CBioseq& seq); void ValidateDupOrOverlapFeats(const CBioseq& seq); void ValidateCollidingGenes(const CBioseq& seq); void x_CompareStrings(const TStrFeatMap& str_feat_map, const string& type, EErrType err, EDiagSev sev); void x_ValidateCompletness(const CBioseq& seq, const CMolInfo& mi); void x_ValidateAbuttingUTR(const CBioseq_Handle& seq); void x_ValidateAbuttingCDSGroup(const TMappedFeatVec& cds_group, bool minus); void ValidateSeqDescContext(const CBioseq& seq); void ValidateMolInfoContext(const CMolInfo& minfo, int& seq_biomol, const CBioseq& seq, const CSeqdesc& desc); void ValidateMolTypeContext(const EGIBB_mol& gibb, EGIBB_mol& seq_biomol, const CBioseq& seq, const CSeqdesc& desc); void ValidateUpdateDateContext(const CDate& update,const CDate& create, const CBioseq& seq, const CSeqdesc& desc); void ValidateOrgContext(const CSeqdesc_CI& iter, const COrg_ref& this_org, const COrg_ref& org, const CBioseq& seq, const CSeqdesc& desc); void ValidateGraphsOnBioseq(const CBioseq& seq); void ValidateByteGraphOnBioseq(const CSeq_graph& graph, const CBioseq& seq); void ValidateGraphOnDeltaBioseq(const CBioseq& seq, bool& validate_values); void ValidateGraphValues(const CSeq_graph& graph, const CBioseq& seq); void ValidateMinValues(const CByte_graph& bg); void ValidateMaxValues(const CByte_graph& bg); void ValidatemRNABioseqContext(const CBioseq_Handle& seq); bool GetLitLength(const CDelta_seq& delta, TSeqPos& len); bool IsSuportedGraphType(const CSeq_graph& graph) const; SIZE_TYPE GetSeqLen(const CBioseq& seq); void ValidateSecondaryAccConflict(const string& primary_acc, const CBioseq& seq, int choice); void ValidateIDSetAgainstDb(const CBioseq& seq); void CheckForPubOnBioseq(const CBioseq& seq); void CheckForBiosourceOnBioseq(const CBioseq& seq); void CheckForMolinfoOnBioseq(const CBioseq& seq); void CheckTpaHistory(const CBioseq& seq); TSeqPos GetDataLen(const CSeq_inst& inst); bool CdError(const CBioseq_Handle& bsh); bool IsMrna(const CBioseq_Handle& bsh); bool IsPrerna(const CBioseq_Handle& bsh); size_t NumOfIntervals(const CSeq_loc& loc); bool LocOnSeg(const CBioseq& seq, const CSeq_loc& loc); bool NotPeptideException(const CFeat_CI& curr, const CFeat_CI& prev); bool IsSameSeqAnnot(const CFeat_CI& fi1, const CFeat_CI& fi2); bool IsSameSeqAnnotDesc(const CFeat_CI& fi1, const CFeat_CI& fi2); bool IsIdIn(const CSeq_id& id, const CBioseq& seq); bool SuppressTrailingXMsg(const CBioseq& seq); bool GetLocFromSeq(const CBioseq& seq, CSeq_loc* loc); bool IsDifferentDbxrefs(const TDbtags& dbxref1, const TDbtags& dbxref2); bool IsHistAssemblyMissing(const CBioseq& seq); bool IsFlybaseDbxrefs(const TDbtags& dbxrefs); bool GraphsOnBioseq(const CBioseq& seq) const; bool IsOtherDNA(const CBioseq& seq) const; bool IsSynthetic(const CBioseq& seq) const; bool x_IsArtificial(const CBioseq& seq) const; bool x_IsActiveFin(const CBioseq& seq) const; bool x_IsMicroRNA(const CBioseq& seq) const; bool x_IsDeltaLitOnly(const CSeq_inst& inst) const; size_t x_CountAdjacentNs(const CSeq_literal& lit); // data size_t m_TpaWithHistory; size_t m_TpaWithoutHistory;};// ============================= Validate SeqFeat ============================class CValidError_feat : private CValidError_base{public: CValidError_feat(CValidError_imp& imp); virtual ~CValidError_feat(void); void ValidateSeqFeat(const CSeq_feat& feat); size_t GetNumGenes (void) const { return m_NumGenes; } size_t GetNumGeneXrefs(void) const { return m_NumGeneXrefs; }private: void ValidateSeqFeatData(const CSeqFeatData& data, const CSeq_feat& feat); void ValidateSeqFeatProduct(const CSeq_loc& prod, const CSeq_feat& feat); void ValidateGene(const CGene_ref& gene, const CSeq_feat& feat); void ValidateGeneXRef(const CSeq_feat& feat); void ValidateOperon(const CSeq_feat& feat); void ValidateCdregion(const CCdregion& cdregion, const CSeq_feat& obj); void ValidateCdTrans(const CSeq_feat& feat); void ValidateCdsProductId(const CSeq_feat& feat); void ValidateCdConflict(const CCdregion& cdregion, const CSeq_feat& feat); void ReportCdTransErrors(const CSeq_feat& feat, bool show_stop, bool got_stop, bool no_end, int ragged); void ValidateSplice(const CSeq_feat& feat, bool check_all); void ValidateBothStrands(const CSeq_feat& feat); void ValidateCommonCDSProduct(const CSeq_feat& feat); void ValidateBadMRNAOverlap(const CSeq_feat& feat); void ValidateBadGeneOverlap(const CSeq_feat& feat); void ValidateCDSPartial(const CSeq_feat& feat); void ValidateCodeBreakNotOnCodon(const CSeq_feat& feat,const CSeq_loc& loc, const CCdregion& cdregion);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?