readfeat.cpp
来自「ncbi源码」· C++ 代码 · 共 1,648 行 · 第 1/5 页
CPP
1,648 行
/* * =========================================================================== * PRODUCTION $Log: readfeat.cpp,v $ * PRODUCTION Revision 1000.5 2004/06/01 19:46:24 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.35 * PRODUCTION * =========================================================================== *//* * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Jonathan Kans * * File Description: * Feature table reader * */#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <corelib/ncbithr.hpp>#include <serial/iterator.hpp>#include <serial/objistrasn.hpp>// Objects includes#include <objects/general/Int_fuzz.hpp>#include <objects/general/Object_id.hpp>#include <objects/general/Dbtag.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objects/seq/Seq_annot.hpp>#include <objects/seq/Annotdesc.hpp>#include <objects/seq/Annot_descr.hpp>#include <objects/seqfeat/SeqFeatData.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqfeat/BioSource.hpp>#include <objects/seqfeat/Org_ref.hpp>#include <objects/seqfeat/OrgName.hpp>#include <objects/seqfeat/SubSource.hpp>#include <objects/seqfeat/OrgMod.hpp>#include <objects/seqfeat/Gene_ref.hpp>#include <objects/seqfeat/Cdregion.hpp>#include <objects/seqfeat/Code_break.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objects/seqfeat/RNA_ref.hpp>#include <objects/seqfeat/Trna_ext.hpp>#include <objects/seqfeat/Imp_feat.hpp>#include <objects/seqfeat/Gb_qual.hpp>#include <objtools/readers/readfeat.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_objects_SCOPE // namespace ncbi::objects::class /* NCBI_XOBJREAD_EXPORT */ CFeature_table_reader_imp{public: enum EQual { eQual_allele, eQual_anticodon, eQual_bond_type, eQual_bound_moiety, eQual_citation, eQual_clone, eQual_codon_start, eQual_cons_splice, eQual_db_xref, eQual_direction, eQual_EC_number, eQual_evidence, eQual_exception, eQual_frequency, eQual_function, eQual_gene, eQual_gene_desc, eQual_gene_syn, eQual_go_component, eQual_go_function, eQual_go_process, eQual_insertion_seq, eQual_label, eQual_locus_tag, eQual_macronuclear, eQual_map, eQual_MEDLINE, eQual_mod_base, eQual_muid, eQual_note, eQual_number, eQual_operon, eQual_organism, eQual_partial, eQual_PCR_conditions, eQual_phenotype, eQual_pmid, eQual_product, eQual_prot_desc, eQual_prot_note, eQual_protein_id, eQual_pseudo, eQual_PubMed, eQual_region_name, eQual_replace, eQual_rpt_family, eQual_rpt_type, eQual_rpt_unit, eQual_site_type, eQual_standard_name, eQual_transcript_id, eQual_transl_except, eQual_transl_table, eQual_translation, eQual_transposon, eQual_usedin }; enum EOrgRef { eOrgRef_organism, eOrgRef_organelle, eOrgRef_div, eOrgRef_lineage, eOrgRef_gcode, eOrgRef_mgcode }; typedef map< string, CSeqFeatData::ESubtype > TFeatReaderMap; typedef map< string, EQual > TQualReaderMap; typedef map< string, EOrgRef > TOrgRefReaderMap; typedef map< string, CBioSource::EGenome > TGenomeReaderMap; typedef map< string, CSubSource::ESubtype > TSubSrcReaderMap; typedef map< string, COrgMod::ESubtype > TOrgModReaderMap; typedef map< string, CSeqFeatData::EBond > TBondReaderMap; typedef map< string, CSeqFeatData::ESite > TSiteReaderMap; typedef map< string, int > TTrnaReaderMap; typedef vector< string > TSingleQualList; // constructor CFeature_table_reader_imp(void); // destructor ~CFeature_table_reader_imp(void); // read 5-column feature table and return Seq-annot CRef<CSeq_annot> ReadSequinFeatureTable (CNcbiIstream& ifs, const string& seqid, const string& annotname, const CFeature_table_reader::TFlags flags); // create single feature from key CRef<CSeq_feat> CreateSeqFeat (const string& feat, CSeq_loc& location, const CFeature_table_reader::TFlags flags); // add single qualifier to feature void AddFeatQual (CRef<CSeq_feat> sfp, const string& qual, const string& val, const CFeature_table_reader::TFlags flags);private: // Prohibit copy constructor and assignment operator CFeature_table_reader_imp(const CFeature_table_reader_imp& value); CFeature_table_reader_imp& operator=(const CFeature_table_reader_imp& value); bool x_ParseFeatureTableLine (const string& line, Int4* startP, Int4* stopP, bool* partial5P, bool* partial3P, bool* ispointP, string& featP, string& qualP, string& valP, Int4 offset); bool x_AddIntervalToFeature (CRef<CSeq_feat> sfp, CSeq_loc_mix *mix, const string& seqid, Int4 start, Int4 stop, bool partial5, bool partial3); bool x_AddQualifierToFeature (CRef<CSeq_feat> sfp, const string& qual, const string& val); bool x_AddQualifierToGene (CSeqFeatData& sfdata, EQual qtype, const string& val); bool x_AddQualifierToCdregion (CRef<CSeq_feat> sfp, CSeqFeatData& sfdata, EQual qtype, const string& val); bool x_AddQualifierToRna (CSeqFeatData& sfdata, EQual qtype, const string& val); bool x_AddQualifierToImp (CRef<CSeq_feat> sfp, CSeqFeatData& sfdata, EQual qtype, const string& qual, const string& val); bool x_AddQualifierToBioSrc (CSeqFeatData& sfdata, EOrgRef rtype, const string& val); bool x_AddQualifierToBioSrc (CSeqFeatData& sfdata, CSubSource::ESubtype stype, const string& val); bool x_AddQualifierToBioSrc (CSeqFeatData& sfdata, COrgMod::ESubtype mtype, const string& val); int x_ParseTrnaString (const string& val); TFeatReaderMap m_FeatKeys; TQualReaderMap m_QualKeys; TOrgRefReaderMap m_OrgRefKeys; TGenomeReaderMap m_GenomeKeys; TSubSrcReaderMap m_SubSrcKeys; TOrgModReaderMap m_OrgModKeys; TBondReaderMap m_BondKeys; TSiteReaderMap m_SiteKeys; TTrnaReaderMap m_TrnaKeys; TSingleQualList m_SingleKeys;};auto_ptr<CFeature_table_reader_imp> CFeature_table_reader::sm_Implementation;void CFeature_table_reader::x_InitImplementation(){ DEFINE_STATIC_FAST_MUTEX(s_Implementation_mutex); CFastMutexGuard LOCK(s_Implementation_mutex); if ( !sm_Implementation.get() ) { sm_Implementation.reset(new CFeature_table_reader_imp()); }}typedef struct featinit { const char * key; CSeqFeatData::ESubtype subtype;} FeatInit;static FeatInit feat_key_to_subtype [] = { { "-10_signal", CSeqFeatData::eSubtype_10_signal }, { "-35_signal", CSeqFeatData::eSubtype_35_signal }, { "3'clip", CSeqFeatData::eSubtype_3clip }, { "3'UTR", CSeqFeatData::eSubtype_3UTR }, { "5'clip", CSeqFeatData::eSubtype_5clip }, { "5'UTR", CSeqFeatData::eSubtype_5UTR }, { "attenuator", CSeqFeatData::eSubtype_attenuator }, { "Bond", CSeqFeatData::eSubtype_bond }, { "CAAT_signal", CSeqFeatData::eSubtype_CAAT_signal }, { "CDS", CSeqFeatData::eSubtype_cdregion }, { "Cit", CSeqFeatData::eSubtype_pub }, { "Comment", CSeqFeatData::eSubtype_comment }, { "conflict", CSeqFeatData::eSubtype_conflict }, { "C_region", CSeqFeatData::eSubtype_C_region }, { "D-loop", CSeqFeatData::eSubtype_D_loop }, { "D_segment", CSeqFeatData::eSubtype_D_segment }, { "enhancer", CSeqFeatData::eSubtype_enhancer }, { "exon", CSeqFeatData::eSubtype_exon }, { "GC_signal", CSeqFeatData::eSubtype_GC_signal }, { "gene", CSeqFeatData::eSubtype_gene }, { "Het", CSeqFeatData::eSubtype_het }, { "iDNA", CSeqFeatData::eSubtype_iDNA }, { "intron", CSeqFeatData::eSubtype_intron }, { "J_segment", CSeqFeatData::eSubtype_J_segment }, { "LTR", CSeqFeatData::eSubtype_LTR }, { "mat_peptide", CSeqFeatData::eSubtype_mat_peptide_aa }, { "mat_peptide_nt", CSeqFeatData::eSubtype_mat_peptide }, { "misc_binding", CSeqFeatData::eSubtype_misc_binding }, { "misc_difference", CSeqFeatData::eSubtype_misc_difference }, { "misc_feature", CSeqFeatData::eSubtype_misc_feature }, { "misc_recomb", CSeqFeatData::eSubtype_misc_recomb }, { "misc_RNA", CSeqFeatData::eSubtype_otherRNA }, { "misc_signal", CSeqFeatData::eSubtype_misc_signal }, { "misc_structure", CSeqFeatData::eSubtype_misc_structure }, { "modified_base", CSeqFeatData::eSubtype_modified_base }, { "mRNA", CSeqFeatData::eSubtype_mRNA }, { "NonStdRes", CSeqFeatData::eSubtype_non_std_residue }, { "Num", CSeqFeatData::eSubtype_num }, { "N_region", CSeqFeatData::eSubtype_N_region }, { "old_sequence", CSeqFeatData::eSubtype_old_sequence }, { "operon", CSeqFeatData::eSubtype_operon }, { "oriT", CSeqFeatData::eSubtype_oriT }, { "polyA_signal", CSeqFeatData::eSubtype_polyA_signal }, { "polyA_site", CSeqFeatData::eSubtype_polyA_site }, { "precursor_RNA", CSeqFeatData::eSubtype_preRNA }, { "pre_RNA", CSeqFeatData::eSubtype_preRNA }, { "preprotein", CSeqFeatData::eSubtype_preprotein }, { "primer_bind", CSeqFeatData::eSubtype_primer_bind }, { "prim_transcript", CSeqFeatData::eSubtype_prim_transcript }, { "promoter", CSeqFeatData::eSubtype_promoter }, { "Protein", CSeqFeatData::eSubtype_prot }, { "protein_bind", CSeqFeatData::eSubtype_protein_bind }, { "RBS", CSeqFeatData::eSubtype_RBS }, { "REFERENCE", CSeqFeatData::eSubtype_pub }, { "Region", CSeqFeatData::eSubtype_region }, { "repeat_region", CSeqFeatData::eSubtype_repeat_region }, { "repeat_unit", CSeqFeatData::eSubtype_repeat_unit }, { "rep_origin", CSeqFeatData::eSubtype_rep_origin }, { "rRNA", CSeqFeatData::eSubtype_rRNA }, { "Rsite", CSeqFeatData::eSubtype_rsite }, { "satellite", CSeqFeatData::eSubtype_satellite }, { "scRNA", CSeqFeatData::eSubtype_scRNA }, { "SecStr", CSeqFeatData::eSubtype_psec_str }, { "sig_peptide", CSeqFeatData::eSubtype_sig_peptide_aa }, { "sig_peptide_nt", CSeqFeatData::eSubtype_sig_peptide }, { "Site", CSeqFeatData::eSubtype_site }, { "Site-ref", CSeqFeatData::eSubtype_site_ref }, { "snoRNA", CSeqFeatData::eSubtype_snoRNA }, { "snRNA", CSeqFeatData::eSubtype_snRNA }, { "source", CSeqFeatData::eSubtype_biosrc }, { "Src", CSeqFeatData::eSubtype_biosrc }, { "stem_loop", CSeqFeatData::eSubtype_stem_loop }, { "STS", CSeqFeatData::eSubtype_STS }, { "S_region", CSeqFeatData::eSubtype_S_region }, { "TATA_signal", CSeqFeatData::eSubtype_TATA_signal },
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?