readfeat.cpp

来自「ncbi源码」· C++ 代码 · 共 1,648 行 · 第 1/5 页

CPP
1,648
字号
/* * =========================================================================== * PRODUCTION $Log: readfeat.cpp,v $ * PRODUCTION Revision 1000.5  2004/06/01 19:46:24  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.35 * PRODUCTION * =========================================================================== *//* * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author:  Jonathan Kans * * File Description: *   Feature table reader * */#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <corelib/ncbithr.hpp>#include <serial/iterator.hpp>#include <serial/objistrasn.hpp>// Objects includes#include <objects/general/Int_fuzz.hpp>#include <objects/general/Object_id.hpp>#include <objects/general/Dbtag.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objects/seq/Seq_annot.hpp>#include <objects/seq/Annotdesc.hpp>#include <objects/seq/Annot_descr.hpp>#include <objects/seqfeat/SeqFeatData.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqfeat/BioSource.hpp>#include <objects/seqfeat/Org_ref.hpp>#include <objects/seqfeat/OrgName.hpp>#include <objects/seqfeat/SubSource.hpp>#include <objects/seqfeat/OrgMod.hpp>#include <objects/seqfeat/Gene_ref.hpp>#include <objects/seqfeat/Cdregion.hpp>#include <objects/seqfeat/Code_break.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objects/seqfeat/RNA_ref.hpp>#include <objects/seqfeat/Trna_ext.hpp>#include <objects/seqfeat/Imp_feat.hpp>#include <objects/seqfeat/Gb_qual.hpp>#include <objtools/readers/readfeat.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_objects_SCOPE // namespace ncbi::objects::class /* NCBI_XOBJREAD_EXPORT */ CFeature_table_reader_imp{public:    enum EQual {        eQual_allele,        eQual_anticodon,        eQual_bond_type,        eQual_bound_moiety,        eQual_citation,        eQual_clone,        eQual_codon_start,        eQual_cons_splice,        eQual_db_xref,        eQual_direction,        eQual_EC_number,        eQual_evidence,        eQual_exception,        eQual_frequency,        eQual_function,        eQual_gene,        eQual_gene_desc,        eQual_gene_syn,        eQual_go_component,        eQual_go_function,        eQual_go_process,        eQual_insertion_seq,        eQual_label,        eQual_locus_tag,        eQual_macronuclear,        eQual_map,        eQual_MEDLINE,        eQual_mod_base,        eQual_muid,        eQual_note,        eQual_number,        eQual_operon,        eQual_organism,        eQual_partial,        eQual_PCR_conditions,        eQual_phenotype,        eQual_pmid,        eQual_product,        eQual_prot_desc,        eQual_prot_note,        eQual_protein_id,        eQual_pseudo,        eQual_PubMed,        eQual_region_name,        eQual_replace,        eQual_rpt_family,        eQual_rpt_type,        eQual_rpt_unit,        eQual_site_type,        eQual_standard_name,        eQual_transcript_id,        eQual_transl_except,        eQual_transl_table,        eQual_translation,        eQual_transposon,        eQual_usedin    };    enum EOrgRef {        eOrgRef_organism,        eOrgRef_organelle,        eOrgRef_div,        eOrgRef_lineage,        eOrgRef_gcode,        eOrgRef_mgcode    };    typedef map< string, CSeqFeatData::ESubtype > TFeatReaderMap;    typedef map< string, EQual > TQualReaderMap;    typedef map< string, EOrgRef > TOrgRefReaderMap;    typedef map< string, CBioSource::EGenome > TGenomeReaderMap;    typedef map< string, CSubSource::ESubtype > TSubSrcReaderMap;    typedef map< string, COrgMod::ESubtype > TOrgModReaderMap;    typedef map< string, CSeqFeatData::EBond > TBondReaderMap;    typedef map< string, CSeqFeatData::ESite > TSiteReaderMap;    typedef map< string, int > TTrnaReaderMap;    typedef vector< string > TSingleQualList;    // constructor    CFeature_table_reader_imp(void);    // destructor    ~CFeature_table_reader_imp(void);    // read 5-column feature table and return Seq-annot    CRef<CSeq_annot> ReadSequinFeatureTable (CNcbiIstream& ifs,                                             const string& seqid,                                             const string& annotname,                                             const CFeature_table_reader::TFlags flags);    // create single feature from key    CRef<CSeq_feat> CreateSeqFeat (const string& feat,                                   CSeq_loc& location,                                   const CFeature_table_reader::TFlags flags);    // add single qualifier to feature    void AddFeatQual (CRef<CSeq_feat> sfp,                      const string& qual,                      const string& val,                      const CFeature_table_reader::TFlags flags);private:    // Prohibit copy constructor and assignment operator    CFeature_table_reader_imp(const CFeature_table_reader_imp& value);    CFeature_table_reader_imp& operator=(const CFeature_table_reader_imp& value);    bool x_ParseFeatureTableLine (const string& line, Int4* startP, Int4* stopP,                                  bool* partial5P, bool* partial3P, bool* ispointP,                                  string& featP, string& qualP, string& valP, Int4 offset);    bool x_AddIntervalToFeature (CRef<CSeq_feat> sfp, CSeq_loc_mix *mix,                                 const string& seqid, Int4 start,                                 Int4 stop, bool partial5, bool partial3);    bool x_AddQualifierToFeature (CRef<CSeq_feat> sfp,                                  const string& qual, const string& val);    bool x_AddQualifierToGene     (CSeqFeatData& sfdata,                                   EQual qtype, const string& val);    bool x_AddQualifierToCdregion (CRef<CSeq_feat> sfp, CSeqFeatData& sfdata,                                   EQual qtype, const string& val);    bool x_AddQualifierToRna      (CSeqFeatData& sfdata,                                   EQual qtype, const string& val);    bool x_AddQualifierToImp      (CRef<CSeq_feat> sfp, CSeqFeatData& sfdata,                                   EQual qtype, const string& qual, const string& val);    bool x_AddQualifierToBioSrc   (CSeqFeatData& sfdata,                                   EOrgRef rtype, const string& val);    bool x_AddQualifierToBioSrc   (CSeqFeatData& sfdata,                                   CSubSource::ESubtype stype, const string& val);    bool x_AddQualifierToBioSrc   (CSeqFeatData& sfdata,                                   COrgMod::ESubtype mtype, const string& val);    int x_ParseTrnaString (const string& val);    TFeatReaderMap    m_FeatKeys;    TQualReaderMap    m_QualKeys;    TOrgRefReaderMap  m_OrgRefKeys;    TGenomeReaderMap  m_GenomeKeys;    TSubSrcReaderMap  m_SubSrcKeys;    TOrgModReaderMap  m_OrgModKeys;    TBondReaderMap    m_BondKeys;    TSiteReaderMap    m_SiteKeys;    TTrnaReaderMap    m_TrnaKeys;    TSingleQualList   m_SingleKeys;};auto_ptr<CFeature_table_reader_imp> CFeature_table_reader::sm_Implementation;void CFeature_table_reader::x_InitImplementation(){    DEFINE_STATIC_FAST_MUTEX(s_Implementation_mutex);    CFastMutexGuard   LOCK(s_Implementation_mutex);    if ( !sm_Implementation.get() ) {        sm_Implementation.reset(new CFeature_table_reader_imp());    }}typedef struct featinit {    const char *           key;    CSeqFeatData::ESubtype subtype;} FeatInit;static FeatInit feat_key_to_subtype [] = {    { "-10_signal",         CSeqFeatData::eSubtype_10_signal          },    { "-35_signal",         CSeqFeatData::eSubtype_35_signal          },    { "3'clip",             CSeqFeatData::eSubtype_3clip              },    { "3'UTR",              CSeqFeatData::eSubtype_3UTR               },    { "5'clip",             CSeqFeatData::eSubtype_5clip              },    { "5'UTR",              CSeqFeatData::eSubtype_5UTR               },    { "attenuator",         CSeqFeatData::eSubtype_attenuator         },    { "Bond",               CSeqFeatData::eSubtype_bond               },    { "CAAT_signal",        CSeqFeatData::eSubtype_CAAT_signal        },    { "CDS",                CSeqFeatData::eSubtype_cdregion           },    { "Cit",                CSeqFeatData::eSubtype_pub                },    { "Comment",            CSeqFeatData::eSubtype_comment            },    { "conflict",           CSeqFeatData::eSubtype_conflict           },    { "C_region",           CSeqFeatData::eSubtype_C_region           },    { "D-loop",             CSeqFeatData::eSubtype_D_loop             },    { "D_segment",          CSeqFeatData::eSubtype_D_segment          },    { "enhancer",           CSeqFeatData::eSubtype_enhancer           },    { "exon",               CSeqFeatData::eSubtype_exon               },    { "GC_signal",          CSeqFeatData::eSubtype_GC_signal          },    { "gene",               CSeqFeatData::eSubtype_gene               },    { "Het",                CSeqFeatData::eSubtype_het                },    { "iDNA",               CSeqFeatData::eSubtype_iDNA               },    { "intron",             CSeqFeatData::eSubtype_intron             },    { "J_segment",          CSeqFeatData::eSubtype_J_segment          },    { "LTR",                CSeqFeatData::eSubtype_LTR                },    { "mat_peptide",        CSeqFeatData::eSubtype_mat_peptide_aa     },    { "mat_peptide_nt",     CSeqFeatData::eSubtype_mat_peptide        },    { "misc_binding",       CSeqFeatData::eSubtype_misc_binding       },    { "misc_difference",    CSeqFeatData::eSubtype_misc_difference    },    { "misc_feature",       CSeqFeatData::eSubtype_misc_feature       },    { "misc_recomb",        CSeqFeatData::eSubtype_misc_recomb        },    { "misc_RNA",           CSeqFeatData::eSubtype_otherRNA           },    { "misc_signal",        CSeqFeatData::eSubtype_misc_signal        },    { "misc_structure",     CSeqFeatData::eSubtype_misc_structure     },    { "modified_base",      CSeqFeatData::eSubtype_modified_base      },    { "mRNA",               CSeqFeatData::eSubtype_mRNA               },    { "NonStdRes",          CSeqFeatData::eSubtype_non_std_residue    },    { "Num",                CSeqFeatData::eSubtype_num                },    { "N_region",           CSeqFeatData::eSubtype_N_region           },    { "old_sequence",       CSeqFeatData::eSubtype_old_sequence       },    { "operon",             CSeqFeatData::eSubtype_operon             },    { "oriT",               CSeqFeatData::eSubtype_oriT               },    { "polyA_signal",       CSeqFeatData::eSubtype_polyA_signal       },    { "polyA_site",         CSeqFeatData::eSubtype_polyA_site         },    { "precursor_RNA",      CSeqFeatData::eSubtype_preRNA             },    { "pre_RNA",            CSeqFeatData::eSubtype_preRNA             },    { "preprotein",         CSeqFeatData::eSubtype_preprotein         },    { "primer_bind",        CSeqFeatData::eSubtype_primer_bind        },    { "prim_transcript",    CSeqFeatData::eSubtype_prim_transcript    },    { "promoter",           CSeqFeatData::eSubtype_promoter           },    { "Protein",            CSeqFeatData::eSubtype_prot               },    { "protein_bind",       CSeqFeatData::eSubtype_protein_bind       },    { "RBS",                CSeqFeatData::eSubtype_RBS                },    { "REFERENCE",          CSeqFeatData::eSubtype_pub                },    { "Region",             CSeqFeatData::eSubtype_region             },    { "repeat_region",      CSeqFeatData::eSubtype_repeat_region      },    { "repeat_unit",        CSeqFeatData::eSubtype_repeat_unit        },    { "rep_origin",         CSeqFeatData::eSubtype_rep_origin         },    { "rRNA",               CSeqFeatData::eSubtype_rRNA               },    { "Rsite",              CSeqFeatData::eSubtype_rsite              },    { "satellite",          CSeqFeatData::eSubtype_satellite          },    { "scRNA",              CSeqFeatData::eSubtype_scRNA              },    { "SecStr",             CSeqFeatData::eSubtype_psec_str           },    { "sig_peptide",        CSeqFeatData::eSubtype_sig_peptide_aa     },    { "sig_peptide_nt",     CSeqFeatData::eSubtype_sig_peptide        },    { "Site",               CSeqFeatData::eSubtype_site               },    { "Site-ref",           CSeqFeatData::eSubtype_site_ref           },    { "snoRNA",             CSeqFeatData::eSubtype_snoRNA             },    { "snRNA",              CSeqFeatData::eSubtype_snRNA              },    { "source",             CSeqFeatData::eSubtype_biosrc             },    { "Src",                CSeqFeatData::eSubtype_biosrc             },    { "stem_loop",          CSeqFeatData::eSubtype_stem_loop          },    { "STS",                CSeqFeatData::eSubtype_STS                },    { "S_region",           CSeqFeatData::eSubtype_S_region           },    { "TATA_signal",        CSeqFeatData::eSubtype_TATA_signal        },

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?