📄 seq_id.hpp
字号:
/* * =========================================================================== * PRODUCTION $Log: Seq_id.hpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:30:49 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.47 * PRODUCTION * =========================================================================== *//* $Id: Seq_id.hpp,v 1000.2 2004/06/01 19:30:49 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: ....... * * File Description: * ....... * * Remark: * This code was originally generated by application DATATOOL * using specifications from the ASN data definition file * 'seqloc.asn'. */#ifndef OBJECTS_SEQLOC_SEQ_ID_HPP#define OBJECTS_SEQLOC_SEQ_ID_HPP// generated includes#include <objects/seqloc/Seq_id_.hpp>#include <corelib/ncbi_limits.hpp>#include <serial/serializable.hpp>#include <objects/seqloc/Textseq_id.hpp>// generated classesBEGIN_NCBI_SCOPEBEGIN_objects_SCOPE // namespace ncbi::objects::class CBioseq;class NCBI_SEQLOC_EXPORT CSeq_id : public CSeq_id_Base, public CSerializable{ typedef CSeq_id_Base Tparent;public: // // See also CSeq_id related functions in "util/sequence.hpp": // // TSeqPos GetLength(const CSeq_id&, CScope*); // bool IsSameBioseq(const CSeq_id&, const CSeq_id&, CScope*); // // Default constructor CSeq_id( void ); // Takes either a FastA-style string delimited by vertical bars or // a raw accession (with optional version). CSeq_id( const string& the_id ); // Construct a seq-id from a dbtag CSeq_id(const CDbtag& tag, bool set_as_general = true); // With proper choice CSeq_id(CSeq_id_Base::E_Choice the_type, int int_seq_id); // see explanation in x_Init below // With proper choice CSeq_id(CSeq_id_Base::E_Choice the_type, const string& acc_in, // see explanation in x_Init below const string& name_in, // force not optional; if not given, use the constructor below const string& version_in, const string& release_in = kEmptyStr); CSeq_id(CSeq_id_Base::E_Choice the_type, const string& acc_in, // see explanation in x_Init below const string& name_in, int version = 0, const string& release_in = kEmptyStr); // Need to lookup choice CSeq_id(const string& the_type, const string& acc_in, // see explanation in x_Init below const string& name_in, // force not optional; if not given, use the constructor below const string& version_in, const string& release_in = kEmptyStr); CSeq_id(const string& the_type, const string& acc_in, // see explanation in x_Init below const string& name_in, int version = 0 , const string& release_in = kEmptyStr); // Destructor virtual ~CSeq_id(void); // Converts a string to a choice, no need to require a member. static CSeq_id::E_Choice WhichInverseSeqId(const char* SeqIdCode); // For s_IdentifyAccession (below) enum EAccessionInfo { // Mask for Seq_id type; allow 8 bits to be safe eAcc_type_mask = 0xff, // Useful flags fAcc_nuc = 0x80000000, fAcc_prot = 0x40000000, fAcc_predicted = 0x20000000, // only for refseq eAcc_flag_mask = 0xe0000000, // Common divisions and categories (0 << 8 .. 127 << 8) eAcc_other = 0 << 8, // no further classification eAcc_est = 1 << 8, eAcc_dirsub = 2 << 8, // direct submission; trumps other values eAcc_genome = 3 << 8, eAcc_div_patent = 4 << 8, eAcc_htgs = 5 << 8, eAcc_con = 6 << 8, // just a contig/segset eAcc_segset = eAcc_con, // was once wrongly split out eAcc_wgs = 7 << 8, eAcc_division_mask = 0xff00, // Actual return values with EXAMPLE prefixes (to be followed // by digits), grouped by Seq-id type. In most cases, there // are other prefixes with the same classification, and if not // there could be. eAcc_unknown = e_not_set | eAcc_other, eAcc_unreserved_nuc = e_not_set | 128 << 8 | fAcc_nuc, // XY eAcc_unreserved_prot = e_not_set | 128 << 8 | fAcc_prot, // XYZ eAcc_ambiguous_nuc = e_not_set | 192 << 8 | fAcc_nuc, // N0-N1 // Most N accessions are GenBank ESTs, but some low-numbered ones // (now only used as primary accessions) were assigned haphazardly, // and some are therefore ambiguous. eAcc_maybe_gb = eAcc_ambiguous_nuc | 1, eAcc_maybe_embl = eAcc_ambiguous_nuc | 2, eAcc_maybe_ddbj = eAcc_ambiguous_nuc | 4, eAcc_gb_embl = eAcc_maybe_gb | eAcc_maybe_embl, eAcc_gb_ddbj = eAcc_maybe_gb | eAcc_maybe_ddbj, eAcc_embl_ddbj = eAcc_maybe_embl | eAcc_maybe_ddbj, eAcc_gb_embl_ddbj = (eAcc_maybe_gb | eAcc_maybe_embl | eAcc_maybe_ddbj), eAcc_local = e_Local | eAcc_other, eAcc_gibbsq = e_Gibbsq | eAcc_other, eAcc_gibbmt = e_Gibbmt | eAcc_other, eAcc_giim = e_Giim | eAcc_other, eAcc_gb_prot = e_Genbank | eAcc_other | fAcc_prot, // AAA eAcc_gb_other_nuc = e_Genbank | eAcc_other | fAcc_nuc, // AS eAcc_gb_est = e_Genbank | eAcc_est | fAcc_nuc, // H eAcc_gb_dirsub = e_Genbank | eAcc_dirsub | fAcc_nuc, // U eAcc_gb_genome = e_Genbank | eAcc_genome | fAcc_nuc, // AE eAcc_gb_patent = e_Genbank | eAcc_div_patent /* | fAcc_nuc */, //I eAcc_gb_patent_prot = e_Genbank | eAcc_div_patent | fAcc_prot, // AAE eAcc_gb_htgs = e_Genbank | eAcc_htgs | fAcc_nuc, // AC eAcc_gb_con = e_Genbank | eAcc_con, // CH eAcc_gb_segset = eAcc_gb_con, // for compatibility eAcc_gb_wgs_nuc = e_Genbank | eAcc_wgs | fAcc_nuc, // AAAA eAcc_gb_wgs_prot = e_Genbank | eAcc_wgs | fAcc_prot, // EAA eAcc_gsdb_dirsub = e_Genbank | 128 << 8 | fAcc_nuc, // J eAcc_gb_gsdb = e_Genbank | 129 << 8 | fAcc_nuc, // AD eAcc_gb_gss = e_Genbank | 130 << 8 | fAcc_nuc, // B eAcc_gb_sts = e_Genbank | 131 << 8 | fAcc_nuc, // G eAcc_gb_backbone = e_Genbank | 132 << 8 | fAcc_nuc, // S eAcc_gb_cdna = e_Genbank | 133 << 8 | fAcc_nuc, // BC eAcc_embl_prot = e_Embl | eAcc_other | fAcc_prot, // CAA eAcc_embl_other_nuc = e_Embl | eAcc_other | fAcc_nuc, // N00060 eAcc_embl_est = e_Embl | eAcc_est | fAcc_nuc, // F eAcc_embl_dirsub = e_Embl | eAcc_dirsub | fAcc_nuc, // V eAcc_embl_genome = e_Embl | eAcc_genome | fAcc_nuc, // AL eAcc_embl_patent = e_Embl | eAcc_div_patent | fAcc_nuc, // A eAcc_embl_htgs = e_Embl | eAcc_htgs | fAcc_nuc, // unused eAcc_embl_con = e_Embl | eAcc_con | fAcc_nuc, // AN eAcc_embl_wgs_nuc = e_Embl | eAcc_wgs | fAcc_nuc, // CAAA eAcc_embl_wgs_prot = e_Embl | eAcc_wgs | fAcc_prot, // unused eAcc_pir = e_Pir | eAcc_other | fAcc_prot, eAcc_swissprot = e_Swissprot | eAcc_other | fAcc_prot, eAcc_patent = e_Patent | eAcc_other, eAcc_refseq_prot = e_Other | eAcc_other | fAcc_prot, //NP_ eAcc_refseq_genome = e_Other | eAcc_genome | fAcc_nuc, //NS_ eAcc_refseq_wgs_nuc = e_Other | eAcc_wgs | fAcc_nuc, //NZ_ eAcc_refseq_wgs_prot = e_Other | eAcc_wgs | fAcc_prot, //ZP_ eAcc_refseq_contig = e_Other | eAcc_segset, //NT_ eAcc_refseq_unreserved = e_Other | 128 << 8, //AA_ eAcc_refseq_mrna = e_Other | 129 << 8 | fAcc_nuc, //NM_ eAcc_refseq_chromosome = e_Other | 130 << 8 | fAcc_nuc, //NC_ eAcc_refseq_genomic = e_Other | 131 << 8 | fAcc_nuc, //NG_ // non-coding RNA eAcc_refseq_ncrna = e_Other | 132 << 8 | fAcc_nuc, //NR_ eAcc_refseq_wgs_intermed = e_Other | 133 << 8 | fAcc_nuc, //NW_ eAcc_refseq_prot_predicted = eAcc_refseq_prot | fAcc_predicted, //XP_ eAcc_refseq_mrna_predicted = eAcc_refseq_mrna | fAcc_predicted, //XM_ eAcc_refseq_ncrna_predicted = eAcc_refseq_ncrna | fAcc_predicted, //XR_ eAcc_general = e_General | eAcc_other, eAcc_gi = e_Gi | eAcc_other, eAcc_ddbj_prot = e_Ddbj | eAcc_other | fAcc_prot, // BAA eAcc_ddbj_other_nuc = e_Ddbj | eAcc_other | fAcc_nuc, // N00028 eAcc_ddbj_est = e_Ddbj | eAcc_est | fAcc_nuc, // C eAcc_ddbj_dirsub = e_Ddbj | eAcc_dirsub | fAcc_nuc, // D eAcc_ddbj_genome = e_Ddbj | eAcc_genome | fAcc_nuc, // AP eAcc_ddbj_patent = e_Ddbj | eAcc_div_patent | fAcc_nuc, // E eAcc_ddbj_htgs = e_Ddbj | eAcc_htgs | fAcc_nuc, // AK eAcc_ddbj_con = e_Ddbj | eAcc_con | fAcc_nuc, // BA eAcc_ddbj_wgs_nuc = e_Ddbj | eAcc_wgs | fAcc_nuc, // BAAA eAcc_ddbj_wgs_prot = e_Ddbj | eAcc_wgs | fAcc_prot, // GAA eAcc_prf = e_Prf | eAcc_other | fAcc_prot, eAcc_pdb = e_Pdb | eAcc_other | fAcc_prot, eAcc_gb_tpa_nuc = e_Tpg | eAcc_other | fAcc_nuc, // BK eAcc_gb_tpa_prot = e_Tpg | eAcc_other | fAcc_prot, // DAA eAcc_embl_tpa_nuc = e_Tpe | eAcc_other | fAcc_nuc, // BN eAcc_embl_tpa_prot = e_Tpe | eAcc_other | fAcc_prot, // unused eAcc_ddbj_tpa_nuc = e_Tpd | eAcc_other | fAcc_nuc, // BR eAcc_ddbj_tpa_prot = e_Tpd | eAcc_other | fAcc_prot // FAA }; static E_Choice GetAccType(EAccessionInfo info) { return static_cast<E_Choice>(info & eAcc_type_mask); } // Deduces information from a bare accession a la WHICH_db_accession; // may report false negatives on properties. static EAccessionInfo IdentifyAccession(const string& accession); EAccessionInfo IdentifyAccession(void) const; // Match() - TRUE if SeqIds are equivalent bool Match(const CSeq_id& sid2) const; // Compare return values enum E_SIC { e_error = 0, // some problem e_DIFF, // different SeqId types-can't compare e_NO, // SeqIds compared, but are different e_YES // SeqIds compared, are equivalent }; // Compare() - more general E_SIC Compare(const CSeq_id& sid2) const; int CompareOrdered(const CSeq_id& sid2) const; bool operator<(const CSeq_id& sid2) const { return CompareOrdered(sid2) < 0; } // Return compatible CTextseq_id const CTextseq_id* GetTextseq_Id(void) const; // Implement serializable interface virtual void WriteAsFasta(ostream& out) const; CProxy DumpAsFasta(void) const { return Dump(eAsFasta); } const string AsFastaString(void) const; // return the label for a given string enum ELabelType { eType, eContent, eBoth, eFasta, // default is to show type + content eDefault = eBoth }; enum ELabelFlags { fLabel_Version = 0x10, // default options - always show the version fLabel_Default = fLabel_Version }; typedef int TLabelFlags; void GetLabel(string* label, ELabelType type = eDefault, TLabelFlags flags = fLabel_Default) const; //Return seqid string with optional version for text seqid type string GetSeqIdString(bool with_version = false) const; // Get a string representation of the sequence IDs of a given bioseq. This // function produces strings in a number of possible formats. enum EStringFormat { eFormat_FastA, // FastA format eFormat_ForceGI, // GI only, in FastA format eFormat_BestWithoutVersion, // 'Best' accession, without the version eFormat_BestWithVersion // 'Best' accession, with version }; static string GetStringDescr(const CBioseq& bioseq, EStringFormat fmt); // Numerical quality ranking; lower is better. // (Text)Score and WorstRank both basically correspond to the C // Toolkit's SeqIdFindWorst, which favors textual accessions, // whereas BestRank corresponds to the C Toolkit's SeqIdFindBest // and favors GIs. All three give a slight bonus to accessions // that carry versions. int AdjustScore (int base_score) const; int BaseTextScore (void) const; int BaseBestRankScore (void) const; int BaseWorstRankScore(void) const; int TextScore (void) const { return AdjustScore(BaseTextScore()); } int BestRankScore (void) const { return AdjustScore(BaseBestRankScore()); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -