📄 seq_id.cpp
字号:
case 30: case 31: case 32: case 33: case 34: case 36: case 38: case 39: case 40: case 42: case 43: case 44: case 45: case 47: case 49: case 50: case 51: case 55: case 56: case 59: return CSeq_id::eAcc_gb_ddbj; case 5: case 9: case 12: case 20: case 22: case 25: case 58: return CSeq_id::eAcc_gb_embl_ddbj; case 8: case 13: case 18: case 19: case 27: case 41: case 46: case 48: case 52: case 54: case 18624: return CSeq_id::eAcc_gb_other_nuc; case 28: case 35: case 37: case 53: case 61: case 62: case 63: case 65: case 66: case 67: case 68: case 69: case 78: case 79: case 83: case 88: case 90: case 91: case 92: case 93: case 94: return CSeq_id::eAcc_ddbj_other_nuc; case 60: case 64: return CSeq_id::eAcc_embl_other_nuc; case 70: return CSeq_id::eAcc_embl_ddbj; default: // unassigned or ambiguous return CSeq_id::eAcc_unknown; } }}CSeq_id::EAccessionInfo CSeq_id::IdentifyAccession(const string& acc){ SIZE_TYPE digit_pos = acc.find_first_of("0123456789"); if (digit_pos == NPOS) { return eAcc_unknown; } SIZE_TYPE main_size = acc.find('.'); if (main_size == NPOS) { main_size = acc.size(); } string pfx = acc.substr(0, digit_pos); NStr::ToUpper(pfx); switch (pfx.size()) { case 0: if (acc.find_first_not_of("0123456789") == NPOS) { // just digits return eAcc_gi; } else if (main_size == 4 || (main_size > 4 && acc[4] == '|')) { return eAcc_pdb; } else { return eAcc_unknown; } case 1: switch (pfx[0]) { case 'A': return eAcc_embl_patent; case 'B': return eAcc_gb_gss; case 'C': return eAcc_ddbj_est; case 'D': return eAcc_ddbj_dirsub; case 'E': return eAcc_ddbj_patent; case 'F': return eAcc_embl_est; case 'G': return eAcc_gb_sts; case 'H': case 'R': case 'T': case 'W': return eAcc_gb_est; case 'I': return eAcc_gb_patent; case 'J': case 'K': case 'L': case 'M': return eAcc_gsdb_dirsub; case 'N': return s_IdentifyNAcc(acc); case 'O': case 'P': case 'Q': return eAcc_swissprot; case 'S': return eAcc_gb_backbone; case 'U': return eAcc_gb_dirsub; case 'V': case 'X': case 'Y': case 'Z': return eAcc_embl_dirsub; default: return eAcc_unreserved_nuc; } case 2: switch (pfx[0]) { case 'A': switch (pfx[1]) { case 'A': case 'I': case 'W': return eAcc_gb_est; case 'B': return eAcc_ddbj_dirsub; case 'C': return eAcc_gb_htgs; case 'D': return eAcc_gb_gsdb; case 'E': return eAcc_gb_genome; case 'F': case 'Y': return eAcc_gb_dirsub; case 'G': case 'P': return eAcc_ddbj_genome; case 'H': return eAcc_gb_con; case 'J': case 'M': return eAcc_embl_dirsub; case 'K': return eAcc_ddbj_htgs; case 'L': return eAcc_embl_genome; case 'N': return eAcc_embl_con; case 'Q': case 'Z': return eAcc_gb_gss; case 'R': return eAcc_gb_patent; case 'S': return eAcc_gb_other_nuc; case 'T': case 'U': case 'V': return eAcc_ddbj_est; case 'X': return eAcc_embl_patent; default: return eAcc_unreserved_nuc; } case 'B': switch (pfx[1]) { case 'A': return eAcc_ddbj_con; case 'B': case 'J': case 'P': case 'W': case 'Y': return eAcc_ddbj_est; case 'C': case 'T': return eAcc_gb_cdna; case 'D': return eAcc_ddbj_patent; case 'E': case 'F': case 'G': case 'I': case 'M': case 'Q': case 'U': return eAcc_gb_est; case 'H': case 'Z': return eAcc_gb_gss; case 'K': case 'L': return eAcc_gb_tpa_nuc; case 'N': return eAcc_embl_tpa_nuc; case 'R': return eAcc_ddbj_tpa_nuc; case 'S': return eAcc_ddbj_genome; // BS is actually chimp genomes. case 'V': return eAcc_gb_sts; case 'X': return eAcc_embl_genome; default: return eAcc_unreserved_nuc; } case 'C': switch (pfx[1]) { case 'A': case 'B': case 'D': case 'F': case 'K': case 'N': return eAcc_gb_est; case 'C': case 'E': case 'G': case 'L': return eAcc_gb_gss; case 'H': case 'M': return eAcc_gb_con; case 'I': case 'J': return eAcc_ddbj_est; // no specific assignment for CO-CP yet case 'O': case 'P': return eAcc_gb_other_nuc; case 'Q': return eAcc_embl_patent; case 'R': return eAcc_embl_genome; case 'S': case 'T': case 'U': return eAcc_embl_other_nuc; default: return eAcc_unreserved_nuc; } default: return eAcc_unreserved_nuc; } case 3: if (pfx[2] == '_') { // refseq-style if (pfx == "NC_") { return eAcc_refseq_chromosome; } else if (pfx == "NG_") { return eAcc_refseq_genomic; } else if (pfx == "NM_") { return eAcc_refseq_mrna; } else if (pfx == "NP_") { return eAcc_refseq_prot; } else if (pfx == "NR_") { return eAcc_refseq_ncrna; } else if (pfx == "NS_") { return eAcc_refseq_genome; /* ? */ } else if (pfx == "NT_") { return eAcc_refseq_contig; } else if (pfx == "NW_") { return eAcc_refseq_wgs_intermed; } // else if (pfx == "NZ_") { return eAcc_refseq_wgs_nuc; } else if (pfx == "XM_") { return eAcc_refseq_mrna_predicted; } else if (pfx == "XP_") { return eAcc_refseq_prot_predicted; } else if (pfx == "XR_") { return eAcc_refseq_ncrna_predicted; } else if (pfx == "ZP_") { return eAcc_refseq_wgs_prot; } else { return eAcc_refseq_unreserved; } } else { // protein switch (pfx[0]) { case 'A': return (pfx == "AAE") ? eAcc_gb_patent_prot : eAcc_gb_prot; case 'B': return eAcc_ddbj_prot; case 'C': return eAcc_embl_prot; case 'D': return eAcc_gb_tpa_prot; case 'E': return eAcc_gb_wgs_prot; case 'F': return eAcc_ddbj_tpa_prot; case 'G': return eAcc_ddbj_wgs_prot; default: return eAcc_unreserved_prot; } } case 4: switch (pfx[0]) { case 'A': return eAcc_gb_wgs_nuc; case 'B': return eAcc_ddbj_wgs_nuc; case 'C': return eAcc_embl_wgs_nuc; default: return eAcc_unknown; } case 7: if (NStr::StartsWith(acc, "NZ_")) { return eAcc_refseq_wgs_nuc; } else { return eAcc_unknown; } default: return eAcc_unknown; }}CSeq_id::EAccessionInfo CSeq_id::IdentifyAccession(void) const{ EAccessionInfo type = (EAccessionInfo)Which(); switch (type) { case CSeq_id::e_Pir: case CSeq_id::e_Swissprot: case CSeq_id::e_Prf: case CSeq_id::e_Pdb: return (EAccessionInfo)(type | fAcc_prot); // always just protein case CSeq_id::e_Genbank: case CSeq_id::e_Embl: case CSeq_id::e_Ddbj: case CSeq_id::e_Tpg: case CSeq_id::e_Tpe: case CSeq_id::e_Tpd: case CSeq_id::e_Other: { const CTextseq_id* tsid = GetTextseq_Id(); if (tsid->IsSetAccession()) { EAccessionInfo ai = IdentifyAccession(tsid->GetAccession()); if ((ai & eAcc_type_mask) == e_not_set) { // We *know* what the type should be.... return (EAccessionInfo)((ai & eAcc_flag_mask) | type); } else if ((ai & eAcc_type_mask) == type) { return ai; } else { // misidentified or mislabeled; assume the former return type; } } else { return type; } } default: return type; }}static inlinevoid x_GetLabel_Type(const CSeq_id& id, string* label, CSeq_id::TLabelFlags flags){ CSeq_id::E_Choice choice = id.Which(); _ASSERT(choice < CSeq_id::e_MaxChoice); if (choice >= CSeq_id::e_MaxChoice) { return; } switch (choice) { default: *label += s_TextId[choice]; break; case CSeq_id::e_General: // for general IDs, use the db-name only *label += "gnl"; break; } // no extra flag interpretation currently}static inlinevoid x_GetLabel_Content(const CSeq_id& id, string* label, CSeq_id::TLabelFlags flags){ const CTextseq_id* tsid = id.GetTextseq_Id(); //text id if (tsid) { string str; if (tsid->IsSetAccession()) { str = tsid->GetAccession(); } else if (tsid->IsSetName()) { str = tsid->GetName(); } if ( !str.empty() ) { if ( (flags & CSeq_id::fLabel_Version) && tsid->IsSetVersion()) { str += "." + NStr::IntToString(tsid->GetVersion()); } } *label += str; } else { //non-text id switch (id.Which()) { case CSeq_id::e_not_set: break; case CSeq_id::e_Local: {{ const CObject_id& oid = id.GetLocal(); if (oid.Which() == CObject_id::e_Id) { *label += NStr::IntToString(oid.GetId()); } else if (oid.Which() == CObject_id::e_Str) { *label += oid.GetStr(); } }} break; case CSeq_id::e_Gibbsq: *label += NStr::IntToString(id.GetGibbsq()); break; case CSeq_id::e_Gibbmt: *label += NStr::IntToString(id.GetGibbmt()); break; case CSeq_id::e_Giim: *label += NStr::IntToString(id.GetGiim().GetId()); break; case CSeq_id::e_General: {{ const CDbtag& dbt = id.GetGeneral(); *label += dbt.GetDb() + '|'; if (dbt.GetTag().Which() == CObject_id::e_Id) { *label += NStr::IntToString(dbt.GetTag().GetId()); } else if (dbt.GetTag().Which()==CObject_id::e_Str) { *label += dbt.GetTag().GetStr(); } }} break; case CSeq_id::e_Patent: {{ const CId_pat& idp = id.GetPatent().GetCit(); *label += idp.GetCountry() + (idp.GetId().IsNumber() ? idp.GetId().GetNumber() : idp.GetId().GetApp_number()) + NStr::IntToString(id.GetPatent().GetSeqid()); }} break; case CSeq_id::e_Gi: *label += NStr::IntToString(id.GetGi()); break; case CSeq_id::e_Pdb: {{ const CPDB_seq_id& pid = id.GetPdb(); char chain = (char)pid.GetChain(); if (chain == '|') { *label += pid.GetMol().Get() + "|VB"; } else if (islower(chain) != 0) { *label += pid.GetMol().Get() + "-" + (char) toupper(chain); } else if ( chain == '\0' ) { *label += pid.GetMol().Get() + "-"; } else { *label += pid.GetMol().Get() + "-" + chain; } }} break; default: break; } }}void CSeq_id::GetLabel(string* label, ELabelType type, TLabelFlags flags) const{ if ( !label ) { return; } switch (type) { case eFasta: *label = AsFastaString(); break; case eBoth: x_GetLabel_Type(*this, label, flags); *label += "|"; x_GetLabel_Content(*this, label, flags); break; case eType: x_GetLabel_Type(*this, label, flags); break; case eContent: x_GetLabel_Content(*this, label, flags); break; }}/*Return seqid string with optional version for text seqid type (default no version).*/ string CSeq_id::GetSeqIdString(bool with_version) const{ string label; TLabelFlags flags = 0; if (with_version) { flags |= fLabel_Version; } GetLabel(&label, eContent, flags); return label;}void CSeq_id::WriteAsFasta(ostream& out) const{ E_Choice the_type = Which(); if (the_type > e_Tpd) // New SeqId type the_type = e_not_set; out << s_TextId[the_type] << '|'; switch (the_type) { case e_not_set: break; case e_Local: GetLocal().AsString(out); break; case e_Gibbsq: out << GetGibbsq(); break; case e_Gibbmt: out << GetGibbmt(); break; case e_Giim: out << (GetGiim().GetId()); break; case e_Genbank: GetGenbank().AsFastaString(out); break; case e_Embl: GetEmbl().AsFastaString(out); break; case e_Pir: GetPir().AsFastaString(out); break; case e_Swissprot: GetSwissprot().AsFastaString(out); break; case e_Patent: GetPatent().AsFastaString(out); break; case e_Other: GetOther().AsFastaString(out); break; case e_General: { const CDbtag& dbt = GetGeneral(); out << (dbt.GetDb()) << '|'; // no Upcase per Ostell - Karl 7/2001 dbt.GetTag().AsString(out); } break; case e_Gi: out << GetGi(); break; case e_Ddbj: GetDdbj().AsFastaString(out); break; case e_Prf: GetPrf().AsFastaString(out); break; case e_Pdb: GetPdb().AsFastaString(out);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -