📄 seq_id.cpp
字号:
break; case e_Tpg: GetTpg().AsFastaString(out); break; case e_Tpe: GetTpe().AsFastaString(out); break; case e_Tpd: GetTpd().AsFastaString(out); break; default: out << "[UnknownSeqIdType]"; break; }}const string CSeq_id::AsFastaString(void) const{ CNcbiOstrstream str; WriteAsFasta(str); return CNcbiOstrstreamToString(str);}//// Local functions for producing a sequence ID 'score'// These functions produce scores in FastA order//static int s_ScoreNAForFasta(const CSeq_id* id){ switch (id->Which()) { case CSeq_id::e_not_set: case CSeq_id::e_Giim: case CSeq_id::e_Pir: case CSeq_id::e_Swissprot: case CSeq_id::e_Prf: return kMax_Int; case CSeq_id::e_Local: return 230; case CSeq_id::e_Gi: return 120; case CSeq_id::e_General: return 50; case CSeq_id::e_Patent: return 40; case CSeq_id::e_Gibbsq: case CSeq_id::e_Gibbmt: case CSeq_id::e_Pdb: return 30; case CSeq_id::e_Other: return 15; default: return 20; // [third party] GenBank/EMBL/DDBJ }}static int s_ScoreAAForFasta(const CSeq_id* id){ switch (id->Which()) { case CSeq_id::e_not_set: case CSeq_id::e_Giim: return kMax_Int; case CSeq_id::e_Local: return 230; case CSeq_id::e_Gi: return 120; case CSeq_id::e_General: return 90; case CSeq_id::e_Patent: return 80; case CSeq_id::e_Prf: return 70; case CSeq_id::e_Pdb: return 50; case CSeq_id::e_Gibbsq: case CSeq_id::e_Gibbmt: return 40; case CSeq_id::e_Pir: return 30; case CSeq_id::e_Swissprot: return 20; case CSeq_id::e_Other: return 15; default: return 60; // [third party] GenBank/EMBL/DDBJ }}//// GetStringDescr()// Given a bioseq, return the best possible ID description, in a number of// appealing formats. This function can produce FastA-formatted titles or a// number of sub-titles (GI only, Best Accession with or without version).//string CSeq_id::GetStringDescr(const CBioseq& bioseq, EStringFormat fmt){ bool is_na = bioseq.GetInst().GetMol() != CSeq_inst::eMol_aa; CRef<CSeq_id> best_id = FindBestChoice(bioseq.GetId(), is_na ? s_ScoreNAForFasta : s_ScoreAAForFasta); switch (fmt) { case eFormat_FastA: {{ // FastA format // Here we have something like: // gi|###|SOME_ACCESSION|title bool found_gi = false; CNcbiOstrstream out_str; ITERATE (CBioseq::TId, id, bioseq.GetId()) { if ((*id)->IsGi()) { (*id)->WriteAsFasta(out_str); found_gi = true; break; } } if (best_id.NotEmpty() && best_id->Which() != CSeq_id::e_Gi) { if (found_gi) { out_str << '|'; } best_id->WriteAsFasta(out_str); } return CNcbiOstrstreamToString(out_str); }} break; case eFormat_ForceGI: // eForceGI produces a string containing only the GI in FastA format // so we have: // gi|#### ITERATE (CBioseq::TId, iter, bioseq.GetId()) { if ( (*iter)->IsGi() ) { CNcbiOstrstream out_str; (*iter)->WriteAsFasta(out_str); return CNcbiOstrstreamToString(out_str); } } break; case eFormat_BestWithVersion: // eBestWithVersion produces only the 'best' accession name, with // its version indicator if (best_id.NotEmpty()) { string label; best_id->GetLabel(&label, eDefault, fLabel_Version); return label; } break; case eFormat_BestWithoutVersion: // eBestWithoutVersion produces only the 'best' accession name, // without its version indicator if (best_id.NotEmpty()) { string label; best_id->GetLabel(&label, eDefault, 0); return label; } break; } // catch-all for unusual events return "";}CSeq_id::CSeq_id(const CDbtag& dbtag, bool set_as_general){ int version = -1; string acc; if (dbtag.GetTag().IsStr()) { acc = dbtag.GetTag().GetStr(); string::size_type pos = acc.find_last_of("."); if (pos != string::npos) { version = NStr::StringToInt(acc.substr(pos + 1, acc.length() - pos)); acc.erase(pos); } } switch (dbtag.GetType()) { case CDbtag::eDbtagType_GenBank: try { int gi = NStr::StringToInt(acc); SetGi(gi); } catch (...) { SetGenbank().SetAccession(acc); if (version != -1) { SetGenbank().SetVersion(version); } } break; case CDbtag::eDbtagType_EMBL: SetEmbl().SetAccession(acc); if (version != -1) { SetEmbl().SetVersion(version); } break; case CDbtag::eDbtagType_DDBJ: SetDdbj().SetAccession(acc); if (version != -1) { SetDdbj().SetVersion(version); } break; case CDbtag::eDbtagType_GI: if (dbtag.GetTag().IsStr()) { SetGi(NStr::StringToInt(dbtag.GetTag().GetStr())); } else { SetGi(dbtag.GetTag().GetId()); } break; case CDbtag::eDbtagType_bad: default: // not understood as a sequence id - leave as e_not_set if (set_as_general) { SetGeneral().Assign(dbtag); } break; }}//SeqIdFastAConstructorsCSeq_id::CSeq_id( const string& the_id ){ // If no vertical bar, tries to interpret the string as a pure // accession, inferring the type from the initial letter(s). if (the_id.find('|') == NPOS) { SIZE_TYPE dot = the_id.find('.'); string acc_in = the_id.substr(0, dot); EAccessionInfo info = IdentifyAccession(acc_in); int ver = 0; if (dot != NPOS) { ver = NStr::StringToNumeric(the_id.substr(dot + 1)); } if (GetAccType(info) != e_not_set) { x_Init(GetAccType(info), acc_in, kEmptyStr, ver); } return; } // Create an istrstream on string the_id std::istrstream myin(the_id.c_str() ); string the_type_in, acc_in, name_in, version_in, release_in; // Read the part of the_id up to the vertical bar ( "|" ) NcbiGetline(myin, the_type_in, '|'); // Remove spaces from front and back of the_type_in string the_type_use = NStr::TruncateSpaces(the_type_in, NStr::eTrunc_Both); // Determine the type from the string CSeq_id_Base::E_Choice the_type = WhichInverseSeqId(the_type_use.c_str()); // Construct according to type if ( the_type == CSeq_id::e_Local ) { NcbiGetline( myin, acc_in, 0 ); // take rest x_Init( the_type, acc_in ); return; } if ( !NcbiGetline( myin, acc_in, '|' ) ) return; if ( the_type == CSeq_id::e_General || the_type == CSeq_id::e_Pdb ) { //Take the rest of the line NcbiGetline( myin, name_in, 0 ); x_Init( the_type, acc_in, name_in ); return; } else if ( the_type == CSeq_id::e_Gi ) { x_Init( the_type, acc_in ); return; } if ( NcbiGetline(myin, name_in, '|') ) { if ( NcbiGetline(myin, version_in, '|') ) { NcbiGetline(myin, release_in, '|'); } } string version = NStr::TruncateSpaces( version_in, NStr::eTrunc_Both ); int ver = 0; if ( ! version.empty() ) { if ( (ver = NStr::StringToNumeric(version) ) < 0) { THROW1_TRACE(invalid_argument, "Unexpected non-numeric version: " + version + "\nthe_id: " + the_id); } } x_Init(the_type, acc_in, name_in, ver, release_in);}// acc_in is just first string, as in text seqid, for// wierd cases (patents, pdb) not really an accCSeq_id::CSeq_id(CSeq_id_Base::E_Choice the_type, const string& acc_in, const string& name_in, const string& version_in, const string& release_in ){ string version = NStr::TruncateSpaces(version_in, NStr::eTrunc_Both); int ver = 0; if ( !version.empty() ) { if ( (ver = NStr::StringToNumeric(version)) < 0 ) { THROW1_TRACE(invalid_argument, "Unexpected non-numeric version. " "\nthe_type = " + string(s_TextId[the_type]) + "\nacc_in = " + acc_in + "\nname_in = " + name_in + "\version_in = " + version_in + "\nrelease_in = " + release_in); } } x_Init(the_type, acc_in, name_in, ver, release_in);}static void s_InitThrow(const string& message, const string& type, const string& acc, const string& name, const string& version, const string& release){ THROW1_TRACE(invalid_argument, "CSeq_id:: " + message + "\ntype = " + type + "\naccession = " + acc + "\nname = " + name + "\nversion = " + version + "\nrelease = " + release);}CSeq_id::CSeq_id(const string& the_type_in, const string& acc_in, const string& name_in, const string& version_in, const string& release_in){ string the_type_use = NStr::TruncateSpaces(the_type_in, NStr::eTrunc_Both); string version = NStr::TruncateSpaces(version_in, NStr::eTrunc_Both); int ver = 0; CSeq_id_Base::E_Choice the_type = WhichInverseSeqId(the_type_use.c_str()); if ( !version.empty() ) { if ( (ver = NStr::StringToNumeric(version)) < 0) { s_InitThrow("Unexpected non-numeric version.", the_type_in, acc_in, name_in, version_in, release_in); } } x_Init(the_type, acc_in, name_in, ver, release_in);}CSeq_id::CSeq_id(const string& the_type_in, const string& acc_in, const string& name_in, int version, const string& release_in ){ string the_type_use = NStr::TruncateSpaces(the_type_in, NStr::eTrunc_Both); CSeq_id_Base::E_Choice the_type = WhichInverseSeqId (the_type_use.c_str()); x_Init(the_type, acc_in, name_in, version, release_in);}CSeq_id::CSeq_id( CSeq_id_Base::E_Choice the_type, const string& acc_in, const string& name_in, int version, const string& release_in){ x_Init(the_type, acc_in, name_in, version, release_in);}CSeq_id::CSeq_id( CSeq_id_Base::E_Choice the_type, int the_id){ if(the_id<=0) THROW1_TRACE(invalid_argument, "Specified Seq-id value is negative"); switch (the_type) { case CSeq_id::e_Local: SetLocal().SetId(the_id); break; case CSeq_id::e_Gibbsq: SetGibbsq(the_id); break; case CSeq_id::e_Gibbmt: SetGibbmt(the_id); break; case CSeq_id::e_Giim: SetGiim().SetId(the_id); break; case CSeq_id::e_Gi: SetGi(the_id); break; default: THROW1_TRACE(invalid_argument, "Specified Seq-id type is not numeric seq-id"); } }// Karl Sirotkin 7/2001voidCSeq_id::x_Init( CSeq_id_Base::E_Choice the_type, const string& acc_in, const string& name_in, int version , const string& release_in){ int the_id; string acc = NStr::TruncateSpaces(acc_in, NStr::eTrunc_Both); string name = NStr::TruncateSpaces(name_in, NStr::eTrunc_Both); string release = NStr::TruncateSpaces(release_in, NStr::eTrunc_Both); switch (the_type) { case CSeq_id::e_not_set: // Will cause unspecified SeqId to be returned. break; case CSeq_id::e_Local: { CSeq_id::TLocal & loc = SetLocal(); string::const_iterator it = acc.begin(); if ( (the_id = NStr::StringToNumeric(acc)) >= 0 && *it != '0' ) { loc.SetId(the_id); } else { // to cover case where embedded vertical bar in // string, could add code here, to concat a // '|' and name string, if not null/empty loc.SetStr(acc); } break; } case CSeq_id::e_Gibbsq: if ( (the_id = NStr::StringToNumeric (acc)) >= 0 ) { SetGibbsq(the_id); } else { s_InitThrow("Unexpected non-numeric accession.", string(s_TextId[the_type]), acc_in, name_in, NStr::IntToString(version), release_in); } break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -