📄 sequence_set.cpp
字号:
// protein formats if (bioseq.GetInst().GetSeq_data().IsNcbieaa()) { sequenceString = bioseq.GetInst().GetSeq_data().GetNcbieaa().Get(); isProtein = true; } else if (bioseq.GetInst().GetSeq_data().IsIupacaa()) { sequenceString = bioseq.GetInst().GetSeq_data().GetIupacaa().Get(); isProtein = true; } else if (bioseq.GetInst().GetSeq_data().IsNcbistdaa()) { StringFromStdaa(bioseq.GetInst().GetSeq_data().GetNcbistdaa().Get(), &sequenceString); isProtein = true; } // nucleotide formats else if (bioseq.GetInst().GetSeq_data().IsIupacna()) { sequenceString = bioseq.GetInst().GetSeq_data().GetIupacna().Get(); // convert 'T' to 'U' for RNA if (bioseq.GetInst().GetMol() == CSeq_inst::eMol_rna) { for (int i=0; i<sequenceString.size(); ++i) { if (sequenceString[i] == 'T') sequenceString[i] = 'U'; } } } else if (bioseq.GetInst().GetSeq_data().IsNcbi4na()) { StringFrom4na(bioseq.GetInst().GetSeq_data().GetNcbi4na().Get(), &sequenceString, (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna)); } else if (bioseq.GetInst().GetSeq_data().IsNcbi8na()) { // same repr. for non-X as 4na StringFrom4na(bioseq.GetInst().GetSeq_data().GetNcbi8na().Get(), &sequenceString, (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna)); } else if (bioseq.GetInst().GetSeq_data().IsNcbi2na()) { StringFrom2na(bioseq.GetInst().GetSeq_data().GetNcbi2na().Get(), &sequenceString, (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna)); if (bioseq.GetInst().IsSetLength() && bioseq.GetInst().GetLength() < sequenceString.length()) sequenceString.resize(bioseq.GetInst().GetLength()); } else { ERRORMSG("Sequence::Sequence() - sequence " << gi << ": confused by sequence string format"); return; } // check length if (bioseq.GetInst().IsSetLength() && bioseq.GetInst().GetLength() != sequenceString.length()) { ERRORMSG("Sequence::Sequence() - sequence string length mismatch"); return; } // force uppercase for (int i=0; i<sequenceString.length(); ++i) sequenceString[i] = toupper(sequenceString[i]); } else { ERRORMSG("Sequence::Sequence() - sequence " << gi << ": confused by sequence representation"); return; } // get identifier (may be NULL if there's a problem!) identifier = MoleculeIdentifier::GetIdentifier(this, pdbID, pdbChain, mmdbID, gi, accession);}void Sequence::AddMMDBAnnotTag(int mmdbID) const{ CBioseq::TAnnot::const_iterator a, ae = bioseqASN->GetAnnot().end(); CSeq_annot::C_Data::TIds::const_iterator i, ie; bool found = false; for (a=bioseqASN->GetAnnot().begin(); a!=ae; ++a) { if ((*a)->GetData().IsIds()) { for (i=(*a)->GetData().GetIds().begin(), ie=(*a)->GetData().GetIds().end(); i!=ie; ++i) { if ((*i)->IsGeneral() && (*i)->GetGeneral().GetDb() == "mmdb" && (*i)->GetGeneral().GetTag().IsId()) { found = true; TRACEMSG("mmdb link already present in sequence " << identifier->ToString()); if ((*i)->GetGeneral().GetTag().GetId() != mmdbID || (identifier->mmdbID != MoleculeIdentifier::VALUE_NOT_SET && identifier->mmdbID != mmdbID)) ERRORMSG("Sequence::AddMMDBAnnotTag() - mmdbID mismatch"); break; } } } if (found) break; } if (!found) { CRef < CSeq_id > seqid(new CSeq_id()); seqid->SetGeneral().SetDb("mmdb"); seqid->SetGeneral().SetTag().SetId(mmdbID); CRef < CSeq_annot > annot(new CSeq_annot()); annot->SetData().SetIds().push_back(seqid); (const_cast<Sequence*>(this))->bioseqASN->SetAnnot().push_back(annot); }}CSeq_id * Sequence::CreateSeqId(void) const{ CSeq_id *sid = new CSeq_id(); FillOutSeqId(sid); return sid;}void Sequence::FillOutSeqId(ncbi::objects::CSeq_id *sid) const{ sid->Reset(); CBioseq::TId::const_iterator i, ie = bioseqASN->GetId().end(); // use pdb id if present for (i=bioseqASN->GetId().begin(); i!=ie; ++i) { if ((*i)->IsPdb()) { sid->Assign(**i); return; } } // use gi if present for (i=bioseqASN->GetId().begin(); i!=ie; ++i) { if ((*i)->IsGi()) { sid->Assign(**i); return; } } // otherwise, just use the first one if (bioseqASN->GetId().size() > 0) sid->Assign(bioseqASN->GetId().front().GetObject()); else ERRORMSG("Sequence::FillOutSeqId() - can't do Seq-id on sequence " << identifier->ToString()); // dangerous to create new Seq-id's...// if (identifier->pdbID.size() > 0 && identifier->pdbChain != MoleculeIdentifier::VALUE_NOT_SET) {// sid->SetPdb().SetMol().Set(identifier->pdbID);// if (identifier->pdbChain != ' ') sid->SetPdb().SetChain(identifier->pdbChain);// } else if (identifier->gi != MoleculeIdentifier::VALUE_NOT_SET) { // use gi// sid->SetGi(identifier->gi);// } else if (identifier->accession.size() > 0) {// CObject_id *oid = new CObject_id();// oid->SetStr(identifier->accession);// sid->SetLocal(*oid);}void Sequence::AddCSeqId(SeqIdPtr *id, bool addAllTypes) const{ if (identifier->pdbID.size() > 0) { PDBSeqIdPtr pdbid = PDBSeqIdNew(); pdbid->mol = StrSave(identifier->pdbID.c_str()); pdbid->chain = (Uint1) identifier->pdbChain; ValNodeAddPointer(id, SEQID_PDB, pdbid); if (!addAllTypes) return; } if (identifier->gi != MoleculeIdentifier::VALUE_NOT_SET) { ValNodeAddInt(id, SEQID_GI, identifier->gi); if (!addAllTypes) return; } if (identifier->accession.size() > 0) { ObjectIdPtr local = ObjectIdNew(); local->str = StrSave(identifier->accession.c_str()); ValNodeAddPointer(id, SEQID_LOCAL, local); if (!addAllTypes) return; }}int Sequence::GetOrSetMMDBLink(void) const{ if (molecule) { const StructureObject *object; if (!molecule->GetParentOfType(&object)) return identifier->mmdbID; if (identifier->mmdbID != MoleculeIdentifier::VALUE_NOT_SET && identifier->mmdbID != object->mmdbID) ERRORMSG("Sequence::GetOrSetMMDBLink() - mismatched MMDB ID: identifier says " << identifier->mmdbID << ", StructureObject says " << object->mmdbID); else const_cast<MoleculeIdentifier*>(identifier)->mmdbID = object->mmdbID; } return identifier->mmdbID;}void Sequence::LaunchWebBrowserWithInfo(void) const{ string db = isProtein ? "Protein" : "Nucleotide"; string opt = isProtein ? "GenPept" : "GenBank"; CNcbiOstrstream oss; oss << "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Search&doptcmdl=" << opt << "&db=" << db << "&term="; // prefer gi's, since accessions can be outdated if (identifier->gi != MoleculeIdentifier::VALUE_NOT_SET) { oss << identifier->gi; } else if (identifier->pdbID.size() > 0) { oss << identifier->pdbID.c_str(); if (identifier->pdbChain != ' ') oss << (char) identifier->pdbChain; } else if (identifier->accession.size() > 0) { if (identifier->accession == "query" || identifier->accession == "consensus") return; oss << identifier->accession.c_str(); } oss << '\0'; LaunchWebPage(oss.str()); delete oss.str();}static bool Prosite2Regex(const string& prosite, string *regex, int *nGroups){ try { // check allowed characters static const string allowed = "-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789[],(){}<>."; int i; for (i=0; i<prosite.size(); ++i) if (allowed.find(toupper(prosite[i])) == string::npos) break; if (i != prosite.size()) throw "invalid ProSite character"; if (prosite[prosite.size() - 1] != '.') throw "ProSite pattern must end with '.'"; // translate into real regex syntax; regex->erase(); *nGroups = 0; bool inGroup = false; for (int i=0; i<prosite.size(); ++i) { // handle grouping and termini bool characterHandled = true; switch (prosite[i]) { case '-': case '.': case '>': if (inGroup) { *regex += ')'; inGroup = false; } if (prosite[i] == '>') *regex += '$'; break; case '<': *regex += '^'; break; default: characterHandled = false; break; } if (characterHandled) continue; if (!inGroup && ( (isalpha(prosite[i]) && toupper(prosite[i]) != 'X') || prosite[i] == '[' || prosite[i] == '{')) { *regex += '('; (*nGroups)++; inGroup = true; } // translate syntax switch (prosite[i]) { case '(': *regex += '{'; break; case ')': *regex += '}'; break; case '{': *regex += "[^"; break; case '}': *regex += ']'; break; case 'X': case 'x': *regex += '.'; break; default: *regex += toupper(prosite[i]); break; } } } catch (const char *err) { ERRORMSG("Prosite2Regex() - " << err); return false; } return true;}bool Sequence::HighlightPattern(const string& prositePattern) const{ // setup regex syntax reg_syntax_t newSyntax = RE_CONTEXT_INDEP_ANCHORS | RE_CONTEXT_INVALID_OPS | RE_INTERVALS | RE_LIMITED_OPS | RE_NO_BK_BRACES | RE_NO_BK_PARENS | RE_NO_EMPTY_RANGES; reg_syntax_t oldSyntax = re_set_syntax(newSyntax); bool retval = true; try { // allocate structures static re_pattern_buffer *patternBuffer = NULL; static re_registers *registers = NULL; if (!patternBuffer) { // new pattern initialized to zero patternBuffer = (re_pattern_buffer *) calloc(1, sizeof(re_pattern_buffer)); if (!patternBuffer) throw "can't allocate pattern buffer"; patternBuffer->fastmap = (char *) calloc(256, sizeof(char)); if (!patternBuffer->fastmap) throw "can't allocate fastmap"; registers = (re_registers *) calloc(1, sizeof(re_registers));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -