📄 smiles.java
字号:
/*---------------------------------------------------------------------- File : SMILES.java Contents: Simplified Molecular Input Line Entry System Authors : Christian Borgelt History : 16.08.2006 file created from file SmilesTokenizer.java----------------------------------------------------------------------*/package moss;import java.io.IOException;/*--------------------------------------------------------------------*/public class SMILES extends Notation {/*--------------------------------------------------------------------*/ /* --- constants: atoms that need no brackets --- */ private static final int[] noBrackets = { Atom.BORON /* B */, Atom.CARBON /* C */, Atom.NITROGEN /* N */, Atom.OXYGEN /* O */, Atom.FLOURINE /* F */, Atom.PHOSPHORUS /* P */, Atom.SULFUR /* S */, Atom.CHLORINE /* Cl */, Atom.BROMINE /* Br */, Atom.IODINE /* I */, Atom.ANY, Atom.CHAIN & Atom.TYPEMASK }; /*------------------------------------------------------------------*/ public SMILES () /* --- create a SMILES object */ { this.labels = new int[100]; } /*------------------------------------------------------------------*/ private int getAtom (int len) throws IOException { /* --- get atom in brackets */ int i, k; /* loop variable, buffer */ char c, n; /* current and next character */ String s; /* to traverse the atom names */ int atom = 0; /* number/code of an atom */ do { /* skip a possible leading number */ if (this.pos >= len) /* (isotope index before element) */ throw new IOException("missing atom after \"[\""); c = this.desc.charAt(this.pos++); } while ((c >= '0') && (c <= '9')); /* --- find the atom number --- */ if ((c >= 'a') && (c <= 'z')) { atom = Atom.AROMATIC; /* if the name of an atom starts */ c += 'A' -'a'; /* with a lowercase letter, it is */ } /* involved in an aromatic bond */ if ((c < 'A') || (c > 'Z')) /* check for a letter */ throw new IOException("illegal atom " +c); n = (this.pos < len) ? this.desc.charAt(this.pos) : ']'; if (n == 'H') { /* if a hydrogen follows */ n = (++this.pos < len) ? this.desc.charAt(this.pos) : ']'; if ((n >= '1') && (n <= '9')) ++this.pos; n = ']'; } /* skip attached hydrogen atoms */ else if ((n >= 'A') && (n <= 'Z')) n += 'a' -'A'; /* make letter lowercase */ if ((n < 'a') || (n > 'z')){/* if single letter atom */ for (i = Atom.oneLetter.length; --i >= 0; ) { k = Atom.oneLetter[i]; /* traverse the atom names */ s = Atom.names[k]; /* that have only one letter */ if (c == s.charAt(0)) { atom |= k; break; } } /* try to find a matching name */ if (i < 0) throw new IOException("illegal atom " +c); } else { /* if double letter atom */ this.pos++; /* skip the second letter */ for (i = 0; ++i < Atom.names.length; ) { s = Atom.names[i]; /* traverse the atom names */ if ((s.length() > 1) /* that have two letters */ && (c == s.charAt(0)) && (n == s.charAt(1))) { atom |= i; break; } /* try to find a matching */ } /* two letter atom name */ if (i >= Atom.names.length) throw new IOException("illegal atom " +c+n); } /* check whether a name was found */ if (this.pos >= len) throw new IOException("missing \"]\""); c = this.desc.charAt(this.pos++); if (c == ']') return atom; /* check for the atom delimiter */ /* --- get a possible charge --- */ if (this.pos >= len) throw new IOException("missing \"]\""); n = this.desc.charAt(this.pos++); k = 0; /* get the next character */ if (c == '+') { /* if there is a positive charge */ if (n == '+') k = +2; else if ((n >= '1') && (n <= '9')) k = +(n -'0'); else k = +1; } else if (c == '-') { /* if there is a negative charge */ if (n == '-') k = -2; else if ((n >= '1') && (n <= '9')) k = -(n -'0'); else k = -1; } /* get the value of the charge */ atom |= Atom.codeCharge(k); /* add the charge to the code */ /* --- skip additional information --- */ while (n != ']') { /* while not at end of atom */ if (this.pos >= len) throw new IOException("missing \"]\""); n = this.desc.charAt(this.pos++); } /* get the next character */ return atom; /* return the atom code */ } /* getAtom() */ /*------------------------------------------------------------------*/ public boolean parse (int src) throws IOException { /* --- parse a molecule description */ int len; /* length of string to parse */ char c, n; /* current and next character */ int a, b, i; /* atom and bond types, buffer */ int dst; /* index of destination atom */ len = this.desc.length(); /* get the length of the description */ dst = a = -1; /* clear the atom type and index */ b = Bond.UNKNOWN; /* and the bond type */ while (true) { /* parse loop for a branch */ if (this.pos >= len) /* if at end of description, */ return false; /* abort indicating no ')' */ c = this.desc.charAt(this.pos++); switch (c) { /* get and evaluate next character */ /* -- branches -- */ case ')': /* if at the end of a branch */ if (b != Bond.UNKNOWN)/* check for a preceding bond */ throw new IOException("unexpected \")\""); return true; /* abort indicating a ')' */ case '(': /* if at the start of a branch */ if ((src < 0) || (b != Bond.UNKNOWN)) throw new IOException("unexpected \"(\""); if (!this.parse(src)) /* recursively parse the branch */ throw new IOException("\")\" expected"); break; /* check for a closing ')' */ /* -- labels -- */ case '%': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (c != '%') /* if one digit label */ i = c -'0'; /* compute the value of the digit */ else { /* if two digit label */ if (this.pos > len -2) throw new IOException("incomplete label %" +this.desc.substring(this.pos)); c = this.desc.charAt(this.pos++); /* get current and */ n = this.desc.charAt(this.pos++); /* next character */ if ((c < '1') || (c > '9') /* check for a number */ || (n < '0') || (n > '9')) /* between 10 and 99 */ throw new IOException("illegal label %" +c +n); i = (c -'0') *10 +(n -'0'); } /* compute the label value */ dst = this.labels[i]; /* get index of destination atom */ this.labels[i] = (dst < 0) ? src : -1; break; /* update the label vector */ /* -- bonds -- */ case '.': b = Bond.NULL; break; case '-': b = Bond.SINGLE; break; case '/': b = Bond.SINGLE; break; case ':': b = Bond.AROMATIC; break; case '=': b = Bond.DOUBLE; break; case '#': b = Bond.TRIPLE; break; /* -- atoms -- */ case 'b': a = Atom.AROMATIC|Atom.BORON; break; case 'B': if ((this.pos < len) && (this.desc.charAt(this.pos) == 'r')) { this.pos++; a = Atom.BROMINE; } else { a = Atom.BORON; } break; case 'c': a = Atom.AROMATIC|Atom.CARBON; break; case 'C': if ((this.pos < len) && (this.desc.charAt(this.pos) == 'l')) { this.pos++; a = Atom.CHLORINE; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -