📄 molecule.java
字号:
/*---------------------------------------------------------------------- File : Molecule.java Contents: Molecule management for substructure mining Author : Christian Borgelt History : 11.03.2002 file created 13.03.2002 output method added 14.03.2002 memory optimization added 21.03.2002 second embed function added (from Miner.java) 22.03.2002 another constructor added (from embedding) 02.04.2002 successor pointer added (molecule list) 14.04.2002 molecule marker added for faster counting 21.02.2003 fragment to molecule cloning extended 31.03.2003 output of Prolog description added 31.07.2003 functions prune, rings, and markRings added 04.08.2003 molecule duplication function added (for debug) 07.08.2003 adapted to new classes, some rewriting 10.08.2003 Prolog output extended (bond types) 07.06.2005 bridge finding functions added 20.07.2005 pre-masking of atoms and bonds added 21.07.2005 encoding and decoding of atoms added 22.07.2005 triming of excluded atoms added 23.07.2005 constructor from fragment optimized 03.08.2005 embedding replaced by special recursive function 04.08.2005 matching of ring bonds in embedding added 06.08.2005 bug in function embed fixed (single atom seed) 16.08.2005 seed embedding optimized (now based on bonds) 03.05.2006 function makeCanonic added 08.05.2006 function makeCanonic generalized and debugged 11.05.2006 function isCanonic added 12.05.2006 bond type flag RINGBOND set in function rings 14.05.2006 bug in function Molecule(Fragment) fixed 16.05.2006 function hasOpenRings added (fragment filtering) 01.06.2006 function isCanonic extended, result changed 04.06.2006 function markPseudo added 06.06.2006 adapted to changed type of bond flags 18.06.2006 changed bond copying in Molecule(Fragment) 26.06.2006 return value added to function trim 28.06.2006 seed embedding functions redesigned 29.06.2006 only group information retained in molecule 09.08.2006 ignoring the atom type only in rings added 10.08.2006 chain treatment in molecule creation adapted 12.08.2006 parsing and output moved to Notation classes----------------------------------------------------------------------*/package moss;import java.io.PrintStream;/*----------------------------------------------------------------------A molecule is represented as a graph with atoms as nodes and bonds asedges. A molecule consists of a vector of atoms and a vector of bonds.A molecule also has an identifier and a group by which it can beclassified as being in the focus or in the complement for the search.----------------------------------------------------------------------*//*--------------------------------------------------------------------*/public class Molecule {/*--------------------------------------------------------------------*/ /* --- constants: sizes --- */ private static final int BLKSIZE = 16; /* block size for enlarging the atom and bond vectors */ private static final Embedding check = new Embedding(); private static final Embedding found = new Embedding(); /* dummies for containment check (saves a recursion parameter) */ /* --- instance variables --- */ protected String id; /* molecule identifier */ protected int group; /* marker for grouping */ protected int[] supp; /* support of a substructure */ protected Molecule succ; /* next molecule in list */ protected TypeMap map; /* optional atom type map */ protected int atomcnt; /* current number of atoms */ protected Atom[] atoms; /* vector of atoms */ protected int bondcnt; /* current number of bonds */ protected Bond[] bonds; /* vector of bonds */ /*-------------------------------------------------------------------- The field "supp" is used to store the support of a found substructure that has been converted into a molecule to check for duplicates. --------------------------------------------------------------------*/ public Molecule () { this(null, 0, BLKSIZE, BLKSIZE); } public Molecule (String id, int group) { this(id, group, BLKSIZE, BLKSIZE); } public Molecule (String id, int group, int atomcnt, int bondcnt) { /* --- create an empty molecule */ this.id = id; /* note the molecule identifier, */ this.group = group; /* the group flag, */ this.map = null; /* and the type map */ this.atoms = new Atom[atomcnt]; /* allocate vectors for atoms */ this.bonds = new Bond[bondcnt]; /* and for bonds and */ this.atomcnt = this.bondcnt = 0; /* clear the counters */ } /* Molecule() */ /*------------------------------------------------------------------*/ public void setId (String id) { this.id = id; } public String getId () { return this.id; } public void setGroup (int group) { this.group = group; } public int getGroup () { return this.group; } public int nAtoms () { return this.atomcnt; } public Atom getAtom (int index) { return this.atoms[index]; } public int nBonds () { return this.bondcnt; } public Bond getBond (int index) { return this.bonds[index]; } public TypeMap getMap () { return this.map; } /*------------------------------------------------------------------*/ public int addAtom (int type) { /* --- add an atom to a molecule */ int vsz; /* (new) vector size */ Atom[] vec; /* buffer for reallocation */ vsz = this.atoms.length; /* get the current vector size */ if (this.atomcnt >= vsz) { /* if the atom vector is full */ vsz += (vsz > BLKSIZE) ? vsz >> 1 : BLKSIZE; vec = new Atom[vsz]; /* enlarge the atom vector */ System.arraycopy(this.atoms, 0, vec, 0, this.atomcnt); this.atoms = vec; /* copy the atoms and */ } /* set the new atoms vector */ this.atoms[this.atomcnt] = new Atom(type); return this.atomcnt++; /* add a new atom to the vector and */ } /* addAtom() */ /* return the index of the new atom */ /*------------------------------------------------------------------*/ public int addBond (int src, int dst, int type) { /* --- add a bond to a molecule */ int vsz; /* (new) vector size */ Bond[] vec; /* buffer for reallocation */ if (type == Bond.NULL) /* if the bond is NULL (i.e., "."), */ return -1; /* do not process it */ vsz = this.bonds.length; /* get the current vector size */ if (this.bondcnt >= vsz) { /* if the bond vector is full */ vsz += (vsz > BLKSIZE) ? vsz >> 1 : BLKSIZE; vec = new Bond[vsz]; /* enlarge the bond vector */ System.arraycopy(this.bonds, 0, vec, 0, this.bondcnt); this.bonds = vec; /* copy the bonds and */ } /* set the new bonds vector */ this.bonds[this.bondcnt] = /* add a new bond to the vector */ new Bond(this.atoms[src], this.atoms[dst], type); return this.bondcnt++; /* return the index of the new bond */ } /* addBond() */ /*------------------------------------------------------------------*/ protected void opt () { /* --- optimize memory usage */ int i; /* loop variable */ Atom[] a; /* buffer for reallocation */ Bond[] b; /* ditto */ for (i = this.atomcnt; --i >= 0; ) this.atoms[i].opt(); /* optimize the atoms */ if (this.atoms.length > this.atomcnt) { a = new Atom[this.atomcnt]; System.arraycopy(this.atoms, 0, a, 0, this.atomcnt); this.atoms = a; /* get a vector of the right size */ } /* and copy the atoms into it */ if (this.bonds.length > this.bondcnt) { b = new Bond[this.bondcnt]; System.arraycopy(this.bonds, 0, b, 0, this.bondcnt); this.bonds = b; /* get a vector of the right size */ } /* and copy the bonds into it */ } /* opt() */ /*------------------------------------------------------------------*/ protected Molecule (Fragment frag) { /* --- turn fragment into molecule */ int i, k; /* loop variables, atom counter */ Embedding emb; /* to access an embbeding */ Bond b; /* to traverse/access the bonds */ Atom a, d, x; /* to traverse/access the atoms */ this.id = null; /* clear the molecule identifier */ this.group = -1; /* and invalidate the group marker */ this.supp = frag.supp; /* copy the embedding information */ emb = frag.list; /* (fragment and its support) */ this.map = emb.mol.map; this.atomcnt = emb.atoms.length +frag.chns; this.atoms = new Atom[this.atomcnt]; this.bondcnt = emb.bonds.length; this.bonds = new Bond[this.bondcnt]; if (frag.chns == 0) { /* if there are no carbon chains */ for (i = emb.atoms.length; --i >= 0; ) { a = emb.atoms[i]; /* traverse the embedding's atoms */ this.atoms[a.mark = i] = new Atom(a.type); } } /* mark and copy each atom */ else { /* if there are no carbon chains */ for (i = emb.atoms.length; --i >= 0; ) emb.atoms[i].mark = -2; /* unmark all atoms of the embedding */ a = emb.atoms[0]; a.mark = 0; /* mark and copy */ this.atoms[0] = new Atom(a.type); /* the root atom */ for (k = i = 0; i < emb.bonds.length; i++) { b = emb.bonds[i]; /* traverse the embedding bonds */ a = null; /* and check for "unsaturated" ones */ if (b.src.mark < -1) this.atoms[b.src.mark = ++k] = new Atom(b.src.type); else if (b.src.mark < 0) a = b.src; /* note a possible chain atom */ if (b.dst.mark < -1) this.atoms[b.dst.mark = ++k] = new Atom(b.dst.type); else if (b.dst.mark < 0) a = b.dst; /* note a possible chain atom */ if (a == null) continue;/* skip "saturated" bonds */ a.mark = ++k; /* mark the carbon at the one end */ b = emb.bonds[i+1]; /* get the other chain bond */ if (b.src.mark == -1) a = b.src; else if (b.dst.mark == -1) a = b.dst; a.mark = k; /* mark the carbon at the other end */ this.atoms[k] = new Atom(Atom.CHAIN); } /* create a special pseudo-atom */ } /* that represents the chain */ for (i = 0; i < this.bondcnt; i++) { b = emb.bonds[i]; /* traverse and copy the bonds */ a = this.atoms[b.src.mark]; d = this.atoms[b.dst.mark]; if (b.dst.mark < b.src.mark) { x = a; a = d; d = x; } this.bonds[i] = new Bond(a, d, b.type); this.bonds[i].flags = b.flags; } /* add the bonds to the molecule */ this.opt(); /* optimize memory usage and */ this.unmark(); /* unmark all atoms and bonds */ if (frag.chns == 0) { /* if there are no carbon chains */ for (i = emb.atoms.length; --i >= 0; ) emb.atoms[i].mark = -1; } else { /* if there are carbon chains */ for (i = emb.bonds.length; --i >= 0; ) { b = emb.bonds[i]; b.src.mark = b.dst.mark = -1; } } /* remove the embedding markers */ } /* Molecule() */ /*-------------------------------------------------------------------- The above function is used in class Miner, where fragments are tested for inclusion. This is done by trying to embed one of them into the other. For this purpose they have to be turned into molecules. In addition, it is needed for functions isCanonic and makeCanonic. Note that when there are carbon chains it does not suffice to unmark the atoms of the embedding, because there are unsaturated bonds, the ends of which have been numbered. Therefore in this case the marks have to be removed by relying on the bonds of the embedding.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -