📄 moleculargraph.java
字号:
/** * ISOAK - Iterative similarity optimal assignment kernel. * * Written by Matthias Rupp 2006-2007. * Copyright (c) 2006-2007, Matthias Rupp, Ewgenij Proschak, Gisbert Schneider. * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * * The above copyright notice, this permission notice and the following disclaimers * shall be included in all copies or substantial portions of this software. * * The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * Please cite * * Matthias Rupp, Ewgenij Proschak, Gisbert Schneider: * A Kernel Approach to Molecular Similarity Based on Iterative Graph Similarity, * Journal of Chemical Information and Molecular Modeling, 47(6): 2280-2286, 2007, * DOI http://dx.doi.org/10.1021/ci700274r. * * if you use this software. */package info.mrupp.isoak1;import java.io.LineNumberReader;import java.text.ParseException;import info.mrupp.isoak1.Array2Float;import info.mrupp.isoak1.Element;/** * Query-only data structure for molecular graphs. * * Contains all information about one molecule. * Designed as a constant data structure, i.e. once created * no modifications are possible. Optimized for fast access. * * <ul> * <li>The first atom descriptor (index zero) is the atom type (element).</li> * <li>The first bond descriptor (index zero) is the bond type (single, double, triple, ...).</li> * </ul> * * The maximum allowed vertex degree tau is used to compute tau! in other * parts of the program. Since optimal neighbour assignments are computed * where tau! possible assignments are tested, increasing tau beyond 6 is * not recommended. */public class MolecularGraph { // Public interface. /** Maximum degree of any vertex. Factorial runtime dependency on this constant. */ public static final int MAX_DEGREE = 6; /** The supported chemical file formats (currently only SDF). * * Extend here if you want to read in other file formats. */ public enum FileFormat { /** Elsevier ⁄ MDL SDF file format. */ MDL_MOL }; /** Creates a new molecular graph from a given source. * * @param reader the source from which a textual representation of the molecule is read. * @param format the file format of the textual source. * @throws java.io.IOException * @throws ParseException */ public MolecularGraph(java.io.LineNumberReader reader, FileFormat format) throws java.io.IOException, ParseException { switch (format) { case MDL_MOL: createFromMdlMol(reader); break; default: throw new RuntimeException ("Unknown file format constant."); } } /** Returns an identifier for the molecular graph. */ public String name() { return name; } /** Returns the total number of atoms in the molecular graph. */ public int numAtoms() { return numAtoms; } /** Returns the total number of bonds in the molecular graph. */ public int numBonds() { return numBonds; } /** Returns the value of the given descriptor for the given atom. First descriptor is atomic number. */ public float atomDescriptor(int atom, int descriptor) { return atomDescriptors.get(atom, descriptor); } /** Returns the value of the given descriptor for the given bond. First descriptor is bond order. */ public float bondDescriptor(int bond, int descriptor) { return bondDescriptors.get(bond, descriptor); } /** Returns the degree of given atom. */ public int numNeighbours(int atom) { return neighbours[atom][MAX_DEGREE]; } /** Returns the n-th neighbour (index) of given atom. The order of neighbours is arbitrary but does not change once the graph has been created. */ public int neighbour(int atom, int n) { return neighbours[atom][n]; } /** Returns the index of the bond between two atoms. */ public int bondIndex(int a, int b) { for (int index = neighbours[a][MAX_DEGREE]; --index >= 0; ) if (neighbours[a][index] == b) return bondIndices[a][index]; throw new IllegalArgumentException(String.format("No bond exists between the specified vertices %d and %d.", a, b)); } // End of public interface. // Internal data structures. private String name; // Name of molecule. private int numAtoms; // Number of atoms in molecule. private int numBonds; // Number of bonds in molecule. private Array2Float atomDescriptors; // Table of atom descriptors (atoms x descriptors). private Array2Float bondDescriptors; // Table of bond descriptors (bonds x descriptors). private int[][] neighbours; // Neighbour list (atoms x tau). private int[][] bondIndices; // The corresponding bond indices for the neighbours array. private void createFromMdlMol (LineNumberReader reader) throws java.io.IOException, ParseException { String line; // Molecule name. name = reader.readLine().trim(); // Skip two lines. reader.readLine(); reader.readLine(); // Number of atoms, number of bonds. line = reader.readLine(); numAtoms = Short.parseShort(line.substring(0,3).trim()); if (numAtoms < 1) throw new ParseException("Molecule must have at least one atom.", 0); numBonds = Short.parseShort(line.substring(3,6).trim()); addAtomDescriptorDim(1); // First descriptor gives element type. addBondDescriptorDim(1); // First descriptor gives bond type. // Atoms. for (int i = 0; i < numAtoms; i++) { line = reader.readLine().substring(31,34).trim(); if (line.length() == 3) throw new ParseException ("Three character element abbreviations are not supported.", 0); if (line.length() == 1) line = line + ' '; atomDescriptors.set(i, 0, Element.fromString(line).atomicNumber()); } // Bonds. neighbours = new int[numAtoms][MAX_DEGREE+1]; bondIndices = new int[numAtoms][MAX_DEGREE]; for (int i = numAtoms; --i >= 0; ) { java.util.Arrays.fill(neighbours[i], -1); neighbours[i][MAX_DEGREE] = 0; // Setting number of neighbours to zero must happen after array is filled. java.util.Arrays.fill(bondIndices[i], -1); } for (int bondIndex = 0; bondIndex < numBonds; ++bondIndex) { line = reader.readLine(); final int v = Integer.parseInt(line.substring(0,3).trim()) - 1; final int w = Integer.parseInt(line.substring(3,6).trim()) - 1; bondDescriptors.set(bondIndex, 0, Float.parseFloat(line.substring(6,9).trim())); final int vIndex = neighbours[v][MAX_DEGREE]; neighbours[v][vIndex] = w; bondIndices[v][vIndex] = bondIndex; ++neighbours[v][MAX_DEGREE]; final int wIndex = neighbours[w][MAX_DEGREE]; neighbours[w][wIndex] = v; bondIndices[w][wIndex] = bondIndex; ++neighbours[w][MAX_DEGREE]; } // Ignore fields until 'M END'. while (!reader.readLine().equals("M END")) {} // Descriptors. final java.util.regex.Pattern delim = java.util.regex.Pattern.compile("\\s|,"); for (line = reader.readLine(); line.length() > 3 && line.subSequence(0,1).equals(">") && line.indexOf("<") > -1; line = reader.readLine()) { if (line.indexOf("<AD:") > -1) { // Atom descriptor. final int index = atomDescriptors.cols(); addAtomDescriptorDim(1); java.util.Scanner scanner = new java.util.Scanner(reader.readLine()); scanner.useLocale(java.util.Locale.ENGLISH); scanner.useDelimiter(delim); for (int i = 0; i < numAtoms; ++i) { atomDescriptors.set(i, index, scanner.nextFloat()); } reader.readLine(); } else if (line.indexOf("<BD:") > -1) { // Bond descriptor. final int index = bondDescriptors.cols(); addBondDescriptorDim(1); java.util.Scanner scanner = new java.util.Scanner(reader.readLine()); scanner.useLocale(java.util.Locale.ENGLISH); scanner.useDelimiter(delim); for (int i = 0; i < numBonds; ++i) { bondDescriptors.set(i, index, scanner.nextFloat()); } reader.readLine(); } else { // Other data. while (!reader.readLine().trim().equals("")) ; } } if (line.length() == 0) line = reader.readLine(); // Skip possible empty line. // Terminator. if (!line.equals("$$$$")) throw new ParseException (String.format("Expected molecule terminator ''$$$$'' in line %d, but did not encounter it", reader.getLineNumber()), 0); } // Helper methods. /** Adds atom descriptors. Old descriptor values are preserved. */ private void addAtomDescriptorDim (int num) { if (atomDescriptors == null) { atomDescriptors = new Array2Float(numAtoms, num); } else { final int newLength = atomDescriptors.cols() + num; Array2Float newArray = new Array2Float(numAtoms, newLength); for (int row = numAtoms; --row >= 0; ) for (int col = atomDescriptors.cols(); --col >= 0; ) newArray.set(row, col, atomDescriptors.get(row, col)); atomDescriptors = newArray; } } /** Adds bond descriptors. Old descriptor values are preserved. */ private void addBondDescriptorDim (int num) { if (bondDescriptors == null) { bondDescriptors = new Array2Float(numBonds, num); } else { final int newLength = bondDescriptors.cols() + num; Array2Float newArray = new Array2Float(numBonds, newLength); for (int row = numBonds; --row >= 0; ) for (int col = bondDescriptors.cols(); --col >= 0; ) newArray.set(row, col, bondDescriptors.get(row, col)); bondDescriptors = newArray; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -