📄 isoakernel1.java
字号:
/** * ISOAK - Iterative similarity optimal assignment kernel. * * Written by Matthias Rupp 2006-2007. * Copyright (c) 2006-2007, Matthias Rupp, Ewgenij Proschak, Gisbert Schneider. * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * * The above copyright notice, this permission notice and the following disclaimers * shall be included in all copies or substantial portions of this software. * * The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * Please cite * * Matthias Rupp, Ewgenij Proschak, Gisbert Schneider: * A Kernel Approach to Molecular Similarity Based on Iterative Graph Similarity, * Journal of Chemical Information and Molecular Modeling, 47(6): 2280-2286, 2007, * DOI http://dx.doi.org/10.1021/ci700274r. * * if you use this software. */package info.mrupp.isoak1;import static info.mrupp.isoak1.Factorial.factorial;import info.mrupp.isoak1.IVertexEdgeKernel;import info.mrupp.isoak1.VertexEdgeKernel;import info.mrupp.isoak1.MolecularGraph;import info.mrupp.isoak1.Array2Float;import info.mrupp.isoak1.HungarianAlgorithm;import info.mrupp.isoak1.HungarianAlgorithm.OptimizationType;import info.mrupp.isoak1.Permutations;/** * The iterative similarity optimal assignment kernel. * * Computes the iterative similarity optimal assignment kernel * between two molecular graphs up to a given precision. * * <ul> * <li>The kernel can be parameterized by the vertex and edge subkernel to be used.</li> * <li>If more than one kernel value is to be computed, create one instance of * the IsoaKernel class and reuse it. This will prevent repeated * reallocation of memory.</li> * </ul> * * Implementation based on the article<br /><br /> * * Matthias Rupp, Ewgenij Proschak, Gisbert Schneider: * <em>A Kernel Approach to Molecular Similarity Based on Iterative Graph Similarity</em>, * Journal of Chemical Information and Molecular Modeling, 2007, submitted.<br /><br /> * * Please cite this article if you use this software in scientific research. * * @author Matthias Rupp * @version 1.0 2007-09-17 * */public class IsoaKernel1 { // About the internal data used for computing the kernel (heap // storage is retained over instantiation lifetime): // // On the memory layout of the arrays: // // Switching from three-dimensional arrays (int [][][]) to two-dimensional // arrays (int [][]) improved runtime by 27% on a 175 molecule test set. // Switching from two-dimensional arrays (int [][]) to one-dimensional arrays // (int []) did not affect runtime. // Using a class (Array2Int) decreased performance by 7%. // // Based on these measurements, I decided to use two-dimensional built-in arrays. // The memory layout is as follows: // // The first index equals the index in the similarity vector (the coordinate). // For each coordinate, there are up to tau! terms with up to tau summands each. // For each summand, there's a factor by which the summand is multiplied and an // index (into the similarity vector) which gives the summand itself. // Factors and indices are stored separately. // The memory layout for the factors (i.j.k denotes the k-th summand factor in // the j-th term in the i-th coordinate): // // 0.0.0, 0.0.1, 0.0.2, ..., 0.0.tau-1, 0.1.0, ..., 0.1.tau-1, ..., 0.tau!-1.0, ..., 0.tau!-1.tau-1, // 1.0.0, 1.0.1, 1.0.2, ..., 1.0.tau-1, 1.1.0, ..., 1.1.tau-1, ..., 1.tau!-1.0, ..., 1.tau!-1.tau-1, // ... // // For the indices, there's an additional entry for each term giving the number // of summands and an additional entry for each coordinate giving the number of terms. // The memory layout for the indices (i.j.k as above, s = number of summands for previous // term, t = number of terms): // // 0.0.0, 0.0.1, 0.0.2, ..., 0.0.tau-1, s, 0.1.0, ..., 0.1.tau-1, s, ..., 0.tau!-1.0, ..., 0.tau!-1.tau-1, s, t, // 1.0.0, 1.0.1, 1.0.2, ..., 1.0.tau-1, s, 1.1.0, ..., 1.1.tau-1, s, ..., 1.tau!-1.0, ..., 1.tau!-1.tau-1, s, t, // ... // // Examples (all indices are zero-based): // // Address of k-th summand factor in j-th term in i-th coordinate: pFactors[i][j*tau+k]. // Address of k-th summand index in j-th term in i-th coordinate: pIndices[i][j*(tau+1)+k]. // Number of summands for j-th term in i-th coordinate: pIndices[i][(j+1)*(tau+1)-1]. // Number of terms in i-th coordinate: pIndices[i][tau!*(tau+1)]. private int kk, nn, kknn; // Number of atoms in molecules and their product. Class members so there's no need for method parameters. private float[] kvec; // Evaluated vertex kernel, linearized indices. private float[][] pFactors; // Factors for the neighbour assignments. Memory layout as described above. private int[][] pIndices; // Indices for the neighbour assignments. Memory layout as described above. private float[] x; // Similarity matrix, linearized indices. private float[] xzero; // Initial similarity matrix, used for convergence criterion. private Array2Float assignTemp; // Temporary storage for calculation of optimal assignment. private IVertexEdgeKernel vertexKernel, edgeKernel; // Comparison functions for vertices and edges. private HungarianAlgorithm hungAlg; // For computation of optimal assignment. final static int tau = MolecularGraph.MAX_DEGREE; // Maximum vertex degree. final static int tauOne = tau + 1; // Used for indexing pIndices. final static int tauFac = factorial(tau); // tau!. final static int pFactorsRow = tauFac*tau; // Number of elements per coordinate in array pFactors. final static int pIndicesRow = tauFac*tauOne + 1; // Number of elements per coordinate in array pIndices. /** Initializes internal data structures to an empty state. */ public IsoaKernel1() { kk = 0; nn = 0; kknn = 0; kvec = new float[0]; pFactors = new float[0][0]; pIndices = new int[0][0]; x = new float[0]; xzero = new float[0]; assignTemp = new Array2Float(0,0); vertexKernel = VertexEdgeKernel.NONE.create(new float[]{}); edgeKernel = VertexEdgeKernel.NONE.create(new float[]{}); hungAlg = new HungarianAlgorithm(); } /** Sets the vertex kernel to be used. Default is NONE. */ public void setVertexKernel(IVertexEdgeKernel k) { if (k != null) vertexKernel = k; } /** Sets the edge kernel to be used. Default is NONE. */ public void setEdgeKernel(IVertexEdgeKernel e) { if (e != null) edgeKernel = e; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -