📄 universalisomorphismtester.java
字号:
/* $Revision: 8299 $ $Author: kaihartmann $ $Date: 2007-05-02 20:10:37 +0200 (Wed, 02 May 2007) $ * * Copyright (C) 2002-2007 Stephane Werner <mail@ixelis.net> * * This code has been kindly provided by Stephane Werner * and Thierry Hanser from IXELIS mail@ixelis.net * * IXELIS sarl - Semantic Information Systems * 17 rue des C???res 67200 Strasbourg, France * Tel/Fax : +33(0)3 88 27 81 39 Email: mail@ixelis.net * * CDK Contact: cdk-devel@lists.sf.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * */package org.openscience.cdk.isomorphism;import java.util.ArrayList;import java.util.BitSet;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import org.openscience.cdk.CDKConstants;import org.openscience.cdk.exception.CDKException;import org.openscience.cdk.interfaces.IAtom;import org.openscience.cdk.interfaces.IAtomContainer;import org.openscience.cdk.interfaces.IBond;import org.openscience.cdk.isomorphism.matchers.IQueryAtom;import org.openscience.cdk.isomorphism.matchers.IQueryBond;import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer;import org.openscience.cdk.isomorphism.mcss.RGraph;import org.openscience.cdk.isomorphism.mcss.RMap;import org.openscience.cdk.isomorphism.mcss.RNode;import org.openscience.cdk.tools.manipulator.BondManipulator;/** * This class implements a multipurpose structure comparison tool. * It allows to find maximal common substructure, find the * mapping of a substructure in another structure, and the mapping of * two isomorphic structures. * * <p>Structure comparison may be associated to bond constraints * (mandatory bonds, e.g. scaffolds, reaction cores,...) on each source graph. * The constraint flexibility allows a number of interesting queries. * The substructure analysis relies on the RGraph generic class (see: RGraph) * This class implements the link between the RGraph model and the * the CDK model in this way the RGraph remains independant and may be used * in other contexts. * * <p>This algorithm derives from the algorithm described in * {@cdk.cite HAN90} and modified in the thesis of T. Hanser {@cdk.cite HAN93}. * * <p>With the <code>isSubgraph()</code> method, the second, and only the second * argument <b>may</b> be a IQueryAtomContainer, which allows one to do MQL like queries. * The first IAtomContainer must never be an IQueryAtomContainer. An example:<pre> * SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); * IAtomContainer atomContainer = sp.parseSmiles("CC(=O)OC(=O)C"); // acetic acid anhydride * IAtomContainer SMILESquery = sp.parseSmiles("CC"); // acetic acid anhydride * IQueryAtomContainer query = IQueryAtomContainerCreator.createBasicQueryContainer(SMILESquery); * boolean isSubstructure = UniversalIsomorphismTester.isSubgraph(atomContainer, query); * </pre> * * <p><font color="#FF0000">WARNING</font>: * As a result of the adjacency perception used in this algorithm * there is a single limitation : cyclopropane and isobutane are seen as isomorph * This is due to the fact that these two compounds are the only ones where * each bond is connected two each other bond (bonds are fully conected) * with the same number of bonds and still they have different structures * The algotihm could be easily enhanced with a simple atom mapping manager * to provide an atom level overlap definition that would reveal this case. * We decided not to penalize the whole procedure because of one single * exception query. Furthermore isomorphism may be discarded since the number of atoms are * not the same (3 != 4) and in most case this will be already * screened out by a fingerprint based filtering. * It is possible to add a special treatment for this special query. * Be reminded that this algorithm matches bonds only. * </p> * * @author Stephane Werner from IXELIS mail@ixelis.net * @cdk.created 2002-07-17 * @cdk.require java1.4+ * @cdk.module standard */public class UniversalIsomorphismTester { final static int ID1 = 0; final static int ID2 = 1; private static long start; public static long timeout=-1; /////////////////////////////////////////////////////////////////////////// // Query Methods // // This methods are simple applications of the RGraph model on atom containers // using different constrains and search options. They give an exemple of the // most common queries but of course it is possible to define other type of // queries exploiting the constrain and option combinations // //// // Isomorphism search /** * Tests if g1 and g2 are isomorph. * * @param g1 first molecule. Must not be an IQueryAtomContainer. * @param g2 second molecule. May be an IQueryAtomContainer. * @return true if the 2 molecule are isomorph */ public static boolean isIsomorph(IAtomContainer g1, IAtomContainer g2) throws CDKException{ if (g1 instanceof IQueryAtomContainer) throw new CDKException( "The first IAtomContainer must not be an IQueryAtomContainer" ); if (g2.getAtomCount() != g1.getAtomCount()) return false; // check single atom case if (g2.getAtomCount() == 1) { IAtom atom = g1.getAtom(0); IAtom atom2 = g2.getAtom(0); if (atom instanceof IQueryAtom) { IQueryAtom qAtom = (IQueryAtom)atom; return qAtom.matches(g2.getAtom(0)); } else if (atom2 instanceof IQueryAtom) { IQueryAtom qAtom = (IQueryAtom)atom2; return qAtom.matches(g1.getAtom(0)); } else { String atomSymbol = atom.getSymbol(); return g1.getAtom(0).getSymbol().equals(atomSymbol); } } return (getIsomorphMap(g1, g2) != null); } /** * Returns the first isomorph mapping found or null. * * @param g1 first molecule. Must not be an IQueryAtomContainer. * @param g2 second molecule. May be an IQueryAtomContainer. * @return the first isomorph mapping found projected of g1. This is a List of RMap objects containing Ids of matching bonds. */ public static List getIsomorphMap(IAtomContainer g1, IAtomContainer g2) throws CDKException{ if (g1 instanceof IQueryAtomContainer) throw new CDKException( "The first IAtomContainer must not be an IQueryAtomContainer" ); List result = null; List rMapsList = search(g1, g2, getBitSet(g1), getBitSet(g2), false, false); if (!rMapsList.isEmpty()) { result = (List) rMapsList.get(0); } return result; } /** * Returns the first isomorph 'atom mapping' found for g2 in g1. * * @param g1 first molecule. Must not be an IQueryAtomContainer. * @param g2 second molecule. May be an IQueryAtomContainer. * @return the first isomorph atom mapping found projected on g1. This is a List of RMap objects containing Ids of matching atoms. */ public static List getIsomorphAtomsMap(IAtomContainer g1, IAtomContainer g2) throws CDKException { if (g1 instanceof IQueryAtomContainer) throw new CDKException( "The first IAtomContainer must not be an IQueryAtomContainer" ); ArrayList list = checkSingleAtomCases(g1, g2); if (list == null) { return (makeAtomsMapOfBondsMap(UniversalIsomorphismTester.getIsomorphMap(g1, g2), g1, g2)); } else if (list.isEmpty()) { return null; } else { return (List)list.get(0); } } /** * Returns all the isomorph 'mappings' found between two * atom containers. * * @param g1 first molecule. Must not be an IQueryAtomContainer. * @param g2 second molecule. May be an IQueryAtomContainer. * @return the list of all the 'mappings' */ public static List getIsomorphMaps(IAtomContainer g1, IAtomContainer g2) throws CDKException{ return search(g1, g2, getBitSet(g1), getBitSet(g2), true, true); } ///// // Subgraph search /** * Returns all the subgraph 'bond mappings' found for g2 in g1. * This is an ArrayList of ArrayLists of RMap objects. * * @param g1 first molecule. Must not be an IQueryAtomContainer. * @param g2 second molecule. May be an IQueryAtomContainer. * @return the list of all the 'mappings' found projected of g1 */ public static List getSubgraphMaps(IAtomContainer g1, IAtomContainer g2) throws CDKException{ return search(g1, g2, new BitSet(), getBitSet(g2), true, true); } /** * Returns the first subgraph 'bond mapping' found for g2 in g1. * * @param g1 first molecule. Must not be an IQueryAtomContainer. * @param g2 second molecule. May be an IQueryAtomContainer. * @return the first subgraph bond mapping found projected on g1. This is a List of RMap objects containing Ids of matching bonds. */ public static List getSubgraphMap(IAtomContainer g1, IAtomContainer g2) throws CDKException{ List result = null; List rMapsList = search(g1, g2, new BitSet(), getBitSet(g2), false, false); if (!rMapsList.isEmpty()) { result = (List) rMapsList.get(0); } return result; } /** * Returns all subgraph 'atom mappings' found for g2 in g1. * This is an ArrayList of ArrayLists of RMap objects. * * @param g1 first molecule. Must not be an IQueryAtomContainer. * @param g2 second molecule. May be an IQueryAtomContainer. * @return all subgraph atom mappings found projected on g1. This is a List of RMap objects containing Ids of matching atoms. */ public static List getSubgraphAtomsMaps(IAtomContainer g1, IAtomContainer g2) throws CDKException { ArrayList list = checkSingleAtomCases(g1, g2); if (list == null) { return (makeAtomsMapsOfBondsMaps(UniversalIsomorphismTester.getSubgraphMaps(g1, g2), g1, g2)); } else { return list; } } /** * Returns the first subgraph 'atom mapping' found for g2 in g1. * * @param g1 first molecule. Must not be an IQueryAtomContainer. * @param g2 second molecule. May be an IQueryAtomContainer. * @return the first subgraph atom mapping found projected on g1. This is a List of RMap objects containing Ids of matching atoms. */ public static List getSubgraphAtomsMap(IAtomContainer g1, IAtomContainer g2) throws CDKException { ArrayList list = checkSingleAtomCases(g1, g2); if (list == null) { return (makeAtomsMapOfBondsMap(UniversalIsomorphismTester.getSubgraphMap(g1, g2), g1, g2)); } else if (list.isEmpty()) { return null; } else { return (List)list.get(0); } } /** * Tests if g2 a subgraph of g1. * * @param g1 first molecule. Must not be an IQueryAtomContainer. * @param g2 second molecule. May be an IQueryAtomContainer. * @return true if g2 a subgraph on g1 */ public static boolean isSubgraph(IAtomContainer g1, IAtomContainer g2) throws CDKException{ if (g1 instanceof IQueryAtomContainer) throw new CDKException( "The first IAtomContainer must not be an IQueryAtomContainer" ); if (g2.getAtomCount() > g1.getAtomCount()) return false; // test for single atom case if (g2.getAtomCount() == 1) { IAtom atom = g2.getAtom(0); for (int i=0; i<g1.getAtomCount(); i++) { IAtom atom2 = g1.getAtom(i); if (atom instanceof IQueryAtom) { IQueryAtom qAtom = (IQueryAtom)atom; if (qAtom.matches(atom2)) return true; } else if (atom2 instanceof IQueryAtom) { IQueryAtom qAtom = (IQueryAtom)atom2; if (qAtom.matches(atom)) return true; } else { if (atom2.getSymbol().equals(atom.getSymbol())) return true; } } return false; } if (!testSubgraphHeuristics(g1, g2)) return false; return (getSubgraphMap(g1, g2) != null); } //// // Maximum common substructure search /** * Returns all the maximal common substructure between 2 atom containers. * * @param g1 first molecule. Must not be an IQueryAtomContainer. * @param g2 second molecule. May be an IQueryAtomContainer. * @return the list of all the maximal common substructure * found projected of g1 (list of AtomContainer ) */ public static List getOverlaps(IAtomContainer g1, IAtomContainer g2) throws CDKException{ start=System.currentTimeMillis(); List rMapsList = search(g1, g2, new BitSet(), new BitSet(), true, false); // projection on G1 ArrayList graphList = projectList(rMapsList, g1, ID1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -