📄 smilesgenerator.java
字号:
/* $Revision: 9775 $ $Author: shk3 $ $Date: 2008-01-01 21:23:12 +0100 (Tue, 01 Jan 2008) $ * * Copyright (C) 2002-2007 Oliver Horlacher * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * All we ask is that proper credit is given for our work, which includes * - but is not limited to - adding the above copyright notice to the beginning * of your source code files, and to any copyright notice that you may distribute * with programs based on this work. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */package org.openscience.cdk.smiles;import org.openscience.cdk.CDKConstants;import org.openscience.cdk.aromaticity.HueckelAromaticityDetector;import org.openscience.cdk.config.IsotopeFactory;import org.openscience.cdk.exception.CDKException;import org.openscience.cdk.geometry.BondTools;import org.openscience.cdk.graph.ConnectivityChecker;import org.openscience.cdk.graph.invariant.CanonicalLabeler;import org.openscience.cdk.graph.invariant.MorganNumbersTools;import org.openscience.cdk.interfaces.*;import org.openscience.cdk.ringsearch.AllRingsFinder;import org.openscience.cdk.ringsearch.RingPartitioner;import org.openscience.cdk.tools.manipulator.RingSetManipulator;import java.io.IOException;import java.util.*;/** * Generates SMILES strings {@cdk.cite WEI88, WEI89}. It takes into account the * isotope and formal charge information of the atoms. In addition to this it * takes stereochemistry in account for both Bond's and Atom's. Via the flag * useAromaticity it can be set if only SP2-hybridized atoms shall be set to * lower case (default) or atoms, which are SP2 or aromatic. IMPORTANT: The * aromaticity detection for this SmilesGenerator relies on AllRingsFinder, * which is known to take very long for some molecules with many cycles or * special cyclic topologies. Thus, the AllRingsFinder has a built-in timeout * of 5 seconds after which it aborts and throws an Exception. If you want your * SMILES generated at any expense, you need to create your own AllRingsFinder, * set the timeout to a higher value, and assign it to this SmilesGenerator. In * the vast majority of cases, however, the defaults will be fine. * If you have a set off ALL rings before, supply this via setRings to speed up generation. * * @author Oliver Horlacher * @author Stefan Kuhn (chiral smiles) * @cdk.created 2002-02-26 * @cdk.keyword SMILES, generator * @cdk.module smiles * @cdk.bug 1257438 * @cdk.bug 1535055 */public class SmilesGenerator{ //private final static boolean debug = false; /** * The number of rings that have been opened */ private int ringMarker = 0; /** * Collection of all the bonds that were broken */ private Vector brokenBonds = new Vector(); /** * The isotope factory which is used to write the mass is needed */ private IsotopeFactory isotopeFactory; AllRingsFinder ringFinder; /** * RingSet that holds all rings of the molecule */ private IRingSet rings = null; /** * The canonical labler */ private CanonicalLabeler canLabler = new CanonicalLabeler(); private final String RING_CONFIG = "stereoconfig"; private final String UP = "up"; private final String DOWN = "down"; private boolean useAromaticityFlag=false; /** * Create the SMILES generator. */ public SmilesGenerator() {} /** * Tells if a certain bond is center of a valid double bond configuration. * *@param container The atomcontainer. *@param bond The bond. *@return true=is a potential configuration, false=is not. */ public boolean isValidDoubleBondConfiguration(IAtomContainer container, IBond bond) { IAtom atom0 = bond.getAtom(0); IAtom atom1 = bond.getAtom(1); java.util.List connectedAtoms = container.getConnectedAtomsList(atom0); org.openscience.cdk.interfaces.IAtom from = null; for (int i = 0; i < connectedAtoms.size(); i++) { if ((IAtom)connectedAtoms.get(i) != atom1) { from = (IAtom)connectedAtoms.get(i); } } boolean[] array = new boolean[container.getBondCount()]; for (int i = 0; i < array.length; i++) { array[i] = true; } if (isStartOfDoubleBond(container, atom0, from, array) && isEndOfDoubleBond(container, atom1, atom0, array) && !bond.getFlag(CDKConstants.ISAROMATIC)) { return (true); } else { return (false); } } /** * Provide a reference to a RingSet that holds ALL rings of the molecule.<BR> * During creation of a SMILES the aromaticity of the molecule has to be detected. * This, in turn, requires the dermination of all rings of the molecule. If this * computationally expensive calculation has been done beforehand, a RingSet can * be handed over to the SmilesGenerator to save the effort of another all-rings- * calculation. * * @param rings RingSet that holds ALL rings of the molecule * @return reference to the SmilesGenerator object this method was called for */ public SmilesGenerator setRings(IRingSet rings) { this.rings = rings; return this; } /** * Generate canonical SMILES from the <code>molecule</code>. This method * canonicaly lables the molecule but does not perform any checks on the * chemical validity of the molecule. * IMPORTANT: A precomputed Set of All Rings (SAR) can be passed to this * SmilesGenerator in order to avoid recomputing it. Use setRings() to * assign the SAR. * *@param molecule The molecule to evaluate *@see org.openscience.cdk.graph.invariant.CanonicalLabeler#canonLabel(IAtomContainer) */ public synchronized String createSMILES(IMolecule molecule) { try { return (createSMILES(molecule, false, new boolean[molecule.getBondCount()])); } catch (CDKException exception) { // This exception can only happen if a chiral smiles is requested return (""); } } /** * Generate a SMILES for the given <code>Reaction</code>. */ public synchronized String createSMILES(IReaction reaction) throws CDKException { StringBuffer reactionSMILES = new StringBuffer(); IMoleculeSet reactants = reaction.getReactants(); for (int i = 0; i < reactants.getAtomContainerCount(); i++) { reactionSMILES.append(createSMILES(reactants.getMolecule(i))); if (i + 1 < reactants.getAtomContainerCount()) { reactionSMILES.append('.'); } } reactionSMILES.append('>'); IMoleculeSet agents = reaction.getAgents(); for (int i = 0; i < agents.getAtomContainerCount(); i++) { reactionSMILES.append(createSMILES(agents.getMolecule(i))); if (i + 1 < agents.getAtomContainerCount()) { reactionSMILES.append('.'); } } reactionSMILES.append('>'); IMoleculeSet products = reaction.getProducts(); for (int i = 0; i < products.getAtomContainerCount(); i++) { reactionSMILES.append(createSMILES(products.getMolecule(i))); if (i + 1 < products.getAtomContainerCount()) { reactionSMILES.append('.'); } } return reactionSMILES.toString(); } /** * Generate canonical and chiral SMILES from the <code>molecule</code>. This * method canonicaly lables the molecule but dose not perform any checks on * the chemical validity of the molecule. The chiral smiles is done like in * the <a href="http://www.daylight.com/dayhtml/doc/theory/theory.smiles.html"> * daylight theory manual</a> . I did not find rules for canonical and chiral * smiles, therefore there is no guarantee that the smiles complies to any * externeal rules, but it is canonical compared to other smiles produced by * this method. The method checks if there are 2D coordinates but does not * check if coordinates make sense. Invalid stereo configurations are ignored; * if there are no valid stereo configuration the smiles will be the same as * the non-chiral one. Note that often stereo configurations are only complete * and can be converted to a smiles if explicit Hs are given. * IMPORTANT: A precomputed Set of All Rings (SAR) can be passed to this * SmilesGenerator in order to avoid recomputing it. Use setRings() to * assign the SAR. * *@param molecule The molecule to evaluate *@exception CDKException At least one atom has no Point2D; * coordinates are needed for creating the chiral smiles. *@see org.openscience.cdk.graph.invariant.CanonicalLabeler#canonLabel(IAtomContainer) */ public synchronized String createChiralSMILES(IMolecule molecule, boolean[] doubleBondConfiguration) throws CDKException { return (createSMILES(molecule, true, doubleBondConfiguration)); } /** * Generate canonical SMILES from the <code>molecule</code>. This method * canonicaly lables the molecule but dose not perform any checks on the * chemical validity of the molecule. This method also takes care of multiple * molecules. * IMPORTANT: A precomputed Set of All Rings (SAR) can be passed to this * SmilesGenerator in order to avoid recomputing it. Use setRings() to * assign the SAR. * *@param molecule The molecule to evaluate *@param chiral true=SMILES will be chiral, false=SMILES * will not be chiral. *@exception CDKException At least one atom has no Point2D; * coordinates are needed for crating the chiral smiles. This excpetion * can only be thrown if chiral smiles is created, ignore it if you want a * non-chiral smiles (createSMILES(AtomContainer) does not throw an * exception). *@see org.openscience.cdk.graph.invariant.CanonicalLabeler#canonLabel(IAtomContainer) */ public synchronized String createSMILES(IMolecule molecule, boolean chiral, boolean doubleBondConfiguration[]) throws CDKException { IMoleculeSet moleculeSet = ConnectivityChecker.partitionIntoMolecules(molecule); if (moleculeSet.getMoleculeCount() > 1) { StringBuffer fullSMILES = new StringBuffer(); for (int i = 0; i < moleculeSet.getAtomContainerCount(); i++) { IMolecule molPart = moleculeSet.getMolecule(i); fullSMILES.append(createSMILESWithoutCheckForMultipleMolecules(molPart, chiral, doubleBondConfiguration)); if (i < (moleculeSet.getAtomContainerCount() - 1)) { // are there more molecules? fullSMILES.append('.'); } } return fullSMILES.toString(); } else { return (createSMILESWithoutCheckForMultipleMolecules(molecule, chiral, doubleBondConfiguration)); } } /** * Generate canonical SMILES from the <code>molecule</code>. This method * canonicaly lables the molecule but dose not perform any checks on the * chemical validity of the molecule. Does not care about multiple molecules. * IMPORTANT: A precomputed Set of All Rings (SAR) can be passed to this * SmilesGenerator in order to avoid recomputing it. Use setRings() to * assign the SAR. * *@param molecule The molecule to evaluate *@param chiral true=SMILES will be chiral, false=SMILES * will not be chiral. *@exception CDKException At least one atom has no Point2D; * coordinates are needed for creating the chiral smiles. This excpetion * can only be thrown if chiral smiles is created, ignore it if you want a * non-chiral smiles (createSMILES(AtomContainer) does not throw an * exception). *@see org.openscience.cdk.graph.invariant.CanonicalLabeler#canonLabel(IAtomContainer) */ public synchronized String createSMILESWithoutCheckForMultipleMolecules(IMolecule molecule, boolean chiral, boolean doubleBondConfiguration[]) throws CDKException { if (molecule.getAtomCount() == 0) { return ""; } canLabler.canonLabel(molecule); brokenBonds.clear(); ringMarker = 0; org.openscience.cdk.interfaces.IAtom start = null; for (int i = 0; i < molecule.getAtomCount(); i++) { org.openscience.cdk.interfaces.IAtom atom = molecule.getAtom(i); if (chiral && atom.getPoint2d() == null) { throw new CDKException("Atom number " + i + " has no 2D coordinates, but 2D coordinates are needed for creating chiral smiles"); } //logger.debug("Setting all VISITED flags to false"); atom.setFlag(CDKConstants.VISITED, false); if (((Long) atom.getProperty("CanonicalLable")).longValue() == 1) { start = atom; } } //detect aromaticity if(rings == null) { if (ringFinder == null) { ringFinder = new AllRingsFinder(); } rings = ringFinder.findAllRings(molecule); } HueckelAromaticityDetector.detectAromaticity(molecule, rings, false); if (chiral && rings.getAtomContainerCount() > 0) { List v = RingPartitioner.partitionRings(rings); //logger.debug("RingSystems: " + v.size()); for (int i = 0; i < v.size(); i++) { int counter = 0; Iterator containers = RingSetManipulator.getAllAtomContainers((IRingSet) v.get(i)).iterator(); while (containers.hasNext()) { IAtomContainer allrings = (IAtomContainer) containers.next(); for (int k = 0; k < allrings.getAtomCount(); k++) { if (!BondTools.isStereo(molecule, allrings.getAtom(k)) && hasWedges(molecule, allrings.getAtom(k)) != null) { IBond bond = molecule.getBond(allrings.getAtom(k), hasWedges(molecule, allrings.getAtom(k))); if (bond.getStereo() == CDKConstants.STEREO_BOND_UP) { allrings.getAtom(k).setProperty(RING_CONFIG, UP); } else { allrings.getAtom(k).setProperty(RING_CONFIG, DOWN); } counter++; } } if (counter == 1) { for (int k = 0; k < allrings.getAtomCount(); k++) { IBond bond = molecule.getBond(allrings.getAtom(k), hasWedges(molecule, allrings.getAtom(k))); if(bond!=null){ if (bond.getStereo() == CDKConstants.STEREO_BOND_UP)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -