📄 descriptorengine.java
字号:
/* $Revision: 8075 $ $Author: egonw $ $Date: 2007-03-10 14:50:10 +0100 (Sat, 10 Mar 2007) $ * * Copyright (C) 2004-2007 Rajarshi Guha <rajarshi@users.sourceforge.net> * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */package org.openscience.cdk.qsar;import nu.xom.Attribute;import nu.xom.Element;import nu.xom.Elements;import org.openscience.cdk.dict.Dictionary;import org.openscience.cdk.dict.DictionaryDatabase;import org.openscience.cdk.dict.Entry;import org.openscience.cdk.exception.CDKException;import org.openscience.cdk.interfaces.IAtom;import org.openscience.cdk.interfaces.IAtomContainer;import org.openscience.cdk.interfaces.IBond;import org.openscience.cdk.tools.LoggingTool;import java.io.File;import java.io.IOException;import java.lang.reflect.Modifier;import java.util.*;import java.util.jar.JarEntry;import java.util.jar.JarFile;/** * A class that provides access to automatic descriptor calculation and more. * <p/> * <p>The aim of this class is to provide an easy to use interface to automatically evaluate * all the CDK descriptors for a given molecule. Note that at a given time this class * will evaluate all <i>atomic</i> or <i>molecular</i> descriptors but not both. * <p/> * <p>The available descriptors are determined by scanning all the jar files in the users CLASSPATH * and selecting classes that belong to the CDK QSAR atomic or molecular descriptors package. * <p/> * <p>An example of its usage would be * <pre> * Molecule someMolecule; * ... * DescriptorEngine descriptoEngine = new DescriptorEngine(DescriptorEngine.MOLECULAR, null); * descriptorEngine.process(someMolecule); * </pre> * <p/> * <p>The class allows the user to obtain a List of all the available descriptors in terms of their * Java class names as well as instances of each descriptor class. For each descriptor, it is possible to * obtain its classification as described in the CDK descriptor-algorithms OWL dictionary. * * @cdk.created 2004-12-02 * @cdk.module qsar * @cdk.depends xom-1.0.jar * @see DescriptorSpecification * @see Dictionary * @see org.openscience.cdk.dict.OWLFile */public class DescriptorEngine { private static String rdfNS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; public static final int ATOMIC = 1; public static final int BOND = 2; public static final int MOLECULAR = 3; private Dictionary dict = null; private List classNames = null; private List descriptors = null; private List speclist = null; private static LoggingTool logger = new LoggingTool(DescriptorEngine.class); /** * Instantiates the DescriptorEngine. * <p/> * This constructir instantiates the engine but does not perform any initialization. As a result calling * the <code>process()</code> method will fail. To use the engine via this constructor you should use * the following code * <p/> * <pre> * List classNames = DescriptorEngine.getDescriptorClassNameByPackage("org.openscience.cdk.qsar.descriptors.molecular", * null); * DescriptorEngine engine = DescriptorEngine(classNames); * <p/> * List instances = engine.instantiateDescriptors(classNames); * List specs = engine.initializeSpecifications(instances) * engine.setDescriptorInstances(instances); * engine.setDescriptorSpecifications(specs); * <p/> * engine.process(someAtomContainer); * </pre> * <p/> * This approach allows one to use find classes using the interface based approach ({@link #getDescriptorClassNameByInterface(String, String[])}. * If you use this method it is preferable to specify the jar files to examine */ public DescriptorEngine(List classNames) { this.classNames = classNames; descriptors = instantiateDescriptors(classNames); speclist = initializeSpecifications(descriptors); // get the dictionary for the descriptors DictionaryDatabase dictDB = new DictionaryDatabase(); dict = dictDB.getDictionary("descriptor-algorithms"); } /** * Constructor that generates a list of descriptors to calculate. * <p/> * All available descriptors are included in the list of descriptors to * calculate This constructor assumes that system classpath is the one to look at * to find valid jar files. * * @param type Indicates whether molecular or atomic descriptors should be calculated. Possible values * are DescriptorEngine.ATOMIC or DescriptorEngine.MOLECULAR */ public DescriptorEngine(int type) { this(type, null); } /** * Constructor that generates a list of descriptors to calculate. * <p/> * All available descriptors are included in the list of descriptors to * calculate * * @param type Indicates whether molecular or atomic descriptors should be calculated. Possible values * are DescriptorEngine.ATOMIC or DescriptorEngine.MOLECULAR * @param jarFileNames A String[] containing the fully qualified names of the jar files * to examine for descriptor classes. In general, this can be set to NULL, in which case * the system classpath is examined for available jar files. This parameter can be set for * situations where the system classpath is not available or is modified such as in an application * container. */ public DescriptorEngine(int type, String[] jarFileNames) { switch (type) { case ATOMIC: classNames = getDescriptorClassNameByPackage("org.openscience.cdk.qsar.descriptors.atomic", jarFileNames); break; case BOND: classNames = getDescriptorClassNameByPackage("org.openscience.cdk.qsar.descriptors.bond", jarFileNames); break; case MOLECULAR: classNames = getDescriptorClassNameByPackage("org.openscience.cdk.qsar.descriptors.molecular", jarFileNames); break; } descriptors = instantiateDescriptors(classNames); speclist = initializeSpecifications(descriptors); logger.debug("Found #descriptors: ", classNames.size()); // get the dictionary for the descriptors DictionaryDatabase dictDB = new DictionaryDatabase(); dict = dictDB.getDictionary("descriptor-algorithms"); } /** * Calculates all available (or only those specified) descriptors for a molecule. * <p/> * The results for a given descriptor as well as associated parameters and * specifications are used to create a <code>DescriptorValue</code> * object which is then added to the molecule as a property keyed * on the <code>DescriptorSpecification</code> object for that descriptor * * @param molecule The molecule for which we want to calculate descriptors * @throws CDKException if an error occured during descriptor calculation or the descriptors and/or * specifications have not been initialized */ public void process(IAtomContainer molecule) throws CDKException { if (descriptors == null || speclist == null) throw new CDKException("Descriptors have not been instantiated"); if (speclist.size() != descriptors.size()) throw new CDKException("Number of specs and descriptors do not match"); for (int i = 0; i < descriptors.size(); i++) { IDescriptor descriptor = (IDescriptor) descriptors.get(i); try { if (descriptor instanceof IMolecularDescriptor) { DescriptorValue value = ((IMolecularDescriptor) descriptor).calculate(molecule); molecule.setProperty(speclist.get(i), value); logger.debug("Calculated molecular descriptors..."); } else if (descriptor instanceof IAtomicDescriptor) { java.util.Iterator atoms = molecule.atoms(); while (atoms.hasNext()) { IAtom atom = (IAtom) atoms.next(); DescriptorValue value = ((IAtomicDescriptor) descriptor).calculate(atom, molecule); atom.setProperty(speclist.get(i), value); } logger.debug("Calculated atomic descriptors..."); } else if (descriptor instanceof IBondDescriptor) { Iterator bonds = molecule.bonds(); while (bonds.hasNext()) { IBond bond = (IBond) bonds.next(); DescriptorValue value = ((IBondDescriptor) descriptor).calculate(bond, molecule); bond.setProperty(speclist.get(i), value); } logger.debug("Calculated bond descriptors..."); } else { logger.debug("Unknown descriptor type for: ", descriptor.getClass().getName()); } } catch (CDKException exception) { logger.error("Could not calculate descriptor value for: ", descriptor.getClass().getName()); logger.debug(exception); throw new CDKException("Could not calculate descriptor value for: " + descriptor.getClass().getName(), exception); } } } /** * Returns the type of the descriptor as defined in the descriptor dictionary. * <p/> * The method will look for the identifier specified by the user in the QSAR descriptor * dictionary. If a corresponding entry is found, first child element that is called * "isClassifiedAs" is returned. Note that the OWL descriptor spec allows both the class of * descriptor (electronic, topological etc) as well as the type of descriptor (molecular, atomic) * to be specified in an "isClassifiedAs" element. Thus we ignore any such element that * indicates the descriptors class. * <p/> * The method assumes that any descriptor entry will have only one "isClassifiedAs" entry describing * the descriptors type. * <p/> * The descriptor can be identified either by the name of the class implementing the descriptor * or else the specification reference value of the descriptor which can be obtained from an instance * of the descriptor class. * * @param identifier A String containing either the descriptors fully qualified class name or else the descriptors
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -