📄 mdlv2000reader.java
字号:
/* $Revision: 9052 $ $Author: egonw $ $Date: 2007-10-14 20:26:33 +0200 (Sun, 14 Oct 2007) $ * * Copyright (C) 1997-2007 Christoph Steinbeck <steinbeck@users.sourceforge.net> * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * All we ask is that proper credit is given for our work, which includes * - but is not limited to - adding the above copyright notice to the beginning * of your source code files, and to any copyright notice that you may distribute * with programs based on this work. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */package org.openscience.cdk.io;import java.io.BufferedReader;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.Reader;import java.io.StringReader;import java.util.StringTokenizer;import javax.vecmath.Point2d;import javax.vecmath.Point3d;import org.openscience.cdk.CDKConstants;import org.openscience.cdk.config.IsotopeFactory;import org.openscience.cdk.exception.CDKException;import org.openscience.cdk.interfaces.IAtom;import org.openscience.cdk.interfaces.IBond;import org.openscience.cdk.interfaces.IChemFile;import org.openscience.cdk.interfaces.IChemModel;import org.openscience.cdk.interfaces.IChemObject;import org.openscience.cdk.interfaces.IChemSequence;import org.openscience.cdk.interfaces.IIsotope;import org.openscience.cdk.interfaces.IMolecule;import org.openscience.cdk.interfaces.IMoleculeSet;import org.openscience.cdk.interfaces.IPseudoAtom;import org.openscience.cdk.io.formats.IResourceFormat;import org.openscience.cdk.io.formats.MDLV2000Format;import org.openscience.cdk.io.setting.BooleanIOSetting;import org.openscience.cdk.io.setting.IOSetting;import org.openscience.cdk.tools.LoggingTool;import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;/** * Reads a molecule from an MDL MOL or SDF file {@cdk.cite DAL92}. An SD files * is read into a ChemSequence of ChemModel's. Each ChemModel will contain one * Molecule. * * <p>From the Atom block it reads atomic coordinates, element types and * formal charges. From the Bond block it reads the bonds and the orders. * Additionally, it reads 'M CHG', 'G ', 'M RAD' and 'M ISO' lines from the * property block. * * <p>If all z coordinates are 0.0, then the xy coordinates are taken as * 2D, otherwise the coordinates are read as 3D. * * <p>The title of the MOL file is read and can be retrieved with: * <pre> * molecule.getProperty(CDKConstants.TITLE); * </pre> * * RGroups which are saved in the mdl file as R#, are renamed according to their appearance, * e.g. the first R# is named R1. With PseudAtom.getLabel() "R1" is returned (instead of R#). * This is introduced due to the SAR table generation procedure of Scitegics PipelinePilot. * * @cdk.module io * * @author steinbeck * @author Egon Willighagen * @cdk.created 2000-10-02 * @cdk.keyword file format, MDL molfile * @cdk.keyword file format, SDF * @cdk.bug 1587283 */public class MDLV2000Reader extends DefaultChemObjectReader { BufferedReader input = null; private LoggingTool logger = null; private BooleanIOSetting forceReadAs3DCoords; public MDLV2000Reader() { this(new StringReader("")); } /** * Contructs a new MDLReader that can read Molecule from a given InputStream. * *@param in The InputStream to read from */ public MDLV2000Reader(InputStream in) { this(new InputStreamReader(in)); } public IResourceFormat getFormat() { return MDLV2000Format.getInstance(); } public void setReader(Reader input) throws CDKException { if (input instanceof BufferedReader) { this.input = (BufferedReader)input; } else { this.input = new BufferedReader(input); } } public void setReader(InputStream input) throws CDKException { setReader(new InputStreamReader(input)); } /** * Contructs a new MDLReader that can read Molecule from a given Reader. * *@param in The Reader to read from */ public MDLV2000Reader(Reader in) { logger = new LoggingTool(this); input = new BufferedReader(in); initIOSettings(); } public boolean accepts(Class classObject) { Class[] interfaces = classObject.getInterfaces(); for (int i=0; i<interfaces.length; i++) { if (IChemFile.class.equals(interfaces[i])) return true; if (IChemModel.class.equals(interfaces[i])) return true; if (IMolecule.class.equals(interfaces[i])) return true; } return false; } /** * Takes an object which subclasses IChemObject, e.g. Molecule, and will read * this (from file, database, internet etc). If the specific implementation * does not support a specific IChemObject it will throw an Exception. * *@param object The object that subclasses * IChemObject *@return The IChemObject read *@exception CDKException */ public IChemObject read(IChemObject object) throws CDKException { if (object instanceof IChemFile) { return readChemFile((IChemFile)object); } else if (object instanceof IChemModel) { return readChemModel((IChemModel)object); } else if (object instanceof IMolecule) { return readMolecule((IMolecule)object); } else { throw new CDKException("Only supported are ChemFile and Molecule."); } } private IChemModel readChemModel(IChemModel chemModel) throws CDKException { IMoleculeSet setOfMolecules = chemModel.getMoleculeSet(); if (setOfMolecules == null) { setOfMolecules = chemModel.getBuilder().newMoleculeSet(); } IMolecule m = readMolecule(chemModel.getBuilder().newMolecule()); if (m != null) { setOfMolecules.addMolecule(m); } chemModel.setMoleculeSet(setOfMolecules); return chemModel; } /** * Read a ChemFile from a file in MDL SDF format. * * @return The ChemFile that was read from the MDL file. */ private IChemFile readChemFile(IChemFile chemFile) throws CDKException { IChemSequence chemSequence = chemFile.getBuilder().newChemSequence(); IChemModel chemModel = chemFile.getBuilder().newChemModel(); IMoleculeSet setOfMolecules = chemFile.getBuilder().newMoleculeSet(); IMolecule m = readMolecule(chemFile.getBuilder().newMolecule()); if (m != null) { setOfMolecules.addMolecule(m); } chemModel.setMoleculeSet(setOfMolecules); chemSequence.addChemModel(chemModel); setOfMolecules = chemFile.getBuilder().newMoleculeSet(); chemModel = chemFile.getBuilder().newChemModel(); String str; try { String line; while ((line = input.readLine()) != null) { logger.debug("line: ", line); // apparently, this is a SDF file, continue with // reading mol files str = new String(line); if (str.equals("$$$$")) { m = readMolecule(chemFile.getBuilder().newMolecule()); if (m != null) { setOfMolecules.addMolecule(m); chemModel.setMoleculeSet(setOfMolecules); chemSequence.addChemModel(chemModel); setOfMolecules = chemFile.getBuilder().newMoleculeSet(); chemModel = chemFile.getBuilder().newChemModel(); } } else { // here the stuff between 'M END' and '$$$$' if (m != null) { // ok, the first lines should start with '>' String fieldName = null; if (str.startsWith("> ")) { // ok, should extract the field name str.substring(2); // String content = int index = str.indexOf("<"); if (index != -1) { int index2 = str.substring(index).indexOf(">"); if (index2 != -1) { fieldName = str.substring( index+1, index+index2 ); } } // end skip all other lines while ((line = input.readLine()) != null && line.startsWith(">")) { logger.debug("data header line: ", line); } } if (line == null) { throw new CDKException("Expecting data line here, but found null!"); } String data = line; while ((line = input.readLine()) != null && line.trim().length() > 0) { if (line.equals("$$$$")) { logger.error("Expecting data line here, but found end of molecule: ", line); break; } logger.debug("data line: ", line); data += line; // preserve newlines, unless the line is exactly 80 chars; in that case it // is assumed to continue on the next line. See MDL documentation. if (line.length() < 80) data += "\n"; } if (fieldName != null) { logger.info("fieldName, data: ", fieldName, ", ", data); m.setProperty(fieldName, data); } } } } } catch (CDKException cdkexc) { throw cdkexc; } catch (Exception exception) { String error = "Error while parsing SDF"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } try { input.close(); } catch (Exception exc) { String error = "Error while closing file: " + exc.getMessage(); logger.error(error); throw new CDKException(error, exc); } chemFile.addChemSequence(chemSequence); return chemFile; } /** * Read a Molecule from a file in MDL sd format * *@return The Molecule that was read from the MDL file. */ private IMolecule readMolecule(IMolecule molecule) throws CDKException { logger.debug("Reading new molecule"); int linecount = 0; int atoms = 0; int bonds = 0; int atom1 = 0; int atom2 = 0; int order = 0; int stereo = 0; int RGroupCounter=1; int Rnumber=0; String [] rGroup=null; double x = 0.0; double y = 0.0; double z = 0.0; double totalZ = 0.0; //int[][] conMat = new int[0][0]; //String help; IBond bond; IAtom atom; String line = ""; try { logger.info("Reading header"); line = input.readLine(); linecount++; if (line == null) { return null; } logger.debug("Line " + linecount + ": " + line); if (line.startsWith("$$$$")) { logger.debug("File is empty, returning empty molecule"); return molecule; } if (line.length() > 0) { molecule.setProperty(CDKConstants.TITLE, line); } line = input.readLine(); linecount++; logger.debug("Line " + linecount + ": " + line); line = input.readLine(); linecount++; logger.debug("Line " + linecount + ": " + line); if (line.length() > 0) { molecule.setProperty(CDKConstants.REMARK, line);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -