📄 maciereader.java
字号:
/* $RCSfile$ * $Author: egonw $ * $Date: 2007-01-04 18:46:10 +0100 (Thu, 04 Jan 2007) $ * $Revision: 7636 $ * * Copyright (C) 2003-2007 The Chemistry Development Kit (CDK) project * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * All we ask is that proper credit is given for our work, which includes * - but is not limited to - adding the above copyright notice to the beginning * of your source code files, and to any copyright notice that you may distribute * with programs based on this work. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */package org.openscience.cdk.io;import java.io.File;import java.io.FileReader;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.LineNumberReader;import java.io.Reader;import java.util.Iterator;import java.util.List;import java.util.StringTokenizer;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.openscience.cdk.CDKConstants;import org.openscience.cdk.ChemFile;import org.openscience.cdk.ChemModel;import org.openscience.cdk.ChemSequence;import org.openscience.cdk.EnzymeResidueLocator;import org.openscience.cdk.PseudoAtom;import org.openscience.cdk.Reaction;import org.openscience.cdk.ReactionSet;import org.openscience.cdk.dict.DictRef;import org.openscience.cdk.dict.DictionaryDatabase;import org.openscience.cdk.exception.CDKException;import org.openscience.cdk.interfaces.IAtom;import org.openscience.cdk.interfaces.IAtomContainer;import org.openscience.cdk.interfaces.IChemFile;import org.openscience.cdk.interfaces.IChemModel;import org.openscience.cdk.interfaces.IChemObject;import org.openscience.cdk.interfaces.IChemSequence;import org.openscience.cdk.io.formats.IResourceFormat;import org.openscience.cdk.io.formats.MACiEFormat;import org.openscience.cdk.io.setting.BooleanIOSetting;import org.openscience.cdk.io.setting.IOSetting;import org.openscience.cdk.io.setting.IntegerIOSetting;import org.openscience.cdk.io.setting.StringIOSetting;import org.openscience.cdk.tools.LoggingTool;import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;import org.openscience.cdk.tools.manipulator.ReactionManipulator;/** * Reads an export from the MACiE enzyme reaction database. * Information about this database can be obtained from * Gemma Holiday, Cambridge University, UK, and Gail Bartlett, * European Bioinformatics Institute, Hinxton, UK. * * <p>This implementation is based on a dump from their database * on 2003-07-14. * * @cdk.module experimental * * @author Egon Willighagen * @cdk.created 2003-07-24 * * @cdk.keyword file format, MACiE RDF * @cdk.require java1.4+ */public class MACiEReader extends DefaultChemObjectReader { /** Property it will put on ChemModel */ public final static String CreationDate = "org.openscience.cdk.io.MACiE.CreationDate"; /** Property it will put on ChemModel */ public final static String MedlineID = "org.openscience.cdk.io.MACiE.MedlineID"; /** Property it will put on ChemModel */ public final static String PDBCode = "org.openscience.cdk.io.MACiE.PDBCode"; /** Property it will put on ChemModel */ public final static String ECNumber = "org.openscience.cdk.io.MACiE.ECNumber"; /** Property it will put on ChemModel */ public final static String EnzymeName = "org.openscience.cdk.io.MACiE.EnzymeName"; private LineNumberReader input = null; private LoggingTool logger = null; private IntegerIOSetting selectedEntry; private BooleanIOSetting readSecondaryFiles; private StringIOSetting readSecondaryDir; private Pattern topLevelDatum; private Pattern subLevelDatum; private Pattern annotationTuple; private Pattern residueLocator; private ChemModel currentEntry; private Reaction currentReaction; private ReactionSet currentReactionStepSet; private String reactionStepAnnotation; private String reactionStepComments; private boolean readThisEntry = true; /** * Contructs a new MACiEReader that can read Molecule from a given Reader. * * @param in The Reader to read from */ public MACiEReader(Reader in) { this(); this.input = new LineNumberReader(in); } public MACiEReader(InputStream input) { this(new InputStreamReader(input)); } public MACiEReader() { logger = new LoggingTool(this); /* compile patterns */ topLevelDatum = Pattern.compile("(.+):(.+)"); subLevelDatum = Pattern.compile("(.+):(.+)\\((.+)\\):(.+)"); annotationTuple = Pattern.compile("(\\w+)=\\((.+?)\\);(.*)"); residueLocator = Pattern.compile("[A-Z][a-z][a-z]\\d{1,5}"); // e.g. Lys150 initIOSettings(); } public IResourceFormat getFormat() { return MACiEFormat.getInstance(); } public void setReader(Reader input) throws CDKException { if (input instanceof LineNumberReader) { this.input = (LineNumberReader)input; } else { this.input = new LineNumberReader(input); } } public void setReader(InputStream input) throws CDKException { setReader(new InputStreamReader(input)); } public boolean accepts(Class classObject) { Class[] interfaces = classObject.getInterfaces(); for (int i=0; i<interfaces.length; i++) { if (IChemModel.class.equals(interfaces[i])) return true; if (IChemFile.class.equals(interfaces[i])) return true; if (IChemSequence.class.equals(interfaces[i])) return true; } return false; } /** * Takes an object which subclasses IChemObject, e.g. Molecule, and will read * this (from file, database, internet etc). If the specific implementation * does not support a specific IChemObject it will throw an Exception. * * @param object The object that subclasses IChemObject * @return The IChemObject read * @exception CDKException */ public IChemObject read(IChemObject object) throws CDKException { customizeJob(); try { if (object instanceof IChemSequence) { return readReactions(false); } else if (object instanceof IChemModel) { return readReactions(true); } else if (object instanceof IChemFile) { IChemFile chemFile = object.getBuilder().newChemFile(); chemFile.addChemSequence((ChemSequence)readReactions(false)); return chemFile; } } catch (IOException exception) { String message = "Error while reading file, line number: " + input.getLineNumber(); logger.error(message); logger.debug(exception); throw new CDKException(message, exception); } throw new CDKException("Only supported are ChemSequence and ChemModel."); } public boolean accepts(IChemObject object) { if (object instanceof ChemSequence) { return true; } else if (object instanceof ChemModel) { return true; } else if (object instanceof ChemFile) { return true; } else if (object == null) { logger.warn("MACiEReader can not read null objects."); } else { logger.warn("MACiEReader can not read IChemObject of type: ", object.getClass().getName()); } return false; } /** * Read a Reaction from a file in MACiE RDF format. * * @return The Reaction that was read from the MDL file. */ private IChemObject readReactions(boolean selectEntry) throws CDKException, IOException { ChemSequence entries = new ChemSequence(); currentEntry = null; int entryCounter = 0; currentReactionStepSet = null; while (input.ready()) { String line = input.readLine(); if (line.startsWith("$RDFILE")) { entries = new ChemSequence(); } else if (line.startsWith("$DATM")) { entries.setProperty(CreationDate, line.substring(7)); } else if (line.startsWith("$RIREG")) { // new entry, store previous entry if any if (currentEntry != null) { // store previous entry currentEntry.setReactionSet(currentReactionStepSet); createNiceMACiETitle(currentEntry); entries.addChemModel(currentEntry); fireFrameRead(); if (selectEntry && (entryCounter == selectedEntry.getSettingValue())) { logger.info("Starting reading wanted frame: ", selectedEntry); return currentEntry; } else { logger.debug("Not reading unwanted frame: " + entryCounter); } } currentEntry = new ChemModel(); entryCounter++; if (!selectEntry || entryCounter == selectedEntry.getSettingValue()) { readThisEntry = true; } else { readThisEntry = false; } currentReactionStepSet = new ReactionSet(); } else if (line.startsWith("$DTYPE")) { String[] tuple = readDtypeDatumTuple(line); String dataType = tuple[0]; String datum = tuple[1]; // now some regular expression wizardry Matcher subLevelMatcher = subLevelDatum.matcher(dataType); if (subLevelMatcher.matches()) { // sub level field found String field = subLevelMatcher.group(2); String fieldNumber = subLevelMatcher.group(3); String subfield = subLevelMatcher.group(4); processSubLevelField(field, fieldNumber, subfield, datum); } else { Matcher topLevelMatcher = topLevelDatum.matcher(dataType); if (topLevelMatcher.matches()) { // top level field found String field = topLevelMatcher.group(2); processTopLevelField(field, datum); } else { logger.error("Could not parse datum tuple of type ", dataType, " around line " + input.getLineNumber()); } } } else { logger.warn("Unrecognized command on line " + input.getLineNumber(), ": ", line); } } if (currentEntry != null) { createNiceMACiETitle(currentEntry); // store last entry currentEntry.setReactionSet(currentReactionStepSet); entries.addChemModel(currentEntry); fireFrameRead(); } if (selectEntry) { // apparently selected last one, other already returned return currentEntry; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -