📄 nomparser.jj
字号:
options {STATIC=false;OPTIMIZE_TOKEN_MANAGER=true;FORCE_LA_CHECK=true;}PARSER_BEGIN(NomParser)/* * Copyright (C) 2003-2007 University of Manchester * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * (or see http://www.gnu.org/copyleft/lesser.html) */package org.openscience.cdk.iupac.parser;import java.io.StringReader;import org.openscience.cdk.Molecule;import org.openscience.cdk.exception.*;import java.util.*;/** * A class partly generated by <a href="http://javacc.dev.java.net" target="_top">JavaCC</a> which breaks down the chemical name * into computable subparts and passes these parts to the MoleculeBuilder. * * @author David Robinson (University of Manchester) * @author Bhupinder Sandhu * @author Stephen Tomkinson * * @cdk.keyword IUPAC name */public class NomParser { //private variables needed throughout the program private static int mainChainPrefix; private static boolean isMainCyclic; /** The tempory vector of locations the current group/substiuant is attached to */ private static Vector tempLocation; /** The vector of attached functional groups, with an instance of AttachedGroup for each * functional group. */ private static Vector attachedGroups; /** The vector of attached substituent, with an instance of AttachedGroup for each * substituent. */ private static Vector attachedSubstituents; /** * Used in the build up of ancient greek style prfixes */ private static int currentNumber; /** * Parses the chemical name and returns the built molecule. * * @param stringToParse A case-insensitive name of the chemical to build. * @return A molecule which represents the interpretatation of the name. * @throws ParseException Any error which occur in the parsing get wrapped * up in a ParseException and thrown. */ public static Molecule generate (String stringToParse) throws ParseException, CDKException { isMainCyclic = false; tempLocation = new Vector(); attachedSubstituents = new Vector(); attachedGroups = new Vector(); currentNumber = 0; StringReader stringReader = new StringReader (stringToParse.toLowerCase() + "\n"); NomParser parser = new NomParser (stringReader); parser.completeChemicalName(); //Scan substituents for a too high connection point checkConnections (attachedSubstituents.iterator()); //Scan functional groups for a too high connection point checkConnections (attachedGroups.iterator()); MoleculeBuilder moleculeBuilder = new MoleculeBuilder(); Molecule returnedMolecule = moleculeBuilder.buildMolecule(mainChainPrefix, attachedSubstituents, attachedGroups, isMainCyclic, stringToParse); return returnedMolecule; } /** * Checks to ensure that all groups/substituents attached to the main chain * are connected to a valid atom which occurs on the main chain. * * @param vectorIterator An iterator which provides instances of AttachedGroup to check * @throws ParseException A taylored instance of ParseException so nomen can display * the error to the user. */ private static void checkConnections (Iterator vectorIterator) throws ParseException { while (vectorIterator.hasNext()) { AttachedGroup ag = (AttachedGroup) vectorIterator.next(); Vector locations = ag.getLocations(); Iterator locationsIterator = locations.iterator(); while (locationsIterator.hasNext()) { Token tok = (Token) locationsIterator.next(); try { if (Integer.parseInt(tok.image) > mainChainPrefix) { //Create a tiny 2D array with a single slot for data, //insert 0 into it as as to reference the first slot in the stringArray int [][] intArray = new int [1][1]; intArray [0][0] = 0; //Put useful message in stringArray String [] stringArray = new String [1]; stringArray [0] = " MUST BE BELOW " + (mainChainPrefix + 1) + " "; tok.next = tok; throw new ParseException (tok, intArray, stringArray); } } catch (NumberFormatException nfe) { //Do nothing, as this should never happen } } } }}PARSER_END(NomParser)JAVACODE/** * Stores "head tokens", the substituent prefix, in a vector of AttachedGroup objects. */void AddHeadToken() { attachedSubstituents.add (new AttachedGroup (tempLocation, currentNumber) ); tempLocation = new Vector();}JAVACODEvoid MakeMainChainIntoSubstituent() { attachedSubstituents.add (new AttachedGroup (tempLocation, mainChainPrefix) ); currentNumber = 0; mainChainPrefix = 0; tempLocation = new Vector();}JAVACODE/** * Stores the functional groups in a vector of AttachedGroup objects. */void AddFunGroup() { Token tok; tok = getToken(-1); attachedGroups.add (new AttachedGroup (tempLocation, tok.image) ); tempLocation = new Vector();}JAVACODE/*** Stores the functional group positions, the number of the atom they * connect to, in an array.*/void AddFunGroupPos() { Token tok; tok = getToken(-1); tempLocation.add(tok);}JAVACODE/*** Adds to the position array a location of -1 to indicate no location was* specified.*/void AddUnknownFunGroupPos() { Token tok = new Token(); tok.image = "-1"; tempLocation.add(tok);}JAVACODE/** * Store the mainChainPrefix token, the chain prefix of the longest carbon chain */void AddMainChain() { mainChainPrefix = currentNumber; currentNumber = 0;}JAVACODE/** * Sets the main chain to be cyclic. */void SetMainCyclic() { isMainCyclic = true;}TOKEN :{ < EOL: "\n" | "\r" >}TOKEN : /*NUMBERS*/{ < CONSTANT: ( <DIGIT> )+ >| < #DIGIT: ["0" - "9"] >}TOKEN : /*NUMBER CONNECTORS*/{ < DASH: "-" >| < COMMA: "," >}/** * Initial small numbers. */TOKEN :{ < METH: "meth" >| < ETH: "eth" >| < PROP: "prop" >| < BUT: "but" >}/** * Other special cases. */TOKEN :{ < UNDEC: "undec" >| < EICOS: "eicos" | "icos" >| < HENICOS: "henicos" >}/** * Usual numbers for base 10 numbering. */TOKEN :{ < HEN: "hen" >| < DO: "do" >| < TRI: "tri" >| < TETR: "tetra" >| < PENT: "pent" >| < HEX: "hex" >| < HEPT: "hept" >| < OCT: "oct" >| < NON: "non" >}/** * Positional aides which give the magnitude of the the base numbers. * Equivalent to "...ty" and "...hundred" in English */TOKEN :{ < DEC: "dec" >| < COS: "cos" >| < CONT: "cont" >}/* Skip the "a" letter for greek numbers */SKIP :{ < A : "a" > | < SPACE : " " >}TOKEN : /*BOND MODIFIERS*/{ < AN: "an" > | < EN: "en" > | < YN: "yn" >}TOKEN : /*CONNECTOR*/{ < YL: "yl" >| < DI: "di" >| < CYCLO: "cyclo" >}TOKEN : /*PREFIXES*/{ < CHLORO: "chloro" > | < FLUORO: "fluoro" > | < BROMO: "bromo" > | < IODO: "iodo" > | < NITRO: "nitro" > | < OXO: "oxo" > | < PHENYL: "phenyl" > | < AMINO: "amino" > | < ALUMINO: "alumino" > | < LITHO: "litho" > | < HYDROXY: "hydroxy" >} TOKEN : /*FUNCTIONAL GROUP SUFFIXES*/{ < E: "e"> | < OL: "ol" > | < OICACID: "oic acid" > | < OYLCHLORIDE: "oyl chloride" > | < NITRILE: "nitrile" > | < AL: "al" > | < AMIDE: "amide" > | < AMINE: "amine" > | < ONE: "one" > | < OATE: "oate" >}TOKEN : /* METALS */{ < LITHIUM: "lithium" >| < SODIUM: "sodium" >| < POTASSIUM: "potassium" >| < RUBIDIUM: "rubidium" >| < CESIUM: "cesium" >| < FRANCIUM: "francium" >| < BERYLLIUM: "beryllium" >| < MAGNESIUM: "magnesium" >| < CALCIUM: "calcium" >| < STRONTIUM: "strontium" >| < BARIUM: "barium" >| < RADIUM: "radium">| < SCANDIUM: "scandium" >| < YTTRIUM: "yttrium" >| < LANTHANUM: "lanthanum" >| < ACTINIUM: "actinium" >| < TITANIUM: "titanium" >| < ZIRCONIUM: "zirconium" >| < HAFNIUM: "hafnium" >| < RUTHERFORDIUM: "rutherfordium" >| < VANADIUM: "vanadium" >| < NIOBIUM: "niobium" >| < TANTALUM: "tantalum" >| < DUBNIUM: "dubnium" >| < CHROMIUM: "chromium" >| < MOLYBDENUM: "molybdenum" >| < TUNGSTEN: "tungsten" >| < SEABORGIUM: "seaborgium" >| < MANGANESE: "manganese" >| < TECHNETIUM: "technetium" >| < RHENIUM: "rhenium" >| < BOHRIUM: "bohrium" >| < IRON: "iron" >| < RUTHENIUM: "ruthenium" >| < OSMIUM: "osmium" >| < HASSIUM: "hassium" >| < COBALT: "cobalt">| < RHODIUM: "rhodium">| < IRIDIUM: "iridium" >| < MEITMERIUM: "meitmerium" >| < NICKEL: "nickel" >| < PALLADIUM: "palladium" >| < PLATINUM: "platinum" >| < COPPER: "copper" >| < SILVER: "silver" >| < GOLD: "gold" >| < ZINC: "zinc" >| < CADMIUM: "cadmium" >| < MECURY: "mercury" >| < ALUMINIUM: "aluminium" >| < GALLIUM: "gallium" >| < INDIUM: "indium" >| < THALLIUM: "thallium" >| < GERMAINIUM: "germainium" >| < TIN: "tin" >| < LEAD: "lead" >| < ARSENIC: "arsenic" >| < ANTIMONY: "antimony" >| < BISMUTH: "bismuth" >| < SELENIUM: "selenium" >| < TELLURIUM: "tellurium" >| < POLONIUM: "polonium" >| < CERIUM: "cerium" >| < PRASEODYMIUM: "praseodymium" >| < NEODYMIUM: "neodymium" >| < PROMETHIUM: "promethium" >| < SANARIUM: "sanarium" >| < EUROPIUM: "europium" >| < GADOLINIUM: "gadolinium" >| < TERBIUM: "terbium" >| < DYSPROSIUM: "dysprosium" >| < HOLMIUM: "holmium" >| < ERBIUM: "erbium" >| < THULIUM: "thulium" >| < YTTERBIUM: "ytterbium" >| < LUTETIUM: "lutetium" >| < THORIUM: "thorium" >| < PROTACTINIUM: "protactinium" >| < URANIUM: "uranium" >
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -