📄 genmdeterministicgenerator.java
字号:
/* $Revision: 8201 $ $Author: egonw $ $Date: 2007-04-16 10:40:19 +0200 (Mon, 16 Apr 2007) $ * * Copyright (C) 2004-2007 Junfeng Hao * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */package org.openscience.cdk.structgen.deterministic;import java.io.FileWriter;import java.io.PrintWriter;import java.util.ArrayList;import java.util.List;import org.openscience.cdk.interfaces.IAtomContainer;import org.openscience.cdk.interfaces.IChemObjectBuilder;import org.openscience.cdk.interfaces.IMolecule;import org.openscience.cdk.nonotify.NoNotificationChemObjectBuilder;import org.openscience.cdk.structgen.IStructureGenerationListener;import org.openscience.cdk.tools.LoggingTool;import org.openscience.cdk.tools.MFAnalyser;/** * An adapted implementation of the Molodtsov structure generator. * Changes to the original idea include issues of * normalization and strong canonicity criteria * * <pre> * gdg = new GENMDeterministicGenerator(formula,""); * gdg.addListener(this); * gdg.setStructuresAtATime(500); * gdg.generate(); * </pre> * * <p>Please note that the number of isomers generated by this generator can quickly * become quite large and their storage in memory will take some space. * In order to handle to large amount of potential data, the generator hands you the * structures in small packets. You register with the generator as a * StructureGeneratorListener. In the respective stateChanged() method which you * must implement, you can do with the latest list of generated structures * whatever you want. Please not that the generatore deletes things internally * after handing over the list. Make sure to remove reference to structure in * which you are not interested anymore, to the garbage collector can clean them up. * * <p>If no StructureGeneratorListener is registered, the generator will not store * the molecular structures, and only count them. The total number of constitutional * isomers can be calculated with: * <pre> * gdg = new GENMDeterministicGenerator(formula,""); * gdg.generate(); * int isomerCount = gdg.getNumberOfStructure(); * </pre> * * <p>Details are found in the following papers * {@cdk.cite Molodtsov94, Molchanova96, Hu94, Hu94b, Hu99}. * * @author Junfeng Hao * @author Christoph Steinbeck * @cdk.created 2004-02-16 * @cdk.module structgen * @cdk.bug 1678346 */public class GENMDeterministicGenerator { private LoggingTool logger; private int numberOfSetFragment; private int numberOfStructures; private IAtomContainer atomContainer; private int[] molecularFormula; private int[] numberOfBasicUnit; private int[] numberOfBasicFragment; private List basicFragment; private List structures; private PrintWriter structureout; private long returnedStructureCount = 500; private static double LOST=0.000000000001; private List listeners = null; boolean hasMoreStructures = false; int structuresAtATime = 500; private IChemObjectBuilder builder = null; /** * Constructor for the GENMDeterministicGenerator. * Allows for setting the molecular formula for which the * isomers are to be generated as well as for setting an output path * for a file with generated structures. * * @param mf molecular formula string * @param path Path to the file used for writing structures. Leave blank if current directory should be used. * If set to null then no structure file is written. */ public GENMDeterministicGenerator(String mf, String path) throws Exception { logger = new LoggingTool(GENMDeterministicGenerator.class); builder = NoNotificationChemObjectBuilder.getInstance(); numberOfSetFragment=0; numberOfStructures=0; logger.debug(mf); MFAnalyser mfa = new MFAnalyser(mf, builder.newAtomContainer()); molecularFormula=new int[12]; numberOfBasicUnit=new int[23]; numberOfBasicFragment=new int[34]; basicFragment=new ArrayList(); structures=new ArrayList(); listeners = new ArrayList(); if (path != null) structureout=new PrintWriter(new FileWriter(path+"structuredata.txt"),true); else structureout = null; initializeParameters(); analyseMolecularFormula(mfa); } /** * Central method of this Generator. Call * * @throws Exception */ public void generate() throws Exception { numberOfStructures = 0; generateBasicUnits(); logger.debug("numberofstructure is=", numberOfStructures); fireChange(); } public void setBasicUnits(List basicUnits) { if(basicUnits!=null)getBasicUnit(basicUnits); else logger.error("input false"); } /** * Get basic units from input information. * * @param basicUnits vector contains basic units which stored as string */ public void getBasicUnit(List basicUnits) { int i; for(i=0;i<basicUnits.size();i++) { String s=(String)basicUnits.get(i); if(s.equals("Si"))numberOfBasicUnit[1]+=1; else if(s.equals("P"))numberOfBasicUnit[2]+=1; else if(s.equals("S"))numberOfBasicUnit[3]+=1; else if(s.equals("N"))numberOfBasicUnit[4]+=1; else if(s.equals("O"))numberOfBasicUnit[5]+=1; else if(s.equals("C"))numberOfBasicUnit[6]+=1; else if(s.equals("SiH3"))numberOfBasicUnit[7]+=1; else if(s.equals("SiH2"))numberOfBasicUnit[8]+=1; else if(s.equals("SiH"))numberOfBasicUnit[9]+=1; else if(s.equals("PH2"))numberOfBasicUnit[10]+=1; else if(s.equals("PH"))numberOfBasicUnit[11]+=1; else if(s.equals("SH"))numberOfBasicUnit[12]+=1; else if(s.equals("NH2"))numberOfBasicUnit[13]+=1; else if(s.equals("NH"))numberOfBasicUnit[14]+=1; else if(s.equals("OH"))numberOfBasicUnit[15]+=1; else if(s.equals("CH3"))numberOfBasicUnit[16]+=1; else if(s.equals("CH2"))numberOfBasicUnit[17]+=1; else if(s.equals("CH"))numberOfBasicUnit[18]+=1; else logger.error("input error"); } } /** * Initialize the basic fragment. For the definition, please see the BasicFragment class. */ public void initializeParameters() throws java.lang.Exception { basicFragment.add(new BasicFragment(1,4,1,0,1,">C<","C")); basicFragment.add(new BasicFragment(2,3,102,0,2,">C=","C")); basicFragment.add(new BasicFragment(3,2,2,0,3,"=C=","C")); basicFragment.add(new BasicFragment(4,2,103,0,4,"-C#","C")); basicFragment.add(new BasicFragment(5,3,1,1,5,">CH-","C")); basicFragment.add(new BasicFragment(6,2,102,1,6,"=CH-","C")); basicFragment.add(new BasicFragment(7,1,3,1,6,"CH#","C")); basicFragment.add(new BasicFragment(8,3,1,0,8,">N-","N")); basicFragment.add(new BasicFragment(9,2,102,0,9,"=N-","N")); basicFragment.add(new BasicFragment(10,1,3,0,10,"N#","N")); basicFragment.add(new BasicFragment(11,2,1,2,11,"-CH2-","C")); basicFragment.add(new BasicFragment(12,1,2,2,12,"CH2=","C")); basicFragment.add(new BasicFragment(13,2,1,1,13,"-NH-","N")); basicFragment.add(new BasicFragment(14,1,2,1,14,"NH=","N")); basicFragment.add(new BasicFragment(15,2,1,0,15,"-O-","O")); basicFragment.add(new BasicFragment(16,1,2,0,16,"O=","O")); basicFragment.add(new BasicFragment(17,2,1,0,17,"-S-","S")); basicFragment.add(new BasicFragment(18,1,2,0,18,"S=","S")); basicFragment.add(new BasicFragment(19,1,1,3,19,"CH3-","C")); basicFragment.add(new BasicFragment(20,1,1,2,20,"NH2-","N")); basicFragment.add(new BasicFragment(21,1,1,1,21,"OH-","O")); basicFragment.add(new BasicFragment(22,1,1,1,22,"-SH","S")); basicFragment.add(new BasicFragment(23,3,1,0,23,">P-","P")); basicFragment.add(new BasicFragment(24,2,1,1,24,"-PH-","P")); basicFragment.add(new BasicFragment(25,1,1,2,25,"PH2-","P")); basicFragment.add(new BasicFragment(26,4,1,0,26,">Si<","Si")); basicFragment.add(new BasicFragment(27,3,1,1,26,">SiH-","Si")); basicFragment.add(new BasicFragment(28,2,1,2,28,"-SiH2-","Si")); basicFragment.add(new BasicFragment(29,1,1,3,29,"SiH3-","Si")); basicFragment.add(new BasicFragment(30,1,1,0,30,"F-","F")); basicFragment.add(new BasicFragment(31,1,1,0,31,"Cl-","Cl")); basicFragment.add(new BasicFragment(32,1,1,0,32,"Br-","Br")); basicFragment.add(new BasicFragment(33,1,1,0,33,"I-","I")); /*for decompose the complex fragment*/ basicFragment.add(new BasicFragment(42,2,1,0,2,">C","C")); basicFragment.add(new BasicFragment(43,1,2,0,2,"C=","C")); basicFragment.add(new BasicFragment(44,1,1,0,4,"C-","C")); basicFragment.add(new BasicFragment(45,1,3,0,4,"C#","C")); basicFragment.add(new BasicFragment(46,1,1,1,6,"CH-","C")); basicFragment.add(new BasicFragment(47,1,2,1,6,"CH=","C")); basicFragment.add(new BasicFragment(50,1,1,0,9,"N-","N")); basicFragment.add(new BasicFragment(51,1,2,0,9,"N=","N")); //Maybe add later return; } /** * Analyse molecular formula to verify it is valid. * * @param mfa MFAnalyser object to operate the molecular formula */ public void analyseMolecularFormula(MFAnalyser mfa) throws java.lang.Exception { molecularFormula[1]=mfa.getAtomCount("C"); molecularFormula[2]=mfa.getAtomCount("H"); molecularFormula[3]=mfa.getAtomCount("O"); molecularFormula[4]=mfa.getAtomCount("N"); molecularFormula[5]=mfa.getAtomCount("S"); molecularFormula[6]=mfa.getAtomCount("P"); molecularFormula[7]=mfa.getAtomCount("Si"); molecularFormula[8]=mfa.getAtomCount("F"); molecularFormula[9]=mfa.getAtomCount("Cl"); molecularFormula[10]=mfa.getAtomCount("Br"); molecularFormula[11]=mfa.getAtomCount("I"); molecularFormula[0]=2*molecularFormula[1]+molecularFormula[4]+molecularFormula[6]+ 2*molecularFormula[7]+2-molecularFormula[2]-molecularFormula[8]-molecularFormula[9]- molecularFormula[10]-molecularFormula[11]; if(molecularFormula[0]<0) { logger.debug("Input molecular formula error!"); } // for(i=1;i<=11;i++) // logger.debug("molecularFormula["+i+"]="+molecularFormula[i]); return; } /** * The first step: generate sets of basic units by backtracking algorithm. */ public void generateBasicUnits() throws java.lang.Exception { int[] maxNumberOfBasicUnit=new int[23]; int i,j,k; int iter1,iter2,iter3,iter4; int numberSi,numberP,numberS,numberN,numberNH; // numberSiH,numberSH,numberPH int numberO,numberOH,numberCH,numberH; int basicUnit; /* Generate the maximum number of basic units based on molecular formula. The corresponding basic units is Si,P,S,N,O,C,SiH3,SiH2,SiH,PH2,PH,SH,NH2,NH,OH,CH3,CH2,CH,F,Cl,Br,I. We could get the basic units directly for F,Cl,Br,I as they do not contain hydrogen atoms.*/ maxNumberOfBasicUnit[1]=molecularFormula[7];//Si maxNumberOfBasicUnit[2]=molecularFormula[6];//P maxNumberOfBasicUnit[3]=molecularFormula[5];//S maxNumberOfBasicUnit[4]=molecularFormula[4];//N maxNumberOfBasicUnit[5]=molecularFormula[3];//O maxNumberOfBasicUnit[6]=molecularFormula[1];//C maxNumberOfBasicUnit[7]=molecularFormula[1];//SiH3 maxNumberOfBasicUnit[8]=molecularFormula[1];//SiH2 maxNumberOfBasicUnit[9]=molecularFormula[1];//SiH maxNumberOfBasicUnit[10]=molecularFormula[6];//PH2 maxNumberOfBasicUnit[11]=molecularFormula[6];//PH maxNumberOfBasicUnit[12]=molecularFormula[5];//SH maxNumberOfBasicUnit[13]=molecularFormula[4];//NH2 maxNumberOfBasicUnit[14]=molecularFormula[4];//NH maxNumberOfBasicUnit[15]=molecularFormula[3];//OH maxNumberOfBasicUnit[16]=molecularFormula[1];//CH3 maxNumberOfBasicUnit[17]=molecularFormula[1];//CH2 maxNumberOfBasicUnit[18]=molecularFormula[1];//CH /* for CH3, CH2 the number of H should be consider at the same time*/ i=molecularFormula[2]; j=i/3; k=i/2; if(maxNumberOfBasicUnit[16]>j)maxNumberOfBasicUnit[16]=j; if(maxNumberOfBasicUnit[17]>k)maxNumberOfBasicUnit[17]=k; /* initialization */ for(i=1;i<=22;i++)numberOfBasicUnit[i]=0; numberSi=0; //numberSiH=0; numberP=0; //numberPH=0; numberS=0; //numberSH=0; numberN=0; numberNH=0; numberO=0; numberOH=0; numberCH=0; numberH=0; basicUnit=0; /* to distribute the hydrogen into heavy atoms. It is easily to see that only the basic units which contain hydrogens need to be considered. Therefore */ iter1=6; iter2=18; do { iter1++; while(iter1<iter2) { numberOfBasicUnit[iter1]=0; switch(iter1-1) { case 9: numberSi=numberOfBasicUnit[7]+numberOfBasicUnit[8]+numberOfBasicUnit[9]; //numberSiH=numberSi+2*numberOfBasicUnit[7]+numberOfBasicUnit[8]; break; case 11: numberP=numberOfBasicUnit[10]+numberOfBasicUnit[11]; //numberPH=numberSiH+numberP+numberOfBasicUnit[10]; break; case 12: numberS=numberOfBasicUnit[12]; //numberSH=numberPH+numberS; break; case 14: numberN=numberOfBasicUnit[13]+numberOfBasicUnit[14]; numberNH=numberS+numberN+numberOfBasicUnit[13]; break; case 15: numberO=numberOfBasicUnit[15]; numberOH=numberNH+numberO; break; } iter1++; } do { /* begin from CH*/ numberCH=numberOH+3*numberOfBasicUnit[16]+2*numberOfBasicUnit[17]; numberH=molecularFormula[2]-numberCH;//left number of hydrogen atoms if(numberH>maxNumberOfBasicUnit[18])break; if(numberH<0)break; numberOfBasicUnit[18]=numberH; /* for Si */ numberOfBasicUnit[1]=molecularFormula[7]-numberSi; if(numberOfBasicUnit[1]>maxNumberOfBasicUnit[1])break; if(numberOfBasicUnit[1]<0)break; /* for P */ numberOfBasicUnit[2]=molecularFormula[6]-numberP; if(numberOfBasicUnit[2]>maxNumberOfBasicUnit[2])break; if(numberOfBasicUnit[2]<0)break; /* for S */ numberOfBasicUnit[3]=molecularFormula[5]-numberS; if(numberOfBasicUnit[3]>maxNumberOfBasicUnit[3])break; if(numberOfBasicUnit[3]<0)break; /* for N */ numberOfBasicUnit[4]=molecularFormula[4]-numberN; if(numberOfBasicUnit[4]>maxNumberOfBasicUnit[4])break; if(numberOfBasicUnit[4]<0)break; /* for O */ numberOfBasicUnit[5]=molecularFormula[3]-numberO; if(numberOfBasicUnit[5]>maxNumberOfBasicUnit[5])break; if(numberOfBasicUnit[5]<0)break; /* for C */ numberOfBasicUnit[6]=molecularFormula[1]-numberOfBasicUnit[16]-numberOfBasicUnit[17]-numberOfBasicUnit[18]; if(numberOfBasicUnit[6]>maxNumberOfBasicUnit[6])break; if(numberOfBasicUnit[6]<0)break; basicUnit+=1; numberOfBasicUnit[0]=basicUnit; /* for F,Cl,Br,I */ for(i=19;i<=22;i++) if(molecularFormula[i-11]!=0)numberOfBasicUnit[i]=molecularFormula[i-11]; if (logger.isDebugEnabled()) { for(i=0;i<=22;i++) { if(numberOfBasicUnit[i]!=0) { logger.debug("numberOfBasicUnit["+i+"]="+numberOfBasicUnit[i]); logger.debug("numberOfBasicUnit["+i+"]="+numberOfBasicUnit[i]); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -