📄 miner.java
字号:
/*---------------------------------------------------------------------- File : Miner.java Contents: Find common substructures of molecules Author : Christian Borgelt History : 11.03.2002 file created 15.03.2002 main function added 18.03.2002 search improved, output to files added 21.03.2002 function output added, main program restructured 28.03.2002 optional use of Sybyl line notation added masks for charge/aromaticity comparison added 29.03.2002 empty seed structures made possible 31.03.2002 inversion of split at threshold made possible 03.04.2002 immediate output of substructures added 04.04.2002 option to ignore the bond type added 14.04.2002 function to set molecule markers added 22.04.2002 interpretation of options +/-: modified 01.05.2002 search horizon problem fixed (maximum size) 15.07.2002 pruning of equivalent branches added 21.02.2003 option -C (find carbon chains) added 31.03.2003 output of Prolog description added 01.08.2003 perfect extension pruning (option -P) added 03.08.2003 ring extensions (options -r and -K) added 04.08.2003 ignore bond type only in rings (option -B) added 07.08.2003 adapted to new classes, considerable rewrite 10.08.2003 empty seeds, atom type exclusion (-x) added 28.03.2004 bug concerning output tokenizer fixed 05.04.2004 bug in atom sorting w.r.t. frequency fixed 07.06.2005 bridge finding functions added 21.07.2005 molecule encoding added, seedless search modified 22.07.2005 molecule trimming (removal and marking) added 23.07.2005 recording optimized with test for closed fragment 24.07.2005 setup restructured, canonical form pruning added 01.08.2005 trimming of input lines and fields added 02.08.2005 masking atom and bond types made an extra phase 03.08.2005 special search code for seed embedding removed 04.08.2005 recursive search simplified, defaults changed 05.08.2005 application of pruning criteria restructured 06.08.2005 carbon chain pruning simplified (min. length) 08.08.2005 combined perfect extension/canonical form pruning 10.08.2005 initial fragments created through wrappers 11.08.2005 adapted to second extension (rightmost path) 15.08.2005 seed handling included in preparation functions 16.08.2005 'deletion' of processed fragments added 17.08.2005 fragment creation modified, option -M added 23.08.2005 bug in molecule grouping fixed (mol.group) 02.09.2005 adapted to modified packed embedding lists 01.03.2006 run method and possibility to abort search added 04.04.2006 main function restructured, new constructor 10.04.2006 bug in function init fixed (seed parsing) 08.05.2006 fragment adaptation, function record simplified 09.05.2006 option -N added (normalized fragment output) 12.05.2006 option -R changed to -g, new option -R added 16.05.2006 bug in molecule identifier output fixed 17.05.2006 option -O added (do not record open rings) 31.05.2006 definition of constant MERGERINGS added 02.06.2006 ring exts. combined with canonical form pruning 04.06.2006 ring exts. combined with rightmost extensions 06.06.2006 bug in combined rightmost/perfect exts. fixed 07.06.2006 equivalent sibling pruning for ring extensions 08.06.2006 bug in combined rightmost/perfect exts. fixed 19.06.2006 closed fragment pruning for CLOSERINGS corrected 21.06.2006 optional early removal of duplicates added 22.06.2006 function isDuplicate added, record redesigned 26.06.2006 miner initialization considerably restructured 28.06.2006 some code cleanup, some functions reordered 29.06.2006 molecule grouping moved to loadMolecules 03.07.2006 comments allowed in input file (# at line start) 04.07.2006 error reporting in loadMolecules improved 06.07.2006 adapted to changes of class Extension 10.07.2006 perfect extension pruning allowed for ring exts. 12.07.2006 counter for repository comparisons added 13.07.2006 bug in bond and atom masking fixed 18.07.2006 parameter setting restructured for class MoSS 19.07.2006 handling of negative support values improved 04.08.2006 revert extension information only if necessary 09.08.2006 ignoring the atom type only in rings added 10.08.2006 adapted to new class TypeMap 12.08.2006 adapted to new Notation classes 13.08.2006 atom types to exclude as seeds added (option -y)----------------------------------------------------------------------*/package moss;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStreamReader;import java.io.PrintStream;/*--------------------------------------------------------------------*/public class Miner implements Runnable {/*--------------------------------------------------------------------*/ public static final String description = "molecular substructure miner (MoSS)"; public static final String version = "version 4.9 (2006.08.13)"; public static final String copyright = "(c) 2002-2006 Christian Borgelt / Tripos, Inc."; /* --- constants: sizes and flags --- */ private static final int BLKSIZE = 16; public static final int BONDEXT = Extension.BOND; public static final int RINGEXT = Extension.RING; public static final int CHAINEXT = Extension.CHAIN; public static final int EQVARS = Extension.EQVARS; public static final int RIGHTEXT = 0x0010; public static final int CLOSERINGS = 0x0020; public static final int MERGERINGS = 0x0040; private static final int FULLRINGS = 0x0080; public static final int PR_UNCLOSE = 0x0100; public static final int PR_PARTIAL = 0x0200; public static final int PR_PERFECT = 0x0400; public static final int PR_EQUIV = 0x0800; public static final int PR_CANONIC = 0x1000; public static final int PR_DEFAULT = 0x1400; public static final int NORMFORM = 0x2000; public static final int VERBOSE = 0x4000; /* --- instance variables --- */ protected int mode; /* operation mode flags */ protected float fsupp; /* minimum support in focus */ protected int supp; /* minimum support in focus */ protected float fcomp; /* minimum support in complement */ protected int comp; /* maximum support in complement */ protected int min; /* minimum size of substructures */ protected int max; /* maximum size of substructures */ protected int rgmin; /* minimum size of rings */ protected int rgmax; /* maximum size of rings */ protected int[] masks; /* masks for atoms and bonds */ protected Molecule seed; /* seed structure to start from */ protected Molecule excl; /* excluded atom types */ protected Molecule exseed; /* excluded seed types */ protected Molecule mols; /* list of molecules */ protected Molecule curr; /* current insertion point for focus */ protected Molecule tail; /* tail of the list of molecules */ protected int molcnt; /* number of molecules */ protected Fragment frag; /* initial fragment (seed structure) */ protected int mepm; /* max. embeddings per molecule */ protected Molecule[] subs; /* list of substructures found */ protected int subcnt; /* number of substructures */ protected int[] cnts; /* support counters for split */ protected TypeMap map; /* map for atom types */ protected Extension ext; /* extension structure */ protected Notation ntn; /* notation for formatting */ protected PrintStream out; /* substructure desc. stream */ protected PrintStream ids; /* molecule identifier stream */ private boolean stop; /* whether thread has been stopped */ /* --- benchmark variables --- */ protected long nodecnt; /* number of search tree nodes */ protected long fragcnt; /* number of created fragments */ protected long embcnt; /* number of created embeddings */ protected long lowsupp; /* insuff. support pruning counter */ protected long perfect; /* perfect extension pruning counter */ protected long equiv; /* equivalent frag. pruning counter */ protected long ringord; /* ring order pruning counter */ protected long canonic; /* canonical form pruning counter */ protected long duplic; /* duplicate fragments counter */ protected long nonclsd; /* non-closed fragments counter */ protected long openrgs; /* open ring fragments counter */ protected long invalid; /* invalid fragments counter */ protected long compcnt; /* number of repository comparisons */ /*------------------------------------------------------------------*/ public Miner () { /* --- create a substructure finder */ this.mode = BONDEXT|PR_DEFAULT; this.fsupp = 0.1F; /* set the default extension mode */ this.fcomp = 0.02F; /* and default values for */ this.supp = 1; /* minimal support in focus and */ this.comp = 0; /* maximal support in complement */ this.min = 1; /* minimum and maximum fragment size */ this.max = Integer.MAX_VALUE; this.rgmin = 0; /* clear the range of ring sizes */ this.rgmax = 0; /* (i.e., do not consider rings) */ this.masks = new int[4]; /* init. the bond and atom masks */ this.masks[0] = this.masks[2] = Atom.TYPEMASK; this.masks[1] = this.masks[3] = Bond.TYPEMASK|Bond.RINGBOND; this.seed = null; /* clear the seed structure, */ this.excl = null; /* the excluded atom types */ this.exseed = null; /* and the excluded seed types */ this.mols = this.curr = this.tail = null; this.molcnt = 0; /* clear the list of molecules */ this.frag = null; /* and the initial fragment */ this.mepm = 0; /* clear max. number of embeddings */ this.subs = null; /* clear list of found substructures */ this.subcnt = 0; /* and the number of substructures */ this.cnts = new int[4]; /* create the support counters */ this.map = null; /* clear the label map, */ this.ext = null; /* the extension object, */ this.ntn = null; /* the notation for formatting, */ this.out = null; /* and the output streams */ this.ids = null; /* (for substructures and ids) */ this.stop = false; /* init. the stopped flag */ } /* Miner() */ /*------------------------------------------------------------------*/ public void setParams (int mode, float supp, float comp, int min, int max, int rgmin, int rgmax) { /* --- set basic parameters */ this.mode = mode; /* operation/search mode */ this.fsupp = supp; /* minimum support in focus */ this.fcomp = comp; /* maximal support in complement */ this.min = min; /* minimum and maximum fragment size */ this.max = (max <= 0) ? Integer.MAX_VALUE : max; this.rgmin = rgmin; /* minimum and maximum ring size */ this.rgmax = (rgmax >= rgmin) ? rgmax : rgmin; } /* setParams() */ /*------------------------------------------------------------------*/ public void setMasks (int atom, int bond, int ringatom, int ringbond) { /* --- set atom and bond masks */ this.masks[0] = atom; /* mask for atom types outside rings */ this.masks[1] = bond; /* mask for bond types outside rings */ this.masks[2] = ringatom; /* mask for atom types inside rings */ this.masks[3] = ringbond; /* mask for bond types inside rings */ } /* setMasks() */ /*------------------------------------------------------------------*/ public void setMaxEmbs (int mepm) { this.mepm = mepm; } /* --- set the maximum embs./molecule */ /*------------------------------------------------------------------*/ public void setExcluded (Molecule excl, Molecule exsd) { this.excl = excl; /* --- set the excluded atom types */ this.exseed = exsd; } /*------------------------------------------------------------------*/ public void setExcluded (String format, String exat, String exsd) throws IOException { /* --- set the excluded atom types */ Notation n = (format.equalsIgnoreCase("sln"))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -