📄 miner.java
字号:
} /* and count it (for benchmark) */ } /* (after this loop cnt is the */ } /* number of created fragments) */ this.fragcnt += cnt; /* count all fragments (benchmark) */ /* --- support-based pruning --- */ for (i = n = 0; i < cnt; i++) { /* traverse the fragments */ if (frls[i].supp[0] < this.supp) { this.lowsupp++; continue; } frls[n++] = frls[i]; /* collect the frequent fragments */ if ((frls[i].supp[0] == frag.supp[0]) && (frls[i].supp[1] == frag.supp[1]) && (((this.mode & RINGEXT) != 0) || ((this.mode & CLOSERINGS) == 0))) frag.setClosed(false); /* if an extension has same support, */ } /* mark the fragment as non-closed */ while (cnt > n) /* delete all non-frequent fragments */ frls[--cnt] = null; /* from the fragment list */ /* If fragments with open rings are to be suppressed, but ring */ /* extensions are not used, explicit tests for closed fragments */ /* are necessary. Otherwise certain fragments may get lost. */ /* Thus not all qualifying fragments must be marked as closed. */ /* --- carbon chain pruning --- */ r = 0; /* clear the carbon chain flag */ if ((this.mode & CHAINEXT) != 0) { for (i = n = 0; i < cnt; i++) { /* traverse the fragments */ if (frls[i].size < -1) continue; if (frls[i].size < 0) r = -1; frls[n++] = frls[i]; /* remove chains with a min. length */ } /* greater than one carbon atom */ while (cnt > n) /* collect the other fragments */ frls[--cnt] = null; /* at the front of the vector and */ } /* decrement the fragment counter */ /* --- ring extension merging --- */ if ((this.mode & MERGERINGS) != 0) cnt = frag.mergeExts(frls, cnt); /* --- unclosable ring pruning --- */ if ((this.mode & PR_UNCLOSE) != 0) { for (i = n = 0; i < cnt; i++) { /* traverse the fragments */ if (frls[i].isUnclosable(this.ext)) { this.openrgs++; continue; } frls[n++] = frls[i]; /* identify, count, and remove */ } /* fragments with unclosable rings */ while (cnt > n) /* collect the remaining fragments */ frls[--cnt] = null; /* at the front of the vector and */ } /* decrement the fragment counter */ /* --- perfect extension pruning --- */ if ((cnt > 1) /* if to prune w.r.t. perfect exts. */ && ((this.mode & (PR_PERFECT|PR_PARTIAL)) != 0)) { for (i = 0; i < cnt; i++) /* search for a perfect extension */ if (frls[i].isPerfect(r != 0)) break; if (i < cnt) { /* if there is a perfect extension, */ this.perfect++; /* count it (benchmarking) */ if ((this.mode & PR_PERFECT) == 0) { /* if partial pruning */ while (--cnt > i) /* delete all extensions to the right */ frls[cnt] = null; /* of the perfect extension, because */ cnt++; } /* they yield non-closed fragments */ else { /* if full perfect extension pruning, */ revert = (i > 0); /* check whether to revert ext. info. */ frls[0] = frls[i]; /* note this specific extension */ while (cnt > 1) /* and delete all other extensions */ frls[--cnt] = null; /* (process only perfect extension) */ } /* later the extension information */ } /* is reset to the base fragment */ } /* (otherwise fragments are lost) */ /* --- equivalent sibling pruning --- */ if ((cnt > 1) /* if to prune equivalent siblings */ && ((this.mode & PR_EQUIV) != 0)) { adapt = ((this.mode & PR_CANONIC) != 0) && ((this.mode & RIGHTEXT) != 0); for (i = n = 1; i < cnt; i++) { for (k = n; --k >= 0;) /* traverse the fragment pairs */ if (frls[i].isEquivTo(frls[k])) break; if (k < 0) { /* collect the unique fragments */ frls[n++] = frls[i]; continue; } this.equiv++; /* count the equivalent fragment */ if (!adapt || !frls[i].isRingBondExt()) continue; /* only ring bond exts. need work */ frls[i].adapt(this.ext);/* adapt the two fragments before */ frls[k].adapt(this.ext);/* comparing their code words */ /* Note that multiple adaptation calls for the same fragment */ /* are harmless, because the fragment records whether it has */ /* already been adapted and thus the work is done only once. */ /* Note also that these adaptation calls cannot fail, since */ /* ring extensions can not be combined with canonical form */ /* pruning and equivalent sibling pruning at the same time. */ /* However, only with both pruning methods "adapt" may fail. */ this.ext.makeWord(frls[i].getAsMolecule()); r = this.ext.compareWord(frls[k].getAsMolecule()); if ((r > 0) || ((r == 0) && (frls[i].idx < frls[k].idx))) frls[k] = frls[i]; /* compare code words of fragments */ } /* and keep only smallest code word */ while (cnt > n) /* "delete" all other fragments */ frls[--cnt] = null; /* from the fragment vector and */ } /* decrement the fragment counter */ /* --- fragment adaptation and ring order pruning --- */ if ((((this.mode & RIGHTEXT) != 0) && ((this.mode & PR_PERFECT) != 0)) || (((this.mode & PR_CANONIC) != 0) && ((this.mode & (PR_PERFECT|FULLRINGS)) != 0))) { for (i = n = 0; i < cnt; i++) { if (!frls[i].adapt(this.ext)) { this.ringord++; continue; } frls[n++] = frls[i]; /* adapt the fragments and keep */ } /* only successfully adapted ones */ while (cnt > n) /* "delete" all other fragments */ frls[--cnt] = null; /* from the fragment vector and */ } /* decrement the fragment counter */ /* If full perfect extension pruning is used, the added bond */ /* may have to be shifted past existing perfect extensions. */ /* This is necessary with rightmost extensions even without */ /* canonical form pruning to ensure proper later extensions. */ /* Similarly, if ring extensions are used, new bonds must be */ /* shifted past already added, but not yet fixed ring bonds. */ /* --- canonical form pruning --- */ if ((this.mode & PR_CANONIC) != 0) { part = ((this.mode & FULLRINGS) != 0); for (i = n = 0; i < cnt; i++) { /* traverse the fragments */ r = frls[i].isCanonic(this.ext, part); if (r < 0) { this.canonic++; continue; } if (r <= 0) frls[i].setValid(false); frls[n++] = frls[i]; /* identify, count, and remove */ } /* the non-canonical fragments */ while (cnt > n) /* collect the remaining fragments */ frls[--cnt] = null; /* at the front of the vector and */ } /* decrement the fragment counter */ /* --- remove duplicates with repository --- */ else { /* if ((this.mode & PR_CANONIC) == 0) */ for (i = n = 0; i < cnt; i++) { /* traverse the fragments */ if (this.duplicate(frls[i])) { this.duplic++; continue; } frls[n++] = frls[i]; /* identify, count, and remove */ } /* already processed fragments */ while (cnt > n) /* collect the remaining fragments */ frls[--cnt] = null; /* at the front of the vector and */ } /* decrement the fragment counter */ /* If canonical form pruning is not used, duplicate fragments */ /* have to be identified and removed by checking each generated */ /* fragment against a repository of already processed fragments. */ /* --- revert extension information --- */ if ((cnt > 0) && revert) /* revert the extension information */ frls[0].revert(); /* to that of the base fragment */ /* If (full) perfect extension pruning is used, the extension */ /* information has to be reverted to that of the base fragment, */ /* because otherwise fragments are lost from the search tree */ /* branches to the left of the perfect extension branch. */ /* --- recursively process branches --- */ depth++; /* increment the recursion depth */ for (i = 0; i < cnt; i++) { /* search fragments recursively */ if (!this.recurse(frls[i], depth)) return false; /* after return from recursion */ frls[i] = null; /* "delete" the processed fragment */ } /* (allow for garbage collection) */ this.output(frag); /* output the current fragment */ return !this.stop; /* return whether search was stopped */ } /* recurse() */ /*-------------------------------------------------------------------- The above function forms all possible extensions of a list of a fragment (a list of embeddings) and sorts them into equivalence classes. Each of these classes is then processed recursively. The function should only be called for frequent fragments. --------------------------------------------------------------------*/ private int search () { /* --- search for substructures */ int i, k, atom; /* loop variables, atom buffer */ Molecule mol; /* to traverse the molecules */ Embedding emb; /* created list of embeddings */ String s; /* buffer for output formatting */ if ((this.mode & RIGHTEXT) != 0) this.ext = new RightExt (this.mode, this.max, this.map); else this.ext = new MaxSrcExt(this.mode, this.max, this.map); if ((this.mode & RINGEXT) != 0) this.ext.setRingSizes(this.rgmin, this.rgmax); this.subs = null; /* create an extension object and */ this.subcnt = 0; /* clear the list of substructures */ this.nodecnt = 0; /* init. the benchmark variables */ this.lowsupp = this.perfect = this.equiv = this.ringord = 0; this.canonic = this.duplic = this.nonclsd = this.openrgs = 0; this.invalid = this.compcnt = 0; this.out.println("id,desc,atoms,bonds,s_abs,s_rel,c_abs,c_rel"); /* print header for substructures */ if ((this.mode & VERBOSE) != 0) System.out.println(); /* if verbose output, start new line */ if (this.seed != null) { /* if there is a seed structure */ this.embcnt = this.frag.supp[2]; this.fragcnt = 1; /* search recursively from the seed */ if (this.frag.supp[0] >= this.supp) this.recurse(this.frag, 0); } else { /* if there are no initial embeddings */ this.fragcnt = this.embcnt = 0; for (i = 0; i < this.map.size; i++) { if (this.map.isExcluded(i) || this.map.isMaximal(i)) continue; /* traverse the different atoms */ atom = this.map.decode(i); s = this.ntn.format(atom) +" "; System.err.print("\nprocessing " +s.substring(0, 7)); if ((this.mode & VERBOSE) != 0) System.out.println(); /* if verbose output, start new line */ if (this.subs != null) /* clear the fragment repository */ for (k = this.subs.length; --k >= 0; ) this.subs[k] = null; this.frag = new Fragment(this.mepm); for (mol = this.mols; mol != null; mol = mol.succ) { emb = mol.embed(i); /* try to embed the atom */ if (emb != null) this.frag.add(emb); } /* collect embeddings in fragment */ this.fragcnt++; /* count the created embedding */ this.embcnt += this.frag.supp[2]; if ((this.frag.supp[0] >= this.supp) && !this.recurse(this.frag, 0)) return this.subcnt; /* search recursively */ this.map.exclude(i); /* exclude the processed atom */ for (mol = this.mols; mol != null; mol = mol.succ) mol.trim(false); /* trim the excluded atom */ } /* from the molecules */ System.err.println(); /* (fragments with this atom type */ } /* need not be considered again) */ return this.subcnt; /* return number of substructures */ } /* search() */ /*------------------------------------------------------------------*/ public void print (Notation not, PrintStream out, boolean norm) { /* --- output all molecules */ int n = 0; /* molecule counter */ Molecule mol; /* to traverse the molecules */ String s; /* buffer for log output */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -