⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 miner.java

📁 A program to find frequent molecular substructures and discriminative fragments in a database of mol
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
      if (mol.markBridges() != 0) n++;    return n;                   /* traverse the molecules */  }  /* markBridges() */        /* and mark the bridges in them */  /*--------------------------------------------------------------------  Bridges have to be marked for correct perfect extension pruning.  --------------------------------------------------------------------*/  private void setup ()  {                             /* --- set up the miner */    int      i;                 /* loop variable */    Molecule mol;               /* to traverse the molecules */    if (this.fsupp < 0) this.supp = (int)Math.ceil (-this.fsupp);    else this.supp = (int)Math.ceil (this.fsupp *this.cnts[0]);    if (this.fcomp < 0) this.comp = (int)Math.floor(-this.fcomp);    else this.comp = (int)Math.floor(this.fcomp *this.cnts[1]);    if (this.supp <= 0)         /* compute and adapt the */      this.supp  = 1;           /* absolute support values */    this.map = new TypeMap();   /* create a type map */    for (mol = this.mols; mol != null; mol = mol.succ) {      if (mol.group != 0) continue;      for (i = mol.atomcnt; --i >= 0; )        this.map.count(this.map.add(mol.atoms[i].type));      this.map.commit();        /* count the elements in the focus */    }                           /* and commit after each molecule */    if (this.excl != null) {    /* if specific atoms are excluded */      for (i = this.excl.atomcnt; --i >= 0; )        this.map.exclude(this.map.encode(this.excl.atoms[i].type));    }                           /* mark the excluded atoms */    if (this.exseed != null) {  /* if specific seeds are excluded */      for (i = this.exseed.atomcnt; --i >= 0; )        this.map.maximize(this.map.encode(this.exseed.atoms[i].type));    }                           /* mark the excluded seeds */    this.map.trim(this.supp);   /* trim the set of elements */    if ((this.seed != null)     /* if one atom seed structure */    &&  (this.seed.atomcnt == 1))      this.map.clear(this.map.encode(this.seed.atoms[0].type));    this.map.sort();            /* sort the elements by frequency */    for (mol = this.mols; mol != null; mol = mol.succ) {      mol.encode(this.map);     /* encode the molecules, */      mol.trim(true);           /* trim excluded atom types, */      mol.sort();               /* (re)sort the bonds, */      mol.unmark();             /* and clear all markers */    }    if (this.seed == null)      /* if there is no seed structure, */      return;                   /* no further prep. is necessary */    this.seed.encode(this.map); /* encode the seed's atoms and */    this.seed.sort(true);       /* prepare it for embedding */  }  /* setup() */  /*------------------------------------------------------------------*/  public int embed ()  {                             /* --- embed seed into all molecules */    int       n = 0;            /* molecule counter */    Molecule  mol;              /* to traverse the molecules */    Embedding emb;              /* created list of embeddings */    String    s;                /* buffer for counter output */    if (this.seed == null)      /* check for an empty seed */      return 0;                 /* and abort if there is none */    if (!this.seed.sort(true))  /* prepare the seed for embedding */      return -1;                /* and check whether it is connected */    this.frag = new Fragment(this.seed, this.mepm);    this.seed.supp = frag.supp; /* create an initial fragment */    for (mol = this.mols; mol != null; mol = mol.succ) {      emb = mol.embed(this.seed);    /* try to embed the seed */      if (emb == null) continue;/* into each of the molecules */      this.frag.add(emb);       /* collect embeddings in fragment */      if ((++n & 0xff) != 0)    /* count all molecules and */        continue;               /* print the molecule counter */      s = "        " +n;        /* if it is divisible by 256 */      System.err.print(s.substring(s.length() -8));      System.err.print("\b\b\b\b\b\b\b\b");    }                           /* print the molecule counter */    this.cnts[2] = this.frag.supp[0];    this.cnts[3] = this.frag.supp[1];    return n;                   /* return the number of molecules */  }  /* embed() */  /*------------------------------------------------------------------*/  private boolean duplicate (Fragment frag)  {                             /* --- check for a duplicate fragment */    int      n, k;              /* buffers */    Molecule sub, cand, vec[];  /* to traverse the substructures */    if (frag.isUnique())        /* if fragment was already checked, */      return false;             /* it cannot be a duplicate */    cand = frag.getAsMolecule();/* get the fragment as a molecule */    cand.sort(false);           /* if no canonical form pruning used, */    cand.unmark();              /* filter possible duplicates */    n = cand.bondcnt;           /* get the size of the substructure */    k = (this.subs == null) ? 0 : this.subs.length;    if (n >= k) {               /* if the substructure vector is full */      vec = new Molecule[k +((k > 256) ? (k >> 1) : 256)];      if (this.subs != null)    /* enlarge the substructure vector */        System.arraycopy(this.subs, 0, vec, 0, k);      this.subs = vec;          /* copy existing substructure lists */    }                           /* traverse recorded substrcutures */    for (sub = this.subs[n]; sub != null; sub = sub.succ) {      if ((sub.atomcnt != cand.atomcnt)      ||  (sub.supp[0] != cand.supp[0])      ||  (sub.supp[1] != cand.supp[1]))        continue;               /* do simple comparisons first */      this.compcnt++;           /* count the isomorphism test */      if (sub.contains(cand))   /* if the candidate substructure is */        return true;            /* equal to a recorded substructure, */    }                           /* the fragment is a duplicate */    frag.setUnique(true);       /* note that the fragment was checked */    cand.succ = this.subs[n];   /* add the new substructure at the */    this.subs[n] = cand;        /* head of the substructure list */    return false;               /* with the same number of bonds */  }  /* duplicate() */  /*------------------------------------------------------------------*/  private void output (Fragment frag)  {                             /* --- output a substructure */    int      id;                /* substructure identifier */    Molecule mol;               /* fragment as a molecule */    double   s;                 /* support of substructure */    String   t;                 /* buffer for log message */    /* --- check the fragment --- */    if (!frag.isValid()) {      /* skip invalid fragments */      this.invalid++; return; } /* (non-canonic, but needed) */    if ((frag.size()  < this.min)    ||  (frag.supp[1] > this.comp))   /* check fragment size */      return;                   /* and support in complement */    if (!frag.isClosed(this.ext)) {      this.nonclsd++; return; } /* skip non-closed fragments */    if (((this.mode & CLOSERINGS) != 0)    &&  frag.hasOpenRings(this.rgmin, this.rgmax)) {      this.openrgs++; return; } /* skip fragments with open rings */    id = ++this.subcnt;         /* count the new substructure and */    t = "        "+this.subcnt; /* print the number of substructures */    System.err.print(t.substring(t.length() -8));    System.err.print("\b\b\b\b\b\b\b\b");    /* --- write substructure description file --- */    mol = frag.getAsMolecule(); /* get fragment as a molecule */    this.out.print(id +",");    /* identifier and description */    if ((this.mode & NORMFORM) != 0)      mol.makeCanonic(this.ext);    this.out.print(this.ntn.format(mol));    this.out.print("," +mol.atomcnt);  /* number of atoms */    this.out.print("," +mol.bondcnt);  /* number of bonds */    this.out.print("," +mol.supp[0]);    s = this.cnts[0];           /* support in the focus */    s = (s != 0) ? mol.supp[0] /s : 1.0;    this.out.print("," +s *100.0);    this.out.print("," +mol.supp[1]);    s = this.cnts[1];           /* support in the complement */    s = (s != 0) ? mol.supp[1] /s : 0.0;    this.out.println("," +s *100.0);    this.out.flush();           /* flush the output stream */    /* --- write molecule identifier file --- */    if (this.ids == null) return;    this.ids.print(id);         /* substructure identifier */    for (mol = frag.firstMol(); mol != null; mol = frag.nextMol())      this.ids.print("," +mol.id);    this.ids.println();         /* list the molecule identifiers */    this.ids.flush();           /* and flush the output stream */  }  /* output() */  /*--------------------------------------------------------------------  The above function records substructures found. By comparing a new  substructure to all already recorded substructures, all parts of a  substructure that have the same support as the substructure itself  are suppressed (i.e., not recorded). In other words, the output is  restricted to so-called "closed" substructures. However, comparing  the substructures in this way is only necessary for those of equal  size if it has been checked whether the candidate fragment is closed.  It need not be done at all if canonical form pruning is used.  --------------------------------------------------------------------*/  private boolean recurse (Fragment frag, int depth)  {                             /* --- part 1 of recursive search */    int        i, k, n, r;      /* loop variables, buffers */    Embedding  emb;             /* to traverse the embeddings */    Fragment[] frls, vec;       /* sorted list of created fragments */    int        vsz, cnt;        /* vector size and number of lists */    boolean    revert = false;  /* whether to revert extension info. */    boolean    adapt, part;     /* flags for adaptation/partial test */    if (this.stop)              /* check for an external abort */      return false;             /* (if running as a separate thread) */    this.nodecnt++;             /* count the search tree node */    /* --- verbose information output --- */    if ((this.mode & VERBOSE) != 0) {      for (i = depth; --i >= 0;)/* if verbose output about */        System.out.print("   ");/* the search is requested */      System.out.print(frag);   /* print the fragment */      System.out.print("  ");   /* and a separator */      Molecule mol = null;      /* buffer for the current molecule */      k = 1;                    /* init. the embedding counter */      for (emb = frag.first(); emb != null; emb = frag.next()) {        if (emb.mol == mol) {   /* if embedding in same molecule */          k++; continue; }      /* count embeddings per molecule */        if (k > 1)              /* if more than one embedding */          System.out.print(k);  /* print the number of embeddings */        mol = emb.mol; k = 1;   /* note the new molecule */        System.out.print(mol.id);      }                         /* print the molecule identifier */      if (k > 1)                /* if more than one embedding */        System.out.print(k);    /* print the number of embeddings */      System.out.println(" (" +frag.supp[0] +")");    }                           /* print the support of the fragment */    /* --- create extensions --- */    frls = new Fragment[vsz = BLKSIZE];    cnt  = 0;                   /* initialize the fragment vector */    for (emb = frag.first(); emb != null; emb = frag.next()) {      this.ext.init(frag, emb); /* traverse the embeddings */      while (this.ext.next()) { /* while there is another extension */        i = n = 0; k = cnt;     /* do a binary search in fragments */        while (i < k) {         /* if the range is not empty */          n = (i +k) >> 1;      /* get index of middle element */          r = this.ext.compareTo(frls[n]);          if      (r < 0) k = n;          else if (r > 0) i = n+1;          else break;           /* adapt the range boundaries */        }                       /* or terminate the search */        if (i < k) {            /* if the fragment was found, */          if (frls[n].add(ext)) /* add the embedding to it and */            this.embcnt++; }    /* count it (for benchmarking) */        else if (emb.mol.group == 0) {          vec = frls;           /* if the fragment was not found */          if (cnt >= vsz) {     /* if the fragment vector is full */            vsz += (vsz > BLKSIZE) ? vsz >> 1 : BLKSIZE;            frls = new Fragment[vsz];            System.arraycopy(vec, 0, frls, 0, i);          }                     /* enlarge the fragment vector */          System.arraycopy(vec, i, frls, i+1, cnt-i);          frls[i] = ext.makeFragment();          cnt++; this.embcnt++; /* create and store a new fragment */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -