⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 miner.java

📁 A program to find frequent molecular substructures and discriminative fragments in a database of mol
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
               ? (Notation)new SLN()               : (Notation)new SMILES();    if ((exat == null) || exat.equals(""))      this.excl = null;         /* if no excluded atom types given, */    else {                      /* clear the molecule, otherwise */      System.err.print("parsing excluded atom types ... ");      this.excl = n.parse(exat);      System.err.println("[" +this.excl.nAtoms() +" atom(s)] done.");    }                           /* parse the excluded atoms */    if ((exsd == null) || exsd.equals(""))      this.exseed = null;       /* if no excluded seed types given, */    else {                      /* clear the molecule, otherwise */      System.err.print("parsing excluded seed types ... ");      this.exseed = n.parse(exsd);      System.err.println("[" +this.exseed.nAtoms() +" atom(s)] done.");    }                           /* parse the excluded seeds */  }  /* setExcluded() */  /*------------------------------------------------------------------*/  public void setSeed (Molecule seed)  { this.seed = seed; }         /* --- set the seed structure */  /*------------------------------------------------------------------*/  public void setSeed (String format, String desc)    throws IOException  {                             /* --- set the seed structure */    if (desc == null)    { this.seed = null; return; }    if (desc.equals("*") || desc.equals("~")) desc = "";    if (desc.equals("")) { this.seed = null; return; }    System.err.print("parsing seed description ... ");    Notation n = (format.equalsIgnoreCase("sln"))               ? (Notation)new SLN()               : (Notation)new SMILES();    this.seed = n.parse(desc);  /* parse the seed structure and */    if (!this.seed.sort(true))  /* check whether it is connected */      throw new IOException("seed structure is not connected");    System.err.print  ("["  +this.seed.nAtoms() +" atom(s)");    System.err.println(", " +this.seed.nBonds() +" bond(s)] done.");  }  /* setSeed() */  /*------------------------------------------------------------------*/  public void setInput (String format, String fn_mol,                        float thresh, boolean invert)    throws IOException  {                             /* --- initialize the input */    long t = System.currentTimeMillis();    System.err.print("reading " +fn_mol +" ... ");    this.loadMolecules(new File(fn_mol), format, thresh, invert, true);    System.err.print( "[" +this.molcnt                    +" (" +this.cnts[0] +"+" +this.cnts[1] +")"                    +" molecule(s)] done");    t = System.currentTimeMillis() -t;    System.err.println(" [" +t/1000.0 +"s].");  }  /* setInput() */  /*------------------------------------------------------------------*/  public void setOutput (Notation ntn, PrintStream out, PrintStream ids)  {                             /* --- initialize the output */    this.ntn = ntn;             /* note the notation for formatting */    this.out = out;             /* and the streams for the */    this.ids = ids;             /* descriptions and identifiers */  }  /* setOutput() */  /*------------------------------------------------------------------*/  public void setOutput (String format, String fn_sub, String fn_ids)    throws IOException  {                             /* --- initialize the output */    this.out = new PrintStream(new FileOutputStream(fn_sub));    this.ids = ((fn_ids != null) && !fn_ids.equals(""))             ? new PrintStream(new FileOutputStream(fn_ids)) : null;    this.ntn = (format.equalsIgnoreCase("sln"))             ? (Notation)new SLN()             : (Notation)new SMILES();  }  /* setOutput() */  /*------------------------------------------------------------------*/  public void addMolecule (Molecule mol)  {                             /* --- add a molecule */    this.molcnt++;              /* count the new molecule */    this.cnts[mol.group]++;     /* (generally and for its group) */    if (this.mols == null) {    /* if this is the first molecule, */      mol.succ  = null;         /* initialize the molecule list */      this.mols = this.curr = this.tail = mol; }    else if (mol.group <= 0) {  /* if the molecule is in the focus, */      if (this.tail == this.curr)        this.tail = mol;        /* replace tail of list if necessary */      mol.succ = this.curr.succ;      this.curr.succ = mol;     /* append the molecule at the end */      this.curr      = mol; }   /* of the focus molecule list */    else {                      /* if molecule is in complement, */      mol.succ       = null;    /* append the molecule at the end */      this.tail.succ = mol;     /* of the molecule list */      this.tail      = mol;     /* (make sure all focus molecules */    }                           /* precede all complement molecules) */  }  /* addMolecule() */  /*------------------------------------------------------------------*/  public int loadMolecules (File file, String format, float thresh,                            boolean invert)    throws IOException  { return this.loadMolecules(file, format, thresh, invert, false); }  /*------------------------------------------------------------------*/  public int loadMolecules (File file, String format, float thresh,                            boolean invert, boolean verbose)    throws IOException  {                             /* --- load molecules from a file */    int             i, k, g;    /* indices in input line, group flag */    FileInputStream in;         /* the input file */    BufferedReader  rdr;        /* reader for the input file */    String          line;       /* a line of the input file */    Notation        mnt;        /* notation for molecule description */    float           value;      /* value associated with the molecule */    String          desc;       /* molecule description */    Molecule        mol;        /* created molecule */    g   = (invert) ? 1 : 0;     /* get group flag value */    mnt = (format.equalsIgnoreCase("sln"))        ? (Notation)new SLN()   /* create a notation object */        : (Notation)new SMILES();    in  = new FileInputStream(file);    rdr = new BufferedReader(new InputStreamReader(in));    try {                       /* open the input file and */      this.cnts[0] =            /* check for successful opening */      this.cnts[1] = 0;         /* clear the molecule counters */      while (true) {            /* open the input file */        line = rdr.readLine();  /* read the next line */        if (line == null) break;/* and check for EOF */        line = line.trim();     /* remove whitespace */        if (line.length()  <= 0)          continue;             /* skip empty input lines */        if (line.charAt(0) == '#')          continue;             /* interpret '#' as a comment */        i = line.indexOf(' ');  /* find start of value */        if (i < 0) i = line.indexOf('\t');        if (i < 0) i = line.indexOf(',');        if (i < 0) throw new IOException("missing first separator");        k = line.indexOf(' ', i+1); /* find start of description */        if (k < 0) k = line.indexOf('\t', i+1);        if (k < 0) k = line.indexOf(',',  i+1);        if (k < 0) throw new IOException("missing second separator");        value = Float.parseFloat(line.substring(i+1, k).trim());        desc  = line.substring(k+1).trim();        line  = line.substring(0,i).trim();        mol   = mnt.parse(desc);/* parse the molecule description */        mol.setId(line);        /* and set identifier and group */        mol.setGroup((value > thresh) ? 1-g : g);        this.addMolecule(mol);  /* add the molecule to the miner */        if (!verbose || ((this.molcnt & 0xff) != 0))          continue;             /* check whether to print messages */        desc = "        " +this.molcnt;        System.err.print(desc.substring(desc.length()-8));        System.err.print("\b\b\b\b\b\b\b\b");      } }                       /* print the number of molecules */    catch (IOException ioe) {   /* report line number with error */      throw new IOException(file +", " +(this.molcnt+1) +": "                            +ioe.getMessage());    }                           /* report line number with error */    return this.molcnt;         /* return the number of molecules */  }  /* loadMolecules() */  /*--------------------------------------------------------------------  For faster counting, the molecule group (i.e. support vector index)  is precomputed by comparing the molecule value with the threshold.  The function also collects all focus molecules at the front of the  list, which is exploited in the search (thus extensions that are  possible only in the complement will not lead to fragment generation).  --------------------------------------------------------------------*/  protected void aromatize ()  {                             /* --- convert Kekul\'e to aromatic */    int  n = 0;                 /* number of molecules */    long t;                     /* for time measurements */    t = System.currentTimeMillis();    System.err.print("converting Kekule representations ... ");    if (this.seed != null) this.seed.aromatize();    for (Molecule mol = this.mols; mol != null; mol = mol.succ)      if (mol.aromatize() > 0) n++;    System.err.print("[" +n +" molecule(s)] done");    t = System.currentTimeMillis() -t;    System.err.println(" [" +t/1000.0 +"s].");  }  /* aromatize() */  /*------------------------------------------------------------------*/  protected int maskTypes ()  {                             /* --- mask atom and bond types */    if (this.seed != null)      /* if there is a seed, mask types */      this.seed.maskTypes(this.masks);    for (Molecule mol = this.mols; mol != null; mol = mol.succ)      mol.maskTypes(this.masks);/* mask types in all molecules */    return this.molcnt;         /* return the number of molecules */  }  /* maskTypes() */  /*------------------------------------------------------------------*/  private int markRings (int min, int max)  {                             /* --- mark rings in all molecules */    int n = 0;                  /* number of molecules */    if (max > 256) max = 256;   /* check and adapt */    if (min > max) min = max;   /* the ring size range */    if (this.seed != null)      /* if there is a seed, mark rings */      this.seed.markRings(min, max);    for (Molecule mol = this.mols; mol != null; mol = mol.succ)      if (mol.markRings(min, max) != 0) n++;    return n;                   /* traverse the molecules */  }  /* markRings() */          /* and mark the rings */  /*------------------------------------------------------------------*/  private int markPseudo (int max)  {                             /* --- mark pseudo-rings in all mols. */    int n = 0;                  /* number of molecules */    if (max > 256) max = 256;   /* check and adapt the ring size */    for (Molecule mol = this.mols; mol != null; mol = mol.succ)      if (mol.markPseudo(max) != 0) n++;    return n;                   /* traverse the molecules */  }  /* markPseudo() */         /* and mark the rings */  /*--------------------------------------------------------------------  For ring extensions the rings have to be marked in the molecules,  so that they can easily be found during the search. The parameters  specify a minimum and a maximum size of a ring.  --------------------------------------------------------------------*/  private int markBridges ()  {                             /* --- mark bridges in all molecules */    int n = 0;                  /* number of molecules */    if (this.seed != null)      /* if there is a seed, */      this.seed.markBridges();  /* mark the bridges in it */    for (Molecule mol = this.mols; mol != null; mol = mol.succ)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -