comparer.java

来自「dump3 morpheus 0.2.9 src」· Java 代码 · 共 471 行

JAVA
471
字号
/**
 * DuMP3 version morpheus_0.2.9 - a duplicate/similar file finder in Java<BR>
 * Copyright 2005 Alexander Gr&auml;sser<BR>
 * All Rights Reserved, http://dump3.sourceforge.net/<BR>
 * <BR>
 * This file is part of DuMP3.<BR>
 * <BR>
 * DuMP3 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the License, or (at your option) any later version.<BR>
 * <BR>
 * DuMP3 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 * PARTICULAR PURPOSE. See the GNU General Public License for more details.<BR>
 * <BR>
 * You should have received a copy of the GNU General Public License along with DuMP3; if not, write to the Free Software Foundation, Inc., 51 Franklin St,
 * Fifth Floor, Boston, MA 02110-1301 USA
 */
package net.za.grasser.duplicate.compare;

import java.util.ArrayList;
import java.util.List;
import java.util.Observable;
import java.util.Observer;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import net.za.grasser.duplicate.Configure;
import net.za.grasser.duplicate.file.DirectoryReader;
import net.za.grasser.duplicate.file.FingerprintFile;
import net.za.grasser.duplicate.file.Status;
import net.za.grasser.duplicate.fingerprint.AbstractFingerprint;
import net.za.grasser.duplicate.fingerprint.FingerprintFactory;
import org.apache.log4j.Logger;

/**
 * @modelguid {6B9EB4EA-87E3-4482-8780-C4A45652D66A}
 * @author <a href="http://sourceforge.net/sendmessage.php?touser=733840">pyropunk at sourceforge dot net</a>
 * @version $Revision: 1.17 $
 */
public class Comparer extends Observable implements Observer, Runnable {
  static {
    Configure.load();
  }
  /** @modelguid {2295ADEE-A695-4A57-ADD2-DD469FEA833F} */
  static Logger log = Logger.getLogger(Comparer.class);
  /**
   * <code>comparer</code> Comparer - singleton instance
   */
  private static Comparer comparer = null;

  /**
   * This method returns a <code>Comparer</code>.
   * 
   * @return Comparer
   */
  public synchronized static Comparer getInstance() {
    if (comparer == null) {
      comparer = new Comparer();
    }
    return comparer;
  }

  /**
   * <code>files</code> Comparer - processed files list
   * 
   * @modelguid {C59EA0C1-D713-4077-BD36-40FC20CC008A}
   */
  private final List<FingerprintFile> files = new ArrayList<FingerprintFile>();
  /**
   * <code>process</code> Comparer - files to process list
   */
  private final LinkedBlockingQueue<FingerprintFile> process = new LinkedBlockingQueue<FingerprintFile>();
  /**
   * <code>compareList</code> Comparer -
   */
  private final LinkedBlockingQueue<FingerprintFile> compareList = new LinkedBlockingQueue<FingerprintFile>();
  /**
   * <code>readerList</code> Comparer - keep list of <code>DirectoryReader</code>s
   */
  private final List<DirectoryReader> readerList = new ArrayList<DirectoryReader>();
  /**
   * <code>expecting</code> Comparer - are we expecting more files from the <code>DirectoryReader</code>s?
   */
  private boolean expecting = true;
  /**
   * <code>expectingCompare</code> Comparer -
   */
  private boolean expectingCompare = true;
  /**
   * <code>comparers</code> Comparer - worker pool (1 per processor)
   */
  private CompareWorker[] comparers = null;
  /**
   * <code>rwl</code> Comparer - needed for multi processor support
   */
  private final ReentrantReadWriteLock rwl = new ReentrantReadWriteLock();
  /**
   * <code>readLock</code> Comparer - read lock on processed files list
   */
  private final Lock readLock = rwl.readLock();
  /**
   * <code>writeLock</code> Comparer - write lock on processed files list
   */
  private final Lock writeLock = rwl.writeLock();
  /**
   * <code>running</code> Comparer -
   */
  private boolean running = false;

  /**
   * Constructor
   */
  private Comparer() {
    super();
    final int cpus = Runtime.getRuntime().availableProcessors();
    log.debug("Creating " + cpus + " compare worker" + (cpus != 1 ? "s" : "") + ".");
    comparers = new CompareWorker[cpus];
    for (int i = 0; i < comparers.length; i++) {
      comparers[i] = new CompareWorker(this, i + 1);
      comparers[i].addObserver(this);
    }
  }

  /**
   * This method starts the comparing
   */
  public void start() {
    if (!running) {
      // start the workers first
      for (final CompareWorker cmp : comparers) {
        cmp.start();
      }
      running = true;
      expectingCompare = true;
      final Thread t = new Thread(this, "Comparer");
      t.setDaemon(true);
      t.setPriority(Thread.MIN_PRIORITY);
      t.start();
    }
  }

  /**
   * This method adds a file to the processed files list.
   * 
   * @param pFile
   */
  public void addFile(final FingerprintFile pFile) {
    readLock.unlock();
    writeLock.lock();
    try {
      files.add(pFile);
    } finally {
      writeLock.unlock();
    }
  }

  /**
   * The <code>Comparer</code> needs to keep track of the <code>DirecoryReader</code>s otherwise it does not know when to stop.
   * 
   * @param pDr
   */
  public void addReader(final DirectoryReader pDr) {
    pDr.addObserver(this);
    synchronized (readerList) {
      readerList.add(pDr);
      expecting = true;
    }
  }

  /**
   * @return a <code>List</code> of all files with duplicates
   * @modelguid {0AC7DB78-56FF-4959-9007-10631321E1D5}
   */
  public List<FingerprintFile> getDuplicates() {
    final ArrayList<FingerprintFile> al = new ArrayList<FingerprintFile>();
    readLock.lock();
    try {
      for (final FingerprintFile ff : files) {
        if (ff.getDuplicates() != null && !ff.getDuplicates().isEmpty()) {
          al.add(ff);
        }
      }
    } finally {
      readLock.unlock();
    }
    return al;
  }

  /**
   * @return List - Returns the files.
   * @modelguid {73444178-24B6-4F1D-A7E5-8B94EDEB71A6}
   */
  public final List<FingerprintFile> getFiles() {
    readLock.lock();
    return files;
  }

  /**
   * @return a <code>List</code> of all problem files
   */
  public List<FingerprintFile> getProblems() {
    final ArrayList<FingerprintFile> al = new ArrayList<FingerprintFile>();
    readLock.lock();
    try {
      for (final FingerprintFile ff : files) {
        if (ff.getStatus() != Status.FILE_OK) {
          al.add(ff);
        }
      }
    } finally {
      readLock.unlock();
    }
    return al;
  }

  /**
   * This method removes the first element from the proecess queue
   * 
   * @return FingerprintFile
   */
  public FingerprintFile removeFirst() {
    try {
      if (expecting || !process.isEmpty()) {
        final FingerprintFile ff = process.take();
        log.debug("taken: " + ff.getPath());
        return ff;
      }
      return null;
    } catch (final InterruptedException ie) {
      log.warn("interrupted", ie);
      return null;
    }
  }

  /**
   * Stops all threads.
   * 
   * @param pRunning boolean - The new value.
   */
  public void setRunning(final boolean pRunning) {
    for (final CompareWorker lComp : comparers) {
      lComp.setRunning(pRunning);
    }
    for (final DirectoryReader lReader : readerList) {
      lReader.setRunning(pRunning);
    }
    running = pRunning;
  }

  /**
   * This method removes a file from the list or the duplicate list
   * 
   * @param pFile
   */
  public void removeFile(final FingerprintFile pFile) {
    writeLock.lock();
    try {
      final List<FingerprintFile> dups = pFile.getDuplicates();
      List<FingerprintFile> parent = null;
      FingerprintFile top = null;
      if (files.contains(pFile)) {
        // remove root
        parent = files;
      } else {
        // remove child and add its duplicates to the duplicate list of the parent
        top = pFile.getCompared();
        parent = top.getDuplicates();
      }
      if (dups != null) {
        // repoint all compared pointers
        for (final FingerprintFile lFile : dups) {
          lFile.setCompared(top);
        }
        parent.addAll(dups);
        dups.clear();
      }
      parent.remove(pFile);
    } finally {
      writeLock.unlock();
    }
  }

  /**
   * This method unlocks a read lock on the processed files list.
   */
  public void stoppedReading() {
    readLock.unlock();
  }

  /**
   * @see java.util.Observer#update(java.util.Observable, java.lang.Object)
   * @modelguid {D956A835-0B16-455E-B639-71D144AA7FB8}
   */
  public void update(final Observable o, final Object ob) {
    if (ob instanceof FingerprintFile) {
      try {
        FingerprintFactory.loadFingerprints((FingerprintFile)ob);
        process.add((FingerprintFile)ob);
      } catch (final Exception e) {
        log.error("Could not create any fingerprints.", e);
      }
    } else if (ob instanceof String) {
      if (o instanceof CompareWorker) {
        final String str = (String)ob;
        if (str.equals("stop")) {
          log.debug("One worker finished.");
          if (getRunningWorkerCount() <= 0) {
            expectingCompare = false;
          }
        }
        return;
      }
      final String str = (String)ob;
      if (str.equals("stop")) {
        log.debug("One reader finished.");
        synchronized (readerList) {
          // o must be a DirectoryReader
          readerList.remove(o);
          if (readerList.isEmpty()) {
            expecting = false;
          }
        }
      }
    }
  }

  /**
   * This method returns the number of <code>CompareWorker</code>s.
   * 
   * @return int
   */
  public int getWorkerCount() {
    return comparers.length;
  }

  /**
   * This method returns the number of <code>CompareWorker</code>s that are still running.
   * 
   * @return int
   */
  public int getRunningWorkerCount() {
    int cnt = 0;
    for (CompareWorker element : comparers) {
      if (element.isRunning()) {
        cnt++;
      }
    }
    return cnt;
  }

  /**
   * This method checks in the file
   * 
   * @param ff1
   */
  public void checkIn(final FingerprintFile ff1) {
    try {
      compareList.put(ff1);
      log.debug("returned: " + ff1.getPath());
    } catch (final InterruptedException ie) {
      log.warn("interrupted", ie);
    }
  }

  /**
   * This method does the actual comparrison
   * 
   * @param ffToCompare
   * @param pFiles
   * @return boolean - true if a duplicate
   */
  private boolean compare(final FingerprintFile ffToCompare, final List<FingerprintFile> pFiles) {
    boolean found = false;
    if (pFiles == null || pFiles.isEmpty()) {
      log.trace("empty duplicate list");
      return false;
    }
    if (!log.isInfoEnabled()) {
      System.out.print(".");
    }
    if (ffToCompare.getStatus() == Status.FILE_OK || ffToCompare.getStatus() == Status.FILE_SIGNATURE_MISMATCH) {
      // for all files
      for (final FingerprintFile ffCompareTo : pFiles) {
        if (ffToCompare.getKey().equals(ffCompareTo.getKey())) {
          // BUG 1549667 - Self shown as duplicate
          // ignore the same file
          log.warn("Tried to compare a file to itself. [" + ffToCompare.getPath() + "]");
          // because we already have it in the list we will not add it again
          return true;
        }
        final StringBuffer sb = new StringBuffer(100);
        for (final String key : ffToCompare.getFingerprints().keySet()) {
          final AbstractFingerprint afToCompare = ffToCompare.getFingerprints().get(key);
          final AbstractFingerprint afCompareTo = ffCompareTo.getFingerprints().get(key);
          if (afToCompare != null && afCompareTo != null) {
            final float sim = afToCompare.matches(afCompareTo);
            if (log.isTraceEnabled()) {
              sb.setLength(0);
              sb.append(ffToCompare.getName());
              sb.append(" ~(");
              sb.append(afToCompare.getClassName());
              sb.append(")~ ");
              sb.append(ffCompareTo.getName());
              sb.append(" -> ");
              sb.append(sim);
              sb.append("%");
              log.trace(sb);
            }
            if ((ffToCompare.getStatus() == Status.FILE_OK || ffToCompare.getStatus() == Status.FILE_SIGNATURE_MISMATCH)
                && sim >= afToCompare.getSimilarityThreshhold()) {
              // nested duplicates
              if (!compare(ffToCompare, ffCompareTo.getDuplicates())) {
                ffToCompare.setSimilarity(sim);
                ffCompareTo.addDuplicate(ffToCompare);
              }
              setChanged();
              super.notifyObservers(ffCompareTo);
              found = true;
              break;
            }
          }
        }
        if (found) {
          break;
        }
      }
    }
    ffToCompare.logFileStatus();
    return found;
  }

  /**
   * @see java.lang.Runnable#run()
   */
  public void run() {
    try {
      log.debug("Comparer running.");
      while (running) {
        FingerprintFile ff1 = null;
        try {
          if (expectingCompare || !compareList.isEmpty()) {
            ff1 = compareList.poll(2, TimeUnit.SECONDS);
          }
        } catch (final InterruptedException ie) {
          // ignore, ff1 still null
        }
        if (ff1 != null) {
          log.debug("got file:" + ff1.getPath());
          if (compare(ff1, getFiles())) {
            // already added as a duplicate to a file in the processed file list
            stoppedReading();
          } else {
            // add to the processed file list and the db
            addFile(ff1);
          }
          setChanged();
          super.notifyObservers(new Integer(1));
        } else if (!expectingCompare) {
          running = false;
          setChanged();
          super.notifyObservers("stop");
          return;
        }
      }
    } finally {
      running = false;
      log.debug("Comparer stopped.");
    }
  }
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?