comparer.java
来自「dump3 morpheus 0.2.9 src」· Java 代码 · 共 471 行
JAVA
471 行
/**
* DuMP3 version morpheus_0.2.9 - a duplicate/similar file finder in Java<BR>
* Copyright 2005 Alexander Grässer<BR>
* All Rights Reserved, http://dump3.sourceforge.net/<BR>
* <BR>
* This file is part of DuMP3.<BR>
* <BR>
* DuMP3 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option) any later version.<BR>
* <BR>
* DuMP3 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU General Public License for more details.<BR>
* <BR>
* You should have received a copy of the GNU General Public License along with DuMP3; if not, write to the Free Software Foundation, Inc., 51 Franklin St,
* Fifth Floor, Boston, MA 02110-1301 USA
*/
package net.za.grasser.duplicate.compare;
import java.util.ArrayList;
import java.util.List;
import java.util.Observable;
import java.util.Observer;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import net.za.grasser.duplicate.Configure;
import net.za.grasser.duplicate.file.DirectoryReader;
import net.za.grasser.duplicate.file.FingerprintFile;
import net.za.grasser.duplicate.file.Status;
import net.za.grasser.duplicate.fingerprint.AbstractFingerprint;
import net.za.grasser.duplicate.fingerprint.FingerprintFactory;
import org.apache.log4j.Logger;
/**
* @modelguid {6B9EB4EA-87E3-4482-8780-C4A45652D66A}
* @author <a href="http://sourceforge.net/sendmessage.php?touser=733840">pyropunk at sourceforge dot net</a>
* @version $Revision: 1.17 $
*/
public class Comparer extends Observable implements Observer, Runnable {
static {
Configure.load();
}
/** @modelguid {2295ADEE-A695-4A57-ADD2-DD469FEA833F} */
static Logger log = Logger.getLogger(Comparer.class);
/**
* <code>comparer</code> Comparer - singleton instance
*/
private static Comparer comparer = null;
/**
* This method returns a <code>Comparer</code>.
*
* @return Comparer
*/
public synchronized static Comparer getInstance() {
if (comparer == null) {
comparer = new Comparer();
}
return comparer;
}
/**
* <code>files</code> Comparer - processed files list
*
* @modelguid {C59EA0C1-D713-4077-BD36-40FC20CC008A}
*/
private final List<FingerprintFile> files = new ArrayList<FingerprintFile>();
/**
* <code>process</code> Comparer - files to process list
*/
private final LinkedBlockingQueue<FingerprintFile> process = new LinkedBlockingQueue<FingerprintFile>();
/**
* <code>compareList</code> Comparer -
*/
private final LinkedBlockingQueue<FingerprintFile> compareList = new LinkedBlockingQueue<FingerprintFile>();
/**
* <code>readerList</code> Comparer - keep list of <code>DirectoryReader</code>s
*/
private final List<DirectoryReader> readerList = new ArrayList<DirectoryReader>();
/**
* <code>expecting</code> Comparer - are we expecting more files from the <code>DirectoryReader</code>s?
*/
private boolean expecting = true;
/**
* <code>expectingCompare</code> Comparer -
*/
private boolean expectingCompare = true;
/**
* <code>comparers</code> Comparer - worker pool (1 per processor)
*/
private CompareWorker[] comparers = null;
/**
* <code>rwl</code> Comparer - needed for multi processor support
*/
private final ReentrantReadWriteLock rwl = new ReentrantReadWriteLock();
/**
* <code>readLock</code> Comparer - read lock on processed files list
*/
private final Lock readLock = rwl.readLock();
/**
* <code>writeLock</code> Comparer - write lock on processed files list
*/
private final Lock writeLock = rwl.writeLock();
/**
* <code>running</code> Comparer -
*/
private boolean running = false;
/**
* Constructor
*/
private Comparer() {
super();
final int cpus = Runtime.getRuntime().availableProcessors();
log.debug("Creating " + cpus + " compare worker" + (cpus != 1 ? "s" : "") + ".");
comparers = new CompareWorker[cpus];
for (int i = 0; i < comparers.length; i++) {
comparers[i] = new CompareWorker(this, i + 1);
comparers[i].addObserver(this);
}
}
/**
* This method starts the comparing
*/
public void start() {
if (!running) {
// start the workers first
for (final CompareWorker cmp : comparers) {
cmp.start();
}
running = true;
expectingCompare = true;
final Thread t = new Thread(this, "Comparer");
t.setDaemon(true);
t.setPriority(Thread.MIN_PRIORITY);
t.start();
}
}
/**
* This method adds a file to the processed files list.
*
* @param pFile
*/
public void addFile(final FingerprintFile pFile) {
readLock.unlock();
writeLock.lock();
try {
files.add(pFile);
} finally {
writeLock.unlock();
}
}
/**
* The <code>Comparer</code> needs to keep track of the <code>DirecoryReader</code>s otherwise it does not know when to stop.
*
* @param pDr
*/
public void addReader(final DirectoryReader pDr) {
pDr.addObserver(this);
synchronized (readerList) {
readerList.add(pDr);
expecting = true;
}
}
/**
* @return a <code>List</code> of all files with duplicates
* @modelguid {0AC7DB78-56FF-4959-9007-10631321E1D5}
*/
public List<FingerprintFile> getDuplicates() {
final ArrayList<FingerprintFile> al = new ArrayList<FingerprintFile>();
readLock.lock();
try {
for (final FingerprintFile ff : files) {
if (ff.getDuplicates() != null && !ff.getDuplicates().isEmpty()) {
al.add(ff);
}
}
} finally {
readLock.unlock();
}
return al;
}
/**
* @return List - Returns the files.
* @modelguid {73444178-24B6-4F1D-A7E5-8B94EDEB71A6}
*/
public final List<FingerprintFile> getFiles() {
readLock.lock();
return files;
}
/**
* @return a <code>List</code> of all problem files
*/
public List<FingerprintFile> getProblems() {
final ArrayList<FingerprintFile> al = new ArrayList<FingerprintFile>();
readLock.lock();
try {
for (final FingerprintFile ff : files) {
if (ff.getStatus() != Status.FILE_OK) {
al.add(ff);
}
}
} finally {
readLock.unlock();
}
return al;
}
/**
* This method removes the first element from the proecess queue
*
* @return FingerprintFile
*/
public FingerprintFile removeFirst() {
try {
if (expecting || !process.isEmpty()) {
final FingerprintFile ff = process.take();
log.debug("taken: " + ff.getPath());
return ff;
}
return null;
} catch (final InterruptedException ie) {
log.warn("interrupted", ie);
return null;
}
}
/**
* Stops all threads.
*
* @param pRunning boolean - The new value.
*/
public void setRunning(final boolean pRunning) {
for (final CompareWorker lComp : comparers) {
lComp.setRunning(pRunning);
}
for (final DirectoryReader lReader : readerList) {
lReader.setRunning(pRunning);
}
running = pRunning;
}
/**
* This method removes a file from the list or the duplicate list
*
* @param pFile
*/
public void removeFile(final FingerprintFile pFile) {
writeLock.lock();
try {
final List<FingerprintFile> dups = pFile.getDuplicates();
List<FingerprintFile> parent = null;
FingerprintFile top = null;
if (files.contains(pFile)) {
// remove root
parent = files;
} else {
// remove child and add its duplicates to the duplicate list of the parent
top = pFile.getCompared();
parent = top.getDuplicates();
}
if (dups != null) {
// repoint all compared pointers
for (final FingerprintFile lFile : dups) {
lFile.setCompared(top);
}
parent.addAll(dups);
dups.clear();
}
parent.remove(pFile);
} finally {
writeLock.unlock();
}
}
/**
* This method unlocks a read lock on the processed files list.
*/
public void stoppedReading() {
readLock.unlock();
}
/**
* @see java.util.Observer#update(java.util.Observable, java.lang.Object)
* @modelguid {D956A835-0B16-455E-B639-71D144AA7FB8}
*/
public void update(final Observable o, final Object ob) {
if (ob instanceof FingerprintFile) {
try {
FingerprintFactory.loadFingerprints((FingerprintFile)ob);
process.add((FingerprintFile)ob);
} catch (final Exception e) {
log.error("Could not create any fingerprints.", e);
}
} else if (ob instanceof String) {
if (o instanceof CompareWorker) {
final String str = (String)ob;
if (str.equals("stop")) {
log.debug("One worker finished.");
if (getRunningWorkerCount() <= 0) {
expectingCompare = false;
}
}
return;
}
final String str = (String)ob;
if (str.equals("stop")) {
log.debug("One reader finished.");
synchronized (readerList) {
// o must be a DirectoryReader
readerList.remove(o);
if (readerList.isEmpty()) {
expecting = false;
}
}
}
}
}
/**
* This method returns the number of <code>CompareWorker</code>s.
*
* @return int
*/
public int getWorkerCount() {
return comparers.length;
}
/**
* This method returns the number of <code>CompareWorker</code>s that are still running.
*
* @return int
*/
public int getRunningWorkerCount() {
int cnt = 0;
for (CompareWorker element : comparers) {
if (element.isRunning()) {
cnt++;
}
}
return cnt;
}
/**
* This method checks in the file
*
* @param ff1
*/
public void checkIn(final FingerprintFile ff1) {
try {
compareList.put(ff1);
log.debug("returned: " + ff1.getPath());
} catch (final InterruptedException ie) {
log.warn("interrupted", ie);
}
}
/**
* This method does the actual comparrison
*
* @param ffToCompare
* @param pFiles
* @return boolean - true if a duplicate
*/
private boolean compare(final FingerprintFile ffToCompare, final List<FingerprintFile> pFiles) {
boolean found = false;
if (pFiles == null || pFiles.isEmpty()) {
log.trace("empty duplicate list");
return false;
}
if (!log.isInfoEnabled()) {
System.out.print(".");
}
if (ffToCompare.getStatus() == Status.FILE_OK || ffToCompare.getStatus() == Status.FILE_SIGNATURE_MISMATCH) {
// for all files
for (final FingerprintFile ffCompareTo : pFiles) {
if (ffToCompare.getKey().equals(ffCompareTo.getKey())) {
// BUG 1549667 - Self shown as duplicate
// ignore the same file
log.warn("Tried to compare a file to itself. [" + ffToCompare.getPath() + "]");
// because we already have it in the list we will not add it again
return true;
}
final StringBuffer sb = new StringBuffer(100);
for (final String key : ffToCompare.getFingerprints().keySet()) {
final AbstractFingerprint afToCompare = ffToCompare.getFingerprints().get(key);
final AbstractFingerprint afCompareTo = ffCompareTo.getFingerprints().get(key);
if (afToCompare != null && afCompareTo != null) {
final float sim = afToCompare.matches(afCompareTo);
if (log.isTraceEnabled()) {
sb.setLength(0);
sb.append(ffToCompare.getName());
sb.append(" ~(");
sb.append(afToCompare.getClassName());
sb.append(")~ ");
sb.append(ffCompareTo.getName());
sb.append(" -> ");
sb.append(sim);
sb.append("%");
log.trace(sb);
}
if ((ffToCompare.getStatus() == Status.FILE_OK || ffToCompare.getStatus() == Status.FILE_SIGNATURE_MISMATCH)
&& sim >= afToCompare.getSimilarityThreshhold()) {
// nested duplicates
if (!compare(ffToCompare, ffCompareTo.getDuplicates())) {
ffToCompare.setSimilarity(sim);
ffCompareTo.addDuplicate(ffToCompare);
}
setChanged();
super.notifyObservers(ffCompareTo);
found = true;
break;
}
}
}
if (found) {
break;
}
}
}
ffToCompare.logFileStatus();
return found;
}
/**
* @see java.lang.Runnable#run()
*/
public void run() {
try {
log.debug("Comparer running.");
while (running) {
FingerprintFile ff1 = null;
try {
if (expectingCompare || !compareList.isEmpty()) {
ff1 = compareList.poll(2, TimeUnit.SECONDS);
}
} catch (final InterruptedException ie) {
// ignore, ff1 still null
}
if (ff1 != null) {
log.debug("got file:" + ff1.getPath());
if (compare(ff1, getFiles())) {
// already added as a duplicate to a file in the processed file list
stoppedReading();
} else {
// add to the processed file list and the db
addFile(ff1);
}
setChanged();
super.notifyObservers(new Integer(1));
} else if (!expectingCompare) {
running = false;
setChanged();
super.notifyObservers("stop");
return;
}
}
} finally {
running = false;
log.debug("Comparer stopped.");
}
}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?