fingerprintfactory.java
来自「dump3 morpheus 0.2.9 src」· Java 代码 · 共 363 行
JAVA
363 行
/**
* DuMP3 version morpheus_0.2.9 - a duplicate/similar file finder in Java<BR>
* Copyright 2005 Alexander Grässer<BR>
* All Rights Reserved, http://dump3.sourceforge.net/<BR>
* <BR>
* This file is part of DuMP3.<BR>
* <BR>
* DuMP3 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option) any later version.<BR>
* <BR>
* DuMP3 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU General Public License for more details.<BR>
* <BR>
* You should have received a copy of the GNU General Public License along with DuMP3; if not, write to the Free Software Foundation, Inc., 51 Franklin St,
* Fifth Floor, Boston, MA 02110-1301 USA
*/
package net.za.grasser.duplicate.fingerprint;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
import java.util.regex.Pattern;
import net.za.grasser.duplicate.Configure;
import net.za.grasser.duplicate.file.FingerprintFile;
import net.za.grasser.duplicate.file.Status;
import net.za.grasser.duplicate.util.Constants;
import net.za.grasser.duplicate.util.HexArray;
import org.apache.log4j.Logger;
import com.lutris.util.BMByteSearch;
/**
* This class determines the fingerprint algortihms for a given <code>FingerprintFile</code> by extension. It validates the contents of the file by checking
* the "magic number" using a regular expression. It also generates the fingerprints for the file using the configured algorithms.
*
* @author <a href="http://sourceforge.net/sendmessage.php?touser=733840">pyropunk at sourceforge dot net</a>
* @version $Revision: 1.12 $
* @modelguid {3132B033-3571-4B10-B82D-AC0C1BCEED92}
*/
public class FingerprintFactory {
/**
* <code>log</code> FingerprintFactory -
*/
private static final Logger log = Logger.getLogger(FingerprintFactory.class);
/**
* @author <a href="http://sourceforge.net/sendmessage.php?touser=733840">pyropunk at sourceforge dot net</a>
* @version $Revision: 1.12 $
* @modelguid {AD489571-E9AC-48E4-B3F2-1D19C3565DB4}
*/
private static class ConfigItem {
/**
* <code>type</code> ConfigItem -
*/
String type = BINARY;
/** @modelguid {1F15908F-943F-4D1C-89C3-C2140EE33A09} */
int offs = 0;
/**
* <code>length</code> ConfigItem -
*
* @modelguid {9A98F6D1-9EC3-40C5-AAF8-15A2F561DF1C}
*/
int length = 0;
/** @modelguid {CDEBBFF6-AE8B-4CD1-BB5B-106632BFC9F8} */
Pattern sig = null;
/** @modelguid {7CB4A319-64E5-45D0-BBDA-E177154F2F69} */
List<Class< ? extends AbstractFingerprint>> cls = null;
/**
* <code>start</code> ConfigItem - hex string of header starting point
*/
String start = null;
/**
* @param pType String - the file type
* @param pOffset int - file signature starts here
* @param pLength int - length of signature
* @param pStart String - a starting string
* @param pSig - file start signature
* @param pCls - which class to create
* @throws ClassNotFoundException
* @modelguid {F867632A-2F1C-4EF1-9E42-41A0D264961C}
*/
@SuppressWarnings("unchecked")
ConfigItem(final String pType, final int pOffset, final int pLength, final String pStart, final String pSig, final String[] pCls) throws ClassNotFoundException {
type = pType;
offs = pOffset;
if (pSig != null && !(pSig.length() == 0)) {
sig = Pattern.compile(pSig, Pattern.CASE_INSENSITIVE);
}
length = pLength;
cls = new ArrayList<Class< ? extends AbstractFingerprint>>(pCls.length);
for (final String element : pCls) {
try {
cls.add((Class< ? extends AbstractFingerprint>)Class.forName(element));
} catch (final ClassNotFoundException cnf) {
try {
// try it as an alias
cls.add((Class< ? extends AbstractFingerprint>)Class.forName(Configure.getProperty(element, element, FingerprintFactory.class.getName())));
} catch (final ClassCastException t) {
t.printStackTrace();
}
} catch (final ClassCastException t) {
t.printStackTrace();
}
}
if (pStart == null || pStart.length() == 0) {
start = null;
} else {
start = pStart;
}
}
}
/**
* <code>BINARY</code> FingerprintFactory -
*/
public static final String BINARY = "binary";
/** @modelguid {8E47B2A1-180D-4F0C-A08A-DE04C1C252AA} */
private static Hashtable<String, ConfigItem> config = null;
/**
* <code>deflt</code> FingerprintFactory - default fingerprint algorithm
*/
private static String deflt = MDFingerprint.class.getName();
/**
* read a config file to configure the FingerprintFactory
*/
static {
Configure.load();
deflt = Configure.getProperty("default fingerprint", deflt, FingerprintFactory.class.getName());
final String[] formats = Configure.getArray("formats", new String[]{
"text"
}, FingerprintFactory.class.getName());
config = new Hashtable<String, ConfigItem>();
for (final String element : formats) {
final int offs = Configure.getIntProperty("signature-offset", 0, element);
final int length = Configure.getIntProperty("signature-length", 0, element);
final String regex = Configure.getProperty("signature-regex", "", element);
final String start = Configure.getProperty("signature-start", null, element);
final String[] clss = Configure.getArray("fingerprint", new String[]{
deflt
}, element);
final String[] extensions = Configure.getArray("extensions", new String[]{
"txt"
}, element);
for (final String element0 : extensions) {
try {
config.put(element0, new ConfigItem(element, offs, length, start, regex, clss));
} catch (final ClassNotFoundException cnf) {
log.error("FingerprintFactory configuration error.", cnf);
}
}
}
}
/**
* Create a fingerprint object and attach it to the file
*
* @param fi
* @param ci
* @throws Exception
* @modelguid {3D934C58-FA8E-4D31-B769-05427EADFAC6}
*/
private static void createFingerprints(final FingerprintFile fi, final ConfigItem ci) throws Exception {
int iEx = 0;
Exception lastEx = null;
for (int i = 0; i < ci.cls.size(); i++) {
try {
final Constructor< ? > con = ci.cls.get(i).getDeclaredConstructor(new Class[]{
FingerprintFile.class
});
final AbstractFingerprint fp = (AbstractFingerprint)con.newInstance(new Object[]{
fi
});
fi.addFingerprint(fp);
} catch (final Exception e) {
iEx++;
lastEx = e;
log.error("Could not create fingerprint [" + ci.cls.get(i) + "] for file [" + fi.getPath() + "]", e);
}
}
if (iEx == ci.cls.size()) {
throw lastEx;
}
}
/**
* @param pExt String - extension, may be prefixed with *.
* @return List of fingerprint classes for extension
*/
public static List<Class< ? extends AbstractFingerprint>> getFingerprintTypes(final String pExt) {
String ext = pExt;
if (ext.startsWith("*.")) {
ext = ext.substring(2);
}
final List<Class< ? extends AbstractFingerprint>> v = new ArrayList<Class< ? extends AbstractFingerprint>>();
final Object o = config.get(ext);
if (o != null && o instanceof ConfigItem) {
v.addAll(((ConfigItem)o).cls);
}
if (v.isEmpty()) {
v.add(MDFingerprint.class);
}
return v;
}
/**
* Determines the file type, validates the signature and creates the Fingerprint objects.
*
* @param fi AbstractFingerprint
* @throws Exception
* @modelguid {BACB38DD-DB0B-4915-8750-BEB0959A6019}
*/
public static void loadFingerprints(final FingerprintFile fi) throws Exception {
// check extension and determine type
String ext = null;
if (fi.getName().lastIndexOf('.') >= 0) {
ext = fi.getExtension();
final Object o = config.get(ext);
if (o != null && o instanceof ConfigItem) {
final ConfigItem ci = (ConfigItem)o;
validateSignature(fi, ci);
createFingerprints(fi, ci);
return;
}
}
createFingerprints(fi, new ConfigItem(BINARY, 0, 0, null, "", new String[]{
deflt
}));
}
/**
* @param f
* @param ci
* @return the start of the header/magic number
* @throws IOException
*/
private static byte[] moveToStart(final File f, final ConfigItem ci) throws IOException {
BufferedInputStream bis = null;
final BMByteSearch bm = new BMByteSearch(HexArray.makeBytes(ci.start));
final byte[] b = new byte[ci.length];
int st = 0;
final int offset = ci.offs;
try {
bis = new BufferedInputStream(new FileInputStream(f));
long len = f.length();
// check whether file is small enough to read in one operation
if (len - offset <= Constants.BUFFER_SIZE) {
final byte[] buffer = new byte[((int)len - offset)];
bis.read(buffer, offset, buffer.length);
String s = null;
do {
st = bm.search(buffer, st, buffer.length - st);
if (st < 0) {
return null;
}
System.arraycopy(buffer, st, b, 0, ci.length);
s = HexArray.makeString(b);
st++;
} while (!ci.sig.matcher(s).matches());
} else {
// read in blocks
byte[] buffer = new byte[Constants.BUFFER_SIZE];
len = bis.read(buffer, offset, buffer.length);
String s = null;
do {
st = bm.search(buffer, st, (int)(len - st));
while (st < 0 && (len = bis.read(buffer)) > -1) {
st = bm.search(buffer, 0, (int)len);
}
if (st < 0) {
return null;
}
// header is over the buffer boundary?
if (st + ci.length > buffer.length) {
final byte[] buffer2 = new byte[Constants.BUFFER_SIZE];
len = bis.read(buffer2);
System.arraycopy(buffer, st, b, 0, buffer.length - st);
System.arraycopy(buffer2, 0, b, buffer.length - st, st + ci.length - buffer.length);
buffer = buffer2;
st = -1;
} else {
System.arraycopy(buffer, st, b, 0, ci.length);
}
s = HexArray.makeString(b);
st++;
} while (!ci.sig.matcher(s).matches());
}
return b;
} catch (final ArrayIndexOutOfBoundsException oobe) {
return null;
} finally {
if (bis != null) {
try {
bis.close();
} catch (final Exception e) {
// ignore
}
}
}
}
/**
* @param fi FingerprintFile - to be validated
* @param ci ConfigItem
* @modelguid {0DA46A7F-1A5D-41F5-8132-DA98FC739BF1}
*/
private static void validateSignature(final FingerprintFile fi, final ConfigItem ci) {
final File f = new File(fi.getPath());
try {
if (f.length() == 0L) {
log.debug(fi.getPath() + " is empty!");
fi.setStatus(Status.FILE_EMPTY);
} else if (ci.sig == null) {
// don't validate these files!
} else if (f.length() >= ci.offs + ci.length) {
byte[] b = null;
if (ci.start == null) {
final BufferedInputStream fr = new BufferedInputStream(new FileInputStream(f));
b = new byte[ci.length];
fr.read(b, ci.offs, b.length);
fr.close();
} else {
b = moveToStart(f, ci);
}
if (b == null) {
log.debug(fi.getPath() + " has invalid signature!");
fi.setStatus(Status.FILE_SIGNATURE_MISMATCH);
return;
}
final String s = HexArray.makeString(b);
if (!ci.sig.matcher(s).matches()) {
log.debug(fi.getPath() + " has invalid signature! (" + s + ")");
fi.setStatus(Status.FILE_SIGNATURE_MISMATCH);
} else {
log.debug(fi.getPath() + " is ok.");
fi.setStatus(Status.FILE_OK);
}
} else {
log.debug(fi.getPath() + " is too short!");
fi.setStatus(Status.FILE_TOO_SHORT);
}
} catch (final FileNotFoundException ioe) {
log.debug(fi.getPath() + " can not be found!");
fi.setStatus(Status.FILE_MISSING);
} catch (final IOException ioe) {
log.debug(fi.getPath() + " can not be read!");
fi.setStatus(Status.FILE_NOT_READABLE);
}
}
/**
* private constructor for utility class
*/
private FingerprintFactory() {
super();
}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?