fingerprintfactory.java

来自「dump3 morpheus 0.2.9 src」· Java 代码 · 共 363 行

JAVA
363
字号
/**
 * DuMP3 version morpheus_0.2.9 - a duplicate/similar file finder in Java<BR>
 * Copyright 2005 Alexander Gr&auml;sser<BR>
 * All Rights Reserved, http://dump3.sourceforge.net/<BR>
 * <BR>
 * This file is part of DuMP3.<BR>
 * <BR>
 * DuMP3 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the License, or (at your option) any later version.<BR>
 * <BR>
 * DuMP3 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 * PARTICULAR PURPOSE. See the GNU General Public License for more details.<BR>
 * <BR>
 * You should have received a copy of the GNU General Public License along with DuMP3; if not, write to the Free Software Foundation, Inc., 51 Franklin St,
 * Fifth Floor, Boston, MA 02110-1301 USA
 */
package net.za.grasser.duplicate.fingerprint;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
import java.util.regex.Pattern;
import net.za.grasser.duplicate.Configure;
import net.za.grasser.duplicate.file.FingerprintFile;
import net.za.grasser.duplicate.file.Status;
import net.za.grasser.duplicate.util.Constants;
import net.za.grasser.duplicate.util.HexArray;
import org.apache.log4j.Logger;
import com.lutris.util.BMByteSearch;

/**
 * This class determines the fingerprint algortihms for a given <code>FingerprintFile</code> by extension. It validates the contents of the file by checking
 * the "magic number" using a regular expression. It also generates the fingerprints for the file using the configured algorithms.
 * 
 * @author <a href="http://sourceforge.net/sendmessage.php?touser=733840">pyropunk at sourceforge dot net</a>
 * @version $Revision: 1.12 $
 * @modelguid {3132B033-3571-4B10-B82D-AC0C1BCEED92}
 */
public class FingerprintFactory {
  /**
   * <code>log</code> FingerprintFactory -
   */
  private static final Logger log = Logger.getLogger(FingerprintFactory.class);

  /**
   * @author <a href="http://sourceforge.net/sendmessage.php?touser=733840">pyropunk at sourceforge dot net</a>
   * @version $Revision: 1.12 $
   * @modelguid {AD489571-E9AC-48E4-B3F2-1D19C3565DB4}
   */
  private static class ConfigItem {
    /**
     * <code>type</code> ConfigItem -
     */
    String type = BINARY;
    /** @modelguid {1F15908F-943F-4D1C-89C3-C2140EE33A09} */
    int offs = 0;
    /**
     * <code>length</code> ConfigItem -
     * 
     * @modelguid {9A98F6D1-9EC3-40C5-AAF8-15A2F561DF1C}
     */
    int length = 0;
    /** @modelguid {CDEBBFF6-AE8B-4CD1-BB5B-106632BFC9F8} */
    Pattern sig = null;
    /** @modelguid {7CB4A319-64E5-45D0-BBDA-E177154F2F69} */
    List<Class< ? extends AbstractFingerprint>> cls = null;
    /**
     * <code>start</code> ConfigItem - hex string of header starting point
     */
    String start = null;

    /**
     * @param pType String - the file type
     * @param pOffset int - file signature starts here
     * @param pLength int - length of signature
     * @param pStart String - a starting string
     * @param pSig - file start signature
     * @param pCls - which class to create
     * @throws ClassNotFoundException
     * @modelguid {F867632A-2F1C-4EF1-9E42-41A0D264961C}
     */
    @SuppressWarnings("unchecked")
    ConfigItem(final String pType, final int pOffset, final int pLength, final String pStart, final String pSig, final String[] pCls) throws ClassNotFoundException {
      type = pType;
      offs = pOffset;
      if (pSig != null && !(pSig.length() == 0)) {
        sig = Pattern.compile(pSig, Pattern.CASE_INSENSITIVE);
      }
      length = pLength;
      cls = new ArrayList<Class< ? extends AbstractFingerprint>>(pCls.length);
      for (final String element : pCls) {
        try {
          cls.add((Class< ? extends AbstractFingerprint>)Class.forName(element));
        } catch (final ClassNotFoundException cnf) {
          try {
            // try it as an alias
            cls.add((Class< ? extends AbstractFingerprint>)Class.forName(Configure.getProperty(element, element, FingerprintFactory.class.getName())));
          } catch (final ClassCastException t) {
            t.printStackTrace();
          }
        } catch (final ClassCastException t) {
          t.printStackTrace();
        }
      }
      if (pStart == null || pStart.length() == 0) {
        start = null;
      } else {
        start = pStart;
      }
    }
  }

  /**
   * <code>BINARY</code> FingerprintFactory -
   */
  public static final String BINARY = "binary";
  /** @modelguid {8E47B2A1-180D-4F0C-A08A-DE04C1C252AA} */
  private static Hashtable<String, ConfigItem> config = null;
  /**
   * <code>deflt</code> FingerprintFactory - default fingerprint algorithm
   */
  private static String deflt = MDFingerprint.class.getName();
  /**
   * read a config file to configure the FingerprintFactory
   */
  static {
    Configure.load();
    deflt = Configure.getProperty("default fingerprint", deflt, FingerprintFactory.class.getName());
    final String[] formats = Configure.getArray("formats", new String[]{
      "text"
    }, FingerprintFactory.class.getName());
    config = new Hashtable<String, ConfigItem>();
    for (final String element : formats) {
      final int offs = Configure.getIntProperty("signature-offset", 0, element);
      final int length = Configure.getIntProperty("signature-length", 0, element);
      final String regex = Configure.getProperty("signature-regex", "", element);
      final String start = Configure.getProperty("signature-start", null, element);
      final String[] clss = Configure.getArray("fingerprint", new String[]{
        deflt
      }, element);
      final String[] extensions = Configure.getArray("extensions", new String[]{
        "txt"
      }, element);
      for (final String element0 : extensions) {
        try {
          config.put(element0, new ConfigItem(element, offs, length, start, regex, clss));
        } catch (final ClassNotFoundException cnf) {
          log.error("FingerprintFactory configuration error.", cnf);
        }
      }
    }
  }

  /**
   * Create a fingerprint object and attach it to the file
   * 
   * @param fi
   * @param ci
   * @throws Exception
   * @modelguid {3D934C58-FA8E-4D31-B769-05427EADFAC6}
   */
  private static void createFingerprints(final FingerprintFile fi, final ConfigItem ci) throws Exception {
    int iEx = 0;
    Exception lastEx = null;
    for (int i = 0; i < ci.cls.size(); i++) {
      try {
        final Constructor< ? > con = ci.cls.get(i).getDeclaredConstructor(new Class[]{
          FingerprintFile.class
        });
        final AbstractFingerprint fp = (AbstractFingerprint)con.newInstance(new Object[]{
          fi
        });
        fi.addFingerprint(fp);
      } catch (final Exception e) {
        iEx++;
        lastEx = e;
        log.error("Could not create fingerprint [" + ci.cls.get(i) + "] for file [" + fi.getPath() + "]", e);
      }
    }
    if (iEx == ci.cls.size()) {
      throw lastEx;
    }
  }

  /**
   * @param pExt String - extension, may be prefixed with *.
   * @return List of fingerprint classes for extension
   */
  public static List<Class< ? extends AbstractFingerprint>> getFingerprintTypes(final String pExt) {
    String ext = pExt;
    if (ext.startsWith("*.")) {
      ext = ext.substring(2);
    }
    final List<Class< ? extends AbstractFingerprint>> v = new ArrayList<Class< ? extends AbstractFingerprint>>();
    final Object o = config.get(ext);
    if (o != null && o instanceof ConfigItem) {
      v.addAll(((ConfigItem)o).cls);
    }
    if (v.isEmpty()) {
      v.add(MDFingerprint.class);
    }
    return v;
  }

  /**
   * Determines the file type, validates the signature and creates the Fingerprint objects.
   * 
   * @param fi AbstractFingerprint
   * @throws Exception
   * @modelguid {BACB38DD-DB0B-4915-8750-BEB0959A6019}
   */
  public static void loadFingerprints(final FingerprintFile fi) throws Exception {
    // check extension and determine type
    String ext = null;
    if (fi.getName().lastIndexOf('.') >= 0) {
      ext = fi.getExtension();
      final Object o = config.get(ext);
      if (o != null && o instanceof ConfigItem) {
        final ConfigItem ci = (ConfigItem)o;
        validateSignature(fi, ci);
        createFingerprints(fi, ci);
        return;
      }
    }
    createFingerprints(fi, new ConfigItem(BINARY, 0, 0, null, "", new String[]{
      deflt
    }));
  }

  /**
   * @param f
   * @param ci
   * @return the start of the header/magic number
   * @throws IOException
   */
  private static byte[] moveToStart(final File f, final ConfigItem ci) throws IOException {
    BufferedInputStream bis = null;
    final BMByteSearch bm = new BMByteSearch(HexArray.makeBytes(ci.start));
    final byte[] b = new byte[ci.length];
    int st = 0;
    final int offset = ci.offs;
    try {
      bis = new BufferedInputStream(new FileInputStream(f));
      long len = f.length();
      // check whether file is small enough to read in one operation
      if (len - offset <= Constants.BUFFER_SIZE) {
        final byte[] buffer = new byte[((int)len - offset)];
        bis.read(buffer, offset, buffer.length);
        String s = null;
        do {
          st = bm.search(buffer, st, buffer.length - st);
          if (st < 0) {
            return null;
          }
          System.arraycopy(buffer, st, b, 0, ci.length);
          s = HexArray.makeString(b);
          st++;
        } while (!ci.sig.matcher(s).matches());
      } else {
        // read in blocks
        byte[] buffer = new byte[Constants.BUFFER_SIZE];
        len = bis.read(buffer, offset, buffer.length);
        String s = null;
        do {
          st = bm.search(buffer, st, (int)(len - st));
          while (st < 0 && (len = bis.read(buffer)) > -1) {
            st = bm.search(buffer, 0, (int)len);
          }
          if (st < 0) {
            return null;
          }
          // header is over the buffer boundary?
          if (st + ci.length > buffer.length) {
            final byte[] buffer2 = new byte[Constants.BUFFER_SIZE];
            len = bis.read(buffer2);
            System.arraycopy(buffer, st, b, 0, buffer.length - st);
            System.arraycopy(buffer2, 0, b, buffer.length - st, st + ci.length - buffer.length);
            buffer = buffer2;
            st = -1;
          } else {
            System.arraycopy(buffer, st, b, 0, ci.length);
          }
          s = HexArray.makeString(b);
          st++;
        } while (!ci.sig.matcher(s).matches());
      }
      return b;
    } catch (final ArrayIndexOutOfBoundsException oobe) {
      return null;
    } finally {
      if (bis != null) {
        try {
          bis.close();
        } catch (final Exception e) {
          // ignore
        }
      }
    }
  }

  /**
   * @param fi FingerprintFile - to be validated
   * @param ci ConfigItem
   * @modelguid {0DA46A7F-1A5D-41F5-8132-DA98FC739BF1}
   */
  private static void validateSignature(final FingerprintFile fi, final ConfigItem ci) {
    final File f = new File(fi.getPath());
    try {
      if (f.length() == 0L) {
        log.debug(fi.getPath() + " is empty!");
        fi.setStatus(Status.FILE_EMPTY);
      } else if (ci.sig == null) {
        // don't validate these files!
      } else if (f.length() >= ci.offs + ci.length) {
        byte[] b = null;
        if (ci.start == null) {
          final BufferedInputStream fr = new BufferedInputStream(new FileInputStream(f));
          b = new byte[ci.length];
          fr.read(b, ci.offs, b.length);
          fr.close();
        } else {
          b = moveToStart(f, ci);
        }
        if (b == null) {
          log.debug(fi.getPath() + " has invalid signature!");
          fi.setStatus(Status.FILE_SIGNATURE_MISMATCH);
          return;
        }
        final String s = HexArray.makeString(b);
        if (!ci.sig.matcher(s).matches()) {
          log.debug(fi.getPath() + " has invalid signature! (" + s + ")");
          fi.setStatus(Status.FILE_SIGNATURE_MISMATCH);
        } else {
          log.debug(fi.getPath() + " is ok.");
          fi.setStatus(Status.FILE_OK);
        }
      } else {
        log.debug(fi.getPath() + " is too short!");
        fi.setStatus(Status.FILE_TOO_SHORT);
      }
    } catch (final FileNotFoundException ioe) {
      log.debug(fi.getPath() + " can not be found!");
      fi.setStatus(Status.FILE_MISSING);
    } catch (final IOException ioe) {
      log.debug(fi.getPath() + " can not be read!");
      fi.setStatus(Status.FILE_NOT_READABLE);
    }
  }

  /**
   * private constructor for utility class
   */
  private FingerprintFactory() {
    super();
  }
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?