archiveutils.java

来自「一个搜索引擎,希望对大家有用」· Java 代码 · 共 668 行 · 第 1/2 页

JAVA
668
字号
/* * ArchiveUtils * * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/util/ArchiveUtils.java,v 1.36 2006/08/31 20:54:01 stack-sf Exp $ * * Created on Jul 7, 2003 * * Copyright (C) 2003 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * */package org.archive.util;import java.io.IOException;import java.io.PrintWriter;import java.io.StringWriter;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.Calendar;import java.util.Date;import java.util.GregorianCalendar;import java.util.TimeZone;/** * Miscellaneous useful methods. * * * @author gojomo */public class ArchiveUtils {    /**     * Arc-style date stamp in the format yyyyMMddHHmm and UTC time zone.     */    public static final SimpleDateFormat TIMESTAMP12;    /**     * Arc-style date stamp in the format yyyyMMddHHmmss and UTC time zone.     */    public static final SimpleDateFormat TIMESTAMP14;    /**     * Arc-style date stamp in the format yyyyMMddHHmmssSSS and UTC time zone.     */    public static final SimpleDateFormat TIMESTAMP17;    /**     * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss.SSS'Z'     * UTC time zone is assumed.     */    public static final SimpleDateFormat TIMESTAMP17ISO8601Z;    /**     * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss'Z'     * UTC time zone is assumed.     */    public static final SimpleDateFormat TIMESTAMP14ISO8601Z;    /**     * Default character to use padding strings.     */    private static final char DEFAULT_PAD_CHAR = ' ';    /** milliseconds in an hour */     private static final int HOUR_IN_MS = 60 * 60 * 1000;    /** milliseconds in a day */    private static final int DAY_IN_MS = 24 * HOUR_IN_MS;    // Initialize fomatters with pattern and time zone    static {        TimeZone TZ = TimeZone.getTimeZone("GMT");        TIMESTAMP12 = new SimpleDateFormat("yyyyMMddHHmm");        TIMESTAMP12.setTimeZone(TZ);        TIMESTAMP14 = new SimpleDateFormat("yyyyMMddHHmmss");        TIMESTAMP14.setTimeZone(TZ);        TIMESTAMP17 = new SimpleDateFormat("yyyyMMddHHmmssSSS");        TIMESTAMP17.setTimeZone(TZ);        TIMESTAMP17ISO8601Z = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");        TIMESTAMP14ISO8601Z = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");    }        public static int MAX_INT_CHAR_WIDTH =        Integer.toString(Integer.MAX_VALUE).length();        /**     * Utility function for creating arc-style date stamps     * in the format yyyMMddHHmmssSSS.     * Date stamps are in the UTC time zone     * @return the date stamp     */    public static String get17DigitDate(){        return TIMESTAMP17.format(new Date());    }    /**     * Utility function for creating arc-style date stamps     * in the format yyyMMddHHmmss.     * Date stamps are in the UTC time zone     * @return the date stamp     */    public static String get14DigitDate(){        return TIMESTAMP14.format(new Date());    }    /**     * Utility function for creating arc-style date stamps     * in the format yyyMMddHHmm.     * Date stamps are in the UTC time zone     * @return the date stamp     */    public static String get12DigitDate(){        return TIMESTAMP12.format(new Date());    }    /**     * Utility function for creating log timestamps, in     * W3C/ISO8601 format, assuming UTC. Use current time.      *      * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z'     *      * @return the date stamp     */    public static String getLog17Date(){        return TIMESTAMP17ISO8601Z.format(new Date());    }        /**     * Utility function for creating log timestamps, in     * W3C/ISO8601 format, assuming UTC.      *      * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z'     * @param date Date to format.     *      * @return the date stamp     */    public static String getLog17Date(long date){        return TIMESTAMP17ISO8601Z.format(new Date(date));    }        /**     * Utility function for creating log timestamps, in     * W3C/ISO8601 format, assuming UTC. Use current time.      *      * Format is yyyy-MM-dd'T'HH:mm:ss'Z'     *      * @return the date stamp     */    public static String getLog14Date(){        return TIMESTAMP14ISO8601Z.format(new Date());    }        /**     * Utility function for creating log timestamps, in     * W3C/ISO8601 format, assuming UTC.      *      * Format is yyyy-MM-dd'T'HH:mm:ss'Z'     * @param date Date to format.     *      * @return the date stamp     */    public static String getLog14Date(long date){        return TIMESTAMP14ISO8601Z.format(new Date(date));    }        /**     * Utility function for creating arc-style date stamps     * in the format yyyMMddHHmmssSSS.     * Date stamps are in the UTC time zone     *     * @param date milliseconds since epoc     * @return the date stamp     */    public static String get17DigitDate(long date){        return TIMESTAMP17.format(new Date(date));    }        public static String get17DigitDate(Date date){        return TIMESTAMP17.format(date);    }    /**     * Utility function for creating arc-style date stamps     * in the format yyyMMddHHmmss.     * Date stamps are in the UTC time zone     *     * @param date milliseconds since epoc     * @return the date stamp     */    public static String get14DigitDate(long date){        return TIMESTAMP14.format(new Date(date));    }    public static String get14DigitDate(Date d) {        return TIMESTAMP14.format(d);    }    /**     * Utility function for creating arc-style date stamps     * in the format yyyMMddHHmm.     * Date stamps are in the UTC time zone     *     * @param date milliseconds since epoc     * @return the date stamp     */    public static String get12DigitDate(long date){        return TIMESTAMP12.format(new Date(date));    }        public static String get12DigitDate(Date d) {        return TIMESTAMP12.format(d);    }    /**     * Utility function for parsing arc-style date stamps     * in the format yyyMMddHHmmssSSS.     * Date stamps are in the UTC time zone.  The whole string will not be     * parsed, only the first 17 digits.     *     * @param date an arc-style formatted date stamp     * @return the Date corresponding to the date stamp string     * @throws ParseException if the inputstring was malformed     */    public static Date parse17DigitDate(String date) throws ParseException{        return TIMESTAMP17.parse(date);    }    /**     * Utility function for parsing arc-style date stamps     * in the format yyyMMddHHmmss.     * Date stamps are in the UTC time zone.  The whole string will not be     * parsed, only the first 14 digits.     *     * @param date an arc-style formatted date stamp     * @return the Date corresponding to the date stamp string     * @throws ParseException if the inputstring was malformed     */    public static Date parse14DigitDate(String date) throws ParseException{        return TIMESTAMP14.parse(date);    }    /**     * Utility function for parsing arc-style date stamps     * in the format yyyMMddHHmm.     * Date stamps are in the UTC time zone.  The whole string will not be     * parsed, only the first 12 digits.     *     * @param date an arc-style formatted date stamp     * @return the Date corresponding to the date stamp string     * @throws ParseException if the inputstring was malformed     */    public static Date parse12DigitDate(String date) throws ParseException{        return TIMESTAMP12.parse(date);    }        /**     * Convert 17-digit date format timestamps (as found in crawl.log, for     * example) into a GregorianCalendar object. + * Useful so you can convert     * into milliseconds-since-epoch. Note: it is possible to compute     * milliseconds-since-epoch + * using {@link #parse17DigitDate}.UTC(), but     * that method is deprecated in favor of using Calendar.getTimeInMillis(). + *     * <p/>I probably should have dug into all the utility methods in     * DateFormat.java to parse the timestamp, but this was + * easier. If     * someone wants to fix this to use those methods, please have at it! <p/>     * Mike Schwartz, schwartz at CodeOnTheRoad dot com.     *      * @param timestamp17String     * @return Calendar set to <code>timestamp17String</code>.     */    public static Calendar timestamp17ToCalendar(String timestamp17String) {        GregorianCalendar calendar = new GregorianCalendar();        int year = Integer.parseInt(timestamp17String.substring(0, 4));        int dayOfMonth = Integer.parseInt(timestamp17String.substring(6, 8));        // Month is 0-based        int month = Integer.parseInt(timestamp17String.substring(4, 6)) - 1;        int hourOfDay = Integer.parseInt(timestamp17String.substring(8, 10));        int minute = Integer.parseInt(timestamp17String.substring(10, 12));        int second = Integer.parseInt(timestamp17String.substring(12, 14));        int milliseconds = Integer                .parseInt(timestamp17String.substring(14, 17));        calendar.set(Calendar.YEAR, year);        calendar.set(Calendar.MONTH, month);        calendar.set(Calendar.DAY_OF_MONTH, dayOfMonth);        calendar.set(Calendar.HOUR_OF_DAY, hourOfDay);        calendar.set(Calendar.MINUTE, minute);        calendar.set(Calendar.SECOND, second);        calendar.set(Calendar.MILLISECOND, milliseconds);        return calendar;    }        /**     * @param timestamp A 14-digit timestamp or the suffix for a 14-digit     * timestamp: E.g. '20010909014640' or '20010101' or '1970'.     * @return Seconds since the epoch as a string zero-pre-padded so always     * Integer.MAX_VALUE wide (Makes it so sorting of resultant string works     * properly).     * @throws ParseException      */    public static String secondsSinceEpoch(String timestamp)    throws ParseException {        return zeroPadInteger((int)            (getSecondsSinceEpoch(timestamp).getTime()/1000));    }        /**     * @param timestamp A 14-digit timestamp or the suffix for a 14-digit     * timestamp: E.g. '20010909014640' or '20010101' or '1970'.     * @return A date.     * @see #secondsSinceEpoch(String)     * @throws ParseException      */    public static Date getSecondsSinceEpoch(String timestamp)    throws ParseException {        if (timestamp.length() < 14) {            if (timestamp.length() < 10 && (timestamp.length() % 2) == 1) {                throw new IllegalArgumentException("Must have year, " +                    "month, date, hour or second granularity: " + timestamp);            }            if (timestamp.length() == 4) {                // Add first month and first date.                timestamp = timestamp + "01010000";            }            if (timestamp.length() == 6) {                // Add a date of the first.                timestamp = timestamp + "010000";

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?