archiveutils.java
来自「一个搜索引擎,希望对大家有用」· Java 代码 · 共 668 行 · 第 1/2 页
JAVA
668 行
/* * ArchiveUtils * * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/util/ArchiveUtils.java,v 1.36 2006/08/31 20:54:01 stack-sf Exp $ * * Created on Jul 7, 2003 * * Copyright (C) 2003 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */package org.archive.util;import java.io.IOException;import java.io.PrintWriter;import java.io.StringWriter;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.Calendar;import java.util.Date;import java.util.GregorianCalendar;import java.util.TimeZone;/** * Miscellaneous useful methods. * * * @author gojomo */public class ArchiveUtils { /** * Arc-style date stamp in the format yyyyMMddHHmm and UTC time zone. */ public static final SimpleDateFormat TIMESTAMP12; /** * Arc-style date stamp in the format yyyyMMddHHmmss and UTC time zone. */ public static final SimpleDateFormat TIMESTAMP14; /** * Arc-style date stamp in the format yyyyMMddHHmmssSSS and UTC time zone. */ public static final SimpleDateFormat TIMESTAMP17; /** * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss.SSS'Z' * UTC time zone is assumed. */ public static final SimpleDateFormat TIMESTAMP17ISO8601Z; /** * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss'Z' * UTC time zone is assumed. */ public static final SimpleDateFormat TIMESTAMP14ISO8601Z; /** * Default character to use padding strings. */ private static final char DEFAULT_PAD_CHAR = ' '; /** milliseconds in an hour */ private static final int HOUR_IN_MS = 60 * 60 * 1000; /** milliseconds in a day */ private static final int DAY_IN_MS = 24 * HOUR_IN_MS; // Initialize fomatters with pattern and time zone static { TimeZone TZ = TimeZone.getTimeZone("GMT"); TIMESTAMP12 = new SimpleDateFormat("yyyyMMddHHmm"); TIMESTAMP12.setTimeZone(TZ); TIMESTAMP14 = new SimpleDateFormat("yyyyMMddHHmmss"); TIMESTAMP14.setTimeZone(TZ); TIMESTAMP17 = new SimpleDateFormat("yyyyMMddHHmmssSSS"); TIMESTAMP17.setTimeZone(TZ); TIMESTAMP17ISO8601Z = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"); TIMESTAMP14ISO8601Z = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); } public static int MAX_INT_CHAR_WIDTH = Integer.toString(Integer.MAX_VALUE).length(); /** * Utility function for creating arc-style date stamps * in the format yyyMMddHHmmssSSS. * Date stamps are in the UTC time zone * @return the date stamp */ public static String get17DigitDate(){ return TIMESTAMP17.format(new Date()); } /** * Utility function for creating arc-style date stamps * in the format yyyMMddHHmmss. * Date stamps are in the UTC time zone * @return the date stamp */ public static String get14DigitDate(){ return TIMESTAMP14.format(new Date()); } /** * Utility function for creating arc-style date stamps * in the format yyyMMddHHmm. * Date stamps are in the UTC time zone * @return the date stamp */ public static String get12DigitDate(){ return TIMESTAMP12.format(new Date()); } /** * Utility function for creating log timestamps, in * W3C/ISO8601 format, assuming UTC. Use current time. * * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z' * * @return the date stamp */ public static String getLog17Date(){ return TIMESTAMP17ISO8601Z.format(new Date()); } /** * Utility function for creating log timestamps, in * W3C/ISO8601 format, assuming UTC. * * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z' * @param date Date to format. * * @return the date stamp */ public static String getLog17Date(long date){ return TIMESTAMP17ISO8601Z.format(new Date(date)); } /** * Utility function for creating log timestamps, in * W3C/ISO8601 format, assuming UTC. Use current time. * * Format is yyyy-MM-dd'T'HH:mm:ss'Z' * * @return the date stamp */ public static String getLog14Date(){ return TIMESTAMP14ISO8601Z.format(new Date()); } /** * Utility function for creating log timestamps, in * W3C/ISO8601 format, assuming UTC. * * Format is yyyy-MM-dd'T'HH:mm:ss'Z' * @param date Date to format. * * @return the date stamp */ public static String getLog14Date(long date){ return TIMESTAMP14ISO8601Z.format(new Date(date)); } /** * Utility function for creating arc-style date stamps * in the format yyyMMddHHmmssSSS. * Date stamps are in the UTC time zone * * @param date milliseconds since epoc * @return the date stamp */ public static String get17DigitDate(long date){ return TIMESTAMP17.format(new Date(date)); } public static String get17DigitDate(Date date){ return TIMESTAMP17.format(date); } /** * Utility function for creating arc-style date stamps * in the format yyyMMddHHmmss. * Date stamps are in the UTC time zone * * @param date milliseconds since epoc * @return the date stamp */ public static String get14DigitDate(long date){ return TIMESTAMP14.format(new Date(date)); } public static String get14DigitDate(Date d) { return TIMESTAMP14.format(d); } /** * Utility function for creating arc-style date stamps * in the format yyyMMddHHmm. * Date stamps are in the UTC time zone * * @param date milliseconds since epoc * @return the date stamp */ public static String get12DigitDate(long date){ return TIMESTAMP12.format(new Date(date)); } public static String get12DigitDate(Date d) { return TIMESTAMP12.format(d); } /** * Utility function for parsing arc-style date stamps * in the format yyyMMddHHmmssSSS. * Date stamps are in the UTC time zone. The whole string will not be * parsed, only the first 17 digits. * * @param date an arc-style formatted date stamp * @return the Date corresponding to the date stamp string * @throws ParseException if the inputstring was malformed */ public static Date parse17DigitDate(String date) throws ParseException{ return TIMESTAMP17.parse(date); } /** * Utility function for parsing arc-style date stamps * in the format yyyMMddHHmmss. * Date stamps are in the UTC time zone. The whole string will not be * parsed, only the first 14 digits. * * @param date an arc-style formatted date stamp * @return the Date corresponding to the date stamp string * @throws ParseException if the inputstring was malformed */ public static Date parse14DigitDate(String date) throws ParseException{ return TIMESTAMP14.parse(date); } /** * Utility function for parsing arc-style date stamps * in the format yyyMMddHHmm. * Date stamps are in the UTC time zone. The whole string will not be * parsed, only the first 12 digits. * * @param date an arc-style formatted date stamp * @return the Date corresponding to the date stamp string * @throws ParseException if the inputstring was malformed */ public static Date parse12DigitDate(String date) throws ParseException{ return TIMESTAMP12.parse(date); } /** * Convert 17-digit date format timestamps (as found in crawl.log, for * example) into a GregorianCalendar object. + * Useful so you can convert * into milliseconds-since-epoch. Note: it is possible to compute * milliseconds-since-epoch + * using {@link #parse17DigitDate}.UTC(), but * that method is deprecated in favor of using Calendar.getTimeInMillis(). + * * <p/>I probably should have dug into all the utility methods in * DateFormat.java to parse the timestamp, but this was + * easier. If * someone wants to fix this to use those methods, please have at it! <p/> * Mike Schwartz, schwartz at CodeOnTheRoad dot com. * * @param timestamp17String * @return Calendar set to <code>timestamp17String</code>. */ public static Calendar timestamp17ToCalendar(String timestamp17String) { GregorianCalendar calendar = new GregorianCalendar(); int year = Integer.parseInt(timestamp17String.substring(0, 4)); int dayOfMonth = Integer.parseInt(timestamp17String.substring(6, 8)); // Month is 0-based int month = Integer.parseInt(timestamp17String.substring(4, 6)) - 1; int hourOfDay = Integer.parseInt(timestamp17String.substring(8, 10)); int minute = Integer.parseInt(timestamp17String.substring(10, 12)); int second = Integer.parseInt(timestamp17String.substring(12, 14)); int milliseconds = Integer .parseInt(timestamp17String.substring(14, 17)); calendar.set(Calendar.YEAR, year); calendar.set(Calendar.MONTH, month); calendar.set(Calendar.DAY_OF_MONTH, dayOfMonth); calendar.set(Calendar.HOUR_OF_DAY, hourOfDay); calendar.set(Calendar.MINUTE, minute); calendar.set(Calendar.SECOND, second); calendar.set(Calendar.MILLISECOND, milliseconds); return calendar; } /** * @param timestamp A 14-digit timestamp or the suffix for a 14-digit * timestamp: E.g. '20010909014640' or '20010101' or '1970'. * @return Seconds since the epoch as a string zero-pre-padded so always * Integer.MAX_VALUE wide (Makes it so sorting of resultant string works * properly). * @throws ParseException */ public static String secondsSinceEpoch(String timestamp) throws ParseException { return zeroPadInteger((int) (getSecondsSinceEpoch(timestamp).getTime()/1000)); } /** * @param timestamp A 14-digit timestamp or the suffix for a 14-digit * timestamp: E.g. '20010909014640' or '20010101' or '1970'. * @return A date. * @see #secondsSinceEpoch(String) * @throws ParseException */ public static Date getSecondsSinceEpoch(String timestamp) throws ParseException { if (timestamp.length() < 14) { if (timestamp.length() < 10 && (timestamp.length() % 2) == 1) { throw new IllegalArgumentException("Must have year, " + "month, date, hour or second granularity: " + timestamp); } if (timestamp.length() == 4) { // Add first month and first date. timestamp = timestamp + "01010000"; } if (timestamp.length() == 6) { // Add a date of the first. timestamp = timestamp + "010000";
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?