📄 archiveutils.java
字号:
/* * ArchiveUtils * * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/util/ArchiveUtils.java,v 1.38 2007/01/23 00:29:48 gojomo Exp $ * * Created on Jul 7, 2003 * * Copyright (C) 2003 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */package org.archive.util;import java.io.IOException;import java.io.PrintWriter;import java.io.StringWriter;import java.text.NumberFormat;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.Calendar;import java.util.Date;import java.util.GregorianCalendar;import java.util.Locale;import java.util.TimeZone;/** * Miscellaneous useful methods. * * @author gojomo & others */public class ArchiveUtils { /** * Arc-style date stamp in the format yyyyMMddHHmm and UTC time zone. */ private static final ThreadLocal<SimpleDateFormat> TIMESTAMP12 = threadLocalDateFormat("yyyyMMddHHmm");; /** * Arc-style date stamp in the format yyyyMMddHHmmss and UTC time zone. */ private static final ThreadLocal<SimpleDateFormat> TIMESTAMP14 = threadLocalDateFormat("yyyyMMddHHmmss"); /** * Arc-style date stamp in the format yyyyMMddHHmmssSSS and UTC time zone. */ private static final ThreadLocal<SimpleDateFormat> TIMESTAMP17 = threadLocalDateFormat("yyyyMMddHHmmssSSS"); /** * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss.SSS'Z' * UTC time zone is assumed. */ private static final ThreadLocal<SimpleDateFormat> TIMESTAMP17ISO8601Z = threadLocalDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"); /** * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss'Z' * UTC time zone is assumed. */ private static final ThreadLocal<SimpleDateFormat> TIMESTAMP14ISO8601Z = threadLocalDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); /** * Default character to use padding strings. */ private static final char DEFAULT_PAD_CHAR = ' '; /** milliseconds in an hour */ private static final int HOUR_IN_MS = 60 * 60 * 1000; /** milliseconds in a day */ private static final int DAY_IN_MS = 24 * HOUR_IN_MS; private static ThreadLocal<SimpleDateFormat> threadLocalDateFormat(final String pattern) { ThreadLocal<SimpleDateFormat> tl = new ThreadLocal<SimpleDateFormat>() { protected SimpleDateFormat initialValue() { SimpleDateFormat df = new SimpleDateFormat(pattern); df.setTimeZone(TimeZone.getTimeZone("GMT")); return df; } }; return tl; } public static int MAX_INT_CHAR_WIDTH = Integer.toString(Integer.MAX_VALUE).length(); /** * Utility function for creating arc-style date stamps * in the format yyyMMddHHmmssSSS. * Date stamps are in the UTC time zone * @return the date stamp */ public static String get17DigitDate(){ return TIMESTAMP17.get().format(new Date()); } /** * Utility function for creating arc-style date stamps * in the format yyyMMddHHmmss. * Date stamps are in the UTC time zone * @return the date stamp */ public static String get14DigitDate(){ return TIMESTAMP14.get().format(new Date()); } /** * Utility function for creating arc-style date stamps * in the format yyyMMddHHmm. * Date stamps are in the UTC time zone * @return the date stamp */ public static String get12DigitDate(){ return TIMESTAMP12.get().format(new Date()); } /** * Utility function for creating log timestamps, in * W3C/ISO8601 format, assuming UTC. Use current time. * * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z' * * @return the date stamp */ public static String getLog17Date(){ return TIMESTAMP17ISO8601Z.get().format(new Date()); } /** * Utility function for creating log timestamps, in * W3C/ISO8601 format, assuming UTC. * * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z' * @param date Date to format. * * @return the date stamp */ public static String getLog17Date(long date){ return TIMESTAMP17ISO8601Z.get().format(new Date(date)); } /** * Utility function for creating log timestamps, in * W3C/ISO8601 format, assuming UTC. Use current time. * * Format is yyyy-MM-dd'T'HH:mm:ss'Z' * * @return the date stamp */ public static String getLog14Date(){ return TIMESTAMP14ISO8601Z.get().format(new Date()); } /** * Utility function for creating log timestamps, in * W3C/ISO8601 format, assuming UTC. * * Format is yyyy-MM-dd'T'HH:mm:ss'Z' * @param date long timestamp to format. * * @return the date stamp */ public static String getLog14Date(long date){ return TIMESTAMP14ISO8601Z.get().format(new Date(date)); } /** * Utility function for creating log timestamps, in * W3C/ISO8601 format, assuming UTC. * * Format is yyyy-MM-dd'T'HH:mm:ss'Z' * @param date Date to format. * * @return the date stamp */ public static String getLog14Date(Date date){ return TIMESTAMP14ISO8601Z.get().format(date); } /** * Utility function for creating arc-style date stamps * in the format yyyyMMddHHmmssSSS. * Date stamps are in the UTC time zone * * @param date milliseconds since epoc * @return the date stamp */ public static String get17DigitDate(long date){ return TIMESTAMP17.get().format(new Date(date)); } public static String get17DigitDate(Date date){ return TIMESTAMP17.get().format(date); } /** * Utility function for creating arc-style date stamps * in the format yyyyMMddHHmmss. * Date stamps are in the UTC time zone * * @param date milliseconds since epoc * @return the date stamp */ public static String get14DigitDate(long date){ return TIMESTAMP14.get().format(new Date(date)); } public static String get14DigitDate(Date d) { return TIMESTAMP14.get().format(d); } /** * Utility function for creating arc-style date stamps * in the format yyyyMMddHHmm. * Date stamps are in the UTC time zone * * @param date milliseconds since epoc * @return the date stamp */ public static String get12DigitDate(long date){ return TIMESTAMP12.get().format(new Date(date)); } public static String get12DigitDate(Date d) { return TIMESTAMP12.get().format(d); } /** * Parses an ARC-style date. If passed String is < 12 characters in length, * we pad. At a minimum, String should contain a year (>=4 characters). * Parse will also fail if day or month are incompletely specified. Depends * on the above getXXDigitDate methods. * @param A 4-17 digit date in ARC style (<code>yyyy</code> to * <code>yyyyMMddHHmmssSSS</code>) formatting. * @return A Date object representing the passed String. * @throws ParseException */ public static Date getDate(String d) throws ParseException { Date date = null; if (d == null) { throw new IllegalArgumentException("Passed date is null"); } switch (d.length()) { case 14: date = ArchiveUtils.parse14DigitDate(d); break; case 17: date = ArchiveUtils.parse17DigitDate(d); break; case 12: date = ArchiveUtils.parse12DigitDate(d); break; case 0: case 1: case 2: case 3: throw new ParseException("Date string must at least contain a" + "year: " + d, d.length()); default: if (!(d.startsWith("19") || d.startsWith("20"))) { throw new ParseException("Unrecognized century: " + d, 0); } if (d.length() < 8 && (d.length() % 2) != 0) { throw new ParseException("Incomplete month/date: " + d, d.length()); } StringBuilder sb = new StringBuilder(d); if (sb.length() < 8) { for (int i = sb.length(); sb.length() < 8; i += 2) { sb.append("01"); } } if (sb.length() < 12) { for (int i = sb.length(); sb.length() < 12; i++) { sb.append("0"); } } date = ArchiveUtils.parse12DigitDate(sb.toString()); } return date; } /** * Utility function for parsing arc-style date stamps * in the format yyyMMddHHmmssSSS. * Date stamps are in the UTC time zone. The whole string will not be * parsed, only the first 17 digits. * * @param date an arc-style formatted date stamp * @return the Date corresponding to the date stamp string * @throws ParseException if the inputstring was malformed */ public static Date parse17DigitDate(String date) throws ParseException { return TIMESTAMP17.get().parse(date); } /** * Utility function for parsing arc-style date stamps * in the format yyyMMddHHmmss. * Date stamps are in the UTC time zone. The whole string will not be * parsed, only the first 14 digits. * * @param date an arc-style formatted date stamp * @return the Date corresponding to the date stamp string * @throws ParseException if the inputstring was malformed */ public static Date parse14DigitDate(String date) throws ParseException{ return TIMESTAMP14.get().parse(date); } /** * Utility function for parsing arc-style date stamps * in the format yyyMMddHHmm. * Date stamps are in the UTC time zone. The whole string will not be * parsed, only the first 12 digits. * * @param date an arc-style formatted date stamp * @return the Date corresponding to the date stamp string * @throws ParseException if the inputstring was malformed */ public static Date parse12DigitDate(String date) throws ParseException{ return TIMESTAMP12.get().parse(date); } /** * Convert 17-digit date format timestamps (as found in crawl.log, for * example) into a GregorianCalendar object. + * Useful so you can convert * into milliseconds-since-epoch. Note: it is possible to compute * milliseconds-since-epoch + * using {@link #parse17DigitDate}.UTC(), but * that method is deprecated in favor of using Calendar.getTimeInMillis(). + * * <p/>I probably should have dug into all the utility methods in * DateFormat.java to parse the timestamp, but this was + * easier. If * someone wants to fix this to use those methods, please have at it! <p/> * Mike Schwartz, schwartz at CodeOnTheRoad dot com. * * @param timestamp17String * @return Calendar set to <code>timestamp17String</code>. */ public static Calendar timestamp17ToCalendar(String timestamp17String) { GregorianCalendar calendar = new GregorianCalendar(); int year = Integer.parseInt(timestamp17String.substring(0, 4)); int dayOfMonth = Integer.parseInt(timestamp17String.substring(6, 8)); // Month is 0-based int month = Integer.parseInt(timestamp17String.substring(4, 6)) - 1; int hourOfDay = Integer.parseInt(timestamp17String.substring(8, 10)); int minute = Integer.parseInt(timestamp17String.substring(10, 12)); int second = Integer.parseInt(timestamp17String.substring(12, 14)); int milliseconds = Integer .parseInt(timestamp17String.substring(14, 17)); calendar.set(Calendar.YEAR, year); calendar.set(Calendar.MONTH, month);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -