📄 uriprocessingformatter.java
字号:
/* UriProcessingFormatter.java * * $Id: UriProcessingFormatter.java,v 1.20 2006/01/21 03:49:44 gojomo Exp $ * * Created on Jun 10, 2003 * * Copyright (C) 2003 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package org.archive.crawler.io;import it.unimi.dsi.mg4j.util.MutableString;import java.util.logging.Formatter;import java.util.logging.LogRecord;import org.archive.crawler.datamodel.CoreAttributeConstants;import org.archive.crawler.datamodel.CrawlURI;import org.archive.util.ArchiveUtils;import org.archive.util.Base32;import org.archive.util.MimetypeUtils;/** * Formatter for 'crawl.log'. Expects completed CrawlURI as parameter. * * @author gojomo */public class UriProcessingFormatterextends Formatter implements CoreAttributeConstants { private final static String NA = "-"; /** * Guess at line length (URIs are assumed avg. of 128 bytes). * Used to preallocated the buffer we accumulate the log line * in. Hopefully we get it right most of the time and no need * to enlarge except in the rare case. */ private final static int GUESS_AT_LOG_LENGTH = 17 + 1 + 3 + 1 + 10 + 128 + + 1 + 10 + 1 + 128 + 1 + 10 + 1 + 3 + 14 + 1 + 32 + 4 + 128 + 1; /** * Reuseable assembly buffer. */ private final MutableString buffer = new MutableString(GUESS_AT_LOG_LENGTH); public String format(LogRecord lr) { CrawlURI curi = (CrawlURI)lr.getParameters()[0]; String length = NA; String mime = null; if (curi.isHttpTransaction()) { if(curi.getContentLength() >= 0) { length = Long.toString(curi.getContentLength()); } else if (curi.getContentSize() > 0) { length = Long.toString(curi.getContentSize()); } mime = curi.getContentType(); } else { if (curi.getContentSize() > 0) { length = Long.toString(curi.getContentSize()); } mime = curi.getContentType(); } mime = MimetypeUtils.truncate(mime); long time = System.currentTimeMillis(); String arcTimeAndDuration; if(curi.containsKey(A_FETCH_COMPLETED_TIME)) { long completedTime = curi.getLong(A_FETCH_COMPLETED_TIME); long beganTime = curi.getLong(A_FETCH_BEGAN_TIME); arcTimeAndDuration = ArchiveUtils.get17DigitDate(beganTime) + "+" + Long.toString(completedTime - beganTime); } else { arcTimeAndDuration = NA; } String via = curi.flattenVia(); Object digest = curi.getContentDigest(); if (digest != null) { digest = Base32.encode((byte [])digest); } String sourceTag = curi.containsKey(A_SOURCE_TAG) ? curi.getString(A_SOURCE_TAG) : null; this.buffer.length(0); return this.buffer.append(ArchiveUtils.getLog17Date(time)) .append(" ") .append(ArchiveUtils.padTo(curi.getFetchStatus(), 5)) .append(" ") .append(ArchiveUtils.padTo(length, 10)) .append(" ") .append(curi.getUURI().toString()) .append(" ") .append(checkForNull(curi.getPathFromSeed())) .append(" ") .append(checkForNull(via)) .append(" ") .append(mime) .append(" ") .append("#") // Pad threads to be 3 digits. For Igor. .append(ArchiveUtils.padTo( Integer.toString(curi.getThreadNumber()), 3, '0')) .append(" ") .append(arcTimeAndDuration) .append(" ") .append(checkForNull((String)digest)) .append(" ") .append(checkForNull(sourceTag)) .append(" ") .append(checkForNull(curi.getAnnotations())) .append("\n").toString(); } /** * @param str String to check. * @return Return passed string or <code>NA</code> if null. */ protected String checkForNull(String str) { return (str == null || str.length() <= 0)? NA: str; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -