📄 writerpoolmember.java
字号:
/* $Id: WriterPoolMember.java 5032 2007-04-02 22:02:14Z gojomo $ * * Created on July 21st, 2006 * * Copyright (C) 2006 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package org.archive.io;import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.text.DecimalFormat;import java.text.NumberFormat;import java.util.Iterator;import java.util.List;import java.util.concurrent.atomic.AtomicInteger;import java.util.logging.Logger;import java.util.zip.GZIPOutputStream;import org.archive.util.ArchiveUtils;import org.archive.util.IoUtils;import org.archive.util.TimestampSerialno;/** * Member of {@link WriterPool}. * Implements rotating off files, file naming with some guarantee of * uniqueness, and position in file. Subclass to pick up functionality for a * particular Writer type. * @author stack * @version $Date: 2007-04-02 22:02:14 +0000 (Mon, 02 Apr 2007) $ $Revision: 5032 $ */public abstract class WriterPoolMember implements ArchiveFileConstants { private final Logger logger = Logger.getLogger(this.getClass().getName()); public static final String UTF8 = "UTF-8"; /** * Default file prefix. * * Stands for Internet Archive Heritrix. */ public static final String DEFAULT_PREFIX = "IAH"; /** * Value to interpolate with actual hostname. */ public static final String HOSTNAME_VARIABLE = "${HOSTNAME}"; /** * Default for file suffix. */ public static final String DEFAULT_SUFFIX = HOSTNAME_VARIABLE; /** * Reference to file we're currently writing. */ private File f = null; /** * Output stream for file. */ private OutputStream out = null; /** * File output stream. * This is needed so can get at channel to find current position in file. */ private FileOutputStream fos; private final boolean compressed; private List<File> writeDirs = null; private String prefix = DEFAULT_PREFIX; private String suffix = DEFAULT_SUFFIX; private final long maxSize; private final String extension; /** * Creation date for the current file. * Set by {@link #createFile()}. */ private String createTimestamp = "UNSET!!!"; /** * A running sequence used making unique file names. */ final private AtomicInteger serialNo; /** * Directories round-robin index. */ private static int roundRobinIndex = 0; /** * NumberFormat instance for formatting serial number. * * Pads serial number with zeros. */ private static NumberFormat serialNoFormatter = new DecimalFormat("00000"); /** * Constructor. * Takes a stream. Use with caution. There is no upperbound check on size. * Will just keep writing. * * @param serialNo used to create unique filename sequences * @param out Where to write. * @param file File the <code>out</code> is connected to. * @param cmprs Compress the content written. * @param a14DigitDate If null, we'll write current time. * @throws IOException */ protected WriterPoolMember(AtomicInteger serialNo, final OutputStream out, final File file, final boolean cmprs, String a14DigitDate) throws IOException { this(serialNo, null, null, cmprs, -1, null); this.out = out; this.f = file; } /** * Constructor. * * @param serialNo used to create unique filename sequences * @param dirs Where to drop files. * @param prefix File prefix to use. * @param cmprs Compress the records written. * @param maxSize Maximum size for ARC files written. * @param extension Extension to give file. */ public WriterPoolMember(AtomicInteger serialNo, final List<File> dirs, final String prefix, final boolean cmprs, final long maxSize, final String extension) { this(serialNo, dirs, prefix, "", cmprs, maxSize, extension); } /** * Constructor. * * @param serialNo used to create unique filename sequences * @param dirs Where to drop files. * @param prefix File prefix to use. * @param cmprs Compress the records written. * @param maxSize Maximum size for ARC files written. * @param suffix File tail to use. If null, unused. * @param extension Extension to give file. */ public WriterPoolMember(AtomicInteger serialNo, final List<File> dirs, final String prefix, final String suffix, final boolean cmprs, final long maxSize, final String extension) { this.suffix = suffix; this.prefix = prefix; this.maxSize = maxSize; this.writeDirs = dirs; this.compressed = cmprs; this.extension = extension; this.serialNo = serialNo; } /** * Call this method just before/after any significant write. * * Call at the end of the writing of a record or just before we start * writing a new record. Will close current file and open a new file * if file size has passed out maxSize. * * <p>Creates and opens a file if none already open. One use of this method * then is after construction, call this method to add the metadata, then * call {@link #getPosition()} to find offset of first record. * * @exception IOException */ public void checkSize() throws IOException { if (this.out == null || (this.maxSize != -1 && (this.f.length() > this.maxSize))) { createFile(); } } /** * Create a new file. * Rotates off the current Writer and creates a new in its place * to take subsequent writes. Usually called from {@link #checkSize()}. * @return Name of file created. * @throws IOException */ protected String createFile() throws IOException { TimestampSerialno tsn = getTimestampSerialNo(); String name = this.prefix + '-' + getUniqueBasename(tsn) + ((this.suffix == null || this.suffix.length() <= 0)? "": "-" + this.suffix) + '.' + this.extension + ((this.compressed)? '.' + COMPRESSED_FILE_EXTENSION: "") + OCCUPIED_SUFFIX; this.createTimestamp = tsn.getTimestamp(); File dir = getNextDirectory(this.writeDirs); return createFile(new File(dir, name)); } protected String createFile(final File file) throws IOException { close(); this.f = file; this.fos = new FileOutputStream(this.f); this.out = new FastBufferedOutputStream(this.fos); logger.info("Opened " + this.f.getAbsolutePath()); return this.f.getName(); } /** * @param dirs List of File objects that point at directories. * @return Find next directory to write an arc too. If more * than one, it tries to round-robin through each in turn. * @throws IOException */ protected File getNextDirectory(List<File> dirs) throws IOException { if (WriterPoolMember.roundRobinIndex >= dirs.size()) { WriterPoolMember.roundRobinIndex = 0; } File d = null; try { d = checkWriteable((File)dirs. get(WriterPoolMember.roundRobinIndex)); } catch (IndexOutOfBoundsException e) { // Dirs list might be altered underneath us. // If so, we get this exception -- just keep on going. } if (d == null && dirs.size() > 1) { for (Iterator i = dirs.iterator(); d == null && i.hasNext();) { d = checkWriteable((File)i.next()); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -