📄 mirrorwriterprocessor.java
字号:
segs[segs.length - 1] = new EndSegment(uriPath, slashIndex + 1, uriPath.length(), maxSegLen, caseSensitive, curi, characterMap, dotBegin, query, suffix, maxPathLen, suffixAtEnd); } else { // The URI ends with a /. segs[segs.length - 1] = new EndSegment(dirFile, 0, dirFile.length(), maxSegLen, caseSensitive, curi, characterMap, null, query, suffix, maxPathLen, suffixAtEnd); } URIToFileReturn r = dirPath(baseDir, host, port, segs, maxPathLen - maxSegLen); if (null == r) { // The path is too long. // Replace all the segment directories by tooLongDir. PathSegment endSegment = segs[segs.length - 1]; segs = new PathSegment[2]; segs[0] = new DirSegment(tooLongDir, 0, tooLongDir.length(), maxSegLen, caseSensitive, curi, EMPTY_MAP, null, null, null); segs[1] = endSegment; r = dirPath(baseDir, host, port, segs, maxPathLen - maxSegLen); } segs[segs.length - 1].addToPath(r); return r; } /** Copies a resource into a file. A temporary file is created and then atomically renamed to the destination file. This prevents leaving a partial file in case of a crash. @param recis the RecordingInputStream that recorded the contents of the resource @param dest the destination file @throws IOException on I/O error @throws IOException if the file rename fails */ private void writeToPath(RecordingInputStream recis, File dest) throws IOException { ReplayInputStream replayis = recis.getContentReplayInputStream(); File tf = new File (dest.getPath() + "N"); FileOutputStream fos = new FileOutputStream(tf); try { replayis.readFullyTo(fos); } finally { fos.close(); replayis.close(); } if (!tf.renameTo(dest)) { throw new IOException("Can not rename " + tf.getAbsolutePath() + " to " + dest.getAbsolutePath()); } } /** This class represents one segment (component) of a URI path. A segment between '/' characters is a directory segment. The segment after the last '/' is the end segment. */ abstract class PathSegment { /** existsMaybeCaseSensitive return code for a file that does not exist. */ protected static final int EXISTS_NOT = 1; /** existsMaybeCaseSensitive return code for a file that exists. Furthermore, the comparison is case-sensitive. */ protected static final int EXISTS_EXACT_MATCH = 2; /** existsMaybeCaseSensitive return code for a file that exists, using a case-insensitive comparison. Furthermore, the file would not exist if the comparison were case-sensitive. */ protected static final int EXISTS_CASE_INSENSITIVE_MATCH = 3; /** The URI, for logging and error reporting.*/ protected CrawlURI curi; /** The main part of this segment. For a directory segment, that's all there is. For an end segment, it's the part of the URI after the last '/' up to but not including the '.' before the suffix (if any). */ protected LumpyString mainPart = null; /** The maximum number of characters allowed in one file system path segment. A URI segment can potentially be much longer, but we'll trim it to this. */ protected int maxSegLen; /** If true, the file system is assumed to be case-sensitive; otherwise the file system is assumed to be case-insensitive. */ private boolean caseSensitive; /** Creates a new PathSegment. @param maxSegLen the maximum number of characters allowed in one path segment @param caseSensitive if true, the file system is assumed to be case-sensitive; otherwise the file system is assumed to be case-insensitive @param curi the URI @throws IllegalArgumentException if maxSegLen is too small */ PathSegment(int maxSegLen, boolean caseSensitive, CrawlURI curi) { if (maxSegLen < 2) { throw new IllegalArgumentException("maxSegLen: " + maxSegLen); } this.maxSegLen = maxSegLen; this.caseSensitive = caseSensitive; this.curi = curi; } /** Adds this segment to a file path. This is the key method of this class. It extends the given path by one segment, named to obey all constraints. A new directory is created if necessary. @param currentPath the current path, to which this segment is added @throws IOException if a needed directory could not be created @throws IOException if a needed directory is not writeable */ abstract void addToPath(URIToFileReturn currentPath) throws IOException; /** Checks if a file (including directories) exists. @param fsf the directory containing the file to be checked @param segStr the simple file or directory name @param check the file or directory for which to check @return EXISTS_NOT if check does not exist, EXISTS_EXACT_MATCH if check exists with a name that matches (case-sensitive) segStr, and EXISTS_CASE_INSENSITIVE_MATCH if check exists with a name that matches segStr using a case-insensitive match but not using a case-sensitive match */ protected int existsMaybeCaseSensitive(File fsf, String segStr, File check) { if (caseSensitive) { return check.exists() ? EXISTS_EXACT_MATCH : EXISTS_NOT; } if (!check.exists()) { return EXISTS_NOT; } /* The JVM says the file exists, but the file system is assumed to be case-insensitive, so do we have an exact match or just a case-insensitive match? We get an array of all the file names that match (case-insensitive) the one we're checking, then we can look for a case-sensitive match. */ String[] fna = fsf.list(new CaseInsensitiveFilenameFilter(segStr)); for (int i = 0; fna.length != i; ++i) { if (segStr.equals(fna[i])) { return EXISTS_EXACT_MATCH; } } return EXISTS_CASE_INSENSITIVE_MATCH; } /** This class implements a FilenameFilter that matches by name, ignoring case. */ class CaseInsensitiveFilenameFilter implements FilenameFilter { /** The file name we're looking for. */ private String target; /** Creates a CaseInsensitiveFilenameFilter. @param target the target file name @throws IllegalArgumentException if target is null or empty. */ CaseInsensitiveFilenameFilter(String target) { if (null == target) { throw new IllegalArgumentException("target null"); } if (0 == target.length()) { throw new IllegalArgumentException("target empty"); } this.target = target; } public boolean accept(File dir, String name) { return target.equalsIgnoreCase(name); } } } /** This class represents one directory segment (component) of a URI path. */ class DirSegment extends PathSegment { /** If a segment name is in this set, prepend an underscore.*/ private Set underscoreSet; /** Creates a DirSegment. @param uriPath the path part of the URI @param beginIndex the beginning index, inclusive, of the substring of uriPath to be used @param endIndex the ending index, exclusive, of the substring of uriPath to be used @param maxSegLen the maximum number of characters allowed in one file system path segment (component) @param caseSensitive if true, the file system is assumed to be case-sensitive; otherwise the file system is assumed to be case-insensitive but case-preserving @param curi the URI @param characterMap a map from characters (as length-1 String values) in the URI path and query to replacement String values @param dotBegin if non-null, this replaces a '.' at the beginning of the directory name @param dotEnd if non-null, this replaces a '.' that appears at the end of a directory name @param underscoreSet if non-null and a segment, after conversion to lower case, is in this set, then prepend an underscore to the segment @throws IllegalArgumentException if beginIndex is negative. @throws IllegalArgumentException if endIndex is less than beginIndex. @throws IllegalArgumentException if maxSegLen is too small. */ DirSegment(String uriPath, int beginIndex, int endIndex, int maxSegLen, boolean caseSensitive, CrawlURI curi, Map characterMap, String dotBegin, String dotEnd, Set underscoreSet) { super(maxSegLen, caseSensitive, curi); mainPart = new LumpyString(uriPath, beginIndex, endIndex, (null == dotEnd) ? 0 : dotEnd.length(), this.maxSegLen, characterMap, dotBegin); if (null != dotEnd) { // We might get a segment like /VeryLong............../ // so we have to loop to guarantee the segment doesn't // end with a dot. int dl = dotEnd.length(); while (mainPart.endsWith('.')) { // Chop off the dot at the end. mainPart.trimToMax(mainPart.length() - 1); if ((mainPart.length() + dl) <= this.maxSegLen) { mainPart.append(dotEnd); } } } this.underscoreSet = underscoreSet; } void addToPath(URIToFileReturn currentPath) throws IOException { NumberFormat nf = null; int startLen = mainPart.length(); // Starting length. for (int i = 0; ; ++i) { if (0 != i) { // Try to create a unique file name by appending a // number. if (null == nf) { nf = NumberFormat.getIntegerInstance(); } String ending = nf.format(i); mainPart.trimToMax(Math.min(startLen, maxSegLen - ending.length())); mainPart.append(ending); } String segStr = mainPart.toString(); if ((null != underscoreSet) && underscoreSet.contains(segStr.toLowerCase())) { mainPart.prepend('_'); ++startLen; mainPart.trimToMax(maxSegLen); segStr = mainPart.toString(); } File fsf = currentPath.getFile(); File f = new File(fsf, segStr); int er = existsMaybeCaseSensitive(fsf, segStr, f); switch (er) { case EXISTS_NOT: if (!f.mkdir()) { throw new IOException("Can not mkdir " + f.getAbsolutePath()); } currentPath.append(f, segStr); return; // Created new directory. case EXISTS_EXACT_MATCH: if (f.isDirectory()) { if (!f.canWrite()) { throw new IOException("Directory " + f.getAbsolutePath() + " not writeable."); } /* A writeable directory already exists. Assume it's the one we want. This assumption fails for cases like http://foo.com/a*256/b.html followed by
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -