📄 httpdoccache.java
字号:
zos.close();
} else {
fs.close();
}
}
} else {
fzip.setLastModified(System.currentTimeMillis());
}
// append user
if (!found) {
FileOutputStream os = new FileOutputStream(f.getPath(), true);
try {
os.write((urlString + LF).getBytes());
} finally {
os.close();
}
}
}
/**
* Write links to ZipFile.
* @param links
* @param ZipOutputStream
*/
protected void writeLinksToZipFile(List links, ZipOutputStream zs)
throws IOException {
HashSet storedLinks = new HashSet();
ZipEntry zipEntry = new ZipEntry("links");
zs.putNextEntry(zipEntry);
for (Iterator iter = links.iterator(); iter.hasNext();) {
URL url = (URL) iter.next();
if (!storedLinks.contains(url)) {
zs.write((url.toString() + LF).getBytes());
storedLinks.add(url);
}
}
zs.closeEntry();
}
/**
* Collects Urls (duplicates will be skipped).
*
* @param doc a HttpDoc object to process. This may also be null
* @exception DocManagerException will be thrown if an error occurs
* while processing the document.
* @see net.matuschek.http.HttpDocManager#processDocument(net.matuschek.http.HttpDoc)
*/
public void processDocument(HttpDoc doc) throws DocManagerException {
log.info(
"Processing "
+ doc.getURL().toExternalForm()
+ doc.getHttpHeader());
// collect URL (only if content is no duplicate)
HttpHeader duplicate = doc.getHeader(CONTENT_DUPLICATE);
if (duplicate == null) {
urls.add(doc.getURL());
}
}
/**
* retrieves a document from the cache.
* @param url
* @see net.matuschek.http.HttpDocManager#retrieveFromCache(java.net.URL)
*/
public HttpDoc retrieveFromCache(java.net.URL url) {
HttpDoc doc = null;
File f = null;
try {
String filename0 = url.toExternalForm();
String filename = generateFilename(filename0) + ".zip";
f = new File(storagedir + DOCUMENTS + filename);
if (f.exists()) {
log.info("retrieve " + f);
// create document and read it from file
doc = new HttpDoc();
doc.setURL(url);
ZipFile zf = new ZipFile(f);
// read headers
readHeadersFromZipFile(doc, zf);
// read links
readLinksFromZipFile(doc, zf);
doc.setCached(true);
// read content
String md5 = doc.getContentMD5();
File contentFile = contentFile(md5, ".zip");
if (contentFile.exists()) {
ZipFile contentZip = new ZipFile(contentFile);
readContentFromZipFile(doc, contentZip);
contentZip.close();
} else {
doc.setContent(new byte[0]);
}
zf.close();
}
} catch (Exception e) {
log.warn("removing invalid file " + f);
f.delete();
doc = null;
}
return doc;
}
/**
* Read content from ZipFile
* @param doc
* @param contentZip
* @throws IOException
*/
protected void readContentFromZipFile(HttpDoc doc, ZipFile contentZip)
throws IOException {
byte[] content = null;
for (Enumeration enumeration = contentZip.entries(); enumeration.hasMoreElements();) {
ZipEntry zipEntry = (ZipEntry) enumeration.nextElement();
if (zipEntry.getName().startsWith("content")) {
InputStream is = contentZip.getInputStream(zipEntry);
int length = (int) zipEntry.getSize();
content = new byte[length];
int startPos = 0;
while (startPos < length) {
startPos += is.read(content, startPos, length - startPos);
}
is.close();
break;
}
}
doc.setContent(content);
}
/**
* Remove document from cache.
* @param url
* @see net.matuschek.http.HttpDocManager#removeDocument(URL)
*/
public void removeDocument(URL url) {
HttpDoc doc = retrieveFromCache(url);
File f = null;
try {
String filename0 = url.toExternalForm();
String filename = generateFilename(filename0) + ".zip";
f = new File(storagedir + LINKS + filename);
if (f.exists()) {
f.delete();
}
deleteContent(doc);
f = new File(storagedir + DOCUMENTS + filename);
if (f.exists()) {
f.delete();
}
} catch (Exception ex) {
log.error(ex);
}
}
/**
* Deletes stored content for the given document
* @param document
*/
private void deleteContent(HttpDoc doc) throws IOException {
byte[] content = doc.getContent();
if (content.length == 0) {
return;
}
String urlString = doc.getURL().toString();
String md5 = doc.getContentMD5();
File f = contentFile(md5, ".txt");
ArrayList entries = new ArrayList();
if (f.exists()) {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
while (reader.ready()) {
String line = reader.readLine();
if (!line.equals(urlString)) {
entries.add(line);
}
}
reader.close();
}
if (entries.size() > 0) {
FileOutputStream os = new FileOutputStream(f.getPath(), false);
for (Iterator iter = entries.iterator(); iter.hasNext();) {
String line = (String) iter.next();
os.write((line + LF).getBytes());
}
os.close();
} else {
f.delete();
File fzip = contentFile(md5, ".zip");
if (fzip.exists()) {
fzip.delete();
}
}
}
/**
* List collected URLs.
* @see java.lang.Object#toString()
*/
public String toString() {
StringBuffer sb = new StringBuffer(1000);
for (Iterator i = urls.iterator(); i.hasNext();) {
sb.append(i.next()).append("\n");
}
return sb.toString();
}
/**
* Uses the first storageDirDepth characters of filename as paths
* @param filename
*/
private final String useFirstCharactersAsDirectories(String filename) {
int n = storageDirDepth;
if (n > filename.length()) n = filename.length();
char dir[] = new char[n*2];
for (int i=0; i<n; i++) {
dir[i*2] = filename.charAt(i);
dir[i*2+1] = File.separatorChar;
}
return new String(dir);
}
/**
* Checks if the storage path for the given file exists and creates it if necessary.
* @param subdirectory
* @param filename
*/
private final void checkStoragePathFor(String subdirectory, String filename) {
if (!subdirectory.endsWith(File.separator)) {
subdirectory += File.separator;
}
String head = filename.substring(0, storageDirDepth*2);
File path = new File(storagedir + subdirectory + head);
if (!path.exists()) {
path.mkdirs();
}
}
/**
* Generate a valid filename for the given docURI.
* @param docURI
* @return String
*/
protected String generateFilename(String docURI) {
if (useMD5) {
MD5 md5 = new MD5(docURI);
String hex = md5.asHex();
if (storageDirDepth > 0) {
return useFirstCharactersAsDirectories(hex) + hex.substring(storageDirDepth);
}
return hex;
} else {
StringBuffer buf = new StringBuffer(docURI.length());
for (int i = 0; i < docURI.length(); i++) {
char c = docURI.charAt(i);
switch (c) {
case '/' : buf.append("&slash;"); break;
case '\\' : buf.append("&backslash"); break;
case ':' : buf.append(":"); break;
case '*' : buf.append("&asterisk;"); break;
case '?' : buf.append("&question;"); break;
case '\"' : buf.append("""); break;
case '<' : buf.append("<"); break;
case '>' : buf.append(">"); break;
case '|' : buf.append("∨"); break;
default : buf.append(c); break;
}
}
docURI = buf.toString();
return docURI;
}
}
/**
* Returns a File with the mapping of this content to its URLs.
* @param content
* @return long
*/
protected File contentFile(String hex, String extension) {
return new File(storagedir + CONTENT + useFirstCharactersAsDirectories(hex) + hex.substring(storageDirDepth) + extension);
}
/**
* Close storageDirectory File.
* @see net.matuschek.http.HttpDocManager#finish()
*/
public void finish() {
if (storageDirectoryStream != null) {
try {
storageDirectoryStream.close();
storageDirectoryStream = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* Calls finish and super.finalize().
* @see java.lang.Object#finalize()
*/
protected void finalize() throws Throwable {
finish();
super.finalize();
}
/**
* Depth of source set directory.
* (depth = number of used subdirectory levels)
* The first storageDirDepth characters of file will be used
* as directories.
*/
protected int storageDirDepth = 0;
/**
* Sets the desired directory depth of the source set directory
* (depth = number of used subdirectory levels)
*
* @param desired depth of source set directory.
*/
public void setStorageDirDepth(int depth) { storageDirDepth = depth; }
/**
* Method getstorageDirDepth.
* returns the directory depth of the source set directory
* @param desired depth of source set directory.
* @return the directory depth of the source set directory
*/
public int getStorageDirDepth() { return storageDirDepth; }
/**
* Get relevant part of contenttype and get default extension for it.
* @param contenttype
* @return extension
*/
private String getExtensionFromContenttype(String contenttype) {
String extension = null;
if (contenttype != null){
String strContentType = null;
int pos = contenttype.indexOf(';');
if (pos > 0) {
strContentType = contenttype.substring(0, pos).trim();
} else {
strContentType = contenttype.trim();
}
extension = getDefaultExtension(strContentType);
}
if (extension == null) {
extension = "";
} else {
extension = "." + extension;
}
return extension;
}
/**
* Get default extension for given contentType.
* @param contentType
* @return default extension or null
*/
protected String getDefaultExtension(String contentType) {
if (contentType == null) {
return null;
} else if (contentType.indexOf("text/html") >= 0) {
return ".html";
} else if (contentType.indexOf("text/") >= 0) {
return ".txt";
} else {
return null;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -