📄 experimentalwarcwritertest.java
字号:
* @param recordCount Expected count of records. * @throws FileNotFoundException * @throws IOException */ private void validate(File f, int recordCount) throws FileNotFoundException, IOException { WARCReader reader = WARCReaderFactory.get(f); assertNotNull(reader); List headers = null; if (recordCount == -1) { headers = reader.validate(); } else { headers = reader.validate(recordCount); } reader.close(); // Now, run through each of the records doing absolute get going from // the end to start. Reopen the arc so no context between this test // and the previous. reader = WARCReaderFactory.get(f); for (int i = headers.size() - 1; i >= 0; i--) { ArchiveRecordHeader h = (ArchiveRecordHeader)headers.get(i); ArchiveRecord r = reader.get(h.getOffset()); String mimeType = r.getHeader().getMimetype(); assertTrue("Record is bogus", mimeType != null && mimeType.length() > 0); } reader.close(); assertTrue("Metadatas not equal", headers.size() == recordCount); for (Iterator i = headers.iterator(); i.hasNext();) { ArchiveRecordHeader r = (ArchiveRecordHeader)i.next(); assertTrue("Record is empty", r.getLength() > 0); } } public void testWriteRecords() throws IOException { final int recordCount = 2; File f = writeRecords("writeRecord", false, DEFAULT_MAX_WARC_FILE_SIZE, recordCount); validate(f, recordCount + 1); // Header record. } public void testRandomAccess() throws IOException { final int recordCount = 3; File f = writeRecords("writeRecord", true, DEFAULT_MAX_WARC_FILE_SIZE, recordCount); WARCReader reader = WARCReaderFactory.get(f); // Get to second record. Get its offset for later use. boolean readFirst = false; String url = null; long offset = -1; long totalRecords = 0; boolean readSecond = false; for (final Iterator i = reader.iterator(); i.hasNext(); totalRecords++) { WARCRecord ar = (WARCRecord)i.next(); if (!readFirst) { readFirst = true; continue; } if (!readSecond) { url = ar.getHeader().getUrl(); offset = ar.getHeader().getOffset(); readSecond = true; } } reader = WARCReaderFactory.get(f, offset); ArchiveRecord ar = reader.get(); assertEquals(ar.getHeader().getUrl(), url); ar.close(); // Get reader again. See how iterator works with offset reader = WARCReaderFactory.get(f, offset); int count = 0; for (final Iterator i = reader.iterator(); i.hasNext(); i.next()) { count++; } reader.close(); assertEquals(totalRecords - 1, count); } public void testWriteRecordCompressed() throws IOException { final int recordCount = 2; File arcFile = writeRecords("writeRecordCompressed", true, DEFAULT_MAX_WARC_FILE_SIZE, recordCount); validate(arcFile, recordCount + 1 /*Header record*/); } protected ExperimentalWARCWriter createWARCWriter(String NAME, boolean compress) { File [] files = {getTmpDir()}; return new ExperimentalWARCWriter(SERIAL_NO, Arrays.asList(files), NAME, "", compress, DEFAULT_MAX_WARC_FILE_SIZE, null); } protected static ByteArrayOutputStream getBaos(String str) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); baos.write(str.getBytes()); return baos; } protected static void writeRecord(ExperimentalWARCWriter w, String url, String mimetype, int len, ByteArrayOutputStream baos) throws IOException { w.writeResourceRecord(url, ArchiveUtils.get14DigitDate(), mimetype, null, new ByteArrayInputStream(baos.toByteArray()), len); } protected int iterateRecords(WARCReader r) throws IOException { int count = 0; for (Iterator<ArchiveRecord> i = r.iterator(); i.hasNext();) { ArchiveRecord ar = i.next(); ar.close(); if (count != 0) { assertTrue("Unexpected URL " + ar.getHeader().getUrl(), ar.getHeader().getUrl().equals(SOME_URL)); } count++; } return count; } protected ExperimentalWARCWriter createWithOneRecord(String name, boolean compressed) throws IOException { ExperimentalWARCWriter writer = createWARCWriter(name, compressed); String content = getContent(); writeRecord(writer, SOME_URL, "text/html", content.length(), getBaos(content)); return writer; } public void testSpaceInURL() { String eMessage = null; try { holeyUrl("testSpaceInURL-" + PREFIX, false, " "); } catch (IOException e) { eMessage = e.getMessage(); } assertTrue("Didn't get expected exception: " + eMessage, eMessage.startsWith("Contains disallowed")); } public void testTabInURL() { String eMessage = null; try { holeyUrl("testTabInURL-" + PREFIX, false, "\t"); } catch (IOException e) { eMessage = e.getMessage(); } assertTrue("Didn't get expected exception: " + eMessage, eMessage.startsWith("Contains illegal")); } protected void holeyUrl(String name, boolean compress, String urlInsert) throws IOException { ExperimentalWARCWriter writer = createWithOneRecord(name, compress); // Add some bytes on the end to mess up the record. String content = getContent(); ByteArrayOutputStream baos = getBaos(content); writeRecord(writer, SOME_URL + urlInsert + "/index.html", "text/html", content.length(), baos); writer.close(); } /** * Write an arc file for other tests to use. * @param arcdir Directory to write to. * @param compress True if file should be compressed. * @return ARC written. * @throws IOException */ public static File createWARCFile(File arcdir, boolean compress) throws IOException { File [] files = {arcdir}; ExperimentalWARCWriter writer = new ExperimentalWARCWriter(SERIAL_NO, Arrays.asList(files), "test", "", compress, DEFAULT_MAX_WARC_FILE_SIZE, null); String content = getContent(); writeRecord(writer, SOME_URL, "text/html", content.length(), getBaos(content)); writer.close(); return writer.getFile(); } // public void testSpeed() throws IOException {// ARCWriter writer = createArcWithOneRecord("speed", true);// // Add a record with a length that is too long.// String content = getContent();// final int count = 100000;// logger.info("Starting speed write of " + count + " records.");// for (int i = 0; i < count; i++) {// writeRecord(writer, SOME_URL, "text/html", content.length(),// getBaos(content));// }// writer.close();// logger.info("Finished speed write test.");// } public void testArcRecordOffsetReads() throws Exception { // Get an ARC with one record. WriterPoolMember w = createWithOneRecord("testArcRecordInBufferStream", true); w.close(); // Get reader on said ARC. WARCReader r = WARCReaderFactory.get(w.getFile()); final Iterator<ArchiveRecord> i = r.iterator(); // Skip first ARC meta record. ArchiveRecord ar = i.next(); i.hasNext(); // Now we're at first and only record in ARC. ar = (WARCRecord) i.next(); // Now try getting some random set of bytes out of it // at an odd offset (used to fail because we were // doing bad math to find where in buffer to read). final byte[] buffer = new byte[17]; final int maxRead = 4; int totalRead = 0; while (totalRead < maxRead) { totalRead = totalRead + ar.read(buffer, 13 + totalRead, maxRead - totalRead); assertTrue(totalRead > 0); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -