⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 experimentalwarcwritertest.java

📁 这是个爬虫和lucece相结合最好了
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
     * @param recordCount Expected count of records.     * @throws FileNotFoundException     * @throws IOException     */    private void validate(File f, int recordCount)    throws FileNotFoundException, IOException {        WARCReader reader = WARCReaderFactory.get(f);        assertNotNull(reader);        List headers = null;        if (recordCount == -1) {            headers = reader.validate();        } else {            headers = reader.validate(recordCount);        }        reader.close();                // Now, run through each of the records doing absolute get going from        // the end to start.  Reopen the arc so no context between this test        // and the previous.        reader = WARCReaderFactory.get(f);        for (int i = headers.size() - 1; i >= 0; i--) {            ArchiveRecordHeader h = (ArchiveRecordHeader)headers.get(i);            ArchiveRecord r = reader.get(h.getOffset());            String mimeType = r.getHeader().getMimetype();            assertTrue("Record is bogus",                mimeType != null && mimeType.length() > 0);        }        reader.close();                assertTrue("Metadatas not equal", headers.size() == recordCount);        for (Iterator i = headers.iterator(); i.hasNext();) {            ArchiveRecordHeader r = (ArchiveRecordHeader)i.next();            assertTrue("Record is empty", r.getLength() > 0);        }    }    public void testWriteRecords() throws IOException {        final int recordCount = 2;        File f = writeRecords("writeRecord", false, DEFAULT_MAX_WARC_FILE_SIZE,            recordCount);     	validate(f, recordCount  + 1); // Header record.    }    public void testRandomAccess() throws IOException {        final int recordCount = 3;        File f = writeRecords("writeRecord", true, DEFAULT_MAX_WARC_FILE_SIZE,            recordCount);        WARCReader reader = WARCReaderFactory.get(f);        // Get to second record.  Get its offset for later use.        boolean readFirst = false;        String url = null;        long offset = -1;        long totalRecords = 0;        boolean readSecond = false;        for (final Iterator i = reader.iterator(); i.hasNext();                totalRecords++) {            WARCRecord ar = (WARCRecord)i.next();            if (!readFirst) {                readFirst = true;                continue;            }            if (!readSecond) {                url = ar.getHeader().getUrl();                offset = ar.getHeader().getOffset();                readSecond = true;            }        }                reader = WARCReaderFactory.get(f, offset);        ArchiveRecord ar = reader.get();        assertEquals(ar.getHeader().getUrl(), url);        ar.close();                // Get reader again.  See how iterator works with offset        reader = WARCReaderFactory.get(f, offset);        int count = 0;        for (final Iterator i = reader.iterator(); i.hasNext(); i.next()) {            count++;        }        reader.close();        assertEquals(totalRecords - 1, count);    }        public void testWriteRecordCompressed() throws IOException {        final int recordCount = 2;        File arcFile = writeRecords("writeRecordCompressed", true,            DEFAULT_MAX_WARC_FILE_SIZE, recordCount);        validate(arcFile, recordCount + 1 /*Header record*/);    }        protected ExperimentalWARCWriter createWARCWriter(String NAME,            boolean compress) {        File [] files = {getTmpDir()};        return new ExperimentalWARCWriter(SERIAL_NO,        	Arrays.asList(files), NAME, "",            compress, DEFAULT_MAX_WARC_FILE_SIZE, null);    }        protected static ByteArrayOutputStream getBaos(String str)    throws IOException {        ByteArrayOutputStream baos = new ByteArrayOutputStream();        baos.write(str.getBytes());        return baos;    }        protected static void writeRecord(ExperimentalWARCWriter w, String url,        String mimetype, int len, ByteArrayOutputStream baos)    throws IOException {        w.writeResourceRecord(url,            ArchiveUtils.get14DigitDate(),            mimetype,            null,            new ByteArrayInputStream(baos.toByteArray()),            len);    }        protected int iterateRecords(WARCReader r)    throws IOException {        int count = 0;        for (Iterator<ArchiveRecord> i = r.iterator(); i.hasNext();) {            ArchiveRecord ar = i.next();            ar.close();            if (count != 0) {                assertTrue("Unexpected URL " + ar.getHeader().getUrl(),                    ar.getHeader().getUrl().equals(SOME_URL));            }            count++;        }        return count;    }        protected ExperimentalWARCWriter createWithOneRecord(String name,        boolean compressed)    throws IOException {        ExperimentalWARCWriter writer = createWARCWriter(name, compressed);        String content = getContent();        writeRecord(writer, SOME_URL, "text/html",            content.length(), getBaos(content));        return writer;    }        public void testSpaceInURL() {        String eMessage = null;        try {            holeyUrl("testSpaceInURL-" + PREFIX, false, " ");        } catch (IOException e) {            eMessage = e.getMessage();        }        assertTrue("Didn't get expected exception: " + eMessage,            eMessage.startsWith("Contains disallowed"));    }    public void testTabInURL() {        String eMessage = null;        try {            holeyUrl("testTabInURL-" + PREFIX, false, "\t");        } catch (IOException e) {            eMessage = e.getMessage();        }        assertTrue("Didn't get expected exception: " + eMessage,            eMessage.startsWith("Contains illegal"));    }        protected void holeyUrl(String name, boolean compress, String urlInsert)    throws IOException {        ExperimentalWARCWriter writer = createWithOneRecord(name, compress);        // Add some bytes on the end to mess up the record.        String content = getContent();        ByteArrayOutputStream baos = getBaos(content);        writeRecord(writer, SOME_URL + urlInsert + "/index.html", "text/html",            content.length(), baos);        writer.close();    }        /**     * Write an arc file for other tests to use.     * @param arcdir Directory to write to.     * @param compress True if file should be compressed.     * @return ARC written.     * @throws IOException      */    public static File createWARCFile(File arcdir, boolean compress)    throws IOException {        File [] files = {arcdir};        ExperimentalWARCWriter writer =            new ExperimentalWARCWriter(SERIAL_NO, Arrays.asList(files),            "test", "", compress, DEFAULT_MAX_WARC_FILE_SIZE, null);        String content = getContent();        writeRecord(writer, SOME_URL, "text/html", content.length(),            getBaos(content));        writer.close();        return writer.getFile();    }    //    public void testSpeed() throws IOException {//        ARCWriter writer = createArcWithOneRecord("speed", true);//        // Add a record with a length that is too long.//        String content = getContent();//        final int count = 100000;//        logger.info("Starting speed write of " + count + " records.");//        for (int i = 0; i < count; i++) {//            writeRecord(writer, SOME_URL, "text/html", content.length(),//                    getBaos(content));//        }//        writer.close();//        logger.info("Finished speed write test.");//    }        public void testArcRecordOffsetReads() throws Exception {    	// Get an ARC with one record.		WriterPoolMember w =			createWithOneRecord("testArcRecordInBufferStream", true);		w.close();		// Get reader on said ARC.		WARCReader r = WARCReaderFactory.get(w.getFile());		final Iterator<ArchiveRecord> i = r.iterator();		// Skip first ARC meta record.		ArchiveRecord ar = i.next();		i.hasNext();		// Now we're at first and only record in ARC.		ar = (WARCRecord) i.next();		// Now try getting some random set of bytes out of it 		// at an odd offset (used to fail because we were		// doing bad math to find where in buffer to read).		final byte[] buffer = new byte[17];		final int maxRead = 4;		int totalRead = 0;		while (totalRead < maxRead) {			totalRead = totalRead			    + ar.read(buffer, 13 + totalRead, maxRead - totalRead);			assertTrue(totalRead > 0);		}	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -