⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 arcwritertest.java

📁 这是个爬虫和lucece相结合最好了
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
                    rec.getMetaData().getUrl().equals(SOME_URL));            }            count++;        }        return count;    }        protected ARCWriter createArcWithOneRecord(String name,        boolean compressed)    throws IOException {    	ARCWriter writer = createARCWriter(name, compressed);        String content = getContent();        writeRecord(writer, SOME_URL, "text/html",            content.length(), getBaos(content));        return writer;    }        public void testSpaceInURL() {        String eMessage = null;        try {            holeyUrl("testSpaceInURL-" + PREFIX, false, " ");        } catch (IOException e) {            eMessage = e.getMessage();        }        assertTrue("Didn't get expected exception: " + eMessage,            eMessage.startsWith("Metadata line doesn't match"));    }    public void testTabInURL() {                String eMessage = null;        try {            holeyUrl("testTabInURL-" + PREFIX, false, "\t");        } catch (IOException e) {            eMessage = e.getMessage();        }        assertTrue("Didn't get expected exception: " + eMessage,            eMessage.startsWith("Metadata line doesn't match"));    }        protected void holeyUrl(String name, boolean compress, String urlInsert)    throws IOException {    	ARCWriter writer = createArcWithOneRecord(name, compress);        // Add some bytes on the end to mess up the record.        String content = getContent();        ByteArrayOutputStream baos = getBaos(content);        writeRecord(writer, SOME_URL + urlInsert + "/index.html", "text/html",            content.length(), baos);        writer.close();    }    // If uncompressed, length has to be right or parse will fail.////    public void testLengthTooShort() throws IOException {//        lengthTooShort("testLengthTooShort-" + PREFIX, false);//    }        public void testLengthTooShortCompressed() throws IOException {        lengthTooShort("testLengthTooShortCompressed-" + PREFIX, true, false);    }        public void testLengthTooShortCompressedStrict()    throws IOException {              String eMessage = null;        try {            lengthTooShort("testLengthTooShortCompressedStrict-" + PREFIX,                true, true);        } catch (RuntimeException e) {            eMessage = e.getMessage();        }        assertTrue("Didn't get expected exception: " + eMessage,            eMessage.startsWith("java.io.IOException: Record ENDING at"));    }         protected void lengthTooShort(String name, boolean compress, boolean strict)    throws IOException {    	ARCWriter writer = createArcWithOneRecord(name, compress);        // Add some bytes on the end to mess up the record.        String content = getContent();        ByteArrayOutputStream baos = getBaos(content);        baos.write("SOME TRAILING BYTES".getBytes());        writeRecord(writer, SOME_URL, "text/html",            content.length(), baos);        writeRecord(writer, SOME_URL, "text/html",            content.length(), getBaos(content));        writer.close();                // Catch System.err into a byte stream.        ByteArrayOutputStream os = new ByteArrayOutputStream();        System.setErr(new PrintStream(os));                ARCReader r = ARCReaderFactory.get(writer.getFile());        r.setStrict(strict);        int count = iterateRecords(r);        assertTrue("Count wrong " + count, count == 4);        // Make sure we get the warning string which complains about the        // trailing bytes.        String err = os.toString();        assertTrue("No message " + err, err.startsWith("WARNING") &&            (err.indexOf("Record ENDING at") > 0));    }    //  If uncompressed, length has to be right or parse will fail.////    public void testLengthTooLong()//    throws IOException {//        lengthTooLong("testLengthTooLongCompressed-" + PREFIX,//            false, false);//    }        public void testLengthTooLongCompressed()    throws IOException {        lengthTooLong("testLengthTooLongCompressed-" + PREFIX,            true, false);    }        public void testLengthTooLongCompressedStrict() {        String eMessage = null;        try {            lengthTooLong("testLengthTooLongCompressed-" + PREFIX,                true, true);        } catch (IOException e) {            eMessage = e.getMessage();        }        assertTrue("Didn't get expected exception: " + eMessage,            eMessage.startsWith("Premature EOF before end-of-record"));    }        protected void lengthTooLong(String name, boolean compress,            boolean strict)    throws IOException {    	ARCWriter writer = createArcWithOneRecord(name, compress);        // Add a record with a length that is too long.        String content = getContent();        writeRecord(writer, SOME_URL, "text/html",            content.length() + 10, getBaos(content));        writeRecord(writer, SOME_URL, "text/html",            content.length(), getBaos(content));        writer.close();                // Catch System.err.        ByteArrayOutputStream os = new ByteArrayOutputStream();        System.setErr(new PrintStream(os));                ARCReader r = ARCReaderFactory.get(writer.getFile());        r.setStrict(strict);        int count = iterateRecords(r);        assertTrue("Count wrong " + count, count == 4);                // Make sure we get the warning string which complains about the        // trailing bytes.        String err = os.toString();        assertTrue("No message " + err,             err.startsWith("WARNING Premature EOF before end-of-record"));    }        public void testGapError() throws IOException {    	ARCWriter writer = createArcWithOneRecord("testGapError", true);        String content = getContent();        // Make a 'weird' RIS that returns bad 'remaining' length        // after the call to readFullyTo.        ReplayInputStream ris = new ReplayInputStream(content.getBytes(),                content.length(), null) {            private boolean readFullyToCalled = false;            public void readFullyTo(OutputStream os)            throws IOException {                super.readFullyTo(os);                this.readFullyToCalled = true;            }                        public long remaining() {                return (this.readFullyToCalled)? -1: super.remaining();            }        };        String message = null;        try {        writer.write(SOME_URL, "text/html", "192.168.1.1",            (new Date()).getTime(), content.length(), ris);        } catch (IOException e) {            message = e.getMessage();        }        writer.close();        assertTrue("No gap when should be",            message != null &&            message.indexOf("Gap between expected and actual") >= 0);    }        /**     * Write an arc file for other tests to use.     * @param arcdir Directory to write to.     * @param compress True if file should be compressed.     * @return ARC written.     * @throws IOException      */    public static File createARCFile(File arcdir, boolean compress)    throws IOException {        File [] files = {arcdir};        ARCWriter writer = new ARCWriter(SERIAL_NO, Arrays.asList(files),            "test", compress, DEFAULT_MAX_ARC_FILE_SIZE);        String content = getContent();        writeRecord(writer, SOME_URL, "text/html", content.length(),            getBaos(content));        writer.close();        return writer.getFile();    }    //    public void testSpeed() throws IOException {//        ARCWriter writer = createArcWithOneRecord("speed", true);//        // Add a record with a length that is too long.//        String content = getContent();//        final int count = 100000;//        logger.info("Starting speed write of " + count + " records.");//        for (int i = 0; i < count; i++) {//            writeRecord(writer, SOME_URL, "text/html", content.length(),//                    getBaos(content));//        }//        writer.close();//        logger.info("Finished speed write test.");//    }            public void testValidateMetaLine() throws Exception {        final String line = "http://www.aandw.net/images/walden2.png " +            "128.197.34.86 20060111174224 image/png 2160";        ARCWriter w = createARCWriter("testValidateMetaLine", true);        try {            w.validateMetaLine(line);            w.validateMetaLine(line + LINE_SEPARATOR);            w.validateMetaLine(line + "\\r\\n");        } finally {            w.close();        }    }        public void testArcRecordOffsetReads() throws Exception {    	// Get an ARC with one record.		WriterPoolMember w =			createArcWithOneRecord("testArcRecordInBufferStream", true);		w.close();		// Get reader on said ARC.		ARCReader r = ARCReaderFactory.get(w.getFile());		final Iterator i = r.iterator();		// Skip first ARC meta record.		ARCRecord ar = (ARCRecord) i.next();		i.hasNext();		// Now we're at first and only record in ARC.		ar = (ARCRecord) i.next();		// Now try getting some random set of bytes out of it 		// at an odd offset (used to fail because we were		// doing bad math to find where in buffer to read).		final byte[] buffer = new byte[17];		final int maxRead = 4;		int totalRead = 0;		while (totalRead < maxRead) {			totalRead = totalRead			    + ar.read(buffer, 13 + totalRead, maxRead - totalRead);			assertTrue(totalRead > 0);		}	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -