⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 uurifactorytest.java

📁 这是个爬虫和lucece相结合最好了
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
	 *       ../..         =  http://a/	 *       ../../        =  http://a/	 *       ../../g       =  http://a/g	 * </pre>	 *	 * @throws URIException	 */	public final void testRFC2396Relative() throws URIException {		UURI base = UURIFactory.		getInstance("http://a/b/c/d;p?q");		TreeMap<String,String> m = new TreeMap<String,String>();		m.put("..", "http://a/b/");		m.put("../", "http://a/b/");		m.put("../g", "http://a/b/g");		m.put("../..", "http://a/");		m.put("../../", "http://a/");		m.put("../../g", "http://a/g");		m.put("g#s", "http://a/b/c/g#s");		m.put("g?y#s ", "http://a/b/c/g?y#s");		m.put(";x", "http://a/b/c/;x");		m.put("g;x", "http://a/b/c/g;x");		m.put("g;x?y#s", "http://a/b/c/g;x?y#s");		m.put(".", "http://a/b/c/");		m.put("./", "http://a/b/c/");		m.put("g", "http://a/b/c/g");		m.put("./g", "http://a/b/c/g");		m.put("g/", "http://a/b/c/g/");		m.put("/g", "http://a/g");		m.put("//g", "http://g");		m.put("?y", "http://a/b/c/?y");		m.put("g?y", "http://a/b/c/g?y");		// EXTRAS beyond the RFC set.		// TODO: That these resolve to a path of /a/g might be wrong.  Perhaps		// it should be '/g'?.		m.put("/../../../../../../../../g", "http://a/g");		m.put("../../../../../../../../g", "http://a/g");		m.put("../G", "http://a/b/G");		for (Iterator i = m.keySet().iterator(); i.hasNext();) {			String key = (String)i.next();			String value = (String)m.get(key);			UURI uuri = UURIFactory.getInstance(base, key);			assertTrue("Unexpected " + key + " " + value + " " + uuri,					uuri.equals(UURIFactory.getInstance(value)));		}	}		/**	 * A UURI should always be without a 'fragment' segment, which is	 * unused and irrelevant for network fetches. 	 *  	 * See [ 970666 ] #anchor links not trimmed, and thus recrawled 	 * 	 * @throws URIException	 */	public final void testAnchors() throws URIException {		UURI uuri = UURIFactory.		getInstance("http://www.example.com/path?query#anchor");		assertEquals("Not equal", "http://www.example.com/path?query",				uuri.toString());	}        /**     * Ensure that URI strings beginning with a colon are treated     * the same as browsers do (as relative, rather than as absolute     * with zero-length scheme).      *      * @throws URIException     */    public void testStartsWithColon() throws URIException {        UURI base = UURIFactory.getInstance("http://www.example.com/path/page");        UURI uuri = UURIFactory.getInstance(base,":foo");        assertEquals("derelativize starsWithColon",                uuri.getURI(),                "http://www.example.com/path/:foo");    }        /**     * Ensure that stray trailing '%' characters do not prevent     * UURI instances from being created, and are reasonably      * escaped when encountered.      *     * @throws URIException     */    public void testTrailingPercents() throws URIException {        String plainPath = "http://www.example.com/path%";        UURI plainPathUuri = UURIFactory.getInstance(plainPath);        assertEquals("plainPath getURI", plainPath, plainPathUuri.getURI());        assertEquals("plainPath getEscapedURI",                 "http://www.example.com/path%", // browsers don't escape '%'                plainPathUuri.getEscapedURI());                String partiallyEscapedPath = "http://www.example.com/pa%20th%";        UURI partiallyEscapedPathUuri = UURIFactory.getInstance(                partiallyEscapedPath);//        assertEquals("partiallyEscapedPath getURI", //                "http://www.example.com/pa th%", // TODO: is this desirable?////              partiallyEscapedPath,//                partiallyEscapedPathUuri.getURI());        assertEquals("partiallyEscapedPath getEscapedURI",                 "http://www.example.com/pa%20th%",                partiallyEscapedPathUuri.getEscapedURI());                String plainQueryString = "http://www.example.com/path?q=foo%";        UURI plainQueryStringUuri = UURIFactory.getInstance(                plainQueryString);//        assertEquals("plainQueryString getURI", //                plainQueryString,//                plainQueryStringUuri.getURI());        assertEquals("plainQueryString getEscapedURI",                 "http://www.example.com/path?q=foo%",                plainQueryStringUuri.getEscapedURI());                        String partiallyEscapedQueryString =             "http://www.example.com/pa%20th?q=foo%";        UURI partiallyEscapedQueryStringUuri = UURIFactory.getInstance(                partiallyEscapedQueryString);        assertEquals("partiallyEscapedQueryString getURI",                 "http://www.example.com/pa th?q=foo%",                partiallyEscapedQueryStringUuri.getURI());        assertEquals("partiallyEscapedQueryString getEscapedURI",                 "http://www.example.com/pa%20th?q=foo%",                partiallyEscapedQueryStringUuri.getEscapedURI());      }        /**     * Ensure that stray '%' characters do not prevent     * UURI instances from being created, and are reasonably      * escaped when encountered.      *     * @throws URIException     */    public void testStrayPercents() throws URIException {        String oneStray = "http://www.example.com/pa%th";        UURI oneStrayUuri = UURIFactory.getInstance(oneStray);        assertEquals("oneStray getURI", oneStray, oneStrayUuri.getURI());        assertEquals("oneStray getEscapedURI",                 "http://www.example.com/pa%th", // browsers don't escape '%'                oneStrayUuri.getEscapedURI());                String precededByValidEscape = "http://www.example.com/pa%20th%way";        UURI precededByValidEscapeUuri = UURIFactory.getInstance(                precededByValidEscape);        assertEquals("precededByValidEscape getURI",                 "http://www.example.com/pa th%way", // getURI interprets escapes                precededByValidEscapeUuri.getURI());        assertEquals("precededByValidEscape getEscapedURI",                 "http://www.example.com/pa%20th%way",                precededByValidEscapeUuri.getEscapedURI());                String followedByValidEscape = "http://www.example.com/pa%th%20way";        UURI followedByValidEscapeUuri = UURIFactory.getInstance(                followedByValidEscape);        assertEquals("followedByValidEscape getURI",                 "http://www.example.com/pa%th way", // getURI interprets escapes                followedByValidEscapeUuri.getURI());        assertEquals("followedByValidEscape getEscapedURI",                 "http://www.example.com/pa%th%20way",                followedByValidEscapeUuri.getEscapedURI());            }        public void testEscapingNotNecessary() throws URIException {        String escapesUnnecessary =             "http://www.example.com/misc;reserved:chars@that&don't=need"            +"+escaping$even,though!you(might)initially?think#so";        // expect everything but the #fragment        String expected = escapesUnnecessary.substring(0, escapesUnnecessary                .length() - 3);        assertEquals("escapes unnecessary",                 expected,                 UURIFactory.getInstance(escapesUnnecessary).toString());    }        public void testIdn() throws URIException {        // See http://www.josefsson.org/idn.php.        String idn1 = new String("http://räksmörgås.josefßon.org/");        String puny1 = "http://xn--rksmrgs-5wao1o.josefsson.org/";        assertEquals("encoding of " + idn1, puny1, UURIFactory                .getInstance(idn1).toString());        String idn2 = "http://www.pølse.dk/";        String puny2 = "http://www.xn--plse-gra.dk/";        assertEquals("encoding of " + idn2, puny2, UURIFactory                .getInstance(idn2).toString());    }        public void testNewLineInURL() throws URIException {    	UURI uuri = UURIFactory.getInstance("http://www.ar\rchive\n." +    	    "org/i\n\n\r\rndex.html");    	assertEquals("http://www.archive.org/index.html", uuri.toString());    }        public void testTabsInURL() throws URIException {        UURI uuri = UURIFactory.getInstance("http://www.ar\tchive\t." +            "org/i\t\r\n\tndex.html");        assertEquals("http://www.archive.org/index.html", uuri.toString());    }        public void testQueryEscaping() throws URIException {        UURI uuri = UURIFactory.getInstance(            "http://www.yahoo.com/foo?somechars!@$%^&*()_-+={[}]|\'\";:/?.>,<");        assertEquals(            // tests in FF1.5 indicate it only escapes " < >             "http://www.yahoo.com/foo?somechars!@$%^&*()_-+={[}]|\'%22;:/?.%3E,%3C",            uuri.toString());    }        /**     * Check that our 'normalization' does same as Nutch's     * Below before-and-afters were taken from the nutch urlnormalizer-basic     * TestBasicURLNormalizer class  (December 2006, Nutch 0.9-dev).     * @throws URIException     */    public void testSameAsNutchURLFilterBasic() throws URIException {        assertEquals(UURIFactory.getInstance(" http://foo.com/ ").toString(),            "http://foo.com/");        // check that protocol is lower cased        assertEquals(UURIFactory.getInstance("HTTP://foo.com/").toString(),            "http://foo.com/");                // check that host is lower cased        assertEquals(UURIFactory.                getInstance("http://Foo.Com/index.html").toString(),            "http://foo.com/index.html");        assertEquals(UURIFactory.                getInstance("http://Foo.Com/index.html").toString(),            "http://foo.com/index.html");        // check that port number is normalized        assertEquals(UURIFactory.                getInstance("http://foo.com:80/index.html").toString(),            "http://foo.com/index.html");        assertEquals(UURIFactory.getInstance("http://foo.com:81/").toString(),            "http://foo.com:81/");        // check that null path is normalized        assertEquals(UURIFactory.getInstance("http://foo.com").toString(),            "http://foo.com/");        // check that references are removed        assertEquals(UURIFactory.                getInstance("http://foo.com/foo.html#ref").toString(),            "http://foo.com/foo.html");        //     // check that encoding is normalized        //     normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");        // check that unnecessary "../" are removed        assertEquals(UURIFactory.                getInstance("http://foo.com/aa/../").toString(),            "http://foo.com/" );        assertEquals(UURIFactory.                getInstance("http://foo.com/aa/bb/../").toString(),            "http://foo.com/aa/");        /* We fail this one.  Here we produce: 'http://foo.com/'.        assertEquals(UURIFactory.                getInstance("http://foo.com/aa/..").toString(),            "http://foo.com/aa/..");         */                assertEquals(UURIFactory.            getInstance("http://foo.com/aa/bb/cc/../../foo.html").toString(),                "http://foo.com/aa/foo.html");        assertEquals(UURIFactory.            getInstance("http://foo.com/aa/bb/../cc/dd/../ee/foo.html").                toString(),                    "http://foo.com/aa/cc/ee/foo.html");        assertEquals(UURIFactory.            getInstance("http://foo.com/../foo.html").toString(),                "http://foo.com/foo.html" );        assertEquals(UURIFactory.            getInstance("http://foo.com/../../foo.html").toString(),                "http://foo.com/foo.html" );        assertEquals(UURIFactory.            getInstance("http://foo.com/../aa/../foo.html").toString(),                "http://foo.com/foo.html" );        assertEquals(UURIFactory.            getInstance("http://foo.com/aa/../../foo.html").toString(),                "http://foo.com/foo.html" );        assertEquals(UURIFactory.                getInstance("http://foo.com/aa/../bb/../foo.html/../../").                    toString(),            "http://foo.com/" );        assertEquals(UURIFactory.getInstance("http://foo.com/../aa/foo.html").            toString(), "http://foo.com/aa/foo.html" );        assertEquals(UURIFactory.                getInstance("http://foo.com/../aa/../foo.html").toString(),            "http://foo.com/foo.html" );        assertEquals(UURIFactory.                getInstance("http://foo.com/a..a/foo.html").toString(),            "http://foo.com/a..a/foo.html" );        assertEquals(UURIFactory.                getInstance("http://foo.com/a..a/../foo.html").toString(),            "http://foo.com/foo.html" );        assertEquals(UURIFactory.            getInstance("http://foo.com/foo.foo/../foo.html").toString(),                 "http://foo.com/foo.html" );    }        public void testHttpSchemeColonSlash() {    	boolean exception = false;    	try {    		UURIFactory.getInstance("https:/");    	} catch (URIException e) {    		exception = true;    	}    	assertTrue("Didn't throw exception when one expected", exception);    	exception = false;    	try {    		UURIFactory.getInstance("http://");    	} catch (URIException e) {    		exception = true;    	}    	assertTrue("Didn't throw exception when one expected", exception);    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -