⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 uurifactorytest.java

📁 这是个爬虫和lucece相结合最好了
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
		uuri = UURIFactory.getInstance(uuri.toString());		uuri = UURIFactory.getInstance(uuri.toString());		assertEquals("Not equal (dbl-encoding) utf8", encodedUtf8Uri, uuri.toString());	}		/**	 * Test for syntax errors stop page parsing.	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=788219&group_id=73833&atid=539099">[ 788219 ] URI Syntax Errors stop page parsing</a>	 * @throws URIException	 */	public final void testThreeSlashes() throws URIException {		UURI goodURI = UURIFactory.		getInstance("http://lcweb.loc.gov/rr/goodtwo.html");		String uuri = "http:///lcweb.loc.gov/rr/goodtwo.html";		UURI rewrittenURI = UURIFactory.getInstance(uuri);		assertTrue("Not equal " + goodURI + ", " + uuri,				goodURI.toString().equals(rewrittenURI.toString()));		uuri = "http:////lcweb.loc.gov/rr/goodtwo.html";		rewrittenURI = UURIFactory.getInstance(uuri);		assertTrue("Not equal " + goodURI + ", " + uuri,				goodURI.toString().equals(rewrittenURI.toString()));		// Check https.		goodURI = UURIFactory.		getInstance("https://lcweb.loc.gov/rr/goodtwo.html");		uuri = "https:////lcweb.loc.gov/rr/goodtwo.html";		rewrittenURI = UURIFactory.getInstance(uuri);		assertTrue("Not equal " + goodURI + ", " + uuri,				goodURI.toString().equals(rewrittenURI.toString()));	}		public final void testNoScheme() {		boolean expectedException = false;		String uuri = "www.loc.gov/rr/european/egw/polishex.html";		try {			UURIFactory.getInstance(uuri);		} catch (URIException e) {			// Expected exception.			expectedException = true;		}		assertTrue("Didn't get expected exception: " + uuri, 				expectedException); 	}		public final void testRelative() throws URIException {		UURI uuriTgt = UURIFactory.		getInstance("http://archive.org:83/home.html");		UURI uri = UURIFactory.		getInstance("http://archive.org:83/one/two/three.html");		UURI uuri = UURIFactory.		getInstance(uri, "/home.html");		assertTrue("Not equal",				uuriTgt.toString().equals(uuri.toString()));	}		/**	 * Test that an empty uuri does the right thing -- that we get back the	 * base.	 *	 * @throws URIException	 */	public final void testRelativeEmpty() throws URIException {		UURI uuriTgt = UURIFactory.		getInstance("http://archive.org:83/one/two/three.html");		UURI uri = UURIFactory.		getInstance("http://archive.org:83/one/two/three.html");		UURI uuri = UURIFactory.		getInstance(uri, "");		assertTrue("Empty length don't work",				uuriTgt.toString().equals(uuri.toString()));	}		public final void testAbsolute() throws URIException {		UURI uuriTgt = UURIFactory.		getInstance("http://archive.org:83/home.html");		UURI uri = UURIFactory.		getInstance("http://archive.org:83/one/two/three.html");		UURI uuri = UURIFactory.		getInstance(uri, "http://archive.org:83/home.html");		assertTrue("Not equal",				uuriTgt.toString().equals(uuri.toString()));	}		/**	 * Test for [ 962892 ] UURI accepting/creating unUsable URIs (bad hosts).	 * @see <a href="https://sourceforge.net/tracker/?func=detail&atid=539099&aid=962892&group_id=73833">[ 962892 ] UURI accepting/creating unUsable URIs (bad hosts)</a>	 */	public final void testHostWithLessThan() {		checkExceptionOnIllegalDomainlabel("http://www.betamobile.com</A");		checkExceptionOnIllegalDomainlabel(		"http://C|/unzipped/426/spacer.gif");		checkExceptionOnIllegalDomainlabel("http://www.lycos.co.uk\"/l/b/\"");	}    		/**	 * Test for [ 1012520 ] UURI.length() &gt; 2k.	 * @throws URIException	 * @see <a href="http://sourceforge.net/tracker/index.php?func=detail&aid=1012520&group_id=73833&atid=539099">[ 1012520 ] UURI.length() &gt; 2k</a>	 */	public final void test2kURI() throws URIException {		final StringBuffer buffer = new StringBuffer("http://a.b");		final String subPath = "/123456789";		for (int i = 0; i < 207; i++) {			buffer.append(subPath);		}		// String should be 2080 characters long.  Legal.		UURIFactory.getInstance(buffer.toString());		boolean gotException = false;		// Add ten more characters and make size illegal.		buffer.append(subPath);		try {			UURIFactory.getInstance(buffer.toString()); 		} catch (URIException e) {			gotException = true;		}		assertTrue("No expected exception complaining about long URI",				gotException);	} 		private void checkExceptionOnIllegalDomainlabel(String uuri) {		boolean expectedException = false;        try {			UURIFactory.getInstance(uuri);		} catch (URIException e) {			// Expected exception.			expectedException = true;		}		assertTrue("Didn't get expected exception: " + uuri, 				expectedException); 	}		/**	 * Test for doing separate DNS lookup for same host	 *	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=788277&group_id=73833&atid=539099">[ 788277 ] Doing separate DNS lookup for same host</a>	 * @throws URIException	 */	public final void testHostWithPeriod() throws URIException {		UURI uuri1 = UURIFactory.		getInstance("http://www.loc.gov./index.html");		UURI uuri2 = UURIFactory.		getInstance("http://www.loc.gov/index.html");		assertEquals("Failed equating hosts with dot",				uuri1.getHost(), uuri2.getHost());	}		/**	 * Test for NPE in java.net.URI.encode	 *	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=874220&group_id=73833&atid=539099">[ 874220 ] NPE in java.net.URI.encode</a>	 * @throws URIException	 */	public final void testHostEncodedChars() throws URIException {		String s = "http://g.msn.co.kr/0nwkokr0/00/19??" +		"PS=10274&NC=10009&CE=42&CP=949&HL=" +		"&#65533;&#65533;&#65533;?&#65533;&#65533;";		assertNotNull("Encoded chars " + s, 				UURIFactory.getInstance(s));	}		/**	 * Test for java.net.URI parses %20 but getHost null	 *	 * See <a href="https://sourceforge.net/tracker/?func=detail&aid=927940&group_id=73833&atid=539099">[ 927940 ] java.net.URI parses %20 but getHost null</a>	 */	public final void testSpaceInHost() {		boolean expectedException = false;		try {			UURIFactory.getInstance(					"http://www.local-regions.odpm%20.gov.uk" +			"/lpsa/challenge/pdf/propect.pdf");		} catch (URIException e) {			expectedException = true;		}		assertTrue("Did not fail with escaped space.", expectedException);				expectedException = false;		try {			UURIFactory.getInstance(					"http://www.local-regions.odpm .gov.uk" +			"/lpsa/challenge/pdf/propect.pdf");		} catch (URIException e) {			expectedException = true;		}		assertTrue("Did not fail with real space.", expectedException);	}		/**	 * Test for java.net.URI chokes on hosts_with_underscores.	 *	 * @see  <a href="https://sourceforge.net/tracker/?func=detail&aid=808270&group_id=73833&atid=539099">[ 808270 ] java.net.URI chokes on hosts_with_underscores</a>	 * @throws URIException	 */	public final void testHostWithUnderscores() throws URIException {		UURI uuri = UURIFactory.getInstance(		"http://x_underscore_underscore.2u.com.tw/nonexistent_page.html");		assertEquals("Failed get of host with underscore",				"x_underscore_underscore.2u.com.tw", uuri.getHost());	}			/**	 * Two dots for igor.	 */	public final void testTwoDots() {		boolean expectedException = false;		try {			UURIFactory.getInstance(			"http://x_underscore_underscore..2u.com/nonexistent_page.html");		} catch (URIException e) {			expectedException = true;		}		assertTrue("Two dots did not throw exception", expectedException);	}		/**	 * Test for java.net.URI#getHost fails when leading digit.	 *	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=910120&group_id=73833&atid=539099">[ 910120 ] java.net.URI#getHost fails when leading digit.</a>	 * @throws URIException	 */	public final void testHostWithDigit() throws URIException {		UURI uuri = UURIFactory.		getInstance("http://0204chat.2u.com.tw/nonexistent_page.html");		assertEquals("Failed get of host with digit",				"0204chat.2u.com.tw", uuri.getHost());	}		/**	 * Test for Constraining java URI class.	 *	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=949548&group_id=73833&atid=539099">[ 949548 ] Constraining java URI class</a>	 */	public final void testPort() {		checkBadPort("http://www.tyopaikat.com:a/robots.txt");		checkBadPort("http://158.144.21.3:80808/robots.txt");		checkBadPort("http://pdb.rutgers.edu:81.rutgers.edu/robots.txt");		checkBadPort(		    "https://webmail.gse.harvard.edu:9100robots.txt/robots.txt");		checkBadPort(		    "https://webmail.gse.harvard.edu:0/robots.txt/robots.txt");	}		/**	 * Test bad port throws exception.	 * @param uri URI with bad port to check.	 */	private void checkBadPort(String uri) {		boolean exception = false;		try {			UURIFactory.getInstance(uri);		}		catch (URIException e) {			exception = true;		}		assertTrue("Didn't throw exception: " + uri, exception);	}		/**	 * Preserve userinfo capitalization.	 * @throws URIException	 */	public final void testUserinfo() throws URIException {        final String authority = "stack:StAcK@www.tyopaikat.com";        final String uri = "http://" + authority + "/robots.txt";		UURI uuri = UURIFactory.getInstance(uri);		assertEquals("Authority not equal", uuri.getAuthority(),            authority);        /*        String tmp = uuri.toString();        assertTrue("URI not equal", tmp.equals(uri));        */	}	/**	 * Test user info + port	 * @throws URIException	 */	public final void testUserinfoPlusPort() throws URIException {		final String userInfo = "stack:StAcK";        final String authority = "www.tyopaikat.com";        final int port = 8080;        final String uri = "http://" + userInfo + "@" + authority + ":" + port         	+ "/robots.txt";		UURI uuri = UURIFactory.getInstance(uri);		assertEquals("Host not equal", authority,uuri.getHost());		assertEquals("Userinfo Not equal",userInfo,uuri.getUserinfo());		assertEquals("Port not equal",port,uuri.getPort());		assertEquals("Authority wrong","stack:StAcK@www.tyopaikat.com:8080",				uuri.getAuthority());		assertEquals("AuthorityMinusUserinfo wrong","www.tyopaikat.com:8080",				uuri.getAuthorityMinusUserinfo());			}		/**	 * Tests from rfc2396 with amendments to accomodate differences	 * intentionally added to make our URI handling like IEs.	 *	 * <pre>	 *       g:h           =  g:h	 *       g             =  http://a/b/c/g	 *       ./g           =  http://a/b/c/g	 *       g/            =  http://a/b/c/g/	 *       /g            =  http://a/g	 *       //g           =  http://g	 *       ?y            =  http://a/b/c/?y	 *       g?y           =  http://a/b/c/g?y	 *       #s            =  (current document)#s	 *       g#s           =  http://a/b/c/g#s	 *       g?y#s         =  http://a/b/c/g?y#s	 *       ;x            =  http://a/b/c/;x	 *       g;x           =  http://a/b/c/g;x	 *       g;x?y#s       =  http://a/b/c/g;x?y#s	 *       .             =  http://a/b/c/	 *       ./            =  http://a/b/c/	 *       ..            =  http://a/b/	 *       ../           =  http://a/b/	 *       ../g          =  http://a/b/g

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -