📄 deciderulesequencetest.java
字号:
addDecideRule(new TooManyPathSegmentsDecideRule("SEGMENTS")); final int max = TooManyPathSegmentsDecideRule.DEFAULT_MAX_PATH_DEPTH.intValue(); StringBuffer baseUri = new StringBuffer("http://archive.org"); for (int i = 0; i < max; i++) { baseUri.append('/'); baseUri.append(Integer.toString(i + 1)); } UURI uuri = UURIFactory.getInstance(baseUri.toString()); CandidateURI candidate = new CandidateURI(uuri); Object decision = this.rule.decisionFor(candidate); assertTrue("Expect " + DecideRule.PASS + " but got " + decision, decision == DecideRule.PASS); baseUri.append("/x"); uuri = UURIFactory.getInstance(baseUri.toString()); candidate = new CandidateURI(uuri); decision = this.rule.decisionFor(candidate); assertTrue("Expect " + DecideRule.REJECT + " but got " + decision, decision == DecideRule.REJECT); } public void testMatchesFilePattern() throws InvalidAttributeValueException, URIException { addDecideRule(new MatchesFilePatternDecideRule("FILE_PATTERN")); StringBuffer baseUri = new StringBuffer("http://archive.org/"); UURI uuri = UURIFactory.getInstance(baseUri.toString() + "ms.doc"); CandidateURI candidate = new CandidateURI(uuri); Object decision = this.rule.decisionFor(candidate); assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision, decision == DecideRule.ACCEPT); uuri = UURIFactory.getInstance(baseUri.toString() + "index.html"); candidate = new CandidateURI(uuri); decision = this.rule.decisionFor(candidate); assertTrue("Expect " + DecideRule.PASS + " but got " + decision, decision == DecideRule.PASS); } public void testNotMatchesFilePattern() throws InvalidAttributeValueException, URIException { addDecideRule(new NotMatchesFilePatternDecideRule("NOT_FILE_PATTERN")); StringBuffer baseUri = new StringBuffer("http://archive.org/"); UURI uuri = UURIFactory.getInstance(baseUri.toString() + "ms.doc"); CandidateURI candidate = new CandidateURI(uuri); Object decision = this.rule.decisionFor(candidate); assertTrue("Expect " + DecideRule.PASS + " but got " + decision, decision == DecideRule.PASS); uuri = UURIFactory.getInstance(baseUri.toString() + "index.html"); candidate = new CandidateURI(uuri); decision = this.rule.decisionFor(candidate); assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision, decision == DecideRule.ACCEPT); } protected void testHopLimit(final int max, final char pathExpansion, final String defaultDecision, final String overLimitDecision) throws URIException { UURI uuri = UURIFactory.getInstance("http://archive.org"); CandidateURI candidate = new CandidateURI(uuri); Object decision = this.rule.decisionFor(candidate); assertTrue("Expect " + defaultDecision + " but got " + decision, decision == defaultDecision); StringBuffer path = new StringBuffer(max); for (int i = 0; i < (max - 1); i++) { path.append(pathExpansion); } candidate = new CandidateURI(uuri, path.toString(), null, null); decision = this.rule.decisionFor(candidate); assertTrue("Expect " + defaultDecision + " but got " + decision, decision == defaultDecision); path.append(pathExpansion); candidate = new CandidateURI(uuri, path.toString(), null, null); decision = this.rule.decisionFor(candidate); assertTrue("Expect " + defaultDecision + " but got " + decision, decision == defaultDecision); path.append(pathExpansion); candidate = new CandidateURI(uuri, path.toString(), null, null); decision = this.rule.decisionFor(candidate); assertTrue("Expect " + overLimitDecision + " but got " + decision, decision == overLimitDecision); } public void testScopePlusOne() throws URIException, InvalidAttributeValueException, AttributeNotFoundException, MBeanException, ReflectionException { // first test host scope ScopePlusOneDecideRule t = new ScopePlusOneDecideRule("host"); SurtPrefixSet mSet = new SurtPrefixSet(); mSet.add(SurtPrefixSet.prefixFromPlain("http://audio.archive.org")); mSet.convertAllPrefixesToHosts(); t.surtPrefixes = mSet; DecideRule s = addDecideRule(t); s.setAttribute(new Attribute(ScopePlusOneDecideRule.ATTR_SCOPE, ScopePlusOneDecideRule.HOST)); UURI uuri = UURIFactory.getInstance("http://audio.archive.org/examples"); CandidateURI candidate = new CandidateURI(uuri); Object decision = this.rule.decisionFor(candidate); assertTrue("URI Expect " + DecideRule.ACCEPT + " for " + candidate + " but got " + decision, decision == DecideRule.ACCEPT); UURI uuriOne = UURIFactory.getInstance("http://movies.archive.org"); CandidateURI plusOne = new CandidateURI(uuriOne); plusOne.setVia(uuri); decision = this.rule.decisionFor(plusOne); assertTrue("PlusOne Expect " + DecideRule.ACCEPT + " for " + plusOne + " with via " + plusOne.flattenVia() + " but got " + decision, decision == DecideRule.ACCEPT); UURI uuriTwo = UURIFactory.getInstance("http://sloan.archive.org"); CandidateURI plusTwo = new CandidateURI(uuriTwo); plusTwo.setVia(uuriOne); decision = this.rule.decisionFor(plusTwo); assertTrue("PlusTwo Expect " + DecideRule.PASS + " for " + plusTwo + " with via " + plusTwo.flattenVia() + " but got " + decision, decision == DecideRule.PASS); //now test domain scope ScopePlusOneDecideRule u = new ScopePlusOneDecideRule("domain"); SurtPrefixSet mSet1 = new SurtPrefixSet(); mSet1.add(SurtPrefixSet.prefixFromPlain("archive.org")); mSet1.convertAllPrefixesToDomains(); u.surtPrefixes = mSet1; DecideRule v = addDecideRule(u); v.setAttribute(new Attribute(ScopePlusOneDecideRule.ATTR_SCOPE, ScopePlusOneDecideRule.DOMAIN)); decision = this.rule.decisionFor(candidate); assertTrue("Domain: URI Expect " + DecideRule.ACCEPT + " for " + candidate + " but got " + decision, decision == DecideRule.ACCEPT); decision = this.rule.decisionFor(plusOne); assertTrue("Domain: PlusOne Expect " + DecideRule.ACCEPT + " for " + plusOne + " with via " + plusOne.flattenVia() + " but got " + decision, decision == DecideRule.ACCEPT); decision = this.rule.decisionFor(plusTwo); assertTrue("Domain: PlusTwo Expect " + DecideRule.ACCEPT + " for " + plusTwo + " with via " + plusTwo.flattenVia() + " but got " + decision, decision == DecideRule.ACCEPT); UURI uuriThree = UURIFactory.getInstance("http://sloan.org"); CandidateURI plusThree = new CandidateURI(uuriThree); plusThree.setVia(uuriTwo); decision = this.rule.decisionFor(plusThree); assertTrue("Domain: PlusThree Expect " + DecideRule.ACCEPT + " for " + plusThree + " with via " + plusThree.flattenVia() + " but got " + decision, decision == DecideRule.ACCEPT); UURI uuriFour = UURIFactory.getInstance("http://example.com"); CandidateURI plusFour = new CandidateURI(uuriFour); plusFour.setVia(uuriThree); decision = this.rule.decisionFor(plusFour); assertTrue("Domain: PlusFour Expect " + DecideRule.PASS + " for " + plusFour + " with via " + plusFour.flattenVia() + " but got " + decision, decision == DecideRule.PASS); } public void testFilter() throws InvalidAttributeValueException, URIException, AttributeNotFoundException, MBeanException, ReflectionException { FilterDecideRule dr = new FilterDecideRule( "FilterDecideRule(ContentTypeRegExpFilter)"); addDecideRule(dr); StringBuffer baseUri = new StringBuffer(); UURI uuri = UURIFactory.getInstance("http://example.com/foo"); CrawlURI curi = new CrawlURI(uuri); curi.setContentType("text/html"); Object decision = this.rule.decisionFor(curi); // default for unconfigured FilterDecideRule is true from (empty) // filters, then ACCEPT because of true assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision, decision == DecideRule.ACCEPT); ContentTypeRegExpFilter filt = new ContentTypeRegExpFilter("ContentTypeRegExpFilter","app.*"); dr.filters.addElement(null,filt); decision = this.rule.decisionFor(curi); // filter should now return false, making decision REJECT assertTrue("Expect " + DecideRule.REJECT + " but got " + decision, decision == DecideRule.REJECT); curi.setContentType("application/octet-stream"); decision = this.rule.decisionFor(curi); // filter should now return true, making decision ACCEPT assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision, decision == DecideRule.ACCEPT); // change true answer to "PASS"; use String to simulate settings non-identity dr.setAttribute(new Attribute(FilterDecideRule.ATTR_TRUE_DECISION,"PASS")); decision = this.rule.decisionFor(curi); assertTrue("Expect " + DecideRule.PASS + " but got " + decision, decision == DecideRule.PASS); } protected DecideRule addDecideRule(DecideRule dr) throws InvalidAttributeValueException { MapType rules = this.rule.getRules(null); rules.addElement(null, dr); return dr; } public void testContentTypeMatchesRegexpDecideRule() throws Exception{ ContentTypeMatchesRegExpDecideRule dr = new ContentTypeMatchesRegExpDecideRule("CTMREDRtest"); DecideRule v = addDecideRule(dr); v.setAttribute(new Attribute(MatchesRegExpDecideRule.ATTR_REGEXP,"text/html")); UURI uuri = UURIFactory.getInstance("http://www.archive.org"); CrawlURI crawlUri = new CrawlURI(uuri); // no content type - let curi pass Object decision = this.rule.decisionFor(crawlUri); assertTrue("URI Expect " + DecideRule.PASS + " for " + crawlUri + " but got " + decision, decision == DecideRule.PASS); // non-matching content type - let curi pass crawlUri.setContentType("application/pdf"); decision = this.rule.decisionFor(crawlUri); assertTrue("URI Expect " + DecideRule.PASS + " for " + crawlUri + " but got " + decision, decision == DecideRule.PASS); // matching content type - accept curi crawlUri.setContentType("text/html"); decision = this.rule.decisionFor(crawlUri); assertTrue("URI Expect " + DecideRule.ACCEPT + " for " + crawlUri + " but got " + decision, decision == DecideRule.ACCEPT); } public void testContentTypeNotMatchesRegexpDecideRule() throws Exception{ ContentTypeNotMatchesRegExpDecideRule dr = new ContentTypeNotMatchesRegExpDecideRule("CTNMREDRtest"); DecideRule v = addDecideRule(dr); v.setAttribute(new Attribute(MatchesRegExpDecideRule.ATTR_REGEXP,"text/html")); UURI uuri = UURIFactory.getInstance("http://www.archive.org"); CrawlURI crawlUri = new CrawlURI(uuri); // no content type - let curi pass Object decision = this.rule.decisionFor(crawlUri); assertTrue("URI Expect " + DecideRule.PASS + " for " + crawlUri + " but got " + decision, decision == DecideRule.PASS); // matching content type - let curi pass crawlUri.setContentType("text/html"); decision = this.rule.decisionFor(crawlUri); assertTrue("URI Expect " + DecideRule.PASS + " for " + crawlUri + " but got " + decision, decision == DecideRule.PASS); // non-matching content type - accept curi crawlUri.setContentType("application/pdf"); decision = this.rule.decisionFor(crawlUri); assertTrue("URI Expect " + DecideRule.ACCEPT + " for " + crawlUri + " but got " + decision, decision == DecideRule.ACCEPT); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -