📄 pagedescription.jj
字号:
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
options {
IGNORE_CASE = true;
STATIC = false;
}
PARSER_BEGIN(PageDescription)
package net.nutch.quality.dynamic;
import java.io.*;
import java.util.*;
/************************************************
* PageDescription gives the URL and the textual
* description for a target page. It loads in
* a Sherlock plugin file.
*
* @author Mike Cafarella
************************************************/
public class PageDescription {
HashMap values = new HashMap();
ArrayList inputs = new ArrayList();
ArrayList interprets = new ArrayList();
public HashMap getValues() {
return values;
}
public ArrayList getInputs() {
return inputs;
}
public ArrayList getInterprets() {
return interprets;
}
/**
* Test out sherlock parsing
*/
public static void main(String argv[]) throws IOException, ParseException {
if (argv.length < 1) {
System.out.println("Usage: java net.nutch.quality.PageDescription <srcFile>");
return;
}
PageDescription pd = new PageDescription(new FileInputStream(new File(argv[0])));
pd.parse();
}
}
PARSER_END(PageDescription)
<DEFAULT,AFTER_EQUALS> SKIP : {
" " | "\t" | "\n" | "\r"
| <"#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")>
}
TOKEN : {
<KEYWORD: (["a"-"z","A"-"Z"])+>
}
TOKEN : {
<EQUALS: "="> : AFTER_EQUALS
}
<AFTER_EQUALS> TOKEN : {
<QUOTED_VALUE:
"\"" (~["\""])* "\"" |
"\'" (~["\'"])* "\'" > : DEFAULT
| <UNQUOTED_VALUE:
(~["\n","\r","\t"," ","\"","\'",">"])+ > : DEFAULT
}
void parse() : {} {
"<search" (param())* ">"
(element())*
("</search>")?
}
void param() : {
String keyword;
String value;
}
{
<KEYWORD> { keyword = token.image.toLowerCase(); }
<EQUALS>
value=value()
{ values.put(keyword,value); }
}
void element() : {
HashMap input;
HashMap interpret;
}
{
(
"<input"
{ input = new HashMap(); inputs.add(input); }
( inputItem(input) )*
|
"<interpret"
{ interpret = new HashMap(); interprets.add(interpret); }
( matchItem(interpret) )*
|
"<" <KEYWORD> (<KEYWORD> (<EQUALS> value() )? )*
)
("/>" | ">")
}
void inputItem(HashMap input) : {
String keyword;
String value = null;
}
{
<KEYWORD> { keyword = token.image.toLowerCase(); }
(<EQUALS> value=value()) ?
{input.put(keyword,value); }
}
void matchItem(HashMap interpret) : {
String keyword;
String value = null;
}
{
<KEYWORD> { keyword = token.image.toLowerCase(); }
<EQUALS>
value=value()
{interpret.put(keyword,value); }
}
String value() : {
String image;
}
{
(
<QUOTED_VALUE> { image = token.image.substring(1,token.image.length()-1); }
|
<UNQUOTED_VALUE> { image = token.image; }
)
{ return image; }
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -