⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pagedescription.jj

📁 一些简要的公爵类一些简要的公爵类一些简要的公爵类
💻 JJ
字号:
/* Copyright (c) 2003 The Nutch Organization.  All rights reserved.   */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */

options {
  IGNORE_CASE = true;
  STATIC = false;
}

PARSER_BEGIN(PageDescription)

package net.nutch.quality.dynamic;

import java.io.*;
import java.util.*;

/************************************************
 * PageDescription gives the URL and the textual
 * description for a target page.  It loads in
 * a Sherlock plugin file.
 * 
 * @author Mike Cafarella
 ************************************************/

public class PageDescription {
    HashMap values = new HashMap();
    ArrayList inputs = new ArrayList();
    ArrayList interprets = new ArrayList();

    public HashMap getValues() {
        return values;

    }

    public ArrayList getInputs() {
        return inputs;
    }

    public ArrayList getInterprets() {
        return interprets;
    }

    /**
     * Test out sherlock parsing
     */
    public static void main(String argv[]) throws IOException, ParseException {
        if (argv.length < 1) {
            System.out.println("Usage: java net.nutch.quality.PageDescription <srcFile>");
            return;
        }
        PageDescription pd = new PageDescription(new FileInputStream(new File(argv[0])));
        pd.parse();
    }
}


PARSER_END(PageDescription)

<DEFAULT,AFTER_EQUALS> SKIP : {
  " " | "\t" | "\n" | "\r" 
| <"#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")>
}

TOKEN : {
  <KEYWORD: (["a"-"z","A"-"Z"])+>
}

TOKEN : {
  <EQUALS: "="> : AFTER_EQUALS
}

<AFTER_EQUALS> TOKEN : {
  <QUOTED_VALUE:
    "\"" (~["\""])* "\""  |
      "\'" (~["\'"])* "\'" > 				         : DEFAULT
| <UNQUOTED_VALUE:
      (~["\n","\r","\t"," ","\"","\'",">"])+ > : DEFAULT
}

void parse() : {} {
  "<search" (param())* ">"
  (element())*
  ("</search>")?
}

void param() : {
  String keyword;
  String value;
}
{
  <KEYWORD> { keyword = token.image.toLowerCase(); }
  <EQUALS>
  value=value()

  { values.put(keyword,value); }
}

void element() : {
  HashMap input;
  HashMap interpret;
}
{
  (
   "<input"
      { input = new HashMap(); inputs.add(input); }
    ( inputItem(input) )*
  |  
   "<interpret"
      { interpret = new HashMap(); interprets.add(interpret); }
    ( matchItem(interpret) )*
  |
    "<" <KEYWORD> (<KEYWORD> (<EQUALS> value() )? )*
  )
  ("/>" | ">")
}

void inputItem(HashMap input) : {
  String keyword;
  String value = null;
}
{
  <KEYWORD> { keyword = token.image.toLowerCase(); }
  (<EQUALS> value=value()) ?

  {input.put(keyword,value); }
}


void matchItem(HashMap interpret) : {
  String keyword;
  String value = null;
}
{
  <KEYWORD> { keyword = token.image.toLowerCase(); }
  <EQUALS>
  value=value()

  {interpret.put(keyword,value); }
}

String value() : {
  String image;
}
{
  (
   <QUOTED_VALUE> { image = token.image.substring(1,token.image.length()-1); }
    |
   <UNQUOTED_VALUE> { image = token.image; }
  )
   { return image; }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -