⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wildcard.java

📁 一个用java语言编写的网络爬虫程序
💻 JAVA
字号:
/* * WebSPHINX web crawling toolkit * Copyright (C) 1998,1999 Carnegie Mellon University  *  * This library is free software; you can redistribute it * and/or modify it under the terms of the GNU Library * General Public License as published by the Free Software  * Foundation, version 2. * * WebSPHINX homepage: http://www.cs.cmu.edu/~rcm/websphinx/ */package websphinx;/** * Wildcard pattern.  Wildcards are similar to sh-style file globbing. * A wildcard pattern is implicitly anchored, meaning that it must match the entire string. * The wildcard operators are: * <PRE> *    ? matches one arbitrary character *    * matches zero or more arbitrary characters *    [xyz] matches characters x or y or z *    {foo,bar,baz}   matches expressions foo or bar or baz *    ()  grouping to extract fields *    \ escape one of these special characters * </PRE> * Escape codes (like \n and \t) and Perl5 character classes (like \w and \s) may also be used. */public class Wildcard extends Regexp {    String stringRep;        public Wildcard (String pattern) {        super ("^" + toRegexp (pattern) + "$");        stringRep = pattern;    }    public boolean equals (Object object) {        if (! (object instanceof Wildcard))            return false;        Wildcard p = (Wildcard)object;        return p.stringRep.equals (stringRep);    }                public static String toRegexp (String wildcard) {        String s = wildcard;        int inAlternative = 0;        int inSet = 0;        boolean inEscape = false;        StringBuffer output = new StringBuffer ();        int len = s.length ();        for (int i=0; i<len; ++i) {            char c = s.charAt (i);            if (inEscape) {                output.append (c);                inEscape = false;            }            else {                switch (c) {                  case '\\':                    output.append (c);                    inEscape = true;                    break;                  case '?':                    output.append ('.');                    break;                  case '*':                    output.append (".*");                    break;                  case '[':                    output.append (c);                    ++inSet;                    break;                  case ']':                      // FIX: handle [] case properly                    output.append (c);                    --inSet;                    break;                  case '{':                    output.append ("(?:");                    ++inAlternative;                    break;                  case ',':                    if (inAlternative > 0)                        output.append ("|");                    else                        output.append (c);                    break;                  case '}':                    output.append (")");                    --inAlternative;                    break;                  case '^':                    if (inSet > 0) {                        output.append (c);                    }                    else {                        output.append ('\\');                        output.append (c);                    }                    break;                  case '$':                  case '.':                  case '|':                  case '+':                    output.append ('\\');                    output.append (c);                    break;                  default:                    output.append (c);                    break;                }            }        }        if (inEscape)            output.append ('\\');        return output.toString ();    }    public static String escape (String s) {        return websphinx.util.Str.escape (s, '\\', "\\?*{}()[]");    }        public String toString () {        return stringRep;    }        public static void main (String[] args) throws Exception {        if (args.length < 2) {            System.err.println ("usage: Wildcard <pattern> <string>*");            return;        }        Pattern p = new Wildcard (args[0].replace ('_', ' ') );        for (int i=1; i<args.length; ++i) {            Region r = p.oneMatch (args[i]);            System.out.println (args[i] + ": " + (r != null));            if (r != null) {                System.out.println ("  [" + r.getStart() + "," + r.getEnd() + "]" + r);                Region[] groups = r.getFields ("websphinx.groups");                if (groups != null)                    for (int j=0; j<groups.length; ++j) {                        Region s = groups[j];                        System.out.println ("    "+"[" + s.getStart() + "," + s.getEnd() + "]" + s);                    }            }        }    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -