📄 wildcard.java
字号:
/* * WebSPHINX web crawling toolkit * Copyright (C) 1998,1999 Carnegie Mellon University * * This library is free software; you can redistribute it * and/or modify it under the terms of the GNU Library * General Public License as published by the Free Software * Foundation, version 2. * * WebSPHINX homepage: http://www.cs.cmu.edu/~rcm/websphinx/ */package websphinx;/** * Wildcard pattern. Wildcards are similar to sh-style file globbing. * A wildcard pattern is implicitly anchored, meaning that it must match the entire string. * The wildcard operators are: * <PRE> * ? matches one arbitrary character * * matches zero or more arbitrary characters * [xyz] matches characters x or y or z * {foo,bar,baz} matches expressions foo or bar or baz * () grouping to extract fields * \ escape one of these special characters * </PRE> * Escape codes (like \n and \t) and Perl5 character classes (like \w and \s) may also be used. */public class Wildcard extends Regexp { String stringRep; public Wildcard (String pattern) { super ("^" + toRegexp (pattern) + "$"); stringRep = pattern; } public boolean equals (Object object) { if (! (object instanceof Wildcard)) return false; Wildcard p = (Wildcard)object; return p.stringRep.equals (stringRep); } public static String toRegexp (String wildcard) { String s = wildcard; int inAlternative = 0; int inSet = 0; boolean inEscape = false; StringBuffer output = new StringBuffer (); int len = s.length (); for (int i=0; i<len; ++i) { char c = s.charAt (i); if (inEscape) { output.append (c); inEscape = false; } else { switch (c) { case '\\': output.append (c); inEscape = true; break; case '?': output.append ('.'); break; case '*': output.append (".*"); break; case '[': output.append (c); ++inSet; break; case ']': // FIX: handle [] case properly output.append (c); --inSet; break; case '{': output.append ("(?:"); ++inAlternative; break; case ',': if (inAlternative > 0) output.append ("|"); else output.append (c); break; case '}': output.append (")"); --inAlternative; break; case '^': if (inSet > 0) { output.append (c); } else { output.append ('\\'); output.append (c); } break; case '$': case '.': case '|': case '+': output.append ('\\'); output.append (c); break; default: output.append (c); break; } } } if (inEscape) output.append ('\\'); return output.toString (); } public static String escape (String s) { return websphinx.util.Str.escape (s, '\\', "\\?*{}()[]"); } public String toString () { return stringRep; } public static void main (String[] args) throws Exception { if (args.length < 2) { System.err.println ("usage: Wildcard <pattern> <string>*"); return; } Pattern p = new Wildcard (args[0].replace ('_', ' ') ); for (int i=1; i<args.length; ++i) { Region r = p.oneMatch (args[i]); System.out.println (args[i] + ": " + (r != null)); if (r != null) { System.out.println (" [" + r.getStart() + "," + r.getEnd() + "]" + r); Region[] groups = r.getFields ("websphinx.groups"); if (groups != null) for (int j=0; j<groups.length; ++j) { Region s = groups[j]; System.out.println (" "+"[" + s.getStart() + "," + s.getEnd() + "]" + s); } } } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -