📄 javaregexpmodule.java
字号:
/* * Copyright (c) 1998-2007 Caucho Technology -- all rights reserved * * This file is part of Resin(R) Open Source * * Each copy or derived work must preserve the copyright notice and this * notice unmodified. * * Resin Open Source is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Resin Open Source is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty * of NON-INFRINGEMENT. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with Resin Open Source; if not, write to the * * Free Software Foundation, Inc. * 59 Temple Place, Suite 330 * Boston, MA 02111-1307 USA * * @author Scott Ferguson */package com.caucho.quercus.lib.regexp;import com.caucho.quercus.QuercusException;import com.caucho.quercus.QuercusModuleException;import com.caucho.quercus.QuercusRuntimeException;import com.caucho.quercus.annotation.Optional;import com.caucho.quercus.annotation.Reference;import com.caucho.quercus.annotation.UsesSymbolTable;import com.caucho.quercus.env.*;import com.caucho.quercus.module.AbstractQuercusModule;import com.caucho.util.L10N;import com.caucho.util.LruCache;import java.io.CharConversionException;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import java.util.logging.Level;import java.util.logging.Logger;import java.util.regex.Matcher;import java.util.regex.Pattern;public class JavaRegexpModule extends AbstractQuercusModule{ private static final L10N L = new L10N(RegexpModule.class); public static final int PREG_REPLACE_EVAL = 0x01; public static final int PCRE_UTF8 = 0x02; public static final int PREG_PATTERN_ORDER = 0x01; public static final int PREG_SET_ORDER = 0x02; public static final int PREG_OFFSET_CAPTURE = 0x04; public static final int PREG_SPLIT_NO_EMPTY = 0x01; public static final int PREG_SPLIT_DELIM_CAPTURE = 0x02; public static final int PREG_SPLIT_OFFSET_CAPTURE = 0x04; public static final int PREG_GREP_INVERT = 1; public static final boolean [] PREG_QUOTE = new boolean[256]; private static final LruCache<StringValue, PCREPattern> _namePatternCache = new LruCache<StringValue, PCREPattern>(1024); private static final LruCache<StringValue, Pattern> _patternCache = new LruCache<StringValue, Pattern>(1024); private static final LruCache<StringValue, ArrayList<Replacement>> _replacementCache = new LruCache<StringValue, ArrayList<Replacement>>(1024); private static final HashMap<String, Value> _constMap = new HashMap<String, Value>(); public String []getLoadedExtensions() { return new String[] { "pcre" }; } /** * Returns the index of the first match. * * @param env the calling environment */ public static Value ereg(Env env, Value pattern, StringValue string, @Optional @Reference Value regsV) { return eregImpl(env, pattern, string, regsV, 0); } /** * Returns the index of the first match. * * @param env the calling environment */ public static Value eregi(Env env, Value pattern, StringValue string, @Optional @Reference Value regsV) { return eregImpl(env, pattern, string, regsV, Pattern.CASE_INSENSITIVE); } /** * Returns the index of the first match. * * @param env the calling environment */ protected static Value eregImpl(Env env, Value rawPattern, StringValue string, Value regsV, int flags) { // php/1511 : error when pattern argument is null or an empty string if (rawPattern.length() == 0) { env.warning(L.l("empty pattern argument")); return BooleanValue.FALSE; } StringValue rawPatternStr; // php/1512.qa : non-string pattern argument is converted to // an integer value and formatted as a string. if (!(rawPattern instanceof StringValue)) { rawPatternStr = rawPattern.toLongValue().toStringValue(); } else { rawPatternStr = rawPattern.toStringValue(); } String cleanPattern = cleanEregRegexp(rawPatternStr, false); Pattern pattern = Pattern.compile(cleanPattern, flags); Matcher matcher = pattern.matcher(string); if (! (matcher.find())) { return BooleanValue.FALSE; } if (regsV != null && ! (regsV instanceof NullValue)) { ArrayValue regs = new ArrayValueImpl(); regsV.set(regs); regs.put(LongValue.ZERO, env.createString(matcher.group())); int count = matcher.groupCount(); for (int i = 1; i <= count; i++) { String group = matcher.group(i); Value value; if (group == null) value = BooleanValue.FALSE; else value = env.createString(group); regs.put(new LongValue(i), value); } int len = matcher.end() - matcher.start(); if (len == 0) return LongValue.ONE; else return new LongValue(len); } else { return LongValue.ONE; } } /** * Returns the index of the first match. * * php/151u * The array that preg_match (PHP 5) returns does not have trailing unmatched * groups. Therefore, an unmatched group should not be added to the array * unless a matched group appears after it. A couple applications like * Gallery2 expect this behavior in order to function correctly. * * Only preg_match and preg_match_all(PREG_SET_ORDER) exhibits this odd * behavior. * * @param env the calling environment */ public static Value preg_match(Env env, StringValue regexp, StringValue subject, @Optional @Reference Value matchRef, @Optional int flags, @Optional int offset) { if (regexp.length() < 2) { env.warning(L.l("Regexp pattern must have opening and closing delimiters")); return BooleanValue.FALSE; } PCREPattern pcrePattern = _namePatternCache.get(regexp); if (pcrePattern == null) { pcrePattern = new PCREPattern(env, regexp); _namePatternCache.put(regexp, pcrePattern); } Matcher matcher = pcrePattern.matcher(env, subject); ArrayValue regs; if (matchRef instanceof DefaultValue) regs = null; else regs = new ArrayValueImpl(); if ((matcher == null) || (! (matcher.find(offset)))) { matchRef.set(regs); return LongValue.ZERO; } boolean isOffsetCapture = (flags & PREG_OFFSET_CAPTURE) != 0; if (regs != null) { if (isOffsetCapture) { ArrayValueImpl part = new ArrayValueImpl(); part.append(env.createString(matcher.group())); part.append(new LongValue(matcher.start())); regs.put(LongValue.ZERO, part); } else regs.put(LongValue.ZERO, env.createString(matcher.group())); int count = matcher.groupCount(); for (int i = 1; i <= count; i++) { String group = matcher.group(i); if (group == null) continue; if (isOffsetCapture) { // php/151u // add unmatched groups first for (int j = regs.getSize(); j < i; j++) { ArrayValue part = new ArrayValueImpl(); part.append(env.getEmptyString()); part.append(LongValue.MINUS_ONE); regs.put(new LongValue(j), part); } ArrayValueImpl part = new ArrayValueImpl(); part.append(env.createString(group)); part.append(new LongValue(matcher.start(i))); Value name = pcrePattern.get(i); if (name != null) regs.put(name, part); regs.put(new LongValue(i), part); } else { // php/151u // add unmatched groups first for (int j = regs.getSize(); j < i; j++) { regs.put(new LongValue(j), env.getEmptyString()); } StringValue match = env.createString(group); Value name = pcrePattern.get(i); if (name != null) regs.put(name, match); regs.put(new LongValue(i), match); } } matchRef.set(regs); } return LongValue.ONE; } /** * Returns the index of the first match. * * @param env the calling environment */ public static Value preg_match_all(Env env, StringValue regexp, StringValue subject, @Reference Value matchRef, @Optional("PREG_PATTERN_ORDER") int flags, @Optional int offset) { if (regexp.length() < 2) { env.warning(L.l("Pattern must have at least opening and closing delimiters")); return LongValue.ZERO; } if ((flags & PREG_PATTERN_ORDER) == 0) { // php/152m if ((flags & PREG_SET_ORDER) == 0) { flags = flags | PREG_PATTERN_ORDER; } } else { if ((flags & PREG_SET_ORDER) != 0) { env.warning((L.l("Cannot combine PREG_PATTER_ORDER and PREG_SET_ORDER"))); return LongValue.ZERO; } } PCREPattern pcrePattern = _namePatternCache.get(regexp); if (pcrePattern == null) { pcrePattern = new PCREPattern(env, regexp); _namePatternCache.put(regexp, pcrePattern); } ArrayValue matches; if (matchRef instanceof ArrayValue) matches = (ArrayValue) matchRef; else matches = new ArrayValueImpl(); matches.clear(); matchRef.set(matches); if ((flags & PREG_PATTERN_ORDER) != 0) { return pregMatchAllPatternOrder(env, pcrePattern, subject, matches, flags, offset); } else if ((flags & PREG_SET_ORDER) != 0) { return pregMatchAllSetOrder(env, pcrePattern, subject, matches, flags, offset); } else throw new UnsupportedOperationException(); } /** * Returns the index of the first match. * * @param env the calling environment */ public static Value pregMatchAllPatternOrder(Env env, PCREPattern pcrePattern, StringValue subject, ArrayValue matches, int flags, int offset) { Matcher matcher = pcrePattern.matcher(env, subject); int groupCount = matcher == null ? 0 : matcher.groupCount(); ArrayValue []matchList = new ArrayValue[groupCount + 1]; for (int j = 0; j <= groupCount; j++) { ArrayValue values = new ArrayValueImpl(); Value patternName = pcrePattern.get(j); // XXX: named subpatterns causing conflicts with array indexes? if (patternName != null) matches.put(patternName, values); matches.put(values); matchList[j] = values; } if (matcher == null || (! (matcher.find()))) { return LongValue.ZERO; } int count = 0; do { count++; for (int j = 0; j <= groupCount; j++) { ArrayValue values = matchList[j]; int start = matcher.start(j); int end = matcher.end(j); StringValue groupValue = subject.substring(start, end); if (groupValue != null) groupValue = groupValue.toUnicodeValue(env); Value result = NullValue.NULL; if (groupValue != null) { if ((flags & PREG_OFFSET_CAPTURE) != 0) { result = new ArrayValueImpl(); result.put(groupValue); result.put(LongValue.create(start)); } else { result = groupValue; } } values.put(result); } } while (matcher.find()); return LongValue.create(count); } /** * Returns the index of the first match. * * @param env the calling environment */ private static Value pregMatchAllSetOrder(Env env, PCREPattern pattern, StringValue subject, ArrayValue matches, int flags, int offset) { Matcher matcher = pattern.matcher(env, subject); if ((matcher == null) || (! (matcher.find()))) { return LongValue.ZERO; } int count = 0; do { count++; ArrayValue matchResult = new ArrayValueImpl(); matches.put(matchResult); for (int i = 0; i <= matcher.groupCount(); i++) { int start = matcher.start(i); int end = matcher.end(i); // group is unmatched, skip if (end - start <= 0) continue; StringValue groupValue = subject.substring(start, end); if (groupValue != null) groupValue = groupValue.toUnicodeValue(env); Value result = NullValue.NULL; if (groupValue != null) { if ((flags & PREG_OFFSET_CAPTURE) != 0) { // php/152n // add unmatched groups first for (int j = matchResult.getSize(); j < i; j++) { ArrayValue part = new ArrayValueImpl(); part.append(env.getEmptyString()); part.append(LongValue.MINUS_ONE); matchResult.put(LongValue.create(j), part); } result = new ArrayValueImpl(); result.put(groupValue); result.put(LongValue.create(start)); } else { // php/ // add unmatched groups that was skipped for (int j = matchResult.getSize(); j < i; j++) { matchResult.put(LongValue.create(j), env.getEmptyString()); } result = groupValue; } } matchResult.put(result); } } while (matcher.find()); return LongValue.create(count); } /** * Quotes regexp values */ public static StringValue preg_quote(StringValue string, @Optional StringValue delim) { StringValue sb = string.createStringBuilder(); boolean []extra = null; if (delim != null && ! delim.equals("")) { extra = new boolean[256]; for (int i = 0; i < delim.length(); i++) extra[delim.charAt(i)] = true; } int length = string.length(); for (int i = 0; i < length; i++) { char ch = string.charAt(i); if (ch >= 256) sb.append(ch); else if (PREG_QUOTE[ch]) { sb.append('\\'); sb.append(ch); } else if (extra != null && extra[ch]) { sb.append('\\'); sb.append(ch); } else sb.append(ch); } return sb; } /** * Loops through subject if subject is array of strings * * @param env * @param pattern string or array * @param replacement string or array * @param subject string or array * @param limit * @param count * @return */ @UsesSymbolTable public static Value preg_replace(Env env, Value pattern, Value replacement, Value subject, @Optional("-1") long limit, @Optional @Reference Value count) { if (subject instanceof ArrayValue) { ArrayValue result = new ArrayValueImpl(); for (Value value : ((ArrayValue) subject).values()) { result.put(pregReplace(env, pattern, replacement, value.toStringValue(), limit, count)); } return result; } else if (subject.isset()) { return pregReplace(env, pattern, replacement, subject.toStringValue(), limit, count); } else return env.getEmptyString(); } /** * Replaces values using regexps */ private static Value pregReplace(Env env, Value patternValue, Value replacement, StringValue subject, @Optional("-1") long limit, Value countV) { StringValue string = subject; if (limit < 0) limit = Long.MAX_VALUE;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -