📄 sre_state.java
字号:
/* * Copyright 2000 Finn Bock * * This program contains material copyrighted by: * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. * * This version of the SRE library can be redistributed under CNRI's * Python 1.6 license. For any other use, please contact Secret Labs * AB (info@pythonware.com). * * Portions of this engine have been developed in cooperation with * CNRI. Hewlett-Packard provided funding for 1.6 integration and * other compatibility work. */// Last updated to _sre.c: 2.52package org.python.modules.sre;import java.util.*;public class SRE_STATE { /* illegal opcode */ public static final int SRE_ERROR_ILLEGAL = -1; /* illegal state */ public static final int SRE_ERROR_STATE = -2; /* runaway recursion */ public static final int SRE_ERROR_RECURSION_LIMIT = -3; public static final int SRE_OP_FAILURE = 0; public static final int SRE_OP_SUCCESS = 1; public static final int SRE_OP_ANY = 2; public static final int SRE_OP_ANY_ALL = 3; public static final int SRE_OP_ASSERT = 4; public static final int SRE_OP_ASSERT_NOT = 5; public static final int SRE_OP_AT = 6; public static final int SRE_OP_BRANCH = 7; public static final int SRE_OP_CALL = 8; public static final int SRE_OP_CATEGORY = 9; public static final int SRE_OP_CHARSET = 10; public static final int SRE_OP_BIGCHARSET = 11; public static final int SRE_OP_GROUPREF = 12; public static final int SRE_OP_GROUPREF_IGNORE = 13; public static final int SRE_OP_IN = 14; public static final int SRE_OP_IN_IGNORE = 15; public static final int SRE_OP_INFO = 16; public static final int SRE_OP_JUMP = 17; public static final int SRE_OP_LITERAL = 18; public static final int SRE_OP_LITERAL_IGNORE = 19; public static final int SRE_OP_MARK = 20; public static final int SRE_OP_MAX_UNTIL = 21; public static final int SRE_OP_MIN_UNTIL = 22; public static final int SRE_OP_NOT_LITERAL = 23; public static final int SRE_OP_NOT_LITERAL_IGNORE = 24; public static final int SRE_OP_NEGATE = 25; public static final int SRE_OP_RANGE = 26; public static final int SRE_OP_REPEAT = 27; public static final int SRE_OP_REPEAT_ONE = 28; public static final int SRE_OP_SUBPATTERN = 29; public static final int SRE_AT_BEGINNING = 0; public static final int SRE_AT_BEGINNING_LINE = 1; public static final int SRE_AT_BEGINNING_STRING = 2; public static final int SRE_AT_BOUNDARY = 3; public static final int SRE_AT_NON_BOUNDARY = 4; public static final int SRE_AT_END = 5; public static final int SRE_AT_END_LINE = 6; public static final int SRE_AT_END_STRING = 7; public static final int SRE_AT_LOC_BOUNDARY = 8; public static final int SRE_AT_LOC_NON_BOUNDARY = 9; public static final int SRE_AT_UNI_BOUNDARY = 10; public static final int SRE_AT_UNI_NON_BOUNDARY = 11; public static final int SRE_CATEGORY_DIGIT = 0; public static final int SRE_CATEGORY_NOT_DIGIT = 1; public static final int SRE_CATEGORY_SPACE = 2; public static final int SRE_CATEGORY_NOT_SPACE = 3; public static final int SRE_CATEGORY_WORD = 4; public static final int SRE_CATEGORY_NOT_WORD = 5; public static final int SRE_CATEGORY_LINEBREAK = 6; public static final int SRE_CATEGORY_NOT_LINEBREAK = 7; public static final int SRE_CATEGORY_LOC_WORD = 8; public static final int SRE_CATEGORY_LOC_NOT_WORD = 9; public static final int SRE_CATEGORY_UNI_DIGIT = 10; public static final int SRE_CATEGORY_UNI_NOT_DIGIT = 11; public static final int SRE_CATEGORY_UNI_SPACE = 12; public static final int SRE_CATEGORY_UNI_NOT_SPACE = 13; public static final int SRE_CATEGORY_UNI_WORD = 14; public static final int SRE_CATEGORY_UNI_NOT_WORD = 15; public static final int SRE_CATEGORY_UNI_LINEBREAK = 16; public static final int SRE_CATEGORY_UNI_NOT_LINEBREAK = 17; public static final int SRE_FLAG_TEMPLATE = 1; public static final int SRE_FLAG_IGNORECASE = 2; public static final int SRE_FLAG_LOCALE = 4; public static final int SRE_FLAG_MULTILINE = 8; public static final int SRE_FLAG_DOTALL = 16; public static final int SRE_FLAG_UNICODE = 32; public static final int SRE_FLAG_VERBOSE = 64; public static final int SRE_INFO_PREFIX = 1; public static final int SRE_INFO_LITERAL = 2; public static final int SRE_INFO_CHARSET = 4; public static final int USE_RECURSION_LIMIT = 2000; /* string pointers */ int ptr; /* current position (also end of current slice) */ int beginning; /* start of original string */ int start; /* start of current slice */ int end; /* end of original string */ /* attributes for the match object */ char[] str; int pos; int endpos; /* character size */ int charsize; /* registers */ int lastindex; int lastmark; /* FIXME: <fl> should be dynamically allocated! */ int[] mark = new int[200]; /* dynamically allocated stuff */ int[] mark_stack; int mark_stack_size; int mark_stack_base; SRE_REPEAT repeat; /* current repeat context */ /* debugging */ int maxlevel; /* duplicated from the PatternObject */ int flags; public SRE_STATE(String str, int start, int end, int flags) { this.str = str.toCharArray(); int size = str.length(); this.charsize = 1; /* adjust boundaries */ if (start < 0) start = 0; else if (start > size) start = size; if (end < 0) end = 0; else if (end > size) end = size; this.start = start; this.end = end; this.pos = start; this.endpos = end; state_reset(); this.flags = flags; } private void mark_fini() { mark_stack = null; mark_stack_size = mark_stack_base = 0; } private void mark_save(int lo, int hi) { if (hi <= lo) return; int size = (hi - lo) + 1; int newsize = mark_stack_size; int minsize = mark_stack_base + size; int[] stack; if (newsize < minsize) { /* create new stack */ if (newsize == 0) { newsize = 512; if (newsize < minsize) newsize = minsize; //TRACE(0, ptr, "allocate stack " + newsize); stack = new int[newsize]; } else { /* grow the stack */ while (newsize < minsize) newsize += newsize; //TRACE(0, ptr, "grow stack to " + newsize); stack = new int[newsize]; System.arraycopy(mark_stack, 0, stack, 0, mark_stack.length); } mark_stack = stack; mark_stack_size = newsize; } //TRACE(0, ptr, "copy " + lo + ":" + hi + " to " + mark_stack_base + " (" + size + ")"); System.arraycopy(mark, lo, mark_stack, mark_stack_base, size); mark_stack_base += size; } private void mark_restore(int lo, int hi) { if (hi <= lo) return; int size = (hi - lo) + 1; mark_stack_base -= size; //TRACE(0, ptr, "copy " + lo + ":" + hi + " from " + mark_stack_base); System.arraycopy(mark_stack, mark_stack_base, mark, lo, size); } final boolean SRE_AT(int ptr, char at) { /* check if pointer is at given position. return 1 if so, 0 otherwise */ boolean thiS, that; switch (at) { case SRE_AT_BEGINNING: case SRE_AT_BEGINNING_STRING: return ptr == beginning; case SRE_AT_BEGINNING_LINE: return (ptr == beginning || SRE_IS_LINEBREAK(str[ptr-1])); case SRE_AT_END: return (ptr+1 == end && SRE_IS_LINEBREAK(str[ptr])) || ptr == end; case SRE_AT_END_LINE: return ptr == end || SRE_IS_LINEBREAK(str[ptr]); case SRE_AT_END_STRING: return ptr == end; case SRE_AT_BOUNDARY: /* word boundary */ if (beginning == end) return false; that = (ptr > beginning) ? SRE_IS_WORD(str[ptr-1]) : false; thiS = (ptr < end) ? SRE_IS_WORD(str[ptr]) : false; return thiS != that; case SRE_AT_NON_BOUNDARY: /* word non-boundary */ if (beginning == end) return false; that = (ptr > beginning) ? SRE_IS_WORD(str[ptr-1]) : false; thiS = (ptr < end) ? SRE_IS_WORD(str[ptr]) : false; return thiS == that; case SRE_AT_LOC_BOUNDARY: case SRE_AT_UNI_BOUNDARY: if (beginning == end) return false; that = (ptr > beginning) ? SRE_LOC_IS_WORD(str[ptr-1]) : false; thiS = (ptr < end) ? SRE_LOC_IS_WORD(str[ptr]) : false; return thiS != that; case SRE_AT_LOC_NON_BOUNDARY: case SRE_AT_UNI_NON_BOUNDARY: /* word non-boundary */ if (beginning == end) return false; that = (ptr > beginning) ? SRE_LOC_IS_WORD(str[ptr-1]) : false; thiS = (ptr < end) ? SRE_LOC_IS_WORD(str[ptr]) : false; return thiS == that; } return false; } final boolean SRE_CHARSET(char[] set, int setidx, char ch) { /* check if character is a member of the given set. return 1 if so, 0 otherwise */ boolean ok = true; for (;;) { switch (set[setidx++]) { case SRE_OP_LITERAL: //TRACE(setidx, ch, "CHARSET LITERAL " + (int) set[setidx]); /* <LITERAL> <code> */ if (ch == set[setidx]) return ok; setidx++; break; case SRE_OP_RANGE: /* <RANGE> <lower> <upper> */ //TRACE(setidx, ch, "CHARSET RANGE " + (int) set[setidx] + " " + (int) set[setidx+1]); if (set[setidx] <= ch && ch <= set[setidx+1]) return ok; setidx += 2; break; case SRE_OP_CHARSET: //TRACE(setidx, ch, "CHARSET CHARSET "); /* <CHARSET> <bitmap> (16 bits per code word) */ if (ch < 256 && (set[setidx + (ch >> 4)] & (1 << (ch & 15))) != 0) return ok; setidx += 16; break; case SRE_OP_BIGCHARSET: /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ //TRACE(setidx, ch, "CHARSET BIGCHARSET "); int count = set[setidx++]; int shift = ((ch >> 8) & 1) == 0 ? 8 : 0; int block = (set[setidx + (ch >> 8) / 2] >> shift) & 0xFF; setidx += 128; int idx = block*16 + ((ch & 255)>>4); if ((set[setidx + idx] & (1 << (ch & 15))) != 0) return ok; setidx += count*16; break; case SRE_OP_CATEGORY: /* <CATEGORY> <code> */ //TRACE(setidx, ch, "CHARSET CHARSET " + (int) set[setidx]); if (sre_category(set[setidx], ch)) return ok; setidx++; break; case SRE_OP_NEGATE: //TRACE(setidx, ch, "CHARSET NEGATE"); ok = !ok; break; case SRE_OP_FAILURE: //TRACE(setidx, ch, "CHARSET FAILURE"); return !ok; default: /* internal error -- there's not much we can do about it here, so let's just pretend it didn't match... */ return false; } } } private int SRE_COUNT(char[] pattern, int pidx, int maxcount, int level) { char chr; int ptr = this.ptr; int end = this.end; int i; /* adjust end */ if (maxcount < end - ptr && maxcount != 65535) end = ptr + maxcount; switch (pattern[pidx]) { case SRE_OP_ANY: /* repeated dot wildcard. */ //TRACE(pidx, ptr, "COUNT ANY"); while (ptr < end && !SRE_IS_LINEBREAK(str[ptr])) ptr++; break; case SRE_OP_ANY_ALL: /* repeated dot wildcare. skip to the end of the target string, and backtrack from there */ //TRACE(pidx, ptr, "COUNT ANY_ALL"); ptr = end; break; case SRE_OP_LITERAL: /* repeated literal */ chr = pattern[pidx+1]; //TRACE(pidx, ptr, "COUNT LITERAL " + (int) chr); while (ptr < end && str[ptr] == chr) ptr++; break; case SRE_OP_LITERAL_IGNORE:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -