📄 sre_state.java
字号:
rp.count = count - 1; this.ptr = ptr; return 0; } if (count < pattern[rp.pidx+2] || pattern[rp.pidx+2] == 65535) { /* we may have enough matches, but if we can match another item, do so */ rp.count = count; lastmark = this.lastmark; mark_save(0, lastmark); /* RECURSIVE */ i = SRE_MATCH(pattern, rp.pidx + 3, level + 1); if (i != 0) return i; mark_restore(0, lastmark); this.lastmark = lastmark; rp.count = count - 1; this.ptr = ptr; } /* cannot match more repeated items here. make sure the tail matches */ this.repeat = rp.prev; /* RECURSIVE */ i = SRE_MATCH(pattern, pidx, level + 1); if (i != 0) return i; this.repeat = rp; this.ptr = ptr; return 0; case SRE_OP_MIN_UNTIL: /* minimizing repeat */ /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ rp = this.repeat; if (rp == null) return SRE_ERROR_STATE; count = rp.count + 1; //TRACE(pidx, ptr, "MIN_UNTIL " + count + " " + rp.pidx); this.ptr = ptr; if (count < pattern[rp.pidx + 1]) { /* not enough matches */ rp.count = count; /* RECURSIVE */ i = SRE_MATCH(pattern, rp.pidx + 3, level + 1); if (i != 0) return i; rp.count = count-1; this.ptr = ptr; return 0; } /* see if the tail matches */ this.repeat = rp.prev; i = SRE_MATCH(pattern, pidx, level + 1); if (i != 0) return i; this.ptr = ptr; this.repeat = rp; if (count >= pattern[rp.pidx+2] && pattern[rp.pidx+2] != 65535) return 0; rp.count = count; /* RECURSIVE */ i = SRE_MATCH(pattern, rp.pidx + 3, level + 1); if (i != 0) return i; rp.count = count - 1; this.ptr = ptr; return 0; default: //TRACE(pidx, ptr, "UNKNOWN " + (int) pattern[pidx-1]); return SRE_ERROR_ILLEGAL; } } //return SRE_ERROR_ILLEGAL; } int SRE_SEARCH(char[] pattern, int pidx) { int ptr = this.start; int end = this.end; int status = 0; int prefix_len = 0; int prefix_skip = 0; int prefix = 0; int charset = 0; int overlap = 0; int flags = 0; if (pattern[pidx] == SRE_OP_INFO) { /* optimization info block */ /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ flags = pattern[pidx+2]; if (pattern[pidx+3] > 0) { /* adjust end point (but make sure we leave at least one character in there, so literal search will work) */ end -= pattern[pidx+3]-1; if (end <= ptr) end = ptr; // FBO } if ((flags & SRE_INFO_PREFIX) != 0) { /* pattern starts with a known prefix */ /* <length> <skip> <prefix data> <overlap data> */ prefix_len = pattern[pidx+5]; prefix_skip = pattern[pidx+6]; prefix = pidx + 7; overlap = prefix + prefix_len - 1; } else if ((flags & SRE_INFO_CHARSET) != 0) { /* pattern starts with a character from a known set */ /* <charset> */ charset = pidx + 5; } pidx += 1 + pattern[pidx+1]; } if (prefix_len > 1) { /* pattern starts with a known prefix. use the overlap table to skip forward as fast as we possibly can */ int i = 0; end = this.end; while (ptr < end) { for (;;) { if (str[ptr] != pattern[prefix+i]) { if (i == 0) break; else i = pattern[overlap+i]; } else { if (++i == prefix_len) { /* found a potential match */ //TRACE(pidx, ptr, "SEARCH SCAN " + prefix_skip + " " + prefix_len); this.start = ptr + 1 - prefix_len; this.ptr = ptr + 1 - prefix_len + prefix_skip; if ((flags & SRE_INFO_LITERAL) != 0) return 1; /* we got all of it */ status = SRE_MATCH(pattern, pidx + 2*prefix_skip, 1); if (status != 0) return status; /* close but no cigar -- try again */ i = pattern[overlap + i]; } break; } } ptr++; } return 0; } if (pattern[pidx] == SRE_OP_LITERAL) { /* pattern starts with a literal */ char chr = pattern[pidx + 1]; end = this.end; for (;;) { while (ptr < end && str[ptr] != chr) ptr++; if (ptr == end) return 0; //TRACE(pidx, ptr, "SEARCH LITERAL"); this.start = ptr; this.ptr = ++ptr; if ((flags & SRE_INFO_LITERAL) != 0) return 1; status = SRE_MATCH(pattern, pidx + 2, 1); if (status != 0) break; } } else if (charset != 0) { /* pattern starts with a character from a known set */ end = this.end; for (;;) { while (ptr < end && !SRE_CHARSET(pattern, charset, str[ptr])) ptr++; if (ptr == end) return 0; //TRACE(pidx, ptr, "SEARCH CHARSET"); this.start = ptr; this.ptr = ptr; status = SRE_MATCH(pattern, pidx, 1); if (status != 0) break; ptr++; } } else { /* general case */ while (ptr <= end) { //TRACE(pidx, ptr, "SEARCH"); this.start = this.ptr = ptr++; status = SRE_MATCH(pattern, pidx, 1); if (status != 0) break; } } return status; } final boolean sre_category(char category, char ch) { switch (category) { case SRE_CATEGORY_DIGIT: return SRE_IS_DIGIT(ch); case SRE_CATEGORY_NOT_DIGIT: return ! SRE_IS_DIGIT(ch); case SRE_CATEGORY_SPACE: return SRE_IS_SPACE(ch); case SRE_CATEGORY_NOT_SPACE: return ! SRE_IS_SPACE(ch); case SRE_CATEGORY_WORD: return SRE_IS_WORD(ch); case SRE_CATEGORY_NOT_WORD: return ! SRE_IS_WORD(ch); case SRE_CATEGORY_LINEBREAK: return SRE_IS_LINEBREAK(ch); case SRE_CATEGORY_NOT_LINEBREAK: return ! SRE_IS_LINEBREAK(ch); case SRE_CATEGORY_LOC_WORD: return SRE_LOC_IS_WORD(ch); case SRE_CATEGORY_LOC_NOT_WORD: return ! SRE_LOC_IS_WORD(ch); case SRE_CATEGORY_UNI_DIGIT: return Character.isDigit(ch); case SRE_CATEGORY_UNI_NOT_DIGIT: return !Character.isDigit(ch); case SRE_CATEGORY_UNI_SPACE: return Character.isWhitespace(ch); case SRE_CATEGORY_UNI_NOT_SPACE: return !Character.isWhitespace(ch); case SRE_CATEGORY_UNI_WORD: return Character.isLetterOrDigit(ch) || ch == '_'; case SRE_CATEGORY_UNI_NOT_WORD: return ! (Character.isLetterOrDigit(ch) || ch == '_'); case SRE_CATEGORY_UNI_LINEBREAK: return SRE_UNI_IS_LINEBREAK(ch); case SRE_CATEGORY_UNI_NOT_LINEBREAK: return ! SRE_UNI_IS_LINEBREAK(ch); } return false; } /* default character predicates (run sre_chars.py to regenerate tables) */ static final int SRE_DIGIT_MASK = 1; static final int SRE_SPACE_MASK = 2; static final int SRE_LINEBREAK_MASK = 4; static final int SRE_ALNUM_MASK = 8; static final int SRE_WORD_MASK = 16; static byte[] sre_char_info = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 }; static byte[] sre_char_lower = new byte[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127 }; final boolean SRE_IS_DIGIT(char ch) { return ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) != 0 : false); } final boolean SRE_IS_SPACE(char ch) { return ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) != 0 : false); } final boolean SRE_IS_WORD(char ch) { return ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) != 0 : false); } final boolean SRE_IS_LINEBREAK(char ch) { return ch == '\n'; } final boolean SRE_LOC_IS_WORD(char ch) { return Character.isLetterOrDigit(ch) || ch == '_'; } final boolean SRE_UNI_IS_LINEBREAK(char ch) { switch (ch) { case 0x000A: /* LINE FEED */ case 0x000D: /* CARRIAGE RETURN */ case 0x001C: /* FILE SEPARATOR */ case 0x001D: /* GROUP SEPARATOR */ case 0x001E: /* RECORD SEPARATOR */ case 0x0085: /* NEXT LINE */ case 0x2028: /* LINE SEPARATOR */ case 0x2029: /* PARAGRAPH SEPARATOR */ return true; default: return false; } } final char lower(char ch) { if ((flags & SRE_FLAG_LOCALE) != 0) return ((ch) < 256 ? Character.toLowerCase(ch) : ch); if ((flags & SRE_FLAG_UNICODE) != 0) return Character.toLowerCase(ch); return ((ch) < 128 ? (char)sre_char_lower[ch] : ch); } public static int getlower(int ch, int flags) { if ((flags & SRE_FLAG_LOCALE) != 0) return ((ch) < 256 ? Character.toLowerCase((char) ch) : ch); if ((flags & SRE_FLAG_UNICODE) != 0) return Character.toLowerCase((char)ch); return ((ch) < 128 ? (char)sre_char_lower[ch] : ch); } String getslice(int index, String string, boolean empty) { int i, j; index = (index - 1) * 2; if (string == null || mark[index] == -1 || mark[index+1] == -1) { if (empty) { /* want empty string */ i = j = 0; } else { return null; } } else { i = mark[index]; j = mark[index+1]; } return string.substring(i, j); } void state_reset() { lastmark = 0; /* FIXME: dynamic! */ for (int i = 0; i < mark.length; i++) mark[i] = -1; lastindex = -1; repeat = null; mark_fini(); } private void TRACE(int pidx, int ptr, String string) { //System.out.println(" |" + pidx + "|" + ptr + ": " + string); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -