📄 sre_state.java
字号:
/* repeated literal */ chr = pattern[pidx+1]; //TRACE(pidx, ptr, "COUNT LITERAL_IGNORE " + (int) chr); while (ptr < end && lower(str[ptr]) == chr) ptr++; break; case SRE_OP_NOT_LITERAL: /* repeated non-literal */ chr = pattern[pidx+1]; //TRACE(pidx, ptr, "COUNT NOT_LITERAL " + (int) chr); while (ptr < end && str[ptr] != chr) ptr++; break; case SRE_OP_NOT_LITERAL_IGNORE: /* repeated non-literal */ chr = pattern[pidx+1]; //TRACE(pidx, ptr, "COUNT NOT_LITERAL_IGNORE " + (int) chr); while (ptr < end && lower(str[ptr]) != chr) ptr++; break; case SRE_OP_IN: /* repeated set */ //TRACE(pidx, ptr, "COUNT IN"); while (ptr < end && SRE_CHARSET(pattern, pidx + 2, str[ptr])) ptr++; break; default: /* repeated single character pattern */ //TRACE(pidx, ptr, "COUNT SUBPATTERN"); while (this.ptr < end) { i = SRE_MATCH(pattern, pidx, level); if (i < 0) return i; if (i == 0) break; } return this.ptr - ptr; } return ptr - this.ptr; } final int SRE_MATCH(char[] pattern, int pidx, int level) { /* check if string matches the given pattern. returns <0 for error, 0 for failure, and 1 for success */ int end = this.end; int ptr = this.ptr; int i, count; char chr; int lastmark; //TRACE(pidx, ptr, "ENTER " + level); if (level > USE_RECURSION_LIMIT) return SRE_ERROR_RECURSION_LIMIT; if (pattern[pidx] == SRE_OP_INFO) { /* optimization info block */ /* args: <1=skip> <2=flags> <3=min> ... */ if (pattern[pidx+3] != 0 && (end - ptr) < pattern[pidx+3]) { return 0; } pidx += pattern[pidx+1] + 1; } for (;;) { switch (pattern[pidx++]) { case SRE_OP_FAILURE: /* immediate failure */ //TRACE(pidx, ptr, "FAILURE"); return 0; case SRE_OP_SUCCESS: /* end of pattern */ //TRACE(pidx, ptr, "SUCCESS"); this.ptr = ptr; return 1; case SRE_OP_AT: /* match at given position */ /* <AT> <code> */ //TRACE(pidx, ptr, "AT " + (int) pattern[pidx]); if (!SRE_AT(ptr, pattern[pidx])) return 0; pidx++; break; case SRE_OP_CATEGORY: /* match at given category */ /* <CATEGORY> <code> */ //TRACE(pidx, ptr, "CATEGORY " + (int)pattern[pidx]); if (ptr >= end || !sre_category(pattern[pidx], str[ptr])) return 0; pidx++; ptr++; break; case SRE_OP_LITERAL: /* match literal character */ /* <LITERAL> <code> */ //TRACE(pidx, ptr, "LITERAL " + (int) pattern[pidx]); if (ptr >= end || str[ptr] != pattern[pidx]) return 0; pidx++; ptr++; break; case SRE_OP_NOT_LITERAL: /* match anything that is not literal character */ /* args: <code> */ //TRACE(pidx, ptr, "NOT_LITERAL " + (int) pattern[pidx]); if (ptr >= end || str[ptr] == pattern[pidx]) return 0; pidx++; ptr++; break; case SRE_OP_ANY: /* match anything */ //TRACE(pidx, ptr, "ANY"); if (ptr >= end || SRE_IS_LINEBREAK(str[ptr])) return 0; ptr++; break; case SRE_OP_ANY_ALL: /* match anything */ /* <ANY_ALL> */ //TRACE(pidx, ptr, "ANY_ALL"); if (ptr >= end) return 0; ptr++; break; case SRE_OP_IN: /* match set member (or non_member) */ /* <IN> <skip> <set> */ //TRACE(pidx, ptr, "IN"); if (ptr >= end || !SRE_CHARSET(pattern, pidx + 1, str[ptr])) return 0; pidx += (int)pattern[pidx]; ptr++; break; case SRE_OP_GROUPREF: /* match backreference */ i = pattern[pidx]; //TRACE(pidx, ptr, "GROUPREF " + i); int p = mark[i+i]; int e = mark[i+i+1]; if (p == -1 || e == -1 || e < p) return 0; while (p < e) { if (ptr >= end || str[ptr] != str[p]) return 0; p++; ptr++; } pidx++; break; case SRE_OP_GROUPREF_IGNORE: /* match backreference */ i = pattern[pidx]; //TRACE(pidx, ptr, "GROUPREF_IGNORE " + i); p = mark[i+i]; e = mark[i+i+1]; if (p == -1 || e == -1 || e < p) return 0; while (p < e) { if (ptr >= end || lower(str[ptr]) != lower(str[p])) return 0; p++; ptr++; } pidx++; break; case SRE_OP_LITERAL_IGNORE: //TRACE(pidx, ptr, "LITERAL_IGNORE " + (int) pattern[pidx]); if (ptr >= end || lower(str[ptr]) != lower(pattern[pidx])) return 0; pidx++; ptr++; break; case SRE_OP_NOT_LITERAL_IGNORE: //TRACE(pidx, ptr, "NOT_LITERAL_IGNORE " + (int) pattern[pidx]); if (ptr >= end || lower(str[ptr]) == lower(pattern[pidx])) return 0; pidx++; ptr++; break; case SRE_OP_IN_IGNORE: //TRACE(pidx, ptr, "IN_IGNORE"); if (ptr >= end || !SRE_CHARSET(pattern, pidx + 1, lower(str[ptr]))) return 0; pidx += (int)pattern[pidx]; ptr++; break; case SRE_OP_MARK: /* set mark */ /* <MARK> <gid> */ //TRACE(pidx, ptr, "MARK " + (int) pattern[pidx]); i = pattern[pidx]; if ((i & 1) != 0) lastindex = i / 2 + 1; if (i > this.lastmark) this.lastmark = i; mark[i] = ptr; pidx++; break; case SRE_OP_JUMP: case SRE_OP_INFO: /* jump forward */ /* <JUMP> <offset> */ //TRACE(pidx, ptr, "JUMP " + (int) pattern[pidx]); pidx += (int)pattern[pidx]; break; case SRE_OP_ASSERT: /* assert subpattern */ /* args: <skip> <back> <pattern> */ //TRACE(pidx, ptr, "ASSERT " + (int) pattern[pidx+1]); this.ptr = ptr - pattern[pidx + 1]; if (this.ptr < this.beginning) return 0; i = SRE_MATCH(pattern, pidx + 2, level + 1); if (i <= 0) return i; pidx += pattern[pidx]; break; case SRE_OP_ASSERT_NOT: /* assert not subpattern */ /* args: <skip> <pattern> */ //TRACE(pidx, ptr, "ASSERT_NOT " + (int) pattern[pidx]); this.ptr = ptr - pattern[pidx + 1]; if (this.ptr >= this.beginning) { i = SRE_MATCH(pattern, pidx + 2, level + 1); if (i < 0) return i; if (i != 0) return 0; } pidx += pattern[pidx]; break; case SRE_OP_BRANCH: /* try an alternate branch */ /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ //TRACE(pidx, ptr, "BRANCH"); lastmark = this.lastmark; for (; pattern[pidx] != 0; pidx += pattern[pidx]) { if (pattern[pidx+1] == SRE_OP_LITERAL && (ptr >= end || str[ptr] != pattern[pidx+2])) continue; if (pattern[pidx+1] == SRE_OP_IN && (ptr >= end || !SRE_CHARSET(pattern, pidx + 3, str[ptr]))) continue; this.ptr = ptr; i = SRE_MATCH(pattern, pidx + 1, level + 1); if (i != 0) return i; while (this.lastmark > lastmark) mark[this.lastmark--] = -1; } return 0; case SRE_OP_REPEAT_ONE: /* match repeated sequence (maximizing regexp) */ /* this operator only works if the repeated item is exactly one character wide, and we're not already collecting backtracking points. for other cases, use the MAX_REPEAT operator */ /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ int mincount = pattern[pidx+1]; //TRACE(pidx, ptr, "REPEAT_ONE " + mincount + " " + (int)pattern[pidx+2]); if (ptr + mincount > end) return 0; /* cannot match */ this.ptr = ptr; count = SRE_COUNT(pattern, pidx + 3, pattern[pidx+2], level + 1); if (count < 0) return count; ptr += count; /* when we arrive here, count contains the number of matches, and ptr points to the tail of the target string. check if the rest of the pattern matches, and backtrack if not. */ if (count < mincount) return 0; if (pattern[pidx + pattern[pidx]] == SRE_OP_SUCCESS) { /* tail is empty. we're finished */ this.ptr = ptr; return 1; } else if (pattern[pidx + pattern[pidx]] == SRE_OP_LITERAL) { /* tail starts with a literal. skip positions where the rest of the pattern cannot possibly match */ chr = pattern[pidx + pattern[pidx]+1]; for (;;) { while (count >= mincount && (ptr >= end || str[ptr] != chr)) { ptr--; count--; } if (count < mincount) break; this.ptr = ptr; i = SRE_MATCH(pattern, pidx + pattern[pidx], level + 1); if (i != 0) return 1; ptr--; count--; } } else { /* general case */ lastmark = this.lastmark; while (count >= mincount) { this.ptr = ptr; i = SRE_MATCH(pattern, pidx + pattern[pidx], level + 1); if (i != 0) return i; ptr--; count--; while (this.lastmark > lastmark) mark[this.lastmark--] = -1; } } return 0; case SRE_OP_REPEAT: /* create repeat context. all the hard work is done by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */ //TRACE(pidx, ptr, "REPEAT " + (int)pattern[pidx+1] + " " + (int)pattern[pidx+2]); SRE_REPEAT rep = new SRE_REPEAT(repeat); rep.count = -1; rep.pidx = pidx; repeat = rep; this.ptr = ptr; i = SRE_MATCH(pattern, pidx + pattern[pidx], level + 1); repeat = rep.prev; return i; case SRE_OP_MAX_UNTIL: /* maximizing repeat */ /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ /* FIXME: we probably need to deal with zero-width matches in here... */ SRE_REPEAT rp = this.repeat; if (rp == null) return SRE_ERROR_STATE; this.ptr = ptr; count = rp.count + 1; //TRACE(pidx, ptr, "MAX_UNTIL " + count); if (count < pattern[rp.pidx + 1]) { /* not enough matches */ rp.count = count; i = SRE_MATCH(pattern, rp.pidx + 3, level + 1); if (i != 0) return i;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -