📄 pcre_compile.cpp.svn-base
字号:
*errorcodeptr = ERR2; return 0; } c = *ptr; /* A letter is upper-cased; then the 0x40 bit is flipped. This coding is ASCII-specific, but then the whole concept of \cx is ASCII-specific. */ c = toASCIIUpper(c) ^ 0x40; break; } } *ptrptr = ptr; return c;}/************************************************** Check for counted repeat **************************************************//* This function is called when a '{' is encountered in a place where it mightstart a quantifier. It looks ahead to see if it really is a quantifier or not.It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}where the ddds are digits.Arguments: p pointer to the first char after '{'Returns: true or false*/static bool isCountedRepeat(const UChar* p, const UChar* patternEnd){ if (p >= patternEnd || !isASCIIDigit(*p)) return false; p++; while (p < patternEnd && isASCIIDigit(*p)) p++; if (p < patternEnd && *p == '}') return true; if (p >= patternEnd || *p++ != ',') return false; if (p < patternEnd && *p == '}') return true; if (p >= patternEnd || !isASCIIDigit(*p)) return false; p++; while (p < patternEnd && isASCIIDigit(*p)) p++; return (p < patternEnd && *p == '}');}/************************************************** Read repeat counts **************************************************//* Read an item of the form {n,m} and return the values. This is called onlyafter isCountedRepeat() has confirmed that a repeat-count quantifier exists,so the syntax is guaranteed to be correct, but we need to check the values.Arguments: p pointer to first char after '{' minp pointer to int for min maxp pointer to int for max returned as -1 if no max errorcodeptr points to error code variableReturns: pointer to '}' on success; current ptr on error, with errorcodeptr set non-zero*/static const UChar* readRepeatCounts(const UChar* p, int* minp, int* maxp, ErrorCode* errorcodeptr){ int min = 0; int max = -1; /* Read the minimum value and do a paranoid check: a negative value indicates an integer overflow. */ while (isASCIIDigit(*p)) min = min * 10 + *p++ - '0'; if (min < 0 || min > 65535) { *errorcodeptr = ERR5; return p; } /* Read the maximum value if there is one, and again do a paranoid on its size. Also, max must not be less than min. */ if (*p == '}') max = min; else { if (*(++p) != '}') { max = 0; while (isASCIIDigit(*p)) max = max * 10 + *p++ - '0'; if (max < 0 || max > 65535) { *errorcodeptr = ERR5; return p; } if (max < min) { *errorcodeptr = ERR4; return p; } } } /* Fill in the required variables, and pass back the pointer to the terminating '}'. */ *minp = min; *maxp = max; return p;}/************************************************** Find first significant op code **************************************************//* This is called by several functions that scan a compiled expression lookingfor a fixed first character, or an anchoring op code etc. It skips over thingsthat do not influence this.Arguments: code pointer to the start of the groupReturns: pointer to the first significant opcode*/static const unsigned char* firstSignificantOpcode(const unsigned char* code){ while (*code == OP_BRANUMBER) code += 3; return code;}static const unsigned char* firstSignificantOpcodeSkippingAssertions(const unsigned char* code){ while (true) { switch (*code) { case OP_ASSERT_NOT: advanceToEndOfBracket(code); code += 1 + LINK_SIZE; break; case OP_WORD_BOUNDARY: case OP_NOT_WORD_BOUNDARY: ++code; break; case OP_BRANUMBER: code += 3; break; default: return code; } }}/************************************************** Get othercase range **************************************************//* This function is passed the start and end of a class range, in UTF-8 modewith UCP support. It searches up the characters, looking for internal ranges ofcharacters in the "other" case. Each call returns the next one, updating thestart address.Arguments: cptr points to starting character value; updated d end value ocptr where to put start of othercase range odptr where to put end of othercase rangeYield: true when range returned; false when no more*/static bool getOthercaseRange(int* cptr, int d, int* ocptr, int* odptr){ int c, othercase = 0; for (c = *cptr; c <= d; c++) { if ((othercase = kjs_pcre_ucp_othercase(c)) >= 0) break; } if (c > d) return false; *ocptr = othercase; int next = othercase + 1; for (++c; c <= d; c++) { if (kjs_pcre_ucp_othercase(c) != next) break; next++; } *odptr = next - 1; *cptr = c; return true;}/************************************************* * Convert character value to UTF-8 * *************************************************//* This function takes an integer value in the range 0 - 0x7fffffff and encodes it as a UTF-8 character in 0 to 6 bytes. Arguments: cvalue the character value buffer pointer to buffer for result - at least 6 bytes long Returns: number of characters placed in the buffer */static int encodeUTF8(int cvalue, unsigned char *buffer){ int i; for (i = 0; i < kjs_pcre_utf8_table1_size; i++) if (cvalue <= kjs_pcre_utf8_table1[i]) break; buffer += i; for (int j = i; j > 0; j--) { *buffer-- = 0x80 | (cvalue & 0x3f); cvalue >>= 6; } *buffer = kjs_pcre_utf8_table2[i] | cvalue; return i + 1;}/************************************************** Compile one branch **************************************************//* Scan the pattern, compiling it into the code vector.Arguments: options the option bits brackets points to number of extracting brackets used codeptr points to the pointer to the current code point ptrptr points to the current pattern pointer errorcodeptr points to error code variable firstbyteptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE) reqbyteptr set to the last literal character required, else < 0 cd contains pointers to tables etc.Returns: true on success false, with *errorcodeptr set non-zero on error*/static inline bool safelyCheckNextChar(const UChar* ptr, const UChar* patternEnd, UChar expected){ return ((ptr + 1 < patternEnd) && ptr[1] == expected);}static boolcompileBranch(int options, int* brackets, unsigned char** codeptr, const UChar** ptrptr, const UChar* patternEnd, ErrorCode* errorcodeptr, int *firstbyteptr, int* reqbyteptr, CompileData& cd){ int repeat_type, op_type; int repeat_min = 0, repeat_max = 0; /* To please picky compilers */ int bravalue = 0; int reqvary, tempreqvary; int c; unsigned char* code = *codeptr; unsigned char* tempcode; bool groupsetfirstbyte = false; const UChar* ptr = *ptrptr; const UChar* tempptr; unsigned char* previous = NULL; unsigned char classbits[32]; bool class_utf8; unsigned char* class_utf8data; unsigned char utf8_char[6]; /* Initialize no first byte, no required byte. REQ_UNSET means "no char matching encountered yet". It gets changed to REQ_NONE if we hit something that matches a non-fixed char first char; reqbyte just remains unset if we never find one. When we hit a repeat whose minimum is zero, we may have to adjust these values to take the zero repeat into account. This is implemented by setting them to zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual item types that can be repeated set these backoff variables appropriately. */ int firstbyte = REQ_UNSET; int reqbyte = REQ_UNSET; int zeroreqbyte = REQ_UNSET; int zerofirstbyte = REQ_UNSET;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -