📄 regexconvert.c
字号:
switch (*Reg_Parse++) { case '^': emit_convert_byte ('^'); break; case '$': emit_convert_byte ('$'); break; case '<': emit_convert_byte ('<'); break; case '>': emit_convert_byte ('>'); break; case '.': emit_convert_byte ('.'); *flag_param |= (HAS_WIDTH | SIMPLE); break; case '(': emit_convert_byte ('('); ret_val = chunk (PAREN, &flags_local); if (ret_val == 0) return 0; /* Something went wrong. */ /* Add HAS_WIDTH flag if it was set by call to chunk. */ *flag_param |= flags_local & HAS_WIDTH; break; case '\0': case '|': case ')': CONVERT_FAIL ("internal error #3, `atom\'"); /* Supposed to be */ /* caught earlier. */ case '?': case '+': case '*': sprintf (Error_Text, "%c follows nothing", *(Reg_Parse - 1)); CONVERT_FAIL (Error_Text); case '{': emit_convert_byte ('\\'); /* Quote braces. */ emit_convert_byte ('{'); break; case '[': { register unsigned int last_value; unsigned char last_emit = 0; unsigned char buffer [500]; int head = 0; int negated = 0; int do_brackets = 1; int a_z_flag = 0; int A_Z_flag = 0; int zero_nine = 0; int u_score_flag = 0; buffer [0] = '\0'; /* Handle characters that can only occur at the start of a class. */ if (*Reg_Parse == '^') { /* Complement of range. */ negated = 1; Reg_Parse++; } if (*Reg_Parse == ']' || *Reg_Parse == '-') { /* If '-' or ']' is the first character in a class, it is a literal character in the class. */ last_emit = *Reg_Parse; if (head >= 498) { CONVERT_FAIL ("too much data in [] to convert."); } buffer [head++] = '\\'; /* Escape `]' and '-' for clarity. */ buffer [head++] = *Reg_Parse; Reg_Parse++; } /* Handle the rest of the class characters. */ while (*Reg_Parse != '\0' && *Reg_Parse != ']') { if (*Reg_Parse == '-') { /* Process a range, e.g [a-z]. */ Reg_Parse++; if (*Reg_Parse == ']' || *Reg_Parse == '\0') { /* If '-' is the last character in a class it is a literal character. If `Reg_Parse' points to the end of the regex string, an error will be generated later. */ last_emit = '-'; if (head >= 498) { CONVERT_FAIL ("too much data in [] to convert."); } buffer [head++] = '\\'; /* Escape '-' for clarity. */ buffer [head++] = '-'; } else { if (*Reg_Parse == '\\') { /* Handle escaped characters within a class range. */ Reg_Parse++; if ((test = literal_escape (*Reg_Parse, 0))) { buffer [head++] = '-'; if (*Reg_Parse != '\"') { emit_convert_byte ('\\'); } buffer [head++] = *Reg_Parse; last_value = (unsigned int) test; } else { sprintf ( Error_Text, "\\%c is an invalid escape sequence(3)", *Reg_Parse); CONVERT_FAIL (Error_Text); } } else { last_value = U_CHAR_AT (Reg_Parse); if (last_emit == '0' && last_value == '9') { zero_nine = 1; head--; } else if (last_emit == 'a' && last_value == 'z') { a_z_flag = 1; head--; } else if (last_emit == 'A' && last_value == 'Z') { A_Z_flag = 1; head--; } else { buffer [head++] = '-'; if ((test = literal_escape (*Reg_Parse, 1))) { /* Ordinary character matches an escape sequence; convert it to the escape sequence. */ if (head >= 495) { CONVERT_FAIL ( "too much data in [] to convert."); } buffer [head++] = '\\'; if (test == '0') { /* Make octal escape. */ test = *Reg_Parse; buffer [head++] = '0'; buffer [head++] = ('0' + (test / 64)); test -= (test / 64) * 64; buffer [head++] = ('0' + (test / 8)); test -= (test / 8) * 8; buffer [head++] = ('0' + test); } else { buffer [head++] = test; } } else { buffer [head++] = last_value; } } } if (last_emit > last_value) { CONVERT_FAIL ("invalid [] range"); } last_emit = (unsigned char) last_value; Reg_Parse++; } /* End class character range code. */ } else if (*Reg_Parse == '\\') { Reg_Parse++; if ((test = literal_escape (*Reg_Parse, 0)) != '\0') { last_emit = test; if (head >= 498) { CONVERT_FAIL ("too much data in [] to convert."); } if (*Reg_Parse != '\"') { buffer [head++] = '\\'; } buffer [head++] = *Reg_Parse; } else { sprintf (Error_Text, "\\%c is an invalid escape sequence(1)", *Reg_Parse); CONVERT_FAIL (Error_Text); } Reg_Parse++; /* End of class escaped sequence code */ } else { last_emit = *Reg_Parse; if (*Reg_Parse == '_') { u_score_flag = 1; /* Emit later if we can't do `\w'. */ } else if ((test = literal_escape (*Reg_Parse, 1))) { /* Ordinary character matches an escape sequence; convert it to the escape sequence. */ if (head >= 495) { CONVERT_FAIL ("too much data in [] to convert."); } buffer [head++] = '\\'; if (test == '0') { /* Make octal escape. */ test = *Reg_Parse; buffer [head++] = '0'; buffer [head++] = ('0' + (test / 64)); test -= (test / 64) * 64; buffer [head++] = ('0' + (test / 8)); test -= (test / 8) * 8; buffer [head++] = ('0' + test); } else { if (head >= 499) { CONVERT_FAIL ("too much data in [] to convert."); } buffer [head++] = test; } } else { if (head >= 499) { CONVERT_FAIL ("too much data in [] to convert."); } buffer [head++] = *Reg_Parse; } Reg_Parse++; } } /* End of while (*Reg_Parse != '\0' && *Reg_Parse != ']') */ if (*Reg_Parse != ']') CONVERT_FAIL ("missing right \']\'"); buffer [head] = '\0'; /* NOTE: it is impossible to specify an empty class. This is because [] would be interpreted as "begin character class" followed by a literal ']' character and no "end character class" delimiter (']'). Because of this, it is always safe to assume that a class HAS_WIDTH. */ Reg_Parse++; *flag_param |= HAS_WIDTH | SIMPLE; if (head == 0) { if (( a_z_flag && A_Z_flag && zero_nine && u_score_flag) || ( a_z_flag && A_Z_flag && !zero_nine && !u_score_flag) || (!a_z_flag && !A_Z_flag && zero_nine && !u_score_flag)) { do_brackets = 0; } } if (do_brackets) { emit_convert_byte ('['); if (negated) emit_convert_byte ('^'); } /* Output any shortcut escapes if we can. */ while (a_z_flag || A_Z_flag || zero_nine || u_score_flag) { if (a_z_flag && A_Z_flag && zero_nine && u_score_flag) { emit_convert_byte ('\\'); if (negated && !do_brackets) { emit_convert_byte ('W'); } else { emit_convert_byte ('w'); } a_z_flag = A_Z_flag = zero_nine = u_score_flag = 0; } else if (a_z_flag && A_Z_flag) { emit_convert_byte ('\\'); if (negated && !do_brackets) { emit_convert_byte ('L'); } else { emit_convert_byte ('l'); } a_z_flag = A_Z_flag = 0; } else if (zero_nine) { emit_convert_byte ('\\'); if (negated && !do_brackets) { emit_convert_byte ('D'); } else { emit_convert_byte ('d'); } zero_nine = 0; } else if (a_z_flag) { emit_convert_byte ('a'); emit_convert_byte ('-'); emit_convert_byte ('z');
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -