📄 php_pcre.c
字号:
zval **regex, /* Regular expression to split by */ **subject, /* Subject string to split */ **limit, /* Number of pieces to return */ **flags; pcre *re = NULL; /* Compiled regular expression */ pcre_extra *extra = NULL; /* Holds results of studying */ int *offsets; /* Array of subpattern offsets */ int size_offsets; /* Size of the offsets array */ int exoptions = 0; /* Execution options */ int preg_options = 0; /* Custom preg options */ int argc; /* Argument count */ int limit_val = -1; /* Integer value of limit */ int no_empty = 0; /* If NO_EMPTY flag is set */ int delim_capture = 0; /* If delimiters should be captured */ int offset_capture = 0;/* If offsets should be captured */ int count = 0; /* Count of matched subpatterns */ int start_offset; /* Where the new search starts */ int next_offset; /* End of the last delimiter match + 1 */ int g_notempty = 0; /* If the match should not be empty */ char *match, /* The current match */ *last_match; /* Location of last match */ int rc; /* Get function parameters and do error checking */ argc = ZEND_NUM_ARGS(); if (argc < 2 || argc > 4 || zend_get_parameters_ex(argc, ®ex, &subject, &limit, &flags) == FAILURE) { WRONG_PARAM_COUNT; } if (argc > 2) { convert_to_long_ex(limit); limit_val = Z_LVAL_PP(limit); if (limit_val == 0) limit_val = -1; if (argc > 3) { convert_to_long_ex(flags); no_empty = Z_LVAL_PP(flags) & PREG_SPLIT_NO_EMPTY; delim_capture = Z_LVAL_PP(flags) & PREG_SPLIT_DELIM_CAPTURE; offset_capture = Z_LVAL_PP(flags) & PREG_SPLIT_OFFSET_CAPTURE; } } /* Make sure we're dealing with strings */ convert_to_string_ex(regex); convert_to_string_ex(subject); /* Compile regex or get it from cache. */ if ((re = pcre_get_compiled_regex(Z_STRVAL_PP(regex), &extra, &preg_options)) == NULL) { RETURN_FALSE; } /* Initialize return value */ array_init(return_value); /* Calculate the size of the offsets array, and allocate memory for it. */ rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); if (rc < 0) { php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d", get_active_function_name(TSRMLS_C), rc); RETURN_FALSE; } size_offsets = (size_offsets + 1) * 3; offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); /* Start at the beginning of the string */ start_offset = 0; next_offset = 0; last_match = Z_STRVAL_PP(subject); match = NULL; /* Get next piece if no limit or limit not yet reached and something matched*/ while ((limit_val == -1 || limit_val > 1)) { count = pcre_exec(re, extra, Z_STRVAL_PP(subject), Z_STRLEN_PP(subject), start_offset, exoptions|g_notempty, offsets, size_offsets); /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); count = size_offsets/3; } /* If something matched */ if (count > 0) { match = Z_STRVAL_PP(subject) + offsets[0]; if (!no_empty || &Z_STRVAL_PP(subject)[offsets[0]] != last_match) { if (offset_capture) { /* Add (match, offset) pair to the return value */ add_offset_pair(return_value, last_match, &Z_STRVAL_PP(subject)[offsets[0]]-last_match, next_offset, NULL); } else { /* Add the piece to the return value */ add_next_index_stringl(return_value, last_match, &Z_STRVAL_PP(subject)[offsets[0]]-last_match, 1); } /* One less left to do */ if (limit_val != -1) limit_val--; } last_match = &Z_STRVAL_PP(subject)[offsets[1]]; next_offset = offsets[1]; if (delim_capture) { int i, match_len; for (i = 1; i < count; i++) { match_len = offsets[(i<<1)+1] - offsets[i<<1]; /* If we have matched a delimiter */ if (!no_empty || match_len > 0) { if (offset_capture) { add_offset_pair(return_value, &Z_STRVAL_PP(subject)[offsets[i<<1]], match_len, offsets[i<<1], NULL); } else { add_next_index_stringl(return_value, &Z_STRVAL_PP(subject)[offsets[i<<1]], match_len, 1); } } } } } else { /* Failed to match */ /* If we previously set PCRE_NOTEMPTY after a null match, this is not necessarily the end. We need to advance the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ if (g_notempty != 0 && start_offset < Z_STRLEN_PP(subject)) { offsets[0] = start_offset; offsets[1] = start_offset + 1; } else break; } /* If we have matched an empty string, mimic what Perl's /g options does. This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; /* Advance to the position right after the last full match */ start_offset = offsets[1]; } if (!no_empty || start_offset != Z_STRLEN_PP(subject)) { if (offset_capture) { /* Add the last (match, offset) pair to the return value */ add_offset_pair(return_value, &Z_STRVAL_PP(subject)[start_offset], Z_STRLEN_PP(subject) - start_offset, start_offset, NULL); } else { /* Add the last piece to the return value */ add_next_index_stringl(return_value, last_match, Z_STRVAL_PP(subject) + Z_STRLEN_PP(subject) - last_match, 1); } } /* Clean up */ efree(offsets);}/* }}} *//* {{{ proto string preg_quote(string str, string delim_char) Quote regular expression characters plus an optional character */PHP_FUNCTION(preg_quote){ zval **in_str_arg; /* Input string argument */ zval **delim; /* Additional delimiter argument */ char *in_str, /* Input string */ *in_str_end, /* End of the input string */ *out_str, /* Output string with quoted characters */ *p, /* Iterator for input string */ *q, /* Iterator for output string */ delim_char=0, /* Delimiter character to be quoted */ c; /* Current character */ zend_bool quote_delim = 0; /* Whether to quote additional delim char */ /* Get the arguments and check for errors */ if (ZEND_NUM_ARGS() < 1 || ZEND_NUM_ARGS() > 2 || zend_get_parameters_ex(ZEND_NUM_ARGS(), &in_str_arg, &delim) == FAILURE) { WRONG_PARAM_COUNT; } /* Make sure we're working with strings */ convert_to_string_ex(in_str_arg); in_str = Z_STRVAL_PP(in_str_arg); in_str_end = Z_STRVAL_PP(in_str_arg) + Z_STRLEN_PP(in_str_arg); /* Nothing to do if we got an empty string */ if (in_str == in_str_end) { RETVAL_STRINGL(empty_string, 0, 0); } if (ZEND_NUM_ARGS() == 2) { convert_to_string_ex(delim); if (Z_STRLEN_PP(delim) > 0) { delim_char = Z_STRVAL_PP(delim)[0]; quote_delim = 1; } } /* Allocate enough memory so that even if each character is quoted, we won't run out of room */ out_str = safe_emalloc(4, Z_STRLEN_PP(in_str_arg), 1); /* Go through the string and quote necessary characters */ for(p = in_str, q = out_str; p != in_str_end; p++) { c = *p; switch(c) { case '.': case '\\': case '+': case '*': case '?': case '[': case '^': case ']': case '$': case '(': case ')': case '{': case '}': case '=': case '!': case '>': case '<': case '|': case ':': *q++ = '\\'; *q++ = c; break; case '\0': *q++ = '\\'; *q++ = '0'; *q++ = '0'; *q++ = '0'; break; default: if (quote_delim && c == delim_char) *q++ = '\\'; *q++ = c; break; } } *q = '\0'; /* Reallocate string and return it */ RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);}/* }}} *//* {{{ proto array preg_grep(string regex, array input) Searches array and returns entries which match regex */PHP_FUNCTION(preg_grep){ zval **regex, /* Regular expression */ **input, /* Input array */ **flags, **entry; /* An entry in the input array */ pcre *re = NULL; /* Compiled regular expression */ pcre_extra *extra = NULL; /* Holds results of studying */ int preg_options = 0; /* Custom preg options */ int *offsets; /* Array of subpattern offsets */ int size_offsets; /* Size of the offsets array */ int count = 0; /* Count of matched subpatterns */ char *string_key; ulong num_key; zend_bool invert = 0; /* Whether to return non-matching entries */ int rc; /* Get arguments and do error checking */ if (ZEND_NUM_ARGS() < 2 || ZEND_NUM_ARGS() > 3 || zend_get_parameters_ex(ZEND_NUM_ARGS(), ®ex, &input, &flags) == FAILURE) { WRONG_PARAM_COUNT; } if (Z_TYPE_PP(input) != IS_ARRAY) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Second argument to preg_grep() should be an array"); return; } SEPARATE_ZVAL(input); /* Make sure regex is a string */ convert_to_string_ex(regex); if (ZEND_NUM_ARGS() > 2) { convert_to_long_ex(flags); invert = (Z_LVAL_PP(flags) & PREG_GREP_INVERT) ? 1 : 0; } /* Compile regex or get it from cache. */ if ((re = pcre_get_compiled_regex(Z_STRVAL_PP(regex), &extra, &preg_options)) == NULL) { RETURN_FALSE; } /* Calculate the size of the offsets array, and allocate memory for it. */ rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); if (rc < 0) { php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d", get_active_function_name(TSRMLS_C), rc); RETURN_FALSE; } size_offsets = (size_offsets + 1) * 3; offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); /* Initialize return array */ array_init(return_value); /* Go through the input array */ zend_hash_internal_pointer_reset(Z_ARRVAL_PP(input)); while(zend_hash_get_current_data(Z_ARRVAL_PP(input), (void **)&entry) == SUCCESS) { convert_to_string_ex(entry); /* Perform the match */ count = pcre_exec(re, extra, Z_STRVAL_PP(entry), Z_STRLEN_PP(entry), 0, 0, offsets, size_offsets); /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); count = size_offsets/3; } /* If the entry fits our requirements */ if ((count > 0 && !invert) || (count < 0 && invert)) { (*entry)->refcount++; /* Add to return array */ switch(zend_hash_get_current_key(Z_ARRVAL_PP(input), &string_key, &num_key, 0)) { case HASH_KEY_IS_STRING: zend_hash_update(Z_ARRVAL_P(return_value), string_key, strlen(string_key)+1, entry, sizeof(zval *), NULL); break; case HASH_KEY_IS_LONG: zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry, sizeof(zval *), NULL); break; } } zend_hash_move_forward(Z_ARRVAL_PP(input)); } /* Clean up */ efree(offsets);}/* }}} *//* {{{ module definition structures */function_entry pcre_functions[] = { PHP_FE(preg_match, third_arg_force_ref) PHP_FE(preg_match_all, third_arg_force_ref) PHP_FE(preg_replace, NULL) PHP_FE(preg_replace_callback, NULL) PHP_FE(preg_split, NULL) PHP_FE(preg_quote, NULL) PHP_FE(preg_grep, NULL) {NULL, NULL, NULL}};zend_module_entry pcre_module_entry = { STANDARD_MODULE_HEADER, "pcre", pcre_functions, PHP_MINIT(pcre), PHP_MSHUTDOWN(pcre), NULL, NULL, PHP_MINFO(pcre), NO_VERSION_YET, STANDARD_MODULE_PROPERTIES};#ifdef COMPILE_DL_PCREZEND_GET_MODULE(pcre)#endif/* }}} */#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE *//* * Local variables: * tab-width: 4 * c-basic-offset: 4 * End: * vim600: sw=4 ts=4 fdm=marker * vim<600: sw=4 ts=4 */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -