📄 php_pcre.c
字号:
} if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) || (!global && subpats_order != 0)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Wrong value for parameter 4 in call to %s()", get_active_function_name(TSRMLS_C)); return; } } /* Overwrite the passed-in value for subpatterns with an empty array. */ if (subpats != NULL) { zval_dtor(subpats); array_init(subpats); } /* Negative offset counts from the end of the string. */ if (start_offset < 0) { start_offset = subject_len + start_offset; if (start_offset < 0) { start_offset = 0; } } /* Compile regex or get it from cache. */ if ((re = pcre_get_compiled_regex(regex, &extra, &preg_options)) == NULL) { RETURN_FALSE; } /* Calculate the size of the offsets array, and allocate memory for it. */ rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); if (rc < 0) { php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d", get_active_function_name(TSRMLS_C), rc); RETURN_FALSE; } num_subpats++; size_offsets = num_subpats * 3; offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); /* * Build a mapping from subpattern numbers to their names. We will always * allocate the table, even though they may be no named subpatterns. This * avoids somewhat more complicated logic in the inner loops. */ subpat_names = (char **)safe_emalloc(num_subpats, sizeof(char *), 0); memset(subpat_names, 0, sizeof(char *) * num_subpats); { int name_cnt = 0, name_size, ni = 0; char *name_table; unsigned short name_idx; rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &name_cnt); if (rc < 0) { php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d", get_active_function_name(TSRMLS_C), rc); RETURN_FALSE; } if (name_cnt > 0) { int rc1, rc2; rc1 = pcre_fullinfo(re, extra, PCRE_INFO_NAMETABLE, &name_table); rc2 = pcre_fullinfo(re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); rc = rc2 ? rc2 : rc1; if (rc < 0) { php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d", get_active_function_name(TSRMLS_C), rc); RETURN_FALSE; } while (ni++ < name_cnt) { name_idx = 0xff * name_table[0] + name_table[1]; subpat_names[name_idx] = name_table + 2; name_table += name_size; } } } /* Allocate match sets array and initialize the values. */ if (global && subpats_order == PREG_PATTERN_ORDER) { match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0); for (i=0; i<num_subpats; i++) { ALLOC_ZVAL(match_sets[i]); array_init(match_sets[i]); INIT_PZVAL(match_sets[i]); } } match = NULL; matched = 0; do { /* Execute the regular expression. */ count = pcre_exec(re, extra, subject, subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); count = size_offsets/3; } /* If something has matched */ if (count >= 0) { matched++; match = subject + offsets[0]; /* If subpatterns array has been passed, fill it in with values. */ if (subpats != NULL) { /* Try to get the list of substrings and display a warning if failed. */ if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) { efree(subpat_names); efree(offsets); efree(re); php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed"); return; } if (global) { /* global pattern matching */ if (subpats_order == PREG_PATTERN_ORDER) { /* For each subpattern, insert it into the appropriate array. */ for (i = 0; i < count; i++) { if (offset_capture) { add_offset_pair(match_sets[i], (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL); } else { add_next_index_stringl(match_sets[i], (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], 1); } } /* * If the number of captured subpatterns on this run is * less than the total possible number, pad the result * arrays with empty strings. */ if (count < num_subpats) { for (; i < num_subpats; i++) { add_next_index_string(match_sets[i], empty_string, 1); } } } else { /* Allocate the result set array */ ALLOC_ZVAL(result_set); array_init(result_set); INIT_PZVAL(result_set); /* Add all the subpatterns to it */ for (i = 0; i < count; i++) { if (offset_capture) { add_offset_pair(result_set, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]); } else { if (subpat_names[i]) { add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], 1); } add_next_index_stringl(result_set, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], 1); } } /* And add it to the output array */ zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL); } } else { /* single pattern matching */ /* For each subpattern, insert it into the subpatterns array. */ for (i = 0; i < count; i++) { if (offset_capture) { add_offset_pair(subpats, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]); } else { if (subpat_names[i]) { add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], 1); } add_next_index_stringl(subpats, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], 1); } } } pcre_free((void *) stringlist); } } else { /* Failed to match */ /* If we previously set PCRE_NOTEMPTY after a null match, this is not necessarily the end. We need to advance the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ if (g_notempty != 0 && start_offset < subject_len) { offsets[0] = start_offset; offsets[1] = start_offset + 1; } else break; } /* If we have matched an empty string, mimic what Perl's /g options does. This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; /* Advance to the position right after the last full match */ start_offset = offsets[1]; } while (global); /* Add the match sets to the output array and clean up */ if (global && subpats_order == PREG_PATTERN_ORDER) { for (i = 0; i < num_subpats; i++) { if (subpat_names[i]) { zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL); ZVAL_ADDREF(match_sets[i]); } zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL); } efree(match_sets); } efree(offsets); efree(subpat_names); RETVAL_LONG(matched);}/* }}} *//* {{{ proto int preg_match(string pattern, string subject [, array subpatterns [, int flags [, int offset ]]]) Perform a Perl-style regular expression match */PHP_FUNCTION(preg_match){ php_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);}/* }}} *//* {{{ proto int preg_match_all(string pattern, string subject, array subpatterns [, int flags [, int offset]]) Perform a Perl-style global regular expression match */PHP_FUNCTION(preg_match_all){ php_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);}/* }}} *//* {{{ preg_get_backref */static int preg_get_backref(char **str, int *backref){ register char in_brace = 0; register char *walk = *str; if (walk[1] == 0) return 0; if (*walk == '$' && walk[1] == '{') { in_brace = 1; walk++; } walk++; if (*walk >= '0' && *walk <= '9') { *backref = *walk - '0'; walk++; } else return 0; if (*walk && *walk >= '0' && *walk <= '9') { *backref = *backref * 10 + *walk - '0'; walk++; } if (in_brace) { if (*walk == 0 || *walk != '}') return 0; else walk++; } *str = walk; return 1; }/* }}} *//* {{{ preg_do_repl_func */static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **result){ zval *retval_ptr; /* Function return value */ zval **args[1]; /* Argument to pass to function */ zval *subpats; /* Captured subpatterns */ int result_len; /* Return value length */ int i; TSRMLS_FETCH(); MAKE_STD_ZVAL(subpats); array_init(subpats); for (i = 0; i < count; i++) add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); args[0] = &subpats; if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) { convert_to_string_ex(&retval_ptr); *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr)); result_len = Z_STRLEN_P(retval_ptr); zval_ptr_dtor(&retval_ptr); } else { php_error(E_WARNING, "Unable to call custom replacement function"); result_len = offsets[1] - offsets[0]; *result = estrndup(&subject[offsets[0]], result_len); } zval_dtor(subpats); FREE_ZVAL(subpats); return result_len;}/* }}} *//* {{{ preg_do_eval */static int preg_do_eval(char *eval_str, int eval_str_len, char *subject, int *offsets, int count, char **result TSRMLS_DC){ zval retval; /* Return value from evaluation */ char *eval_str_end, /* End of eval string */ *match, /* Current match for a backref */ *esc_match, /* Quote-escaped match */ *walk, /* Used to walk the code string */ *segment, /* Start of segment to append while walking */ walk_last; /* Last walked character */ int match_len; /* Length of the match */ int esc_match_len; /* Length of the quote-escaped match */ int result_len; /* Length of the result of the evaluation */ int backref; /* Current backref */ char *compiled_string_description; smart_str code = {0}; eval_str_end = eval_str + eval_str_len; walk = segment = eval_str; walk_last = 0; while (walk < eval_str_end) { /* If found a backreference.. */ if ('\\' == *walk || '$' == *walk) { smart_str_appendl(&code, segment, walk - segment); if (walk_last == '\\') { code.c[code.len-1] = *walk++; segment = walk; walk_last = 0; continue; } segment = walk; if (preg_get_backref(&walk, &backref)) { if (backref < count) { /* Find the corresponding string match and substitute it in instead of the backref */ match = subject + offsets[backref<<1]; match_len = offsets[(backref<<1)+1] - offsets[backref<<1]; if (match_len) { esc_match = php_addslashes_ex(match, match_len, &esc_match_len, 0, 1 TSRMLS_CC); } else { esc_match = match; esc_match_len = 0; } } else { esc_match = empty_string; esc_match_len = 0; match_len = 0; } smart_str_appendl(&code, esc_match, esc_match_len); segment = walk; /* Clean up and reassign */ if (esc_match_len) efree(esc_match); continue; } } walk++; walk_last = walk[-1]; } smart_str_appendl(&code, segment, walk - segment); smart_str_0(&code); compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC); /* Run the code */ if (zend_eval_string(code.c, &retval, compiled_string_description TSRMLS_CC) == FAILURE) { efree(compiled_string_description); php_error_docref(NULL TSRMLS_CC, E_ERROR, "Failed evaluating code:\n%s", code.c); /* php_error_docref(NULL TSRMLS_CC, ) does not return in this case */ } efree(compiled_string_description); convert_to_string(&retval); /* Save the return value and its length */ *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval)); result_len = Z_STRLEN(retval); /* Clean up */ zval_dtor(&retval); smart_str_free(&code); return result_len;}/* }}} *//* {{{ php_pcre_replace
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -