📄 php_pcre.c
字号:
/* +----------------------------------------------------------------------+ | PHP Version 4 | +----------------------------------------------------------------------+ | Copyright (c) 1997-2007 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ | Author: Andrei Zmievski <andrei@php.net> | +----------------------------------------------------------------------+ *//* $Id: php_pcre.c,v 1.132.2.24.2.5 2007/01/01 09:46:45 sebastian Exp $ */#ifdef HAVE_CONFIG_H#include "config.h"#endif#include "php.h"#include "php_globals.h"#include "php_pcre.h"#include "ext/standard/info.h"#include "ext/standard/php_smart_str.h"#if HAVE_PCRE || HAVE_BUNDLED_PCRE#include "ext/standard/php_string.h"#define PREG_PATTERN_ORDER 1#define PREG_SET_ORDER 2#define PREG_OFFSET_CAPTURE (1<<8)#define PREG_SPLIT_NO_EMPTY (1<<0)#define PREG_SPLIT_DELIM_CAPTURE (1<<1)#define PREG_SPLIT_OFFSET_CAPTURE (1<<2)#define PREG_REPLACE_EVAL (1<<0)#define PREG_GREP_INVERT (1<<0)ZEND_DECLARE_MODULE_GLOBALS(pcre)static void php_free_pcre_cache(void *data){ pcre_cache_entry *pce = (pcre_cache_entry *) data; if (!pce) return; pefree(pce->re, 1); if (pce->extra) pefree(pce->extra, 1);#if HAVE_SETLOCALE if ((void*)pce->tables) pefree((void*)pce->tables, 1); pefree(pce->locale, 1);#endif}static void php_pcre_init_globals(zend_pcre_globals *pcre_globals TSRMLS_DC){ zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);}static void php_pcre_shutdown_globals(zend_pcre_globals *pcre_globals TSRMLS_DC){ zend_hash_destroy(&pcre_globals->pcre_cache);}static /* {{{ PHP_MINFO_FUNCTION(pcre) */PHP_MINFO_FUNCTION(pcre){ php_info_print_table_start(); php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" ); php_info_print_table_row(2, "PCRE Library Version", pcre_version() ); php_info_print_table_end();}/* }}} *//* {{{ PHP_MINIT_FUNCTION(pcre) */static PHP_MINIT_FUNCTION(pcre){ ZEND_INIT_MODULE_GLOBALS(pcre, php_pcre_init_globals, php_pcre_shutdown_globals); REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT); return SUCCESS;}/* }}} *//* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */static PHP_MSHUTDOWN_FUNCTION(pcre){#ifndef ZTS php_pcre_shutdown_globals(&pcre_globals TSRMLS_CC);#endif return SUCCESS;}/* }}} */#define PCRE_CACHE_SIZE 4096/* {{{ static pcre_clean_cache */static int pcre_clean_cache(void *data, void *arg TSRMLS_DC){ int *num_clean = (int *)arg; if (*num_clean > 0) { (*num_clean)--; return 1; } else { return 0; }}/* }}} *//* {{{ pcre_get_compiled_regex */PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options) { pcre *re = NULL; int coptions = 0; int soptions = 0; const char *error; int erroffset; char delimiter; char start_delimiter; char end_delimiter; char *p, *pp; char *pattern; int regex_len; int do_study = 0; int poptions = 0; unsigned const char *tables = NULL;#if HAVE_SETLOCALE char *locale = setlocale(LC_CTYPE, NULL);#endif pcre_cache_entry *pce; pcre_cache_entry new_entry; TSRMLS_FETCH(); /* Try to lookup the cached regex entry, and if successful, just pass back the compiled pattern, otherwise go on and compile it. */ regex_len = strlen(regex); if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) { /* * We use a quick pcre_info() check to see whether cache is corrupted, and if it * is, we flush it and compile the pattern from scratch. */ if (pcre_info(pce->re, NULL, NULL) == PCRE_ERROR_BADMAGIC) { zend_hash_clean(&PCRE_G(pcre_cache)); } else {#if HAVE_SETLOCALE if (!strcmp(pce->locale, locale)) {#endif *extra = pce->extra; *preg_options = pce->preg_options; return pce->re;#if HAVE_SETLOCALE } }#endif } p = regex; /* Parse through the leading whitespace, and display a warning if we get to the end without encountering a delimiter. */ while (isspace((int)*(unsigned char *)p)) p++; if (*p == 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression"); return NULL; } /* Get the delimiter and display a warning if it is alphanumeric or a backslash. */ delimiter = *p++; if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Delimiter must not be alphanumeric or backslash"); return NULL; } start_delimiter = delimiter; if ((pp = strchr("([{< )]}> )]}>", delimiter))) delimiter = pp[5]; end_delimiter = delimiter; if (start_delimiter == end_delimiter) { /* We need to iterate through the pattern, searching for the ending delimiter, but skipping the backslashed delimiters. If the ending delimiter is not found, display a warning. */ pp = p; while (*pp != 0) { if (*pp == '\\' && pp[1] != 0) pp++; else if (*pp == delimiter) break; pp++; } if (*pp == 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "No ending delimiter '%c' found", delimiter); return NULL; } } else { /* We iterate through the pattern, searching for the matching ending * delimiter. For each matching starting delimiter, we increment nesting * level, and decrement it for each matching ending delimiter. If we * reach the end of the pattern without matching, display a warning. */ int brackets = 1; /* brackets nesting level */ pp = p; while (*pp != 0) { if (*pp == '\\' && pp[1] != 0) pp++; else if (*pp == end_delimiter && --brackets <= 0) break; else if (*pp == start_delimiter) brackets++; pp++; } if (*pp == 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "No ending matching delimiter '%c' found", end_delimiter); return NULL; } } /* Make a copy of the actual pattern. */ pattern = estrndup(p, pp-p); /* Move on to the options */ pp++; /* Clear out preg options */ *preg_options = 0; /* Parse through the options, setting appropriate flags. Display a warning if we encounter an unknown modifier. */ while (*pp != 0) { switch (*pp++) { /* Perl compatible options */ case 'i': coptions |= PCRE_CASELESS; break; case 'm': coptions |= PCRE_MULTILINE; break; case 's': coptions |= PCRE_DOTALL; break; case 'x': coptions |= PCRE_EXTENDED; break; /* PCRE specific options */ case 'A': coptions |= PCRE_ANCHORED; break; case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break; case 'S': do_study = 1; break; case 'U': coptions |= PCRE_UNGREEDY; break; case 'X': coptions |= PCRE_EXTRA; break; case 'u': coptions |= PCRE_UTF8; break; /* Custom preg options */ case 'e': poptions |= PREG_REPLACE_EVAL; break; case ' ': case '\n': break; default: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown modifier '%c'", pp[-1]); efree(pattern); return NULL; } }#if HAVE_SETLOCALE if (strcmp(locale, "C")) tables = pcre_maketables();#endif /* Compile pattern and display a warning if compilation failed. */ re = pcre_compile(pattern, coptions, &error, &erroffset, tables); if (re == NULL) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Compilation failed: %s at offset %d", error, erroffset); efree(pattern); return NULL; } /* If study option was specified, study the pattern and store the result in extra for passing to pcre_exec. */ if (do_study) { *extra = pcre_study(re, soptions, &error); if (error != NULL) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern"); } } *preg_options = poptions; efree(pattern); /* * If we reached cache limit, clean out the items from the head of the list; * these are supposedly the oldest ones (but not necessarily the least used * ones). */ if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) { int num_clean = PCRE_CACHE_SIZE / 8; zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC); } /* Store the compiled pattern and extra info in the cache. */ new_entry.re = re; new_entry.extra = *extra; new_entry.preg_options = poptions;#if HAVE_SETLOCALE new_entry.locale = pestrdup(locale, 1); new_entry.tables = tables;#endif zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry, sizeof(pcre_cache_entry), NULL); return re;}/* }}} *//* {{{ add_offset_pair */static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name){ zval *match_pair; ALLOC_ZVAL(match_pair); array_init(match_pair); INIT_PZVAL(match_pair); /* Add (match, offset) to the return value */ add_next_index_stringl(match_pair, str, len, 1); add_next_index_long(match_pair, offset); if (name) { zval_add_ref(&match_pair); zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL); } zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);}/* }}} *//* {{{ php_pcre_match */static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global){ /* parameters */ char *regex; /* Regular expression */ char *subject; /* String to match against */ int regex_len; int subject_len; zval *subpats = NULL; /* Array for subpatterns */ long flags; /* Match control flags */ zval *result_set, /* Holds a set of subpatterns after a global match */ **match_sets = NULL; /* An array of sets of matches for each subpattern after a global match */ pcre *re = NULL; /* Compiled regular expression */ pcre_extra *extra = NULL; /* Holds results of studying */ int exoptions = 0; /* Execution options */ int preg_options = 0; /* Custom preg options */ int count = 0; /* Count of matched subpatterns */ int *offsets; /* Array of subpattern offsets */ int num_subpats; /* Number of captured subpatterns */ int size_offsets; /* Size of the offsets array */ long start_offset = 0; /* Where the new search starts */ int matched; /* Has anything matched */ int subpats_order = 0; /* Order of subpattern matches */ int offset_capture = 0;/* Capture match offsets: yes/no */ int g_notempty = 0; /* If the match should not be empty */ const char **stringlist; /* Holds list of subpatterns */ char *match; /* The current match */ char **subpat_names = NULL;/* Array for named subpatterns */ int i, rc; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "ssz|ll" : "ss|zll"), ®ex, ®ex_len, &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) { RETURN_FALSE; } if (global) subpats_order = PREG_PATTERN_ORDER; if (ZEND_NUM_ARGS() > 3) { offset_capture = flags & PREG_OFFSET_CAPTURE; /* * subpats_order is pre-set to pattern mode so we change it only if * necessary. */ if (flags & 0xff) { subpats_order = flags & 0xff;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -