⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 php_mbregex.c

📁 php-4.4.7学习linux时下载的源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
/*   +----------------------------------------------------------------------+   | PHP Version 4                                                        |   +----------------------------------------------------------------------+   | Copyright (c) 2001 The PHP Group                                     |   +----------------------------------------------------------------------+   | This source file is subject to version 3.01 of the PHP license,      |   | that is bundled with this package in the file LICENSE, and is        |   | available through the world-wide-web at the following url:           |   | http://www.php.net/license/3_01.txt                                  |   | If you did not receive a copy of the PHP license and are unable to   |   | obtain it through the world-wide-web, please send a note to          |   | license@php.net so we can mail you a copy immediately.               |   +----------------------------------------------------------------------+   | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |   +----------------------------------------------------------------------+ *//* $Id: php_mbregex.c,v 1.18.2.11.4.1 2006/01/01 13:46:54 sniper Exp $ */#ifdef HAVE_CONFIG_H#include "config.h"#endif#include "php.h"#include "php_ini.h"#include "php_mbregex.h"#include "mbregex.h"#include "mbstring.h"#if HAVE_MBREGEXZEND_EXTERN_MODULE_GLOBALS(mbstring)/* * string buffer */struct strbuf {	unsigned char* buffer;	int length;	int pos;	int allocsz;};static void_php_mb_regex_strbuf_init(struct strbuf *pd){	if (pd) {		pd->buffer = (unsigned char*)0;		pd->length = 0;		pd->pos = 0;		pd->allocsz = 64;	}}static int_php_mb_regex_strbuf_ncat(struct strbuf *pd, const unsigned char *psrc, int len){	if (pd == NULL || psrc == NULL) {		return -1;	}	if ((pd->pos + len) >= pd->length) {		/* reallocate buffer */		int newlen = pd->length + pd->allocsz + len;		unsigned char *tmp = (unsigned char*)erealloc((void*)pd->buffer, newlen);		if (tmp == NULL) {			return -1;		}		pd->length = newlen;		pd->buffer = tmp;	}	while (len > 0) {		pd->buffer[pd->pos++] = *psrc++;		len--;	}	return len;}/* * encoding name resolver */static int _php_mb_regex_name2mbctype(const char *pname){	int mbctype;	mbctype = -1;	if (pname != NULL) {		if (strcasecmp("EUC-JP", pname) == 0		    || strcasecmp("X-EUC-JP", pname) == 0		    || strcasecmp("UJIS", pname) == 0		    || strcasecmp("EUCJP", pname) == 0		    || strcasecmp("EUC_JP", pname) == 0		    || strcasecmp("EUCJP-WIN", pname) == 0) {			mbctype = MBCTYPE_EUC;		} else if (strcasecmp("UTF-8", pname) == 0		           || strcasecmp("UTF8", pname) == 0) {			mbctype = MBCTYPE_UTF8;		} else if (strcasecmp("SJIS", pname) == 0		           || strcasecmp("CP932", pname) == 0		           || strcasecmp("MS932", pname) == 0		           || strcasecmp("SHIFT_JIS", pname) == 0		           || strcasecmp("SJIS-WIN", pname) == 0) {			mbctype = MBCTYPE_SJIS;		} else if (strcasecmp("ASCII", pname) == 0) {			mbctype = MBCTYPE_ASCII;			mbctype = MBCTYPE_EUC;		}	}	return mbctype;}static const char*php_mbregex_mbctype2name(int mbctype){	const char *p = NULL;	if (mbctype == MBCTYPE_EUC) {		p = "EUC-JP";	} else if(mbctype == MBCTYPE_UTF8) {		p = "UTF-8";	} else if(mbctype == MBCTYPE_SJIS) {		p = "SJIS";	} else if(mbctype == MBCTYPE_ASCII) {		p = "ascii";	}	return p;}/* * regex cache */static intphp_mbregex_compile_pattern(mb_regex_t *pre, const char *pattern, int patlen, int options, int mbctype TSRMLS_DC){	int res = 0;	const char *err_str = NULL;	mb_regex_t *rc = NULL;	if(zend_hash_find(&MBSTRG(ht_rc), (char *)pattern, patlen+1, (void **) &rc) == FAILURE ||			rc->options != options || rc->mbctype != mbctype) {		memset(pre, 0, sizeof(*pre));		pre->fastmap = (char*)safe_emalloc((1 << MBRE_BYTEWIDTH), sizeof(char), 0);		if (pre->fastmap) {			pre->options = options;			pre->mbctype = mbctype;			err_str = mbre_compile_pattern(pattern, patlen, pre);			if (!err_str) {				zend_hash_update(&MBSTRG(ht_rc), (char *) pattern, patlen+1, (void *) pre, sizeof(*pre), NULL);			} else {				efree(pre->fastmap);				pre->fastmap = (char*)0;				php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);				res = 1;			}		} else {			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to allocate memory in mbregex_compile_pattern");			res = 1;		}	} else {		memcpy(pre, rc, sizeof(*pre));	}	return res;}static size_t _php_mb_regex_get_option_string(char *str, size_t len, int option){	size_t len_left = len;	size_t len_req = 0;	char *p = str;	if ((option & MBRE_OPTION_IGNORECASE) != 0) {		if (len_left > 0) {			--len_left;			*(p++) = 'i';		}		++len_req;		}	if ((option & MBRE_OPTION_EXTENDED) != 0) {		if (len_left > 0) {			--len_left;			*(p++) = 'x';		}		++len_req;		}	if ((option & MBRE_OPTION_POSIXLINE) == MBRE_OPTION_POSIXLINE) {		if (len_left > 0) {			--len_left;			*(p++) = 'p';		}		++len_req;		} else {		if ((option & MBRE_OPTION_MULTILINE) != 0) {			if (len_left > 0) {				--len_left;				*(p++) = 'm';			}			++len_req;			}		if ((option & MBRE_OPTION_SINGLELINE) != 0) {			if (len_left > 0) {				--len_left;				*(p++) = 's';			}			++len_req;			}	}		if ((option & MBRE_OPTION_LONGEST) != 0) {		if (len_left > 0) {			--len_left;			*(p++) = 'l';		}		++len_req;		}	if (len_left > 0) {		--len_left;		*(p++) = '\0';	}	++len_req;		if (len < len_req) {		return len_req;	}	return 0;}static void_php_mb_regex_init_options(const char *parg, int narg, int *option, int *eval) {	int n;	char c;	int optm = 0; 	if (parg != NULL) {		n = 0;		while(n < narg) {			c = parg[n++];			switch (c) {				case 'i':					optm |= MBRE_OPTION_IGNORECASE;					break;				case 'x':					optm |= MBRE_OPTION_EXTENDED;					break;				case 'm':					optm |= MBRE_OPTION_MULTILINE;					break;				case 's':					optm |= MBRE_OPTION_SINGLELINE;					break;				case 'p':					optm |= MBRE_OPTION_POSIXLINE;					break;				case 'l':					optm |= MBRE_OPTION_LONGEST;					break;				case 'e':					if (eval != NULL) *eval = 1; 					break;				default:					break;			}		}		if (option != NULL) *option|=optm; 	}}/* * php funcions *//* {{{ proto string mb_regex_encoding([string encoding])   Returns the current encoding for regex as a string. */PHP_FUNCTION(mb_regex_encoding){	zval **arg1;	int mbctype;	if (ZEND_NUM_ARGS() == 0) {		const char *retval = php_mbregex_mbctype2name(MBSTRG(current_mbctype));		if ( retval != NULL ) {			RETVAL_STRING((char *)retval, 1);		} else {			RETVAL_FALSE;		}	} else if (ZEND_NUM_ARGS() == 1 &&	           zend_get_parameters_ex(1, &arg1) != FAILURE) {		convert_to_string_ex(arg1);		mbctype = _php_mb_regex_name2mbctype(Z_STRVAL_PP(arg1));		if (mbctype < 0) {			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg1));			RETVAL_FALSE;		} else {			MBSTRG(current_mbctype) = mbctype;			RETVAL_TRUE;		}	} else {		WRONG_PARAM_COUNT;	}}/* }}} *//* {{{ _php_mb_regex_ereg_exec */static void_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase){	zval tmp;	zval *arg_pattern, *array;	char *string;	int string_len;	mb_regex_t re;	struct mbre_registers regs = {0, 0, 0, 0};	int i, err, match_len, option, beg, end;	char *str;	array = NULL;	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {		RETURN_FALSE;	}	option = MBSTRG(regex_default_options);	if (icase) {		option |= MBRE_OPTION_IGNORECASE;	}	/* compile the regular expression from the supplied regex */	if (Z_TYPE_P(arg_pattern) != IS_STRING) {		/* we convert numbers to integers and treat them as a string */		tmp = *arg_pattern;		zval_copy_ctor(&tmp);		if (Z_TYPE_P(&tmp) == IS_DOUBLE) {			convert_to_long(&tmp);	/* get rid of decimal places */		}		convert_to_string(&tmp);		arg_pattern = &tmp;		/* don't bother doing an extended regex with just a number */	}	err = php_mbregex_compile_pattern(	     &re,	     Z_STRVAL_P(arg_pattern),	     Z_STRLEN_P(arg_pattern),	     option, MBSTRG(current_mbctype) TSRMLS_CC);	if (err) {		RETVAL_FALSE;		goto out;	}	/* actually execute the regular expression */	err = mbre_search(	     &re,	     string,	     string_len,	      0, string_len,	     &regs);	if (err < 0) {		mbre_free_registers(&regs);		RETVAL_FALSE;		goto out;	}	match_len = 1;	str = string;	if (array) {		match_len = regs.end[0] - regs.beg[0];		zval_dtor(array);	/* start with clean array */		array_init(array);		for (i = 0; i < regs.num_regs; i++) {			beg = regs.beg[i];			end = regs.end[i];			if (beg >= 0 && beg < end && end <= string_len) {				add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);			} else {				add_index_bool(array, i, 0);			}		}	}	mbre_free_registers(&regs);	if (match_len == 0) {		match_len = 1;	}	RETVAL_LONG(match_len);out:	if (arg_pattern == &tmp) {		zval_dtor(&tmp);	}}/* {{{ proto int mb_ereg(string pattern, string string [, array registers])   Regular expression match for multibyte string */PHP_FUNCTION(mb_ereg){	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);}/* }}} *//* {{{ proto int mb_eregi(string pattern, string string [, array registers])   Case-insensitive regular expression match for multibyte string */PHP_FUNCTION(mb_eregi){	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);}/* }}} *//* {{{ _php_mb_regex_ereg_replace_exec */static void_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int option){	zval *arg_pattern_zval;	char *arg_pattern;	int arg_pattern_len;	char *replace;	int replace_len;	char *string;	int string_len;	char *p;	mb_regex_t re;	struct mbre_registers regs = {0, 0, 0, 0};	struct strbuf outdev, evaldev, *pdevice;	int i, n, err, pos, eval;	char *description = NULL;	char pat_buf[2];	eval = 0;	{		char *option_str = NULL;		int option_str_len = 0;		if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zss|s",									&arg_pattern_zval,									&replace, &replace_len,									&string, &string_len,									&option_str, &option_str_len) == FAILURE) {			RETURN_FALSE;		}		if (option_str != NULL) {			_php_mb_regex_init_options(option_str, option_str_len, &option, &eval);		} else {			option |= MBSTRG(regex_default_options);		}	}	if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) {		arg_pattern = Z_STRVAL_P(arg_pattern_zval);		arg_pattern_len = Z_STRLEN_P(arg_pattern_zval);	} else {		/* FIXME: this code is not multibyte aware! */		convert_to_long_ex(&arg_pattern_zval);		pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval);			pat_buf[1] = '\0';		arg_pattern = pat_buf;		arg_pattern_len = 1;		}	/* create regex pattern buffer */	err = php_mbregex_compile_pattern(	    &re,	    arg_pattern,	    arg_pattern_len,	    option, MBSTRG(current_mbctype) TSRMLS_CC);	if (err) {		RETURN_FALSE;	}	/* initialize string buffer (auto reallocate buffer) */	_php_mb_regex_strbuf_init(&outdev);	_php_mb_regex_strbuf_init(&evaldev);	outdev.allocsz = (string_len >> 2) + 8;	if (eval) {		pdevice = &evaldev;		description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);	} else {		pdevice = &outdev;		description = NULL;	}	/* do the actual work */	err = 0;	pos = 0;	while (err >= 0) {		err = mbre_search(&re, string, string_len, pos, string_len - pos, &regs);		if (err <= -2) {			php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec()");			break;		}		if (err >= 0) {#if moriyoshi_0			if (regs.beg[0] == regs.end[0]) {				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");				break;			}#endif			/* copy the part of the string before the match */			_php_mb_regex_strbuf_ncat(&outdev, (const unsigned char *)&string[pos], regs.beg[0] - pos);			/* copy replacement and backrefs */			/* FIXME: this code (\\digit replacement) is not mbyte aware! */ 			i = 0;			p = replace;			while (i < replace_len) {				n = -1;				if (p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {					n = p[1] - '0';				}				if (n >= 0 && n < regs.num_regs) {					if (regs.beg[n] >= 0 && regs.beg[n] < regs.end[n] && regs.end[n] <= string_len) {						_php_mb_regex_strbuf_ncat(pdevice, (const unsigned char *)&string[regs.beg[n]], regs.end[n] - regs.beg[n]);					}					p += 2;					i += 2;				} else {					_php_mb_regex_strbuf_ncat(pdevice, (const unsigned char *)p, 1);					p++;					i++;				}			}			if (eval) {				zval v;				/* null terminate buffer */				_php_mb_regex_strbuf_ncat(&evaldev, (const unsigned char *)"\0", 1);				/* do eval */				zend_eval_string((char *)evaldev.buffer, &v, description TSRMLS_CC);				/* result of eval */				convert_to_string(&v);				_php_mb_regex_strbuf_ncat(&outdev, Z_STRVAL(v), Z_STRLEN(v));				/* Clean up */				evaldev.pos = 0;				zval_dtor(&v);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -