📄 mbstring.c
字号:
/* +----------------------------------------------------------------------+ | PHP Version 4 | +----------------------------------------------------------------------+ | Copyright (c) 2001 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> | | Rui Hirokawa <hirokawa@php.net> | +----------------------------------------------------------------------+ *//* $Id: mbstring.c,v 1.142.2.47.2.21 2007/04/04 15:28:18 masugata Exp $ *//* * PHP4 Multibyte String module "mbstring" * * History: * 2000.5.19 Release php-4.0RC2_jstring-1.0 * 2001.4.1 Release php4_jstring-1.0.91 * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group) * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net) *//* * PHP3 Internationalization support program. * * Copyright (c) 1999,2000 by the PHP3 internationalization team. * All rights reserved. * * See README_PHP3-i18n-ja for more detail. * * Authors: * Hironori Sato <satoh@jpnnet.com> * Shigeru Kanemoto <sgk@happysize.co.jp> * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> * Rui Hirokawa <rui_hirokawa@ybb.ne.jp> *//* {{{ includes */#ifdef HAVE_CONFIG_H#include "config.h"#endif#include "php.h"#include "php_ini.h"#include "php_variables.h"#include "mbstring.h"#include "ext/standard/php_string.h"#include "ext/standard/php_mail.h"#include "ext/standard/exec.h"#include "ext/standard/url.h"#include "main/php_output.h"#include "ext/standard/info.h"#include "libmbfl/mbfl/mbfl_allocators.h"#include "php_variables.h"#include "php_globals.h"#include "rfc1867.h"#include "php_content_types.h"#include "SAPI.h"#include "php_unicode.h"#include "TSRM.h"#ifdef ZEND_MULTIBYTE#include "zend_multibyte.h"#endif /* ZEND_MULTIBYTE */#if HAVE_MBSTRING#if HAVE_MBREGEX#include "mbregex.h"#endif/* }}} */#ifdef ZTSMUTEX_T mbregex_locale_mutex = NULL;#endif/* {{{ php_mb_encoding_handler_info_t */typedef struct _php_mb_encoding_handler_info_t { int data_type; const char *separator; unsigned int report_errors: 1; enum mbfl_no_language to_language; enum mbfl_no_encoding to_encoding; enum mbfl_no_language from_language; int num_from_encodings; const enum mbfl_no_encoding *from_encodings;} php_mb_encoding_handler_info_t;/* }}} *//* {{{ php_mb_default_identify_list */typedef struct _php_mb_nls_ident_list { enum mbfl_no_language lang; enum mbfl_no_encoding* list; int list_size;} php_mb_nls_ident_list;static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_jis, mbfl_no_encoding_utf8, mbfl_no_encoding_euc_jp, mbfl_no_encoding_sjis};static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8, mbfl_no_encoding_euc_cn, mbfl_no_encoding_cp936};static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8, mbfl_no_encoding_euc_tw, mbfl_no_encoding_big5};static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8, mbfl_no_encoding_euc_kr, mbfl_no_encoding_uhc};static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8, mbfl_no_encoding_koi8r, mbfl_no_encoding_cp1251, mbfl_no_encoding_cp866};static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8};static const php_mb_nls_ident_list php_mb_default_identify_list[] = { { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) }, { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) }, { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) }, { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) }, { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) }, { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }};/* }}} */static const unsigned char third_and_rest_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE_REST };static const unsigned char second_args_force_ref[] = { 2, BYREF_NONE, BYREF_FORCE };#if HAVE_MBREGEXstatic const unsigned char third_argument_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE };#endif/* {{{ sapi_post_entry mbstr_post_entries[] */static sapi_post_entry mbstr_post_entries[] = { { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mbstr_post_handler }, { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler }, { NULL, 0, NULL, NULL }};/* }}} *//* {{{ sapi_post_entry php_post_entries[] */static sapi_post_entry php_post_entries[] = { { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler }, { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler }, { NULL, 0, NULL, NULL }};/* }}} *//* {{{ mb_overload_def mb_ovld[] */static const struct mb_overload_def mb_ovld[] = { {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"}, {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"}, {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"}, {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"}, {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"}, {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"}, {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"}, {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},#if HAVE_MBREGEX {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"}, {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"}, {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"}, {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"}, {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},#endif {0, NULL, NULL, NULL}}; /* }}} */#if HAVE_MBREGEXstruct def_mbctype_tbl { enum mbfl_no_encoding mbfl_encoding; int regex_encoding;};const struct def_mbctype_tbl mbctype_tbl[] = { {mbfl_no_encoding_ascii,MBCTYPE_ASCII}, {mbfl_no_encoding_7bit,MBCTYPE_ASCII}, {mbfl_no_encoding_8bit,MBCTYPE_ASCII}, {mbfl_no_encoding_euc_jp,MBCTYPE_EUC}, {mbfl_no_encoding_eucjp_win,MBCTYPE_EUC}, {mbfl_no_encoding_sjis,MBCTYPE_SJIS}, {mbfl_no_encoding_sjis_win,MBCTYPE_SJIS}, {mbfl_no_encoding_utf8,MBCTYPE_UTF8}, {mbfl_no_encoding_pass,-1}};#endif/* {{{ function_entry mbstring_functions[] */function_entry mbstring_functions[] = { PHP_FE(mb_convert_case, NULL) PHP_FE(mb_strtoupper, NULL) PHP_FE(mb_strtolower, NULL) PHP_FE(mb_language, NULL) PHP_FE(mb_internal_encoding, NULL) PHP_FE(mb_http_input, NULL) PHP_FE(mb_http_output, NULL) PHP_FE(mb_detect_order, NULL) PHP_FE(mb_substitute_character, NULL) PHP_FE(mb_parse_str, (unsigned char *)second_args_force_ref) PHP_FE(mb_output_handler, NULL) PHP_FE(mb_preferred_mime_name, NULL) PHP_FE(mb_strlen, NULL) PHP_FE(mb_strpos, NULL) PHP_FE(mb_strrpos, NULL) PHP_FE(mb_substr_count, NULL) PHP_FE(mb_substr, NULL) PHP_FE(mb_strcut, NULL) PHP_FE(mb_strwidth, NULL) PHP_FE(mb_strimwidth, NULL) PHP_FE(mb_convert_encoding, NULL) PHP_FE(mb_detect_encoding, NULL) PHP_FE(mb_convert_kana, NULL) PHP_FE(mb_encode_mimeheader, NULL) PHP_FE(mb_decode_mimeheader, NULL) PHP_FE(mb_convert_variables, (unsigned char *)third_and_rest_force_ref) PHP_FE(mb_encode_numericentity, NULL) PHP_FE(mb_decode_numericentity, NULL) PHP_FE(mb_send_mail, NULL) PHP_FE(mb_get_info, NULL) PHP_FE(mb_check_encoding, NULL) PHP_FALIAS(mbstrlen, mb_strlen, NULL) PHP_FALIAS(mbstrpos, mb_strpos, NULL) PHP_FALIAS(mbstrrpos, mb_strrpos, NULL) PHP_FALIAS(mbsubstr, mb_substr, NULL) PHP_FALIAS(mbstrcut, mb_strcut, NULL) PHP_FALIAS(i18n_internal_encoding, mb_internal_encoding, NULL) PHP_FALIAS(i18n_http_input, mb_http_input, NULL) PHP_FALIAS(i18n_http_output, mb_http_output, NULL) PHP_FALIAS(i18n_convert, mb_convert_encoding, NULL) PHP_FALIAS(i18n_discover_encoding, mb_detect_encoding, NULL) PHP_FALIAS(i18n_mime_header_encode, mb_encode_mimeheader, NULL) PHP_FALIAS(i18n_mime_header_decode, mb_decode_mimeheader, NULL) PHP_FALIAS(i18n_ja_jp_hantozen, mb_convert_kana, NULL)#if HAVE_MBREGEX PHP_FE(mb_regex_encoding, NULL) PHP_FE(mb_regex_set_options, NULL) PHP_FE(mb_ereg, (unsigned char *)third_argument_force_ref) PHP_FE(mb_eregi, (unsigned char *)third_argument_force_ref) PHP_FE(mb_ereg_replace, NULL) PHP_FE(mb_eregi_replace, NULL) PHP_FE(mb_split, NULL) PHP_FE(mb_ereg_match, NULL) PHP_FE(mb_ereg_search, NULL) PHP_FE(mb_ereg_search_pos, NULL) PHP_FE(mb_ereg_search_regs, NULL) PHP_FE(mb_ereg_search_init, NULL) PHP_FE(mb_ereg_search_getregs, NULL) PHP_FE(mb_ereg_search_getpos, NULL) PHP_FE(mb_ereg_search_setpos, NULL) PHP_FALIAS(mbregex_encoding, mb_regex_encoding, NULL) PHP_FALIAS(mbereg, mb_ereg, NULL) PHP_FALIAS(mberegi, mb_eregi, NULL) PHP_FALIAS(mbereg_replace, mb_ereg_replace, NULL) PHP_FALIAS(mberegi_replace, mb_eregi_replace, NULL) PHP_FALIAS(mbsplit, mb_split, NULL) PHP_FALIAS(mbereg_match, mb_ereg_match, NULL) PHP_FALIAS(mbereg_search, mb_ereg_search, NULL) PHP_FALIAS(mbereg_search_pos, mb_ereg_search_pos, NULL) PHP_FALIAS(mbereg_search_regs, mb_ereg_search_regs, NULL) PHP_FALIAS(mbereg_search_init, mb_ereg_search_init, NULL) PHP_FALIAS(mbereg_search_getregs, mb_ereg_search_getregs, NULL) PHP_FALIAS(mbereg_search_getpos, mb_ereg_search_getpos, NULL) PHP_FALIAS(mbereg_search_setpos, mb_ereg_search_setpos, NULL)#endif { NULL, NULL, NULL }};/* }}} *//* {{{ zend_module_entry mbstring_module_entry */zend_module_entry mbstring_module_entry = { STANDARD_MODULE_HEADER, "mbstring", mbstring_functions, PHP_MINIT(mbstring), PHP_MSHUTDOWN(mbstring), PHP_RINIT(mbstring), PHP_RSHUTDOWN(mbstring), PHP_MINFO(mbstring), NO_VERSION_YET, STANDARD_MODULE_PROPERTIES};/* }}} */ZEND_DECLARE_MODULE_GLOBALS(mbstring)#ifdef COMPILE_DL_MBSTRINGZEND_GET_MODULE(mbstring)#endif/* {{{ allocators */static void *_php_mb_allocators_malloc(unsigned int sz){ return emalloc(sz);}static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz){ return erealloc(ptr, sz);}static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem){ return ecalloc(nelems, szelem);}static void _php_mb_allocators_free(void *ptr){ efree(ptr);} static void *_php_mb_allocators_pmalloc(unsigned int sz){ return pemalloc(sz, 1);}static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz){ return perealloc(ptr, sz, 1);}static void _php_mb_allocators_pfree(void *ptr){ pefree(ptr, 1);} static mbfl_allocators _php_mb_allocators = { _php_mb_allocators_malloc, _php_mb_allocators_realloc, _php_mb_allocators_calloc, _php_mb_allocators_free, _php_mb_allocators_pmalloc, _php_mb_allocators_prealloc, _php_mb_allocators_pfree};/* }}} *//* {{{ static int php_mb_parse_encoding_list() * Return 0 if input contains any illegal encoding, otherwise 1. * Even if any illegal encoding is detected the result may contain a list * of parsed encodings. */static intphp_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC){ int n, l, size, bauto, ret = 1; char *p, *p1, *p2, *endp, *tmpstr; enum mbfl_no_encoding no_encoding; enum mbfl_no_encoding *src, *entry, *list; list = NULL; if (value == NULL || value_length <= 0) { if (return_list) { *return_list = NULL; } if (return_size) { *return_size = 0; } return 0; } else { enum mbfl_no_encoding *identify_list; int identify_list_size; identify_list = MBSTRG(default_detect_order_list); identify_list_size = MBSTRG(default_detect_order_list_size); /* copy the value string for work */ if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) { tmpstr = (char *)estrndup(value+1, value_length-2); value_length -= 2; } else tmpstr = (char *)estrndup(value, value_length); if (tmpstr == NULL) { return 0; } /* count the number of listed encoding names */ endp = tmpstr + value_length; n = 1; p1 = tmpstr; while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) { p1 = p2 + 1; n++; } size = n + identify_list_size; /* make list */ list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent); if (list != NULL) { entry = list; n = 0; bauto = 0; p1 = tmpstr; do { p2 = p = php_memnstr(p1, ",", 1, endp); if (p == NULL) { p = endp; } *p = '\0'; /* trim spaces */ while (p1 < p && (*p1 == ' ' || *p1 == '\t')) { p1++; } p--; while (p > p1 && (*p == ' ' || *p == '\t')) { *p = '\0'; p--; } /* convert to the encoding number and check encoding */ if (strcasecmp(p1, "auto") == 0) { if (!bauto) { bauto = 1; l = identify_list_size; src = identify_list; while (l > 0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -