📄 regexp.c

📁 从一个开源软件中摘取的正则表达式模块
💻 C
📖 第 1 页 / 共 3 页
字号:
bool RE_compile_and_execute(const char *re_val, const char *dat, int cflags, int nmatch, regmatch_t *pmatch){	regex_t    *re;	/* Compile RE */	re = RE_compile_and_cache(re_val,cflags);	if(re == NULL)		return FALSE;	return RE_execute(re, dat, nmatch, pmatch);}/*普通匹配*/bool pg_regex_match(const char * data,const char * re_val){#ifdef DEBUG	printf("re = \"%s\"\ndata = \"%s\"\n",re_val,data);#endif        return RE_compile_and_execute( re_val,        	                       data,                        	       regex_flavor,                               	       0, NULL);}/*进行忽略大小写的匹配*/bool pg_regex_match_icase(const char * data,const char * re_val){#ifdef DEBUG	printf("re = \"%s\"\ndata = \"%s\"\n",re_val,data);#endif        return RE_compile_and_execute( re_val,        	                       data,                        	       regex_flavor | REG_ICASE,                               	       0, NULL);}/* * textregexsubstr() *              Return a substring matched by a regular expression.If the re_val has parenthesized subexpressions, use the first one ,else use the whole one. *              取子字符串,如果re_val含有分组的子表达式（用“()” 括起来的），那么只匹配第一个子表达式所匹配的内容。 *              例如：                text_regex_substr("hellolo hello world","(lo)+");                结果是："lo"                text_regex_substr("hellolo hello world","l(lo)+");                结果是："lo"				如果没有匹配字符串，返回 NULL; * *              此函数分配内存，调用函数一定要记得释放。 * */char * text_regex_substr(const char * data,const char * re_val){	char * result = NULL;        regex_t    *re;        regmatch_t      pmatch[2];        int                     so, eo;	#ifdef DEBUG	printf("Get substr \"%s\" of \"%s\"\n",re_val,data);#endif	if((data == NULL) || (re_val == NULL))		return NULL;		        /* Compile RE */        re = RE_compile_and_cache(re_val, regex_flavor);        /*         * We pass two regmatch_t structs to get info about the overall match and         * the match for the first parenthesized subexpression (if any). If there         * is a parenthesized subexpression, we return what it matched; else         * return what the whole regexp matched.         */        if (!RE_execute(re, data, 2, pmatch))                return NULL;               /* definitely no match */        if (re->re_nsub > 0)        {                /* has parenthesized subexpressions, use the first one */                so = pmatch[1].rm_so;                eo = pmatch[1].rm_eo;        }        else        {                /* no parenthesized subexpression, use whole match */                so = pmatch[0].rm_so;                eo = pmatch[0].rm_eo;        }        /*         * It is possible to have a match to the whole pattern but no match         * for a subexpression; for example 'foo(bar)?' is considered to match         * 'foo' but there is no subexpression match.  So this extra test for         * match failure is not redundant.         */        if (so < 0 || eo < 0)                return NULL;               /*  no match */	/*取子字符串,so 从0开始,而mb_substr的参数要求的字符的位置，从1开始*/	result =  mb_substr(data, so+1, eo - so);#ifdef DEBUG        printf("\tresult is:\"%s\"\n ",result);#endif	return result;}/* * charlen_to_bytelen() *      Compute the number of bytes occupied by n characters starting at *p * * It is caller's responsibility that there actually are n characters; * the string need not be null-terminated. */static intcharlen_to_bytelen(const char *p, int n){        if (pg_database_encoding_max_length() == 1)        {                /* Optimization for single-byte encodings */                return n;        }        else        {                const char *s;                for (s = p; n > 0; n--)                        s += pg_mblen(s);                return s - p;        }}/* * 在一个字符串的后面添加一个字符串。 * target:原始的字符串 * data：需要添加的字符串 * len:需要添加的字符串中需要添加的字符个数 * */static char * append_str(char * target,const char * data,size_t len){#ifdef DEBUG//	printf("\t\"%s\" append \"%s\" of %d\n",target,data,len);#endif	size_t data_length  = 0; 	size_t target_length  =0; 	if(data == NULL)		return target;	/*确定真实需要添加的字符个数*/	data_length  = null_strlen(data);		if(len > data_length)		len = data_length;	len ++ ;/*添加字符串的结尾字符*/		if(target == NULL)	{		target = (char *)malloc(len * sizeof(char));	        	if( target == NULL)      		{                	fprintf(stderr,"Memory alloc error!");        	        return NULL;	        }	}	else	{		target_length  = null_strlen(target);			target = (char *)realloc(target,(target_length+len) * sizeof(char));	                if( target == NULL)                {                        fprintf(stderr,"Memory alloc error!");                        return NULL;                }	}	        /* OK, append the data */        memcpy(target + target_length, data, len-1);	*(target + target_length + len-1) = 0;#ifdef DEBUG  //      printf("\t\tresult is: %s\n",target);#endif	return target;}/* * appendStringInfoRegexpSubstr * * Append replace_text to str, substituting regexp back references for * \n escapes.  start_ptr is the start of the match in the source string, * at logical character position data_pos. */static char *appendStringInfoRegexpSubstr(char * str, const char *replace_text,                                                         regmatch_t *pmatch,                                                       const   char *start_ptr, int data_pos){        const char *p = replace_text;        const char *p_end = p + null_strlen(replace_text);        int         eml = pg_database_encoding_max_length();        for (;;)        {                const char *chunk_start = p;                int                     so;                int                     eo;                /* Find next escape char. */                if (eml == 1)                {                        for (; p < p_end && *p != '\\'; p++)                                 /* nothing */ ;                }                else                {                        for (; p < p_end && *p != '\\'; p += pg_mblen(p))                                 /* nothing */ ;                }                /* Copy the text we just scanned over, if any. */                if (p > chunk_start)//                        appendBinaryStringInfo(str, chunk_start, p - chunk_start);                        str = append_str(str, chunk_start, p - chunk_start);                /* Done if at end of string, else advance over escape char. */                if (p >= p_end)                        break;                p++;                if (p >= p_end)                {                        /* Escape at very end of input.  Treat same as unexpected char *///                        appendStringInfoChar(str, '\\');                        str = append_str(str, "\\", 1);                        break;                }                if (*p >= '1' && *p <= '9')                {                        /* Use the back reference of regexp. */                        int                     idx = *p - '0';                        so = pmatch[idx].rm_so;                        eo = pmatch[idx].rm_eo;                        p++;                }                else if (*p == '&')                {                        /* Use the entire matched string. */                        so = pmatch[0].rm_so;                        eo = pmatch[0].rm_eo;                        p++;                }                else if (*p == '\\')                {                        /* \\ means transfer one \ to output. */                        //appendStringInfoChar(str, '\\');                        str = append_str(str, "\\", 1);                        p++;                        continue;                }                else                {                        /*                         * If escape char is not followed by any expected char, just treat                         * it as ordinary data to copy.  (XXX would it be better to throw                         * an error?)                         */                        //appendStringInfoChar(str, '\\');                        str = append_str(str, "\\", 1);                        continue;                }                if (so != -1 && eo != -1)                {                        /* *                          * Copy the text that is back reference of regexp.      Note so and eo *                                                   * are counted in characters not bytes. *                                                                            */                        const char       *chunk_start;                        int                     chunk_len;                        Assert(so >= data_pos);                        chunk_start = start_ptr;                        chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);                        chunk_len = charlen_to_bytelen(chunk_start, eo - so);//                        appendBinaryStringInfo(str, chunk_start, chunk_len);                        str = append_str(str, chunk_start, chunk_len);                }        }	return str;}#define REGEXP_REPLACE_BACKREF_CNT              10/* * replace_text_regexp * * replace text that matches to regexp in src_text to replace_text. *
💿 文件大小 118 K
👤 上传用户 kuoiai
📂 所属分类编译器/解释器
🏷️ 相关标签

#开源软件 #正 #模块 #表达式
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -