📄 regexp.c

📁 从一个开源软件中摘取的正则表达式模块
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
 * Note: to avoid having to include regex.h in builtins.h, we declare * the regexp argument as void *, but really it's regex_t *. */static char *replace_text_regexp(const char * src_text, regex_t *re, const char *replace_text, bool glob){        size_t     src_text_len = 0;	size_t     replace_text_len = 0;	char       *result_buf = NULL;         //StringInfoData buf;        regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];        pg_wchar   *data;        size_t     data_len;        int        search_start;        int        data_pos;        const char       *start_ptr;        bool       have_escape;		if(src_text == NULL)		return NULL;	src_text_len = null_strlen(src_text);	if(replace_text != NULL)		replace_text_len = null_strlen(replace_text);	        //initStringInfo(&buf);        /* Convert data string to wide characters. */        data = (pg_wchar *) malloc((src_text_len + 1) * sizeof(pg_wchar));        if( data == NULL)        {                fprintf(stderr,"Memory alloc error!");                return NULL;        }        data_len = pg_encoding_mb2wchar_with_len(GetDatabaseEncoding(), src_text,data,src_text_len);  /*杜英杰添加*/        /* Check whether replace_text has escape char. */        have_escape = check_replace_text_has_escape_char(replace_text);        /* start_ptr points to the data_pos'th character of src_text */        start_ptr = src_text;        data_pos = 0;        search_start = 0;        while (search_start <= data_len)        {                int     regexec_result;        //        CHECK_FOR_INTERRUPTS();                regexec_result = pg_regexec(re,                                                                        data,                                                                        data_len,                                                                        search_start,                                                                        NULL,           /* no details */                                                                        REGEXP_REPLACE_BACKREF_CNT,                                                                        pmatch,                                                                        0);                if (regexec_result == REG_NOMATCH)                        break;                if (regexec_result != REG_OKAY)                {			fprintf(stderr,"regular expression failed" );			break;                }                /* * Copy the text to the left of the match position.  Note we are given character not byte indexes.  */                if (pmatch[0].rm_so - data_pos > 0)                {                        int                     chunk_len;                        chunk_len = charlen_to_bytelen(start_ptr, pmatch[0].rm_so - data_pos); /*确定字节数*/			result_buf = append_str(result_buf, start_ptr, chunk_len);//                        appendBinaryStringInfo(&buf, start_ptr, chunk_len);	/*把字符串压入缓冲区*/                        /*                         * Advance start_ptr over that text, to avoid multiple rescans of                         * it if the replace_text contains multiple back-references.                         */                        start_ptr += chunk_len;                        data_pos = pmatch[0].rm_so;                }                /*                 * Copy the replace_text. Process back references when the                 * replace_text has escape characters.                 */                if (have_escape)                        result_buf = appendStringInfoRegexpSubstr(result_buf, replace_text, pmatch, start_ptr, data_pos);                else                        result_buf = append_str(result_buf, replace_text, replace_text_len);//                      appendStringInfoText(&buf, replace_text);                /* Advance start_ptr and data_pos over the matched text. */                start_ptr += charlen_to_bytelen(start_ptr, pmatch[0].rm_eo - data_pos);                data_pos = pmatch[0].rm_eo;                /*                 * When global option is off, replace the first instance only.                 */                if (!glob)                        break;                /*                 * Search from next character when the matching text is zero width.                 */                search_start = data_pos;                if (pmatch[0].rm_so == pmatch[0].rm_eo)                        search_start++;        }        /*         * Copy the text to the right of the last match.         */        if (data_pos < data_len)        {                int                     chunk_len;                chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;                result_buf = append_str(result_buf, start_ptr, chunk_len);                //appendBinaryStringInfo(&buf, start_ptr, chunk_len);        }        free(data);        return result_buf;}/* * check_replace_text_has_escape_char * * check whether replace_text contains escape char. */static boolcheck_replace_text_has_escape_char(const char  *replace_text){        const char *p = replace_text;        const char *p_end = NULL;	if(replace_text == NULL)		return false;        p_end = p + null_strlen(replace_text);        if (pg_database_encoding_max_length() == 1)        {                for (; p < p_end; p++)                {                        if (*p == '\\')                                return true;                }        }        else        {                for (; p < p_end; p += pg_mblen(p))                {                        if (*p == '\\')                                return true;                }        }        return false;}/* * text_regex_replace() *              Return a string matched by a regular expression, with replacement. */char * text_regex_replace(const char * s,const char * p,const char * r){#ifdef DEBUG	printf("repalce \"%s\" of \"%s\" as \"%s\":\n",p,s,r);#endif	char     *result = NULL;        regex_t  *re = RE_compile_and_cache(p,regex_flavor);        result = replace_text_regexp(s, re, r, true);#ifdef DEBUG	printf("result is\"%s\"\n",result);#endif        return result;}/* * similar_escape() 把SQL99的正则表达式模式转化成POSIX样式的正则表达式模式。也就是SIMILAR TO使用的正则表达式 * 转换结果字符串被 ***:^(?: ... )$  包裹，但是这种格式是被regex　引擎允许的，可以直接作为正则表达式使用。不需要去掉什么 * 结果进行了内存的分配，需要调用程序手动释放内存。 * Convert a SQL99 regexp pattern to POSIX style, so it can be used by our regexp engine. */char *similar_escape(const char * pattern,const char * escape){        const char       *pat_text = pattern;        const char       *esc_text = escape;        const char       *p,   *e;        int               plen, elen;        char             *result;        char             *r;        bool              afterescape = false;        int               nquotes = 0;        /* This function is not strict, so must test explicitly */        if (pattern == NULL)                return NULL;        p = pat_text;        plen = null_strlen(pat_text);        if (escape == NULL)	{                /* No ESCAPE clause provided; default to backslash as escape */                e = "\\";                elen = 1;        }        else        {                e = esc_text;                elen = null_strlen(esc_text);                if (elen == 0)                        e = NULL;                       /* no escape character */                else if (elen != 1)			fprintf(stderr,"invalid escape string\nEscape string must be empty or one character.");        }        /*----------         * 我们在转化后的字符串的两侧包围了：                   ***:^(?: ... )$         * 例如：SIMILAR_ESCAPE('ASDF','') 的输出是  ***:^(?:ASDF)$          * 这种用法让人感到奇怪，需要做一些解释。  "***:" 是一个让指示符，         * 让当前程序不管regex_flavor的设置为何，都把正则表达式看作是　ARE 格式。         * 使用^ 和 $ 来强迫模式匹配整个输入字符串。         * The "(?:" and ")" are a non-capturing set of parens; we have to have         * parens in case the string contains "|", else the "^" and "$" will         * be bound into the first and last alternatives which is not what we         * want, and the parens must be non capturing because we don't want them         * to count when selecting output for SUBSTRING.         *         * 这种模式其实不用管，因为正则表达式模块就支持这种包围的模式，例如         * pg_regex_match_icase("***:^(?:ASDF)$","ASDF") 返回是true。         *----------            */        /* 因为每个字节最多被转换成2个字节(加逃逸)，再加上前缀和后缀10个字节 */        result = (char *) malloc(10 + 2 * plen + 1);        if( pattern == NULL)        {                fprintf(stderr,"Memory alloc error!");                return NULL;        }        r = result;        *r++ = '*';*r++ = '*'; *r++ = '*'; *r++ = ':'; *r++ = '^'; *r++ = '('; *r++ = '?'; *r++ = ':';        while (plen > 0)        {                char            pchar = *p;                if (afterescape)                {                        if (pchar == '"')       /* for SUBSTRING patterns */                                *r++ = ((nquotes++ % 2) == 0) ? '(' : ')';                        else                        {                                *r++ = '\\';                                *r++ = pchar;                        }                        afterescape = false;                }                else if (e && pchar == *e)                {                        /* SQL99 escape character; do not send to output */                        afterescape = true;                }                else if (pchar == '%')                {                        *r++ = '.';                        *r++ = '*';                }                else if (pchar == '_')                        *r++ = '.';                else if (pchar == '\\' || pchar == '.' || pchar == '?' ||                                 pchar == '{')                {                        *r++ = '\\';                        *r++ = pchar;                }                else                        *r++ = pchar;                p++, plen--;        }        *r++ = ')';        *r++ = '$';	*r = '\0';        return result;}bool similar_to(const char * data,const char * re_val,const char * escape){	bool result;	char * new_re_val = similar_escape(re_val,escape);	if(new_re_val == NULL)		return false;	result =  pg_regex_match(new_re_val,data);	free (new_re_val);	return result ;}
上一页 1 23
💿 文件大小 118 K
👤 上传用户 kuoiai
📂 所属分类编译器/解释器
🏷️ 相关标签

#开源软件 #正 #模块 #表达式
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -