📄 regexp.c
字号:
* Note: to avoid having to include regex.h in builtins.h, we declare * the regexp argument as void *, but really it's regex_t *. */static char *replace_text_regexp(const char * src_text, regex_t *re, const char *replace_text, bool glob){ size_t src_text_len = 0; size_t replace_text_len = 0; char *result_buf = NULL; //StringInfoData buf; regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT]; pg_wchar *data; size_t data_len; int search_start; int data_pos; const char *start_ptr; bool have_escape; if(src_text == NULL) return NULL; src_text_len = null_strlen(src_text); if(replace_text != NULL) replace_text_len = null_strlen(replace_text); //initStringInfo(&buf); /* Convert data string to wide characters. */ data = (pg_wchar *) malloc((src_text_len + 1) * sizeof(pg_wchar)); if( data == NULL) { fprintf(stderr,"Memory alloc error!"); return NULL; } data_len = pg_encoding_mb2wchar_with_len(GetDatabaseEncoding(), src_text,data,src_text_len); /*杜英杰添加*/ /* Check whether replace_text has escape char. */ have_escape = check_replace_text_has_escape_char(replace_text); /* start_ptr points to the data_pos'th character of src_text */ start_ptr = src_text; data_pos = 0; search_start = 0; while (search_start <= data_len) { int regexec_result; // CHECK_FOR_INTERRUPTS(); regexec_result = pg_regexec(re, data, data_len, search_start, NULL, /* no details */ REGEXP_REPLACE_BACKREF_CNT, pmatch, 0); if (regexec_result == REG_NOMATCH) break; if (regexec_result != REG_OKAY) { fprintf(stderr,"regular expression failed" ); break; } /* * Copy the text to the left of the match position. Note we are given character not byte indexes. */ if (pmatch[0].rm_so - data_pos > 0) { int chunk_len; chunk_len = charlen_to_bytelen(start_ptr, pmatch[0].rm_so - data_pos); /*确定字节数*/ result_buf = append_str(result_buf, start_ptr, chunk_len);// appendBinaryStringInfo(&buf, start_ptr, chunk_len); /*把字符串压入缓冲区*/ /* * Advance start_ptr over that text, to avoid multiple rescans of * it if the replace_text contains multiple back-references. */ start_ptr += chunk_len; data_pos = pmatch[0].rm_so; } /* * Copy the replace_text. Process back references when the * replace_text has escape characters. */ if (have_escape) result_buf = appendStringInfoRegexpSubstr(result_buf, replace_text, pmatch, start_ptr, data_pos); else result_buf = append_str(result_buf, replace_text, replace_text_len);// appendStringInfoText(&buf, replace_text); /* Advance start_ptr and data_pos over the matched text. */ start_ptr += charlen_to_bytelen(start_ptr, pmatch[0].rm_eo - data_pos); data_pos = pmatch[0].rm_eo; /* * When global option is off, replace the first instance only. */ if (!glob) break; /* * Search from next character when the matching text is zero width. */ search_start = data_pos; if (pmatch[0].rm_so == pmatch[0].rm_eo) search_start++; } /* * Copy the text to the right of the last match. */ if (data_pos < data_len) { int chunk_len; chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr; result_buf = append_str(result_buf, start_ptr, chunk_len); //appendBinaryStringInfo(&buf, start_ptr, chunk_len); } free(data); return result_buf;}/* * check_replace_text_has_escape_char * * check whether replace_text contains escape char. */static boolcheck_replace_text_has_escape_char(const char *replace_text){ const char *p = replace_text; const char *p_end = NULL; if(replace_text == NULL) return false; p_end = p + null_strlen(replace_text); if (pg_database_encoding_max_length() == 1) { for (; p < p_end; p++) { if (*p == '\\') return true; } } else { for (; p < p_end; p += pg_mblen(p)) { if (*p == '\\') return true; } } return false;}/* * text_regex_replace() * Return a string matched by a regular expression, with replacement. */char * text_regex_replace(const char * s,const char * p,const char * r){#ifdef DEBUG printf("repalce \"%s\" of \"%s\" as \"%s\":\n",p,s,r);#endif char *result = NULL; regex_t *re = RE_compile_and_cache(p,regex_flavor); result = replace_text_regexp(s, re, r, true);#ifdef DEBUG printf("result is\"%s\"\n",result);#endif return result;}/* * similar_escape() 把SQL99的正则表达式模式转化成POSIX样式的正则表达式模式。也就是SIMILAR TO使用的正则表达式 * 转换结果字符串被 ***:^(?: ... )$ 包裹,但是这种格式是被regex 引擎允许的,可以直接作为正则表达式使用。不需要去掉什么 * 结果进行了内存的分配,需要调用程序手动释放内存。 * Convert a SQL99 regexp pattern to POSIX style, so it can be used by our regexp engine. */char *similar_escape(const char * pattern,const char * escape){ const char *pat_text = pattern; const char *esc_text = escape; const char *p, *e; int plen, elen; char *result; char *r; bool afterescape = false; int nquotes = 0; /* This function is not strict, so must test explicitly */ if (pattern == NULL) return NULL; p = pat_text; plen = null_strlen(pat_text); if (escape == NULL) { /* No ESCAPE clause provided; default to backslash as escape */ e = "\\"; elen = 1; } else { e = esc_text; elen = null_strlen(esc_text); if (elen == 0) e = NULL; /* no escape character */ else if (elen != 1) fprintf(stderr,"invalid escape string\nEscape string must be empty or one character."); } /*---------- * 我们在转化后的字符串的两侧包围了: ***:^(?: ... )$ * 例如:SIMILAR_ESCAPE('ASDF','') 的输出是 ***:^(?:ASDF)$ * 这种用法让人感到奇怪,需要做一些解释。 "***:" 是一个让指示符, * 让当前程序不管regex_flavor的设置为何,都把正则表达式看作是 ARE 格式。 * 使用^ 和 $ 来强迫模式匹配整个输入字符串。 * The "(?:" and ")" are a non-capturing set of parens; we have to have * parens in case the string contains "|", else the "^" and "$" will * be bound into the first and last alternatives which is not what we * want, and the parens must be non capturing because we don't want them * to count when selecting output for SUBSTRING. * * 这种模式其实不用管,因为正则表达式模块就支持这种包围的模式,例如 * pg_regex_match_icase("***:^(?:ASDF)$","ASDF") 返回是true。 *---------- */ /* 因为每个字节最多被转换成2个字节(加逃逸),再加上前缀和后缀10个字节 */ result = (char *) malloc(10 + 2 * plen + 1); if( pattern == NULL) { fprintf(stderr,"Memory alloc error!"); return NULL; } r = result; *r++ = '*';*r++ = '*'; *r++ = '*'; *r++ = ':'; *r++ = '^'; *r++ = '('; *r++ = '?'; *r++ = ':'; while (plen > 0) { char pchar = *p; if (afterescape) { if (pchar == '"') /* for SUBSTRING patterns */ *r++ = ((nquotes++ % 2) == 0) ? '(' : ')'; else { *r++ = '\\'; *r++ = pchar; } afterescape = false; } else if (e && pchar == *e) { /* SQL99 escape character; do not send to output */ afterescape = true; } else if (pchar == '%') { *r++ = '.'; *r++ = '*'; } else if (pchar == '_') *r++ = '.'; else if (pchar == '\\' || pchar == '.' || pchar == '?' || pchar == '{') { *r++ = '\\'; *r++ = pchar; } else *r++ = pchar; p++, plen--; } *r++ = ')'; *r++ = '$'; *r = '\0'; return result;}bool similar_to(const char * data,const char * re_val,const char * escape){ bool result; char * new_re_val = similar_escape(re_val,escape); if(new_re_val == NULL) return false; result = pg_regex_match(new_re_val,data); free (new_re_val); return result ;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -