📄 regexp.c
字号:
bool RE_compile_and_execute(const char *re_val, const char *dat, int cflags, int nmatch, regmatch_t *pmatch){ regex_t *re; /* Compile RE */ re = RE_compile_and_cache(re_val,cflags); if(re == NULL) return FALSE; return RE_execute(re, dat, nmatch, pmatch);}/*普通匹配*/bool pg_regex_match(const char * data,const char * re_val){#ifdef DEBUG printf("re = \"%s\"\ndata = \"%s\"\n",re_val,data);#endif return RE_compile_and_execute( re_val, data, regex_flavor, 0, NULL);}/*进行忽略大小写的匹配*/bool pg_regex_match_icase(const char * data,const char * re_val){#ifdef DEBUG printf("re = \"%s\"\ndata = \"%s\"\n",re_val,data);#endif return RE_compile_and_execute( re_val, data, regex_flavor | REG_ICASE, 0, NULL);}/* * textregexsubstr() * Return a substring matched by a regular expression.If the re_val has parenthesized subexpressions, use the first one ,else use the whole one. * 取子字符串,如果re_val含有分组的子表达式(用“()” 括起来的),那么只匹配第一个子表达式所匹配的内容。 * 例如: text_regex_substr("hellolo hello world","(lo)+"); 结果是:"lo" text_regex_substr("hellolo hello world","l(lo)+"); 结果是:"lo" 如果没有匹配字符串,返回 NULL; * * 此函数分配内存,调用函数一定要记得释放。 * */char * text_regex_substr(const char * data,const char * re_val){ char * result = NULL; regex_t *re; regmatch_t pmatch[2]; int so, eo; #ifdef DEBUG printf("Get substr \"%s\" of \"%s\"\n",re_val,data);#endif if((data == NULL) || (re_val == NULL)) return NULL; /* Compile RE */ re = RE_compile_and_cache(re_val, regex_flavor); /* * We pass two regmatch_t structs to get info about the overall match and * the match for the first parenthesized subexpression (if any). If there * is a parenthesized subexpression, we return what it matched; else * return what the whole regexp matched. */ if (!RE_execute(re, data, 2, pmatch)) return NULL; /* definitely no match */ if (re->re_nsub > 0) { /* has parenthesized subexpressions, use the first one */ so = pmatch[1].rm_so; eo = pmatch[1].rm_eo; } else { /* no parenthesized subexpression, use whole match */ so = pmatch[0].rm_so; eo = pmatch[0].rm_eo; } /* * It is possible to have a match to the whole pattern but no match * for a subexpression; for example 'foo(bar)?' is considered to match * 'foo' but there is no subexpression match. So this extra test for * match failure is not redundant. */ if (so < 0 || eo < 0) return NULL; /* no match */ /*取子字符串,so 从0开始,而mb_substr的参数要求的字符的位置,从1开始*/ result = mb_substr(data, so+1, eo - so);#ifdef DEBUG printf("\tresult is:\"%s\"\n ",result);#endif return result;}/* * charlen_to_bytelen() * Compute the number of bytes occupied by n characters starting at *p * * It is caller's responsibility that there actually are n characters; * the string need not be null-terminated. */static intcharlen_to_bytelen(const char *p, int n){ if (pg_database_encoding_max_length() == 1) { /* Optimization for single-byte encodings */ return n; } else { const char *s; for (s = p; n > 0; n--) s += pg_mblen(s); return s - p; }}/* * 在一个字符串的后面添加一个字符串。 * target:原始的字符串 * data:需要添加的字符串 * len:需要添加的字符串中需要添加的字符个数 * */static char * append_str(char * target,const char * data,size_t len){#ifdef DEBUG// printf("\t\"%s\" append \"%s\" of %d\n",target,data,len);#endif size_t data_length = 0; size_t target_length =0; if(data == NULL) return target; /*确定真实需要添加的字符个数*/ data_length = null_strlen(data); if(len > data_length) len = data_length; len ++ ;/*添加字符串的结尾字符*/ if(target == NULL) { target = (char *)malloc(len * sizeof(char)); if( target == NULL) { fprintf(stderr,"Memory alloc error!"); return NULL; } } else { target_length = null_strlen(target); target = (char *)realloc(target,(target_length+len) * sizeof(char)); if( target == NULL) { fprintf(stderr,"Memory alloc error!"); return NULL; } } /* OK, append the data */ memcpy(target + target_length, data, len-1); *(target + target_length + len-1) = 0;#ifdef DEBUG // printf("\t\tresult is: %s\n",target);#endif return target;}/* * appendStringInfoRegexpSubstr * * Append replace_text to str, substituting regexp back references for * \n escapes. start_ptr is the start of the match in the source string, * at logical character position data_pos. */static char *appendStringInfoRegexpSubstr(char * str, const char *replace_text, regmatch_t *pmatch, const char *start_ptr, int data_pos){ const char *p = replace_text; const char *p_end = p + null_strlen(replace_text); int eml = pg_database_encoding_max_length(); for (;;) { const char *chunk_start = p; int so; int eo; /* Find next escape char. */ if (eml == 1) { for (; p < p_end && *p != '\\'; p++) /* nothing */ ; } else { for (; p < p_end && *p != '\\'; p += pg_mblen(p)) /* nothing */ ; } /* Copy the text we just scanned over, if any. */ if (p > chunk_start)// appendBinaryStringInfo(str, chunk_start, p - chunk_start); str = append_str(str, chunk_start, p - chunk_start); /* Done if at end of string, else advance over escape char. */ if (p >= p_end) break; p++; if (p >= p_end) { /* Escape at very end of input. Treat same as unexpected char */// appendStringInfoChar(str, '\\'); str = append_str(str, "\\", 1); break; } if (*p >= '1' && *p <= '9') { /* Use the back reference of regexp. */ int idx = *p - '0'; so = pmatch[idx].rm_so; eo = pmatch[idx].rm_eo; p++; } else if (*p == '&') { /* Use the entire matched string. */ so = pmatch[0].rm_so; eo = pmatch[0].rm_eo; p++; } else if (*p == '\\') { /* \\ means transfer one \ to output. */ //appendStringInfoChar(str, '\\'); str = append_str(str, "\\", 1); p++; continue; } else { /* * If escape char is not followed by any expected char, just treat * it as ordinary data to copy. (XXX would it be better to throw * an error?) */ //appendStringInfoChar(str, '\\'); str = append_str(str, "\\", 1); continue; } if (so != -1 && eo != -1) { /* * * Copy the text that is back reference of regexp. Note so and eo * * are counted in characters not bytes. * */ const char *chunk_start; int chunk_len; Assert(so >= data_pos); chunk_start = start_ptr; chunk_start += charlen_to_bytelen(chunk_start, so - data_pos); chunk_len = charlen_to_bytelen(chunk_start, eo - so);// appendBinaryStringInfo(str, chunk_start, chunk_len); str = append_str(str, chunk_start, chunk_len); } } return str;}#define REGEXP_REPLACE_BACKREF_CNT 10/* * replace_text_regexp * * replace text that matches to regexp in src_text to replace_text. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -