📄 pcre.mx
字号:
* more time analyzing it in order to speed up the time taken for matching. */ extra = pcre_study(pcre_code, 0, &err_p2); pcre_fullinfo(pcre_code, extra, PCRE_INFO_CAPTURECOUNT, &i); ovecsize = (i + 1) * 3; if ((ovector = (int *) GDKzalloc(sizeof(int) * ovecsize)) == NULL) { my_pcre_free(pcre_code); throw(MAL, "pcre_replace_bat","not enough memory"); } tmpbat = BATnew(origin_strs->htype, TYPE_str, BATcount(origin_strs)); BATloop(origin_strs, p, q) { origin_str = BUNtail(origin_strs, p); len_origin_str = strlen(origin_str); i = ncaptures = len_del = offset = 0; do { j = pcre_exec(pcre_code, extra, origin_str, len_origin_str, offset, exec_options, ovector, ovecsize); if (j > 0){ capture_offsets[i] = ovector[0]; capture_offsets[i+1] = ovector[1]; ncaptures++; i += 2; len_del += (ovector[1] - ovector[0]); offset = ovector[1]; } } while((j > 0) && (offset < len_origin_str) && (ncaptures < MAX_NR_CAPTURES)); if (ncaptures > 0){ replaced_str = GDKmalloc(len_origin_str - len_del + (len_replacement * ncaptures) + 1); if (!replaced_str) { my_pcre_free(pcre_code); GDKfree(ovector); throw(MAL, "pcre_replace_bat","not enough memory\n"); } j = k = 0; /* copy eventually the substring before the first captured * substring */ strncpy(replaced_str, origin_str, capture_offsets[j]); k = capture_offsets[j]; j++; for (i = 0; i < ncaptures - 1; i++) { strncpy(replaced_str+k, replacement, len_replacement); k += len_replacement; /* copy the substring between two captured substrings */ len = capture_offsets[j+1] - capture_offsets[j]; strncpy(replaced_str+k, origin_str+capture_offsets[j], len); k += len; j += 2; } /* replace the last captured substring */ strncpy(replaced_str+k, replacement, len_replacement); k += len_replacement; /* copy eventually the substring after the last captured substring */ len = len_origin_str - capture_offsets[j]; strncpy(replaced_str+k, origin_str+capture_offsets[j], len); k += len; replaced_str[k] = '\0'; BUNins(tmpbat, BUNhead(origin_strs, p), replaced_str, FALSE); GDKfree(replaced_str); } else { /* no captured substrings, copy the original string into new bat */ BUNins(tmpbat, BUNhead(origin_strs, p), origin_str, FALSE); } } my_pcre_free(pcre_code); GDKfree(ovector); if (origin_strs->htype == TYPE_void) { *res = BATseqbase(tmpbat, origin_strs->hseqbase); } else { *res = tmpbat; } return MAL_SUCCEED;}str pcre_init(void){ pcre_malloc = my_pcre_malloc; pcre_free = my_pcre_free; return NULL;}voidpcre_exit(void){}strpcre_match_with_flags(int *ret, str val, str pat, str flags){ const char err[BUFSIZ], *err_p = err; int errpos = 0; int options = PCRE_UTF8, i; pcre *re; for (i = 0; i < (int)strlen(flags); i++) { if (flags[i] == 'i') { options |= PCRE_CASELESS; } else if (flags[i] == 'm') { options |= PCRE_MULTILINE; } else if (flags[i] == 's') { options |= PCRE_DOTALL; } else if (flags[i] == 'x') { options |= PCRE_EXTENDED; } else { throw(MAL, "pcre_match", "unsupported flag character '%c'\n", flags[i]); } } if ((re = pcre_compile(pat, options, &err_p, &errpos, NULL)) == NULL) { throw(MAL, "pcre_match", "Compilation of regular expression (%s) failed at %d with '%s'", pat, errpos, err_p); } *ret = pcre_exec(re, NULL, val, strlen(val), 0, 0, NULL, 0); return MAL_SUCCEED;}strpcre_match(bit *ret, str val, str pat){ str msg; int errpos = 0; if ((msg = pcre_match_with_flags(&errpos, val, pat, "")) != MAL_SUCCEED) return msg; if (errpos >= 0) *ret = TRUE; else if (errpos == -1) *ret = FALSE; else throw(MAL, "pcre_match", "Matching of regular expression (%s) failed with %d", pat, errpos); return msg;}strpcre_patindex(int *ret, str val, str pat){ return pcre_match_with_flags(ret, val, pat, "");}#endifstatic intpcre_quote(str *res, str s){ str p; *res = p = GDKmalloc(strlen(s) * 2 + 1); /* certainly long enough */ if (p == NULL) return -1; /* quote all non-alphanumeric ASCII characters (i.e. leave non-ASCII and alphanumeric alone) */ while (*s) { if (!((*s & 0x80) != 0 || ('a' <= *s && *s <= 'z') || ('A' <= *s && *s <= 'Z') || ('0' <= *s && *s <= '9'))) *p++ = '\\'; *p++ = *s++; } *p = 0; return 0;}intpcre_tostr(str *tostr, int *l, pcre * p){ (void) tostr; (void) l; (void) p; return GDK_FAIL; /* don't generate */}intpcre_fromstr(str instr, int *l, pcre ** val){ (void) l; (void) instr; (void) val; return GDK_FAIL; /* don't parse */}intpcre_nequal(pcre * l, pcre * r){ if (l != r) return 0; else return 1;}hash_tpcre_hash(pcre * b){ return *(sht *) b;}pcre *pcre_null(void){ static sht nullval, *r; nullval = ~(sht) 0; r = &nullval; return ((pcre *) (r));}voidpcre_del(Heap *h, var_t *index){ HEAP_free(h, *index);}#define pcresize(val) ((sht*)val)[0]var_tpcre_put(Heap *h, var_t *bun, pcre * val){ char *base; *bun = HEAP_malloc(h, pcresize(val)); base = h->base; if (*bun) memcpy(&base[*bun], (char *) val, pcresize(val)); return *bun;}intpcre_length(pcre * p){ return (pcresize(p));}voidpcre_heap(Heap *heap, size_t capacity){ HEAP_initialize(heap, capacity, 0, (int) sizeof(var_t));}/* change SQL LIKE pattern into PCRE pattern */intsql2pcre(str *r, str pat, str esc_str) { /* change the SQL wilcards into PCRE wildcards */ int len = (int) strlen(pat); int escaped = 0; int hasWildcard = 0; char *ppat = GDKmalloc(len*2+3 /* 3 = "^'the translated regexp'$0" */); int esc = esc_str[0]; /* should change to utf8_convert() */ int specials = 0; *r = ppat; /* # the escape character can be a char which is special in a PCRE # if the user used the "+" char as escape and has "++" in # its pattern, then replacing this with "+" is not correct # but should be "\+" */ if (*esc_str && strchr( ".+*()[]", esc) != NULL) specials = 1; *ppat ++ = '^'; while (*pat) { int c = *pat++; if (c == esc) { if (escaped) { if (specials) { /* change ++ into \\+ */ *ppat++ = esc; } else { /* do not escape simple escape symbols */ ppat[-1] = esc; } escaped = 0; } else { *ppat++ = '\\'; escaped = 1; } /* not optimal but functional */ hasWildcard = 1; } else if (strchr( ".+*()[]\\", c) != NULL) { *ppat++ = '\\'; *ppat++ = c; /* not optimal but functional */ hasWildcard = 1; escaped = 0; } else if (c == '%' && !escaped) { *ppat++ = '.'; *ppat++ = '*'; hasWildcard = 1; } else if (c == '_' && !escaped) { *ppat++ = '.'; hasWildcard = 1; } else { *ppat++ = c; escaped = 0; } } /* no wildcard or escape character at end of string */ if (!hasWildcard || escaped) { GDKfree(*r); *r = GDKstrdup(str_nil); if (escaped) return -1; } else { *ppat++ = '$'; *ppat = 0; } return 0; }@+ Wrapping@c#include "mal.h"strPCREfromstr(str instr, int *l, pcre ** val){ (void) instr; (void) l; (void) val; return NULL;}strPCREreplace_wrap(str *res, str *or, str *pat, str *repl, str *flags){ return pcre_replace(res,*or,*pat,*repl,*flags);}strPCREreplace_bat_wrap(int *res, int *bid, str *pat, str *repl, str *flags){ BAT *b,*bn = NULL; str msg; if ((b = BATdescriptor(*bid)) == NULL) throw(MAL, "pcre.replace", "Cannot access descriptor"); msg = pcre_replace_bat(&bn,b,*pat,*repl,*flags); if( msg == MAL_SUCCEED){ *res= bn->batCacheid; BBPkeepref(*res); } BBPunfix(b->batCacheid); return msg;}strPCREcompile_wrap(pcre ** res, str *pattern){ return pcre_compile_wrap(res, *pattern);}strPCREexec_wrap(bit *res, pcre * pattern, str *s){#ifndef HAVE_LIBPCRE (void) res; (void) pattern; (void) s; throw(MAL, "pcre.select", "Library missing");#else return pcre_exec_wrap(res, pattern, *s);#endif}strPCREselect(int *res, str *pattern, int *bid){#ifndef HAVE_LIBPCRE (void) res; (void) pattern; (void) bid; throw(MAL, "pcre.select", "Library missing");#else BAT *bn = NULL, *strs; str msg; if ((strs = BATdescriptor(*bid)) == NULL) { throw(MAL, "pcre.select", "Cannot access descriptor"); } if ((msg = pcre_select(&bn, *pattern, strs)) != MAL_SUCCEED) { BBPunfix(strs->batCacheid); return msg; } *res = bn->batCacheid; BBPkeepref(bn->batCacheid); BBPunfix(strs->batCacheid); return msg;#endif}strPCREuselect(int *res, str *pattern, int *bid){#ifndef HAVE_LIBPCRE (void) res; (void) pattern; (void) bid; throw(MAL, "pcre.select", "Library missing");#else BAT *bn = NULL, *strs; str msg; if ((strs = BATdescriptor(*bid)) == NULL) { throw(MAL, "pcre.select", "Cannot access descriptor"); } if ((msg = pcre_uselect(&bn, *pattern, strs)) != MAL_SUCCEED) { BBPunfix(strs->batCacheid); return msg; } *res = bn->batCacheid; BBPkeepref(bn->batCacheid); BBPunfix(strs->batCacheid); return msg;#endif}strPCREmatch(bit *ret, str *val, str *pat){ return pcre_match(ret, *val, *pat);}strPCREpatindex(int *ret, str *val, str *pat){ return pcre_patindex(ret, *val, *pat);}strPCREquote(str *ret, str *val){ if (pcre_quote(ret, *val) <0) throw(MAL, "pcre.quote", "Quote failed"); return MAL_SUCCEED;}strPCREsql2pcre(str *ret, str *pat, str *esc){ if (sql2pcre(ret, *pat, *esc) <0) throw(MAL, "pcre.sql2pcre", "Pattern convert failed"); return MAL_SUCCEED;} strPCRElike3(bit *ret, str *s, str *pat, str *esc){ char *ppat = NULL; str r = PCREsql2pcre(&ppat, pat, esc); if (!r) { if (strcmp(ppat, (char*)str_nil) == 0) { *ret = FALSE; if (strcmp(*s, *pat) == 0) *ret = TRUE; } else { r = PCREmatch(ret, s, &ppat); } } if (ppat) GDKfree(ppat); return r;}strPCRElike2(bit *ret, str *s, str *pat){ char *esc = "\\"; return PCRElike3(ret, s, pat, &esc);}strPCRElike_uselect_pcre(int *ret, int *b, str *pat, str *esc){ char *ppat = NULL; str r = PCREsql2pcre(&ppat, pat, esc); if (!r) { if (strcmp(ppat, (char*)str_nil) == 0) { BAT *bp = BATdescriptor(*b); BAT *res = BATuselect(bp, *pat, *pat); *ret = res->batCacheid; BBPkeepref(res->batCacheid); BBPreleaseref(bp->batCacheid); r = MAL_SUCCEED; } else { r = PCREuselect(ret, &ppat, b); } } if (ppat) GDKfree(ppat); return r;} @}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -