📄 pcre.c
字号:
#line 112 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/pcre.mx"#include "mal_config.h"#include "mal.h"#include "mal_exception.h"#ifdef WIN32#ifndef HAVE_LIBPCRE#define pcre_export extern __declspec(dllimport)#else#define pcre_export extern __declspec(dllexport)#endif#else#define pcre_export extern#endif#ifndef HAVE_LIBPCRE#define pcre str#else#include <pcre.h>#endifpcre_export str PCREquote(str *r, str *v);pcre_export str PCREselect(int *res, str *pattern, int *bid);pcre_export str PCREuselect(int *res, str *pattern, int *bid);pcre_export str PCREmatch(bit *ret, str *val, str *pat);pcre_export str PCREpatindex(int *ret, str *val, str *pat);pcre_export str PCREfromstr(str instr, int *l, pcre ** val);pcre_export str PCREreplace_wrap(str *res, str *or, str *pat, str *repl, str *flags);pcre_export str PCREreplace_bat_wrap(int *res, int *or, str *pat, str *repl, str *flags);pcre_export str PCREcompile_wrap(pcre ** res, str *pattern);pcre_export str PCREexec_wrap(bit *res, pcre * pattern, str *s);pcre_export int pcre_tostr(str *tostr, int *l, pcre * p);pcre_export int pcre_fromstr(str instr, int *l, pcre ** val);pcre_export int pcre_nequal(pcre * l, pcre * r);pcre_export hash_t pcre_hash(pcre * b);pcre_export pcre * pcre_null(void);pcre_export void pcre_del(Heap *h, var_t *index);pcre_export int pcre_length(pcre * p);pcre_export void pcre_heap(Heap *heap, size_t capacity);pcre_export var_t pcre_put(Heap *h, var_t *bun, pcre * val);pcre_export str PCRElike3(bit *ret, str *s, str *pat, str *esc);pcre_export str PCRElike2(bit *ret, str *s, str *pat);pcre_export str PCRElike_uselect_pcre(int *ret, int *b, str *pat, str *esc);pcre_export str pcre_init(void);#ifndef HAVE_LIBPCREstrpcre_compile_wrap(pcre ** res, str pattern){ (void) res; (void) pattern; throw(MAL, "pcre_compile", "not available as required version of libpcre was not found by configure.\n");}strpcre_exec_wrap(bit *res, pcre * pattern, str s){ (void) res; (void) pattern; (void) s; throw(MAL, "pcre_exec", "not available as required version of libpcre was not found by configure.\n");}strpcre_select(BAT **res, str pattern, BAT *strs){ (void) res, (void) pattern; (void) strs; throw(MAL, "pcre_select", "not available as required version of libpcre was not found by configure.\n");}strpcre_uselect(BAT **res, str pattern, BAT *strs){ (void) res, (void) pattern; (void) strs; throw(MAL, "pcre_uselect", "not available as required version of libpcre was not found by configure.\n");}strpcre_replace(str *res, str origin_str, str pattern, str replacement, str flags){ (void) res; (void) origin_str; (void) pattern; (void) replacement; (void) flags; throw(MAL, "pcre_replace","not available as required version of libpcre was not found by configure.\n");}strpcre_replace_bat(BAT **res, BAT *origin_strs, str pattern, str replacement, str flags){ (void) res; (void) origin_strs; (void) pattern; (void) replacement; (void) flags; throw(MAL, "pcre_replace_bat","not available as required version of libpcre was not found by configure.");}str pcre_init(void){ return NULL;}voidpcre_exit(void){}strpcre_match(bit *ret, str val, str pat){ (void) ret; (void) val; (void) pat; throw(MAL, "pcre_match", "not available as required version of libpcre was not found by configure.\n");}strpcre_patindex(int *ret, str val, str pat){ (void) ret; (void) val; (void) pat; throw(MAL, "pcre_patindexmatch", "not available as required version of libpcre was not found by configure.\n");}#else#include <pcre.h>#define m2p(p) (pcre*)(((sht*)p)+1)#define p2m(p) (pcre*)(((sht*)p)-1)void *my_pcre_malloc(size_t s){ char *r = GDKmalloc(s + sizeof(sht)); sht *sz = (sht *) r; *sz = s + sizeof(sht); return (void *) (sz + 1);}voidmy_pcre_free(void *blk){ sht *sz = (sht *) blk; sz -= 1; GDKfree((void *) sz);}strpcre_compile_wrap(pcre ** res, str pattern){ pcre *r; const char err[BUFSIZ], *err_p = err; int errpos = 0; if ((r = pcre_compile(pattern, PCRE_UTF8 | PCRE_MULTILINE, &err_p, &errpos, NULL)) == NULL) { throw(MAL,"pcre.compile", "failed with\n'%s'\nat %d in\n'%s'.\n", err_p, errpos, pattern); } *(pcre **) res = p2m(r); return MAL_SUCCEED;}strpcre_exec_wrap(bit *res, pcre * pattern, str s){ if (pcre_exec(m2p(pattern), NULL, s, strlen(s), 0, 0, NULL, 0) >= 0) { *res = TRUE; return MAL_SUCCEED; } *res = FALSE; throw(MAL, "pcre.exec","failed to execute pattern match");}strpcre_select(BAT **res, str pattern, BAT *strs){ const char err[BUFSIZ], *err_p = err; int errpos = 0; BAT *r; BUN p, q; pcre *re = NULL; if (strs->htype == TYPE_void) r = BATnew(TYPE_oid, TYPE_str, BATcount(strs)); else r = BATnew(strs->htype, TYPE_str, BATcount(strs)); if ((re = pcre_compile(pattern, PCRE_UTF8 | PCRE_MULTILINE, &err_p, &errpos, NULL)) == NULL) { throw(MAL, "pcre_select", "pcre compile of pattern (%s) failed at %d with\n'%s'.", pattern, errpos, err_p); } BATloop(strs, p, q) { str s = BUNtail(strs, p); if (pcre_exec(re, NULL, s, strlen(s), 0, 0, NULL, 0) >= 0) { BUNins(r, BUNhead(strs, p), s, FALSE); } } if (!(r->batDirty&2)) r = BATsetaccess(r, BAT_READ); my_pcre_free(re); *res = r; return MAL_SUCCEED;}strpcre_uselect(BAT **res, str pattern, BAT *strs){ const char err[BUFSIZ], *err_p = err; int errpos = 0; BAT *r; BUN p, q; pcre *re = NULL; if (strs->htype == TYPE_void) r = BATnew(TYPE_oid, TYPE_void, BATcount(strs)); else r = BATnew(strs->htype, TYPE_void, BATcount(strs)); if ((re = pcre_compile(pattern, PCRE_UTF8 | PCRE_MULTILINE, &err_p, &errpos, NULL)) == NULL) { throw(MAL, "pcre_uselect", "pcre compile of pattern (%s) failed at %d with\n'%s'.", pattern, errpos, err_p); } BATloop(strs, p, q) { str s = BUNtail(strs, p); if (pcre_exec(re, NULL, s, strlen(s), 0, 0, NULL, 0) >= 0) { BUNins(r, BUNhead(strs, p), NULL, FALSE); } } my_pcre_free(re); if (!(r->batDirty&2)) r = BATsetaccess(r, BAT_READ); *res = r; return MAL_SUCCEED;}#define MAX_NR_CAPTURES 1024 /* Maximal number of captured substrings in one original string */strpcre_replace(str *res, str origin_str, str pattern, str replacement, str flags){ const char err[BUFSIZ], *err_p = err, *err_p2 = err; pcre *pcre_code = NULL; pcre_extra *extra; str tmpres; int i, j, k, len, errpos = 0, offset = 0; int compile_options = PCRE_UTF8, exec_options = PCRE_NOTEMPTY; int *ovector, ovecsize; int len_origin_str = strlen(origin_str); int len_replacement = strlen(replacement); int capture_offsets[MAX_NR_CAPTURES * 2], ncaptures = 0, len_del = 0; for (i = 0; i < (int)strlen(flags); i++) { if (flags[i] == 'e') { exec_options -= PCRE_NOTEMPTY; stream_printf(GDKout, "exec_options %d, PCRE_NOTEMPTY %d\n", exec_options, PCRE_NOTEMPTY); } else if (flags[i] == 'i') { compile_options |= PCRE_CASELESS; } else if (flags[i] == 'm') { compile_options |= PCRE_MULTILINE; } else if (flags[i] == 's') { compile_options |= PCRE_DOTALL; } else if (flags[i] == 'x') { compile_options |= PCRE_EXTENDED; } else { throw(MAL,"pcre_replace","unsupported flag character '%c'\n", flags[i]); } } if ((pcre_code = pcre_compile(pattern, compile_options, &err_p, &errpos, NULL)) == NULL) { throw(MAL,"pcre_replace","pcre compile of pattern (%s) failed at %d with\n'%s'.\n", pattern, errpos, err_p); } /* Since the compiled pattern is going to be used several times, it is * worth spending more time analyzing it in order to speed up the time * taken for matching. */ extra = pcre_study(pcre_code, 0, &err_p2); pcre_fullinfo(pcre_code, extra, PCRE_INFO_CAPTURECOUNT, &i); ovecsize = (i + 1) * 3; if ((ovector = (int *) GDKmalloc(sizeof(int) * ovecsize)) == NULL) { my_pcre_free(pcre_code); throw(MAL, "pcre_replace","not enough memory\n"); } i = 0; do { j = pcre_exec(pcre_code, extra, origin_str, len_origin_str, offset, exec_options, ovector, ovecsize); if (j > 0){ capture_offsets[i] = ovector[0]; capture_offsets[i+1] = ovector[1]; ncaptures++; i += 2; len_del += (ovector[1] - ovector[0]); offset = ovector[1]; } } while((j > 0) && (offset < len_origin_str) && (ncaptures < MAX_NR_CAPTURES)); if (ncaptures > 0){ tmpres = GDKmalloc(len_origin_str - len_del + (len_replacement * ncaptures) + 1); if (!tmpres) { my_pcre_free(pcre_code); GDKfree(ovector); throw(MAL, "pcre_replace","not enough memory\n"); } j = k = 0; /* possibly copy the substring before the first captured substring */ strncpy(tmpres, origin_str, capture_offsets[j]); k = capture_offsets[j]; j++; for (i = 0; i < ncaptures - 1; i++) { strncpy(tmpres+k, replacement, len_replacement); k += len_replacement; /* copy the substring between two captured substrings */ len = capture_offsets[j+1] - capture_offsets[j]; strncpy(tmpres+k, origin_str+capture_offsets[j], len); k += len; j += 2; } /* replace the last captured substring */ strncpy(tmpres+k, replacement, len_replacement); k += len_replacement; /* possibly copy the substring after the last captured substring */ len = len_origin_str - capture_offsets[j]; strncpy(tmpres+k, origin_str+capture_offsets[j], len); k += len; tmpres[k] = '\0'; } else { /* no captured substrings, return the original string*/ tmpres = GDKstrdup(origin_str); } my_pcre_free(pcre_code); GDKfree(ovector); *res = tmpres; return MAL_SUCCEED;}strpcre_replace_bat(BAT **res, BAT *origin_strs, str pattern, str replacement, str flags){ const char err[BUFSIZ], *err_p = err, *err_p2 = err; int i, j, k, len, errpos = 0, offset = 0; int compile_options = PCRE_UTF8, exec_options = PCRE_NOTEMPTY; pcre *pcre_code = NULL; pcre_extra *extra; BAT *tmpbat; BUN p, q; int *ovector, ovecsize; int len_origin_str, len_replacement = strlen(replacement); int capture_offsets[MAX_NR_CAPTURES * 2], ncaptures = 0, len_del = 0; str origin_str, replaced_str; for (i = 0; i < (int)strlen(flags); i++) { if (flags[i] == 'e') { exec_options |= (~PCRE_NOTEMPTY); } else if (flags[i] == 'i') { compile_options |= PCRE_CASELESS; } else if (flags[i] == 'm') { compile_options |= PCRE_MULTILINE; } else if (flags[i] == 's') { compile_options |= PCRE_DOTALL; } else if (flags[i] == 'x') { compile_options |= PCRE_EXTENDED; } else { throw(MAL,"pcre_replace_bat", "\"flags\" contains invalid character '%c'\n", flags[i]); } } if ((pcre_code = pcre_compile(pattern, compile_options, &err_p, &errpos, NULL)) == NULL) { throw(MAL,"pcre_replace_bat", "pcre compile of pattern (%s) failed at %d with\n'%s'.\n", pattern, errpos, err_p); } /* Since the compiled pattern is ging to be used several times, it is worth spending * more time analyzing it in order to speed up the time taken for matching. */ extra = pcre_study(pcre_code, 0, &err_p2); pcre_fullinfo(pcre_code, extra, PCRE_INFO_CAPTURECOUNT, &i); ovecsize = (i + 1) * 3; if ((ovector = (int *) GDKzalloc(sizeof(int) * ovecsize)) == NULL) { my_pcre_free(pcre_code); throw(MAL, "pcre_replace_bat","not enough memory"); } tmpbat = BATnew(origin_strs->htype, TYPE_str, BATcount(origin_strs)); BATloop(origin_strs, p, q) { origin_str = BUNtail(origin_strs, p); len_origin_str = strlen(origin_str); i = ncaptures = len_del = offset = 0; do { j = pcre_exec(pcre_code, extra, origin_str, len_origin_str, offset, exec_options, ovector, ovecsize); if (j > 0){ capture_offsets[i] = ovector[0]; capture_offsets[i+1] = ovector[1]; ncaptures++; i += 2; len_del += (ovector[1] - ovector[0]); offset = ovector[1]; } } while((j > 0) && (offset < len_origin_str) && (ncaptures < MAX_NR_CAPTURES)); if (ncaptures > 0){ replaced_str = GDKmalloc(len_origin_str - len_del + (len_replacement * ncaptures) + 1); if (!replaced_str) { my_pcre_free(pcre_code); GDKfree(ovector); throw(MAL, "pcre_replace_bat","not enough memory\n"); } j = k = 0; /* copy eventually the substring before the first captured * substring */ strncpy(replaced_str, origin_str, capture_offsets[j]); k = capture_offsets[j]; j++; for (i = 0; i < ncaptures - 1; i++) { strncpy(replaced_str+k, replacement, len_replacement); k += len_replacement; /* copy the substring between two captured substrings */ len = capture_offsets[j+1] - capture_offsets[j]; strncpy(replaced_str+k, origin_str+capture_offsets[j], len); k += len; j += 2; } /* replace the last captured substring */ strncpy(replaced_str+k, replacement, len_replacement); k += len_replacement;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -