re.c

来自「网络爬虫程序」· C语言 代码 · 共 303 行

C
303
字号
/***************************************************************************//*    This code is part of WWW grabber called pavuk                        *//*    Copyright (c) 1997 - 2001 Stefan Ondrejicka                          *//*    Distributed under GPL 2 or later                                     *//***************************************************************************/#include "config.h"#ifdef HAVE_REGEX#include "re.h"#include "tools.h"void re_free(re_entry * ree){#ifdef HAVE_POSIX_REGEX  regfree(&(ree->preg));#endif#ifdef HAVE_V8_REGEX  _free(ree->preg);#endif#ifdef HAVE_GNU_REGEX  regfree(&(ree->preg));#endif#ifdef HAVE_PCRE_REGEX  _free(ree->preg);  _free(ree->preg_extra);#endif  _free(ree->pattern);  _free(ree);}re_entry *re_make(const char *str){  int ec;  re_entry *rv;#if defined(HAVE_BSD_REGEX) || defined(HAVE_GNU_REGEX) || defined(HAVE_PCRE_REGEX)  char *p;#endif  rv = _malloc(sizeof(re_entry));#ifdef HAVE_POSIX_REGEX  if((ec = regcomp(&(rv->preg), str, REG_EXTENDED)))  {    char pom[PATH_MAX];    xprintf(0, gettext("Error compiling regular expression : %s\n"), str);    regerror(ec, &(rv->preg), pom, sizeof(pom));    xprintf(0, "%s\n", pom);    regfree(&(rv->preg));    _free(rv);  }#endif#ifdef HAVE_V8_REGEX  if(!(rv->preg = regcomp(str)))  {    xprintf(0, gettext("Error compiling regular expression : %s\n"), str);    _free(rv);  }#endif#ifdef HAVE_BSD_REGEX  if((p = re_comp(str)))  {    xprintf(0, gettext("Error compiling regular expression : %s\n"), str);    xprintf(0, "%s", p);    _free(rv);  }#endif#ifdef HAVE_GNU_REGEX  rv->preg.allocated = 0;  rv->preg.buffer = NULL;  rv->preg.fastmap = NULL;  if((p = re_compile_pattern(str, strlen(str), &rv->preg)))  {    xprintf(0, gettext("Error compiling regular expression : %s\n"), str);    xprintf(0, "%s\n", p);    regfree(&(rv->preg));    _free(rv);  }#endif#ifdef HAVE_PCRE_REGEX  if((rv->preg = pcre_compile(str, 0, (const char **) &p, &ec, NULL)))  {    rv->preg_extra = pcre_study(rv->preg, 0, (const char **) &p);  }  if(!rv->preg)  {    xprintf(0, gettext("Error compiling regular expression : %s\n"), str);    xprintf(0, "%s\n", p);    _free(rv);  }#endif  else    rv->pattern = new_string(str);  return rv;}int re_pmatch(re_entry * ree, char *str){#ifdef HAVE_POSIX_REGEX  return !regexec(&(ree->preg), str, 0, NULL, 0);#endif#ifdef HAVE_V8_REGEX  return regexec(ree->preg, str);#endif#ifdef HAVE_BSD_REGEX  re_comp(ree->pattern);  return re_exec(str);#endif#ifdef HAVE_GNU_REGEX  return re_match(&(ree->preg), str, strlen(str), 0, NULL) >= 0;#endif#ifdef HAVE_PCRE_REGEX  return pcre_exec(ree->preg, ree->preg_extra, str, strlen(str), 0, 0, NULL,    0) >= 0;#endif}int re_pmatch_sub(re_entry * ree, char *str, int nr, int *start, int *end){  int rv = FALSE;  if(rv < 0)    return FALSE;#ifdef HAVE_POSIX_REGEX  if(nr < (ree->preg.re_nsub + 1))  {    regmatch_t *pmatch;    pmatch = _malloc((ree->preg.re_nsub + 1) * sizeof(regmatch_t));    rv = !regexec(&(ree->preg), str, ree->preg.re_nsub + 1, pmatch, 0);    if(rv)    {      *start = pmatch[nr].rm_so;      *end = pmatch[nr].rm_eo;    }    _free(pmatch);  }#endif#ifdef HAVE_V8_REGEX  /* not supported ! */  rv = FALSE;#endif#ifdef HAVE_BSD_REGEX  /* not supported ! */  rv = FALSE;#endif#ifdef HAVE_GNU_REGEX  if(nr < lfnamep->preg.re_nsub)  {    struct re_registers pmatch;    int sra;    pmatch.start = _malloc((ree->preg.re_nsub + 1) * sizeof(*pmatch.start));    pmatch.end = _malloc((ree->preg.re_nsub + 1) * sizeof(*pmatch.end));    ree->pmatch.num_regs = rv->preg.re_nsub + 1;    sra = ree->preg.regs_allocated;    ree->preg.regs_allocated = REGS_FIXED;    rv = re_match(&(ree->preg), str, strlen(str), 0, &pmatch) >= 0;    if(rv)    {      *start = pmatch.start[nr];      *end = pmatch.end[nr];    }    ree->preg.regs_allocated = sra;    _free(pmatch.start);    _free(pmatch.end);  }#endif#ifdef HAVE_PCRE_REGEX  {    int *pmatch;    int pmatch_nr;    pcre_fullinfo(ree->preg, ree->preg_extra, PCRE_INFO_CAPTURECOUNT,      &pmatch_nr);    if(pmatch_nr >= nr)    {      pmatch = (int *) _malloc((pmatch_nr + 1) * 3 * sizeof(int));      rv =        pcre_exec(ree->preg, ree->preg_extra, str, strlen(str), 0, 0, pmatch,        (pmatch_nr + 1) * 3) >= 0;      if(rv)      {        *start = pmatch[2 * nr];        *end = pmatch[2 * nr + 1];      }      _free(pmatch);    }  }#endif  return rv;}int re_pmatch_subs(re_entry * ree, char *str, int *nr, int **subs){  int rv = FALSE;  if(rv < 0)    return FALSE;#ifdef HAVE_POSIX_REGEX  {    regmatch_t *pmatch;    pmatch = _malloc((ree->preg.re_nsub + 1) * sizeof(regmatch_t));    rv = !regexec(&(ree->preg), str, ree->preg.re_nsub + 1, pmatch, 0);    if(rv)    {      *subs = (int *) _malloc((ree->preg.re_nsub + 1) * 2 * sizeof(int));      for(*nr = 0; *nr < (ree->preg.re_nsub + 1); (*nr)++)      {        (*subs)[2 * (*nr)] = pmatch[*nr].rm_so;        (*subs)[2 * (*nr) + 1] = pmatch[*nr].rm_eo;      }    }    _free(pmatch);  }#endif#ifdef HAVE_V8_REGEX  /* not supported ! */  rv = FALSE;#endif#ifdef HAVE_BSD_REGEX  /* not supported ! */  rv = FALSE;#endif#ifdef HAVE_GNU_REGEX  {    struct re_registers pmatch;    int sra;    pmatch.start = _malloc((ree->preg.re_nsub + 1) * sizeof(*pmatch.start));    pmatch.end = _malloc((ree->preg.re_nsub + 1) * sizeof(*pmatch.end));    ree->pmatch.num_regs = rv->preg.re_nsub + 1;    sra = ree->preg.regs_allocated;    ree->preg.regs_allocated = REGS_FIXED;    rv = re_match(&(ree->preg), str, strlen(str), 0, &pmatch) >= 0;    if(rv)    {      *subs = (int *) _malloc((ree->preg.re_nsub + 1) * 2 * sizeof(int));      for(*nr = 0; *nr < (ree->preg.re_nsub + 1); (*nr)++)      {        (*subs)[2 * (*nr)] = pmatch.start[*nr];        (*subs)[2 * (*nr) + 1] = pmatch.end[*nr];      }    }    ree->preg.regs_allocated = sra;    _free(pmatch.start);    _free(pmatch.end);  }#endif#ifdef HAVE_PCRE_REGEX  {    int *pmatch;    int pmatch_nr;    pcre_fullinfo(ree->preg, ree->preg_extra, PCRE_INFO_CAPTURECOUNT,      &pmatch_nr);    pmatch = (int *) _malloc((pmatch_nr + 1) * 3 * sizeof(int));    rv =      pcre_exec(ree->preg, ree->preg_extra, str, strlen(str), 0, 0, pmatch,      (pmatch_nr + 1) * 3) >= 0;    if(rv)    {      *subs = (int *) _malloc((pmatch_nr + 1) * 2 * sizeof(int));      for(*nr = 0; *nr < (pmatch_nr + 1); (*nr)++)      {        (*subs)[2 * (*nr)] = pmatch[2 * (*nr)];        (*subs)[2 * (*nr) + 1] = pmatch[2 * (*nr) + 1];      }    }    _free(pmatch);  }#endif  return rv;}#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?