⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lfname.c

📁 网络爬虫程序
💻 C
📖 第 1 页 / 共 3 页
字号:
void lfname_free(lfname * lfnamep){#ifdef HAVE_REGEX  if(lfnamep->type == LFNAME_REGEX)  {#ifdef HAVE_POSIX_REGEX    regfree(&(lfnamep->preg));    _free(lfnamep->pmatch);#elif defined(HAVE_V8_REGEX)    _free(lfnamep->preg);#elif defined(HAVE_GNU_REGEX)    regfree(&lfnamep->preg);    _free(lfnamep->pmatch.start);    _free(lfnamep->pmatch.end);#elif defined(HAVE_PCRE_REGEX)    _free(lfnamep->preg);    _free(lfnamep->preg_extra);    _free(lfnamep->pmatch);#endif  }#endif  _free(lfnamep->matchstr);  _free(lfnamep->transstr);  _free(lfnamep);}lfname *lfname_new(lfname_type type, const char *mpt, const char *str){  lfname *rv;  const char *p;  rv = _malloc(sizeof(lfname));  rv->type = type;  rv->matchstr = NULL;  rv->transstr = NULL;#ifdef HAVE_REGEX  if(type == LFNAME_REGEX)  {#ifdef HAVE_POSIX_REGEX    int ec;    if((ec = regcomp(&(rv->preg), mpt, REG_EXTENDED)))    {      char pom[PATH_MAX];      xprintf(0, gettext("Error compiling regular expression : %s\n"), mpt);      regerror(ec, &(rv->preg), pom, sizeof(pom));      xprintf(0, "%s\n", pom);      regfree(&(rv->preg));      free(rv);      return NULL;    }    rv->pmatch = _malloc((rv->preg.re_nsub + 1) * sizeof(regmatch_t));#elif defined(HAVE_V8_REGEX)    if(!(rv->preg = regcomp(mpt)))    {      xprintf(0, gettext("Error compiling regular expression : %s\n"), mpt);      free(rv->preg);      free(rv);      return NULL;    }#elif defined(HAVE_BSD_REGEX)    if((p = re_comp(mpt)))    {      xprintf(0, gettext("Error compiling regular expression : %s\n"), mpt);      xprintf(0, "%s", p);      free(rv);      return NULL;    }#elif defined(HAVE_GNU_REGEX)    rv->preg.allocated = 0;    rv->preg.buffer = NULL;    rv->preg.fastmap = NULL;    re_set_syntax(r_2phase_star);    if((p = re_compile_pattern(mpt, strlen(mpt), &rv->preg)))    {      xprintf(0, gettext("Error compiling regular expression : %s\n"), mpt);      xprintf(0, "%s\n", p);      regfree(&(rv->preg));      free(rv);      return NULL;    }    rv->pmatch.start =      _malloc((rv->preg.re_nsub + 1) * sizeof(*rv->pmatch.start));    rv->pmatch.end =      _malloc((rv->preg.re_nsub + 1) * sizeof(*rv->pmatch.end));    rv->pmatch.num_regs = rv->preg.re_nsub + 1;    rv->preg.regs_allocated = REGS_FIXED;#elif defined(HAVE_PCRE_REGEX)    int errcode = 0;    if((rv->preg = pcre_compile(mpt, 0, (const char **) &p, &errcode, NULL)))    {      rv->preg_extra = pcre_study(rv->preg, 0, (const char **) &p);      pcre_fullinfo(rv->preg, rv->preg_extra, PCRE_INFO_CAPTURECOUNT,        &rv->pmatch_nr);      rv->pmatch_nr++;      rv->pmatch = (int *) _malloc(rv->pmatch_nr * 3 * sizeof(int));    }    else    {      xprintf(0, gettext("Error compiling regular expression : %s\n"), mpt);      xprintf(0, "%s\n", p);      _free(rv);      return NULL;    }#endif  }#endif  if(str[0] == '(')  {    struct lfname_lsp_var *variant;    p = str;    if((variant = lfname_lsp_analyze(&p)))    {      lfname_lsp_var_free(variant);      if(*p)      {        xprintf(0, gettext("LSP analyze error: bad token at - %s\n"), p);        lfname_free(rv);        return NULL;      }      else      {        rv->transstr = tl_strdup(str);      }    }    else    {      lfname_free(rv);      return NULL;    }  }  else    rv->transstr = tl_strdup(str);  rv->matchstr = tl_strdup(mpt);  return rv;}int lfname_match(lfname * lfnamep, const char *urlstr){#ifdef HAVE_REGEX  if(lfnamep->type == LFNAME_REGEX)  {#ifdef HAVE_POSIX_REGEX    return !regexec(&(lfnamep->preg), urlstr, lfnamep->preg.re_nsub + 1,      lfnamep->pmatch, 0);#elif defined(HAVE_V8_REGEX)    return regexec(lfnamep->preg, urlstr);#elif defined(HAVE_BSD_REGEX)    re_comp(lfnamep->matchstr);    return re_exec(urlstr);#elif defined(HAVE_GNU_REGEX)    return re_match(&(lfnamep->preg), urlstr, strlen(urlstr), 0,      &lfnamep->pmatch) >= 0;#elif defined(HAVE_PCRE_REGEX)    return pcre_exec(lfnamep->preg, lfnamep->preg_extra, urlstr,      strlen(urlstr), 0, 0, lfnamep->pmatch, 3 * lfnamep->pmatch_nr) >= 0;#endif  }  else#endif    return !fnmatch(lfnamep->matchstr, urlstr, 0);}int lfname_check_rule(const char *str){  if(str[0] == '(')  {    const char *p = str;    struct lfname_lsp_var *variant;    if((variant = lfname_lsp_analyze(&p)))    {      lfname_lsp_var_free(variant);      if(*p)      {        xprintf(0, gettext("LSP analyze error: bad token at - %s\n"), p);        return FALSE;      }      else        return TRUE;    }    else      return FALSE;  }  return TRUE;}int lfname_check_pattern(lfname_type type, const char *str){#ifdef HAVE_REGEX  if(type == LFNAME_REGEX)  {#ifdef HAVE_POSIX_REGEX    int ec;    char pom[PATH_MAX];    regex_t preg;    ec = regcomp(&preg, str, REG_EXTENDED);    if(ec)    {      xprintf(0, gettext("Error compiling regular expression : %s\n"), str);      regerror(ec, &preg, pom, sizeof(pom));      xprintf(0, "%s\n", pom);    }    regfree(&preg);    return !ec;#elif defined(HAVE_V8_REGEX)    regexp *preg;    preg = regcomp(str);    if(!preg)      xprintf(0, gettext("Error compiling regular expression : %s\n"), str);    else      free(preg);    return preg != NULL;#elif defined(HAVE_BSD_REGEX)    char *p;    p = re_comp(str);    if(p)    {      xprintf(0, gettext("Error compiling regular expression : %s\n"), str);      xprintf(0, "%s", p);    }    return p == NULL;#elif defined(HAVE_GNU_REGEX)    char *p;    struct re_pattern_buffer preg;    preg.allocated = 0;    preg.buffer = NULL;    preg.fastmap = NULL;    if((p = re_compile_pattern(str, strlen(str), &preg)))    {      xprintf(0, gettext("Error compiling regular expression : %s\n"), str);      xprintf(0, "%s\n", p);    }    regfree(&preg);    return p == NULL;#elif defined(HAVE_PCRE_REGEX)    pcre *re;    const char *errmsg = NULL;    int errcode = 0;    if(!(re = pcre_compile(str, 0, &errmsg, &errcode, NULL)))    {      xprintf(0, gettext("Error compiling regular expression : %s\n"), str);      xprintf(0, "%s\n", errmsg);      return -1;    }    else      free(re);    return re != NULL;#endif  }  else#endif    return TRUE;}const char *lfname_interp_get_macro(struct lfname_lsp_interp *interp,int macro){  switch (macro)  {  case 'i':    return interp->scheme;  case 'p':    return interp->passwd;  case 'u':    return interp->user;  case 'h':    return interp->host;  case 'm':    return interp->domain;  case 'r':    return interp->port;  case 'd':    return interp->path;  case 'n':    return interp->name;  case 'b':    return interp->basename;  case 'e':    return interp->extension;  case 's':    return interp->query;  case 'q':    return interp->post_query;  case 'U':    return interp->urlstr;  case 'o':    return interp->deflt;  case 'M':    return interp->mime_type;  case 'E':    return interp->mime_type_ext;  }  return NULL;}int lfname_check_macro(int macro){  return strchr("ipuhmrdnbesUoqEM", macro) != NULL;}static const struct{  enum lfname_lsp_type type;  enum lfname_lsp_type rettype;  char *name;  short params;  enum lfname_lsp_type p1type;  enum lfname_lsp_type p2type;  enum lfname_lsp_type p3type;} lfname_lsp_ftbl[] ={  {LF_LSP_UNKNOWN, LF_LSP_UNKNOWN, NULL, 0,      LF_LSP_UNKNOWN, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},  {LF_LSP_STR, LF_LSP_STR, NULL, 0,      LF_LSP_UNKNOWN, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},  {LF_LSP_NUM, LF_LSP_NUM, NULL, 0,      LF_LSP_UNKNOWN, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},  {LF_LSP_MACRO, LF_LSP_STR, NULL, 0,      LF_LSP_UNKNOWN, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},  {LF_LSP_SUB, LF_LSP_STR, "sp ", 1,      LF_LSP_NUM, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},  {LF_LSP_SC, LF_LSP_STR, "sc ", 2, LF_LSP_STR, LF_LSP_STR, LF_LSP_UNKNOWN},  {LF_LSP_SS, LF_LSP_STR, "ss ", 3, LF_LSP_STR, LF_LSP_NUM, LF_LSP_NUM},  {LF_LSP_HASH, LF_LSP_NUM, "hsh ", 2, LF_LSP_STR, LF_LSP_NUM, LF_LSP_UNKNOWN},  {LF_LSP_MD5, LF_LSP_STR, "md5 ", 1,      LF_LSP_STR, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},  {LF_LSP_LOWER, LF_LSP_STR, "lo ", 1,      LF_LSP_STR, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},  {LF_LSP_UPPER, LF_LSP_STR, "up ", 1,      LF_LSP_STR, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},  {LF_LSP_UENC, LF_LSP_STR, "ue ", 2, LF_LSP_STR, LF_LSP_STR, LF_LSP_UNKNOWN},  {LF_LSP_UDEC, LF_LSP_STR, "ud ", 1,      LF_LSP_STR, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},  {LF_LSP_DELCHR, LF_LSP_STR, "dc ", 2,      LF_LSP_STR, LF_LSP_STR, LF_LSP_UNKNOWN},  {LF_LSP_TRCHR, LF_LSP_STR, "tc ", 3, LF_LSP_STR, LF_LSP_STR, LF_LSP_STR},  {LF_LSP_TRSTR, LF_LSP_STR, "ts ", 3, LF_LSP_STR, LF_LSP_STR, LF_LSP_STR},  {LF_LSP_STRSPN, LF_LSP_NUM, "spn ", 2,      LF_LSP_STR, LF_LSP_STR, LF_LSP_UNKNOWN},  {LF_LSP_STRCSPN, LF_LSP_NUM, "cspn ", 2,      LF_LSP_STR, LF_LSP_STR, LF_LSP_UNKNOWN},  {LF_LSP_STRLEN, LF_LSP_NUM, "sl ", 1,      LF_LSP_STR, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},  {LF_LSP_NRSTR, LF_LSP_STR, "ns ", 2, LF_LSP_STR, LF_LSP_NUM, LF_LSP_UNKNOWN},  {LF_LSP_LCHR, LF_LSP_NUM, "lc ", 2, LF_LSP_STR, LF_LSP_STR, LF_LSP_UNKNOWN},  {LF_LSP_PLS, LF_LSP_NUM, "+ ", 2, LF_LSP_NUM, LF_LSP_NUM, LF_LSP_UNKNOWN},  {LF_LSP_MNS, LF_LSP_NUM, "- ", 2, LF_LSP_NUM, LF_LSP_NUM, LF_LSP_UNKNOWN},  {LF_LSP_MOD, LF_LSP_NUM, "% ", 2, LF_LSP_NUM, LF_LSP_NUM, LF_LSP_UNKNOWN},  {LF_LSP_MUL, LF_LSP_NUM, "* ", 2, LF_LSP_NUM, LF_LSP_NUM, LF_LSP_UNKNOWN},  {LF_LSP_DIV, LF_LSP_NUM, "/ ", 2, LF_LSP_NUM, LF_LSP_NUM, LF_LSP_UNKNOWN},  {LF_LSP_REMOVEPARAMETER, LF_LSP_STR, "rmpar ", 2,      LF_LSP_STR, LF_LSP_STR, LF_LSP_UNKNOWN},  {LF_LSP_GETVALUE, LF_LSP_STR, "getval ", 2,      LF_LSP_STR, LF_LSP_STR, LF_LSP_UNKNOWN},  {LF_LSP_SIF, LF_LSP_STR, "sif ", 3, LF_LSP_NUM, LF_LSP_STR, LF_LSP_STR},  {LF_LSP_NOT, LF_LSP_NUM, "! ", 1,      LF_LSP_NUM, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},  {LF_LSP_AND, LF_LSP_NUM, "& ", 2, LF_LSP_NUM, LF_LSP_NUM, LF_LSP_UNKNOWN},  {LF_LSP_OR, LF_LSP_NUM, "| ", 2, LF_LSP_NUM, LF_LSP_NUM, LF_LSP_UNKNOWN},  {LF_LSP_GETEXT, LF_LSP_STR, "getext ", 1,      LF_LSP_STR, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},#ifdef HAVE_MOZJS  {LF_LSP_JSF, LF_LSP_STR, "jsf ", 1,      LF_LSP_STR, LF_LSP_UNKNOWN, LF_LSP_UNKNOWN},#endif  {LF_LSP_SEQ, LF_LSP_NUM, "seq ", 2, LF_LSP_STR, LF_LSP_STR, LF_LSP_UNKNOWN}};static enum lfname_lsp_type lfname_lsp_token_type(const char **pstr){  const char *p = *pstr;  enum lfname_lsp_type retv = LF_LSP_UNKNOWN;  while(*p == ' ')    p++;  if(*p == '(')  {    int i;    for(i = 0; i < NUM_ELEM(lfname_lsp_ftbl); i++)    {      if(lfname_lsp_ftbl[i].name &&        !strncmp(p + 1, lfname_lsp_ftbl[i].name,          strlen(lfname_lsp_ftbl[i].name)))      {        retv = lfname_lsp_ftbl[i].type;        p += 1 + strlen(lfname_lsp_ftbl[i].name);        break;      }    }  }  else if(*p == '\"')  {    retv = LF_LSP_STR;    p++;  }  else if(*p == '%')  {    retv = LF_LSP_MACRO;    p++;  }  else if(tl_ascii_isdigit(*p) || *p == '-')    retv = LF_LSP_NUM;  *pstr = p;  return retv;}static struct lfname_lsp_var *lfname_lsp_var_new(enum lfname_lsp_type type){  struct lfname_lsp_var *retv = NULL;  retv = _malloc(sizeof(struct lfname_lsp_var));  retv->type = type;  retv->val.str = NULL;  retv->rettype = lfname_lsp_ftbl[type].rettype;  retv->ret_val.str = NULL;  retv->param1 = NULL;  retv->param2 = NULL;  retv->param3 = NULL;  return retv;}static void lfname_lsp_var_ret_free(struct lfname_lsp_var *var){  if(!var)    return;  lfname_lsp_var_ret_free(var->param1);  lfname_lsp_var_ret_free(var->param2);  lfname_lsp_var_ret_free(var->param3);  if(var->rettype == LF_LSP_STR)    _free(var->ret_val.str);}static void lfname_lsp_var_free(struct lfname_lsp_var *var){  if(!var)    return;  lfname_lsp_var_free(var->param1);  lfname_lsp_var_free(var->param2);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -