📄 lfname.c
字号:
/***************************************************************************//* This code is part of WWW grabber called pavuk *//* Copyright (c) 1997 - 2001 Stefan Ondrejicka *//* Distributed under GPL 2 or later *//***************************************************************************/#include "config.h"#include <unistd.h>#include <stdio.h>#include <string.h>#include <stdlib.h>#include <errno.h>#ifdef HAVE_FNMATCH#include <fnmatch.h>#else#include "fnmatch.h"#endif#include "lfname.h"#include "url.h"#include "tools.h"#include "tr.h"#include "dlhash_tools.h"#include "http.h"#include "form.h"#include "mime.h"#include "jsbind.h"enum lfname_lsp_type{ LF_LSP_UNKNOWN, /*** unknown ***/ LF_LSP_STR, /*** string variable ***/ LF_LSP_NUM, /*** number variable ***/ LF_LSP_MACRO, /*** macro variable ***/ LF_LSP_SUB, /*** subpart from regex ***/ LF_LSP_SC, /*** strcat function ***/ LF_LSP_SS, /*** substr function ***/ LF_LSP_HASH, /*** hash function ***/ LF_LSP_MD5, /*** md5 function ***/ LF_LSP_LOWER, /*** lowerstr function ***/ LF_LSP_UPPER, /*** upperstr function ***/ LF_LSP_UENC, /*** urlencode function ***/ LF_LSP_UDEC, /*** urldecode function ***/ LF_LSP_DELCHR, /*** delchr function ***/ LF_LSP_TRCHR, /*** trchr function ***/ LF_LSP_TRSTR, /*** trstr function ***/ LF_LSP_STRSPN, /*** strspn function ***/ LF_LSP_STRCSPN, /*** strcspn function ***/ LF_LSP_STRLEN, /*** strlen function ***/ LF_LSP_NRSTR, /*** nrtostr function ***/ LF_LSP_LCHR, /*** last character offset ***/ LF_LSP_PLS, /*** plus ***/ LF_LSP_MNS, /*** minus ***/ LF_LSP_MOD, /*** mod ***/ LF_LSP_MUL, /*** multiply ***/ LF_LSP_DIV, /*** divide ***/ LF_LSP_REMOVEPARAMETER, /*** removes a parameter from an url string */ LF_LSP_GETVALUE,/*** reads a value from a parameter of an url string */ LF_LSP_SIF, /*** if condition ***/ LF_LSP_NOT, /*** logical not ***/ LF_LSP_AND, /*** logical and ***/ LF_LSP_OR, /*** logical or ***/ LF_LSP_GETEXT, /*** get extension from path ***/#ifdef HAVE_MOZJS LF_LSP_JSF, /*** result of JavaScript function ***/#endif LF_LSP_SEQ /*** string equal ***/};struct lfname_lsp_var{ enum lfname_lsp_type type; union { char *str; int num; char macro; } val; enum lfname_lsp_type rettype; union { char *str; int num; } ret_val; struct lfname_lsp_var *param1; struct lfname_lsp_var *param2; struct lfname_lsp_var *param3;};static char *lfname_lsp_get_by_url(struct lfname_lsp_interp *);static struct lfname_lsp_var *lfname_lsp_analyze(const char **);static void lfname_lsp_var_free(struct lfname_lsp_var *);static char *_strfindnchr(char *str, int chr, int n){ int cnt; char *p; for(p = str, cnt = 0; *p && cnt < n; p++) { if(*p == chr) cnt++; } if(cnt != n) return NULL; else return p - 1;}static char *_strrfindnchr(char *str, int chr, int n){ int cnt; char *p; for(p = str + strlen(str) - 1, cnt = 0; p >= str && cnt < n; p--) { if(*p == chr) cnt++; } if(cnt != n) return NULL; else return p + 1;}#ifdef HAVE_REGEXchar *lfname_re_sub(lfname * lfnamep, const char *urlstr, int nr){ char pom[4096]; pom[0] = '\0'; if(lfnamep->type != LFNAME_REGEX) return tl_strdup(pom);#ifdef HAVE_POSIX_REGEX { regmatch_t *pmatch = lfnamep->pmatch; if(nr >= 0 && nr <= lfnamep->preg.re_nsub) { strncpy(pom, urlstr + pmatch[nr].rm_so, pmatch[nr].rm_eo - pmatch[nr].rm_so); /* FIXME: Security */ pom[pmatch[nr].rm_eo - pmatch[nr].rm_so] = '\0'; } }#elif defined(HAVE_V8_REGEX)#ifdef HAVE_V8_REGSUB { char ssect[10]; if(nr) sprintf(ssect, "\\%d", nr); else strcpy(ssect, "&"); regsub(lfnamep->preg, ssect, pom); }#endif#elif defined(HAVE_GNU_REGEX) if(nr >= 0 && nr < lfnamep->preg.re_nsub) { strncpy(pom, urlstr + lfnamep->pmatch.start[nr], lfnamep->pmatch.end[nr] - lfnamep->pmatch.start[nr]); /* FIXME: Security */ pom[lfnamep->pmatch.end[nr] - lfnamep->pmatch.start[nr]] = '\0'; }#elif defined(HAVE_PCRE_REGEX) if(nr >= 0 && nr < lfnamep->pmatch_nr) { strncpy(pom, urlstr + lfnamep->pmatch[2 * nr], lfnamep->pmatch[2 * nr + 1] - lfnamep->pmatch[2 * nr]); /* FIXME: Security */ pom[lfnamep->pmatch[2 * nr + 1] - lfnamep->pmatch[2 * nr]] = '\0'; }#endif return tl_strdup(pom);}#endif/* $x - x-th match section *//* %i - protocol id *//* %p - password *//* %u - user name *//* %h - host name *//* %m - domain name *//* %r - port number *//* %d - doc path *//* %n - doc name *//* %b - base name of document *//* %e - extension *//* %s - search string *//* %q - POST query string *//* %M - mime type *//* %E - extension by mime type *//* %o - default doc name *//* %-x - x-th dirname from end *//* %x - x-th dirname from start */char *lfname_get_by_url(url * urlp, const char *urlstr, const char *mime_type, lfname * lfnamep){ char *ps, *pd, *pp, *p1, *p2; char pom[4096]; char pstr[4096]; int nr; char *n, *d, *t, *e, *b, *m, *q, *o; const char *mimeext; char *retv = NULL; p1 = url_get_path(urlp); if(urlp->type == URLT_GOPHER) { if(urlp->p.gopher.selector[0] == '1') snprintf(pstr, sizeof(pstr), "/%s/%s", urlp->p.gopher.selector, priv_cfg.index_name); else snprintf(pstr, sizeof(pstr), "/%s", urlp->p.gopher.selector); } else if(tl_is_dirname(p1) || ((urlp->type == URLT_FTP || urlp->type == URLT_FTPS) && urlp->p.ftp.dir)) { snprintf(pstr, sizeof(pstr), "%s/%s", p1, priv_cfg.index_name); } else { strncpy(pstr, p1, sizeof(pstr)); pstr[sizeof(pstr) - 1] = '\0'; } t = get_abs_file_path(pstr); strncpy(pstr, t, sizeof(pstr)); pstr[sizeof(pstr) - 1] = '\0'; p1 = strrchr(pstr, '/'); d = p1 ? tl_strndup(pstr, p1 - pstr) : tl_strdup(""); n = p1 ? tl_strdup(p1 + 1) : tl_strdup(pstr); e = tl_strdup(tl_get_extension(pstr)); p1 = strrchr(n, '.'); if(p1) b = tl_strndup(n, p1 - n); else b = tl_strdup(n); m = url_get_site(urlp); p1 = strchr(m, '.'); if(p1) m = p1 + 1; q = NULL; if(urlp->status & URL_FORM_ACTION) { form_info *fi = (form_info *) urlp->extension; p1 = form_encode_urlencoded(fi->infos); if(p1) { strncpy(pstr, p1, sizeof(pstr) - 1); pstr[sizeof(pstr) - 1] = '\0'; q = tl_strdup(pstr); } _free(p1); } if(!q) q = tl_strdup(""); o = url_get_default_local_name(urlp); mimeext = mime_get_type_ext(mime_type); pom[0] = '\0'; if(lfnamep->transstr[0] == '(') { struct lfname_lsp_interp interp; char port[10]; interp.urlp = urlp; interp.urlstr = urlstr; interp.scheme = prottable[urlp->type].dirname; interp.passwd = url_get_pass(urlp, NULL) ? url_get_pass(urlp, NULL) : ""; interp.user = url_get_user(urlp, NULL) ? url_get_user(urlp, NULL) : ""; interp.host = url_get_site(urlp) ? url_get_site(urlp) : ""; interp.domain = m; sprintf(port, "%d", url_get_port(urlp)); interp.port = port; interp.path = d; interp.name = n; interp.basename = b; interp.extension = e; interp.query = url_get_search_str(urlp) ? url_get_search_str(urlp) : ""; interp.post_query = q; interp.deflt = o; interp.mime_type = mime_type; interp.mime_type_ext = mimeext; interp.orig = lfnamep; retv = lfname_lsp_get_by_url(&interp); } else { for(ps = lfnamep->transstr, pd = pom; *ps; ps++) { if(!*(ps + 1)) { *pd = *ps; pd++; *pd = '\0'; continue; } switch (*ps) { case '\\': ps++; *pd = *ps; pd++; *pd = '\0'; break;#ifdef HAVE_REGEX case '$': ps++; nr = strtol(ps, &pp, 10); p1 = lfname_re_sub(lfnamep, urlstr, nr); strncpy(pd, p1, sizeof(pom)-(pd-pom)); pd[sizeof(pom) - (pd-pom) - 1] = '\0'; _free(p1); while(*pd) pd++; ps = pp - 1; break;#endif case '%': ps++; pstr[0] = '\0'; switch (*ps) { case 'i': strncpy(pstr, prottable[urlp->type].dirname, sizeof(pstr)); break; case 'p': strncpy(pstr, url_get_pass(urlp, NULL) ? url_get_pass(urlp, NULL) : "", sizeof(pstr)); break; case 'u': strncpy(pstr, url_get_user(urlp, NULL) ? url_get_user(urlp, NULL) : "", sizeof(pstr)); break; case 'h': strncpy(pstr, url_get_site(urlp) ? url_get_site(urlp) : "", sizeof(pstr)); break; case 'm': strncpy(pstr, m, sizeof(pstr)); break; case 'r': sprintf(pstr, "%d", url_get_port(urlp)); break; case 't': strncpy(pstr, t, sizeof(pstr)); break; case 'd': strncpy(pstr, d, sizeof(pstr)); break; case 'n': strncpy(pstr, n, sizeof(pstr)); break; case 'b': strncpy(pstr, b, sizeof(pstr)); break; case 'e': strncpy(pstr, e, sizeof(pstr)); break; case 's': strncpy(pstr, url_get_search_str(urlp) ? url_get_search_str(urlp) : "", sizeof(pstr)); break; case 'q': strncpy(pstr, q, sizeof(pstr)); break; case 'M': strncpy(pstr, mime_type ? mime_type : "", sizeof(pstr)); break; case 'E': strncpy(pstr, mimeext ? mimeext : "", sizeof(pstr)); break; case 'o': strncpy(pstr, o, sizeof(pstr)); break; case '-': nr = strtol(ps + 1, &pp, 10); p1 = _strrfindnchr(d, '/', nr); p2 = _strrfindnchr(d, '/', nr + 1); if(!p1) pstr[0] = '\0'; else if(p2) { strncpy(pstr, p2 + 1, p1 - 1 - p2); /* FIXME: Security */ *(pstr + (p1 - 1 - p2)) = '\0'; } else pstr[0] = '\0'; ps = pp - 1; break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': nr = strtol(ps, &pp, 10); p1 = _strfindnchr(d, '/', nr); p2 = _strfindnchr(d, '/', nr + 1); if(!p1) pstr[0] = '\0'; else if(p2) { strncpy(pstr, p1 + 1, p2 - 1 - p1); /* FIXME: Security */ *(pstr + (p2 - 1 - p1)) = '\0'; } else strncpy(pstr, p1 + 1, sizeof(pstr)); ps = pp - 1; break; default: pstr[0] = *(ps - 1); pstr[1] = *ps; pstr[2] = '\0'; } pstr[sizeof(pstr) - 1] = '\0'; strcat(pd, pstr); /* FIXME: Security */ while(*pd) pd++; break; default: *pd = *ps; pd++; *pd = '\0'; } } retv = tl_strdup(pom); } free(e); free(n); free(t); free(d); free(q); free(o); return retv;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -