⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 url.c

📁 网络爬虫程序
💻 C
📖 第 1 页 / 共 5 页
字号:
  return p;}static char *url_get_local_name(url * urlp, const char *mime_type){  return url_get_local_name_real(urlp, mime_type, TRUE);}/******************************************************//* k danemu URL vytvori meno suboru v lokalnom strome *//* FIXME: Translate me!                               *//******************************************************/static char *url_to_filename_real(url * urlp, const char *mime_type,int lockfn){  char *p;  bool_t inserted = FALSE;  if(!urlp->local_name && prottable[urlp->type].supported)  {    p = url_get_local_name(urlp, mime_type);    if(cfg.enable_info && urlp->type != URLT_FILE &&      !(urlp->status & URL_REDIRECT))    {      char *di;      LOCK_GETLFNAME;      di = dinfo_get_unique_name(urlp, p, lockfn);      UNLOCK_GETLFNAME;      if(di)      {        _free(p);        p = di;      }    }    else if(!cfg.enable_info && cfg.unique_doc &&      urlp->type != URLT_FILE && !(urlp->status & URL_REDIRECT))    {      /*** such filename have already other URL   ***/      /*** we need to compute new unique filename ***/      char *f;      char *pom;      int i;      url *inhash;      LOCK_CFG_FILEHASH;      inhash = (url *) dlhash_find_by_key(cfg.fn_hash_tbl, (dllist_t) p);      if(!inhash && !inserted)      {        urlp->local_name = p;        dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);        inserted = TRUE;      }      if(inhash && url_compare(inhash, urlp))        inhash = NULL;      UNLOCK_CFG_FILEHASH;      if(inhash)      {        int pomlen = strlen(p) + 9;        LOCK_GETLFNAME;        pom = _malloc(pomlen);        f = strrchr(p, '/');        if(!f)          f = "";        else        {          *f = '\0';          f++;        }        if (cfg.remove_before_store)        {          snprintf(pom, pomlen, "%s/%s", p, f);         }        else        {           i = 0;          do          {            i++;            snprintf(pom, pomlen, "%s/%03d%s", p, i, f);            LOCK_CFG_FILEHASH;            inhash = (url *) dlhash_find_by_key(cfg.fn_hash_tbl,            (dllist_t) pom);            if(!inhash && !inserted)            {              urlp->local_name = pom;              dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);              inserted = TRUE;            }            UNLOCK_CFG_FILEHASH;          }          while(inhash);        }        UNLOCK_GETLFNAME;        _free(p);        p = pom;      }    }    if(!inserted)    {      LOCK_CFG_FILEHASH;      urlp->local_name = p;      dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);      inserted = TRUE;      UNLOCK_CFG_FILEHASH;    }  }  return urlp->local_name;}char *url_to_filename(url * urlp, int lockfn){  return url_to_filename_real(urlp, NULL, lockfn);}char *url_to_filename_with_type(url * urlp, const char *mime_type, int lockfn){  return url_to_filename_real(urlp, mime_type, lockfn);}void url_set_filename(url * urlp, char *local_name){  LOCK_CFG_FILEHASH;  urlp->local_name = local_name;  dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);  UNLOCK_CFG_FILEHASH;}/******************************************************//* k danemu URL vytvori meno suboru v lokalnom strome *//* FIXME: Translate me!                               *//******************************************************/void url_changed_filename(url * urlp){  url_remove_from_file_hash_tab(urlp);  _free(urlp->local_name);  url_add_to_file_hash_tab(urlp);}/****************************************************************//* k danemu URL vytvori meno docasneho suboru v lokalnom strome *//* FIXME: Translate me!                                         *//****************************************************************/char *url_to_in_filename(url * urlp){  char *pom;  char *p;  if(cfg.mode == MODE_NOSTORE || cfg.mode == MODE_FTPDIR || (cfg.dumpfd >= 0))  {    int pomlen = strlen(priv_cfg.cache_dir) + 50;    pom = _malloc(pomlen);#ifdef HAVE_MT    snprintf(pom, pomlen, "%s/.in_pavuk_nostore_%d_%ld",      priv_cfg.cache_dir, (int) getpid(), pthread_self());#else    snprintf(pom, pomlen, "%s/.in_pavuk_nostore_%d", priv_cfg.cache_dir,      (int) getpid());#endif    return pom;  }  p = url_to_filename(urlp, TRUE);  pom = _malloc(strlen(p) + 5);  strcpy(pom, p);  p = strrchr(pom, '/');  if(!p)    p = pom;  else    p++;  memmove(p + 4, p, strlen(p) + 1);  strncpy(p, ".in_", 4);  return pom;}/************************************************//* make from URL structure URL string           *//************************************************/char *url_to_urlstr(url * urlp, int wa){  char *p;  char portstr[10];  char *retv;  snprintf(portstr, sizeof(portstr), ":%d", url_get_port(urlp));  switch (urlp->type)  {  case URLT_HTTP:  case URLT_HTTPS:    retv = _malloc(strlen(prottable[urlp->type].typestr) +      (urlp->p.http.user ? strlen(urlp->p.http.user) + 1 : 0) +      (urlp->p.http.password ? strlen(urlp->p.http.password) + 1 : 0) +      strlen(urlp->p.http.host) +      (urlp->p.http.port ==        prottable[urlp->type].default_port ? 0 : strlen(portstr) + 1) +      strlen(urlp->p.http.document) +      (urlp->p.http.searchstr ? strlen(urlp->p.http.searchstr) + 1 : 0) +      (urlp->p.http.anchor_name ? strlen(urlp->p.http.anchor_name) + 1 : 0) +      1);    sprintf(retv, "%s%s%s%s%s%s%s%s%s%s%s%s", prottable[urlp->type].typestr,      urlp->p.http.user ? urlp->p.http.user : "",      urlp->p.http.password ? ":" : "",      urlp->p.http.password ? urlp->p.http.password : "",      (urlp->p.http.password || urlp->p.http.user) ? "@" : "",      urlp->p.http.host,      (urlp->p.http.port ==        prottable[urlp->type].default_port ? "" : portstr),      urlp->p.http.document, urlp->p.http.searchstr ? "?" : "",      urlp->p.http.searchstr ? urlp->p.http.searchstr : "", wa      && urlp->p.http.anchor_name ? "#" : "", wa      && urlp->p.http.anchor_name ? urlp->p.http.anchor_name : "");    if(!urlp->p.http.searchstr &&      (urlp->status & URL_FORM_ACTION) &&      (((form_info *) urlp->extension)->method == FORM_M_GET))    {      char *ss;      ss = form_encode_urlencoded(((form_info *) urlp->extension)->infos);      if(ss)        retv = tl_str_concat(retv, "?", ss, NULL);      _free(ss);    }    return retv;  case URLT_FILE:    p = get_abs_file_path(urlp->p.file.filename);    retv = _malloc(strlen(prottable[URLT_FILE].typestr) +      strlen(p) +      (urlp->p.file.searchstr ? strlen(urlp->p.file.searchstr) + 1 : 0) +      ((wa &&          urlp->p.file.anchor_name) ? strlen(urlp->p.file.anchor_name) +        1 : 0) + 1);    sprintf(retv, "%s%s%s%s%s%s", prottable[URLT_FILE].typestr, p,      urlp->p.file.searchstr ? "?" : "",      urlp->p.file.searchstr ? urlp->p.file.searchstr : "",      urlp->p.file.anchor_name ? "#" : "",      urlp->p.file.anchor_name ? urlp->p.file.anchor_name : "");    free(p);    return retv;  case URLT_FTP:  case URLT_FTPS:    retv = _malloc(strlen(prottable[urlp->type].typestr) +      (urlp->p.ftp.user ? strlen(urlp->p.ftp.user) + 1 : 0) +      (urlp->p.ftp.password ? strlen(urlp->p.ftp.password) + 1 : 0) +      strlen(urlp->p.ftp.host) +      (urlp->p.ftp.port ==        prottable[urlp->type].default_port ? 0 : strlen(portstr) + 1) +      strlen(urlp->p.ftp.path) +      (urlp->p.ftp.anchor_name ? strlen(urlp->p.ftp.anchor_name) + 1 : 0) +      1);    sprintf(retv, "%s%s%s%s%s%s%s%s%s%s", prottable[urlp->type].typestr,      urlp->p.ftp.user ? urlp->p.ftp.user : "",      urlp->p.ftp.password ? ":" : "",      urlp->p.ftp.password ? urlp->p.ftp.password : "",      (urlp->p.ftp.password || urlp->p.ftp.user) ? "@" : "",      urlp->p.ftp.host,      (urlp->p.ftp.port == prottable[urlp->type].default_port ? "" : portstr),      urlp->p.ftp.path,      wa && urlp->p.ftp.anchor_name ? "#" : "",      wa && urlp->p.ftp.anchor_name ? urlp->p.ftp.anchor_name : "");    return retv;  case URLT_GOPHER:    retv = _malloc(strlen(prottable[URLT_GOPHER].typestr) +      strlen(urlp->p.gopher.host) +      (urlp->p.gopher.port ==        prottable[urlp->type].default_port ? 0 : strlen(portstr) + 1) +      strlen(urlp->p.gopher.selector) + 2);    sprintf(retv, "%s%s%s/%s", prottable[URLT_GOPHER].typestr,      urlp->p.gopher.host,      (urlp->p.gopher.port ==        prottable[urlp->type].default_port ? "" : portstr),      urlp->p.gopher.selector);    return retv;  case URLT_UNKNOWN:    return tl_strdup(urlp->p.unsup.urlstr);  case URLT_FROMPARENT:  default:    return NULL;  }}char *url_to_request_urlstr(url * urlp, int absolute){  char *p, *s, *w, *u;  char portstr[10];  char *retv = NULL;  snprintf(portstr, sizeof(portstr), ":%d", url_get_port(urlp));  switch (urlp->type)  {  case URLT_HTTP:  case URLT_HTTPS:    p = url_encode_str_safe(urlp->p.http.document, URL_PATH_UNSAFE);    s = urlp->p.http.searchstr ?      url_encode_str_safe(urlp->p.http.searchstr, URL_QUERY_UNSAFE) : NULL;    if(absolute)      retv = tl_str_concat(NULL, prottable[urlp->type].typestr,        urlp->p.http.host,        (urlp->p.http.port ==          prottable[urlp->type].default_port ? "" : portstr), NULL);    retv = tl_str_concat(retv, p ? p : "", s ? "?" : "", s ? s : "", NULL);    _free(p);    _free(s);    if(!urlp->p.http.searchstr &&      (urlp->status & URL_FORM_ACTION) &&      (((form_info *) urlp->extension)->method == FORM_M_GET))    {      char *ss;      ss = form_encode_urlencoded(((form_info *) urlp->extension)->infos);      if(ss)        retv = tl_str_concat(retv, "?", ss, NULL);      _free(ss);    }    break;  case URLT_FTP:  case URLT_FTPS:    p = url_encode_str_safe(urlp->p.ftp.path, URL_PATH_UNSAFE);    if(absolute)    {      w = urlp->p.ftp.password ?        url_encode_str_safe(urlp->p.ftp.password, URL_AUTH_UNSAFE) : NULL;      u = urlp->p.ftp.user ?        url_encode_str_safe(urlp->p.ftp.user, URL_AUTH_UNSAFE) : NULL;      retv = tl_str_concat(NULL, prottable[urlp->type].typestr,        u ? u : "", w ? ":" : "", w ? w : "",        (w || u) ? "@" : "", urlp->p.ftp.host,        (urlp->p.ftp.port ==          prottable[urlp->type].default_port ? "" : portstr), NULL);      _free(u);      _free(w);    }    retv = tl_str_concat(retv, p, NULL);    _free(p);    break;  case URLT_GOPHER:    p = url_encode_str_safe(urlp->p.gopher.selector, URL_PATH_UNSAFE);    if(absolute)      retv = tl_str_concat(NULL, prottable[urlp->type].typestr,        urlp->p.gopher.host,        (urlp->p.gopher.port ==          prottable[urlp->type].default_port ? "" : portstr), NULL);    retv = tl_str_concat(retv, "/", urlp->p.gopher.selector, NULL);    _free(p);    break;  default:    break;  }  return retv;}/********************************************************//* z URL vrati adresu servera pre dokument              *//* FIXME: Translate me!                                 *//********************************************************/char *url_get_site(url * urlr){  switch (urlr->type)  {  case URLT_HTTP:  case URLT_HTTPS:    return urlr->p.http.host;  case URLT_FTP:  case URLT_FTPS:    return urlr->p.ftp.host;  case URLT_GOPHER:    return urlr->p.gopher.host;  default:    return NULL;  }}int url_get_port(url * urlr){  switch (urlr->type)  {  case URLT_HTTP:  case URLT_HTTPS:    return (int) urlr->p.http.port;  case URLT_FTP:  case URLT_FTPS:    return (int) urlr->p.ftp.port;  case URLT_GOPHER:    return (int) urlr->p.gopher.port;  default:    return 0;  }}char *url_get_path(url * urlr){  switch (urlr->type)  {  case URLT_HTTP:  case URLT_HTTPS:    return urlr->p.http.document;  case URLT_FTP:  case URLT_FTPS:    return urlr->p.ftp.path;  case URLT_GOPHER:    return urlr->p.gopher.selector;  case URLT_FILE:    return urlr->p.file.filename;  default:    return NULL;  }}void url_set_path(url * urlr, char *path){  switch (urlr->type)  {  case URLT_HTTP:  case URLT_HTTPS:    _free(urlr->p.http.document);    urlr->p.http.document = tl_strdup(path);    break;  case URLT_FTP:  case URLT_FTPS:    _free(urlr->p.ftp.path);    urlr->p.ftp.path = tl_strdup(path);    break;  case URLT_GOPHER:    _free(urlr->p.gopher.selector);    urlr->p.gopher.selector = tl_strdup(path);    break;  case URLT_FILE:    _free(urlr->p.file.filename);    urlr->p.file.filename = tl_strdup(path);    break;  default:    return;  }  url_changed_filename(urlr);}char *url_get_full_path(url * urlr){  char *rv = NULL;  switch (urlr->type)  {  case URLT_HTTP:  case URLT_HTTPS:    if(urlr->p.http.searchstr)    {      rv =        tl_str_concat(NULL, urlr->p.http.document, "?",        urlr->p.http.searchstr, NULL);    }    else      rv = tl_strdup(urlr->p.http.document);    break;  default:    rv = tl_strdup(url_get_path(urlr));    break;  }  return rv;}char *url_get_pass(url * urlr, char *realm){  char *pass = NULL;  authinfo *ai;  switch (urlr->type)  {  case URLT_HTTP:  case URLT_HTTPS:    pass = urlr->p.http.password;    break;  case URLT_FTP:  case URLT_FTPS:    pass = urlr->p.ftp.password;    break;  default:    return NULL;  }  if(!pass)  {    ai = authinfo_match_entry(urlr->type, url_get_site(urlr),      url_get_port(urlr), url_get_path(urlr), realm);    if(ai)      pass = ai->pass;  }  if(!pass)  {    pass = priv_cfg.passwd_auth;  }  return pass;}char *url_get_user(url * urlr, char *realm){  char *user = NULL;  authinfo *ai;  switch (urlr->type)  {  case URLT_HTTP:  case URLT_HTTPS:    user = urlr->p.http.user;    break;  case URLT_FTP:  case URLT_FTPS:    user = urlr->p.ftp.user;    break;  default:    return NULL;  }  if(!user)  {    ai = authinfo_match_entry(urlr->type, url_get_site(urlr),      url_get_port(urlr), url_get_path(urlr), realm);    if(ai)      user = ai->user;  }  if(!user)  {    user = priv_cfg.name_auth;  }  return user;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -