⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 db.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
/*  *  DEBUG: none *  AUTHOR: Simon Wilkinson (based on ideas from Judith Pluemer) * *  Harvest Indexer http://www.tardis.ed.ac.uk/harvest/ *  --------------------------------------------------- * *  The Harvest Indexer is a continued development of code developed by *  the Harvest Project. Development is carried out by numerous individuals *  in the Internet community, and is not officially connected with the *  original Harvest Project or its funding sources. *  *  Please mail harvest@tardis.ed.ac.uk if you are interested in participating *  in the development effort. * *  This program is free software; you can redistribute it and/or modify *  it under the terms of the GNU General Public License as published by *  the Free Software Foundation; either version 2 of the License, or *  (at your option) any later version. *   *  This program is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *  GNU General Public License for more details. *   *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <stdio.h>#include <string.h>#include <stdlib.h>#include <gdbm.h>#include "util.h"#include "template.h"static char pdbfile[MAXPATHLEN + 1];static char md5dbfile[MAXPATHLEN + 1];static GDBM_FILE pdb = NULL;static GDBM_FILE md5db = NULL;/* Initialize */void urldb_init(char *dir) {  sprintf(pdbfile,"%s/PRODUCTION.gdbm", dir);  sprintf(md5dbfile,"%s/MD5.gdbm", dir);    pdb = gdbm_open(pdbfile, 0, GDBM_READER, 0644, NULL);  md5db = gdbm_open(md5dbfile, 0, GDBM_READER, 0644, NULL);    /* FIXME: Need error check here */}char * urldb_getmd5(char *url) {  datum d,k;    if (md5db == NULL) return (0);   k.dptr = url;  k.dsize = strlen(k.dptr) +1;  d = gdbm_fetch(md5db, k);  return(d.dptr);}void urldb_writesoif(char *url, FILE *out) {  datum d,k;    if (pdb == NULL) return;  k.dptr = url;  k.dsize = strlen(k.dptr) +1;   d = gdbm_fetch(pdb, k);  if (d.dptr == NULL) return;  fwrite(d.dptr,d.dsize,1,out);  xfree(d.dptr);}char *urldb_getrefs(char *url) {  datum d,k;  Template *t;  AVPair *avp;  char *ret=NULL;  if (pdb == NULL) return (NULL);  k.dptr  = url;  k.dsize = strlen(k.dptr) + 1;  d=gdbm_fetch(pdb, k);  if (d.dptr == NULL) return (NULL);  init_parse_template_string(d.dptr, d.dsize);  if ((t = parse_template()) == NULL) {    errorlog("urldb_getrefs: Corrupt SOIF object: %s\n", url);    xfree(d.dptr);  }  finish_parse_template();  xfree(d.dptr);  if ((avp = extract_AVPair(t->list, T_UREFS)) !=NULL) {    ret=xstrdup(avp->value);  }  free_template(t);    return(ret);}int urldb_getlmt(char *url) {  datum d,k;  Template *t;  AVPair *avp;  int lmt=0;  if (pdb == NULL) return (0); /* No production db */  k.dptr = url;  k.dsize = strlen(k.dptr) +1;  d=gdbm_fetch(pdb, k);  if (d.dptr == NULL) return(0); /* URL isn't in db */  init_parse_template_string(d.dptr, d.dsize);  if ((t = parse_template()) == NULL) {    errorlog("urldb_getlmt: Corrupt SOIF object: %s\n", url);    xfree(d.dptr);  }  finish_parse_template();  xfree(d.dptr);  if ((avp = extract_AVPair(t->list, T_LMT)) !=NULL) {    lmt=atoi(avp->value);  }  free_template(t);    return(lmt);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -