⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mfile.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: mfile.c,v 1.52 2003/04/05 12:32:34 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/ /*  * TODO: The size estimates in init may not be accurate due to  * only partially written final blocks.  */#include <sys/types.h>#include <fcntl.h>#ifdef WIN32#include <io.h>#else#include <unistd.h>#endif#include <direntz.h>#include <string.h>#include <stdlib.h>#include <stdio.h>#include <assert.h>#include <errno.h>#include <zebra-lock.h>#include <zebrautl.h>#include <mfile.h>static int scan_areadef(MFile_area ma, const char *ad, const char *base){    /*     * If no definition is given, use current directory, unlimited.     */    char dirname[FILENAME_MAX+1];     mf_dir **dp = &ma->dirs, *dir = *dp;    if (!ad)        ad = ".:-1b";    for (;;)    {        const char *ad0 = ad;        int i = 0, fact = 1, multi;	mfile_off_t size = 0;        while (*ad == ' ' || *ad == '\t')            ad++;        if (!*ad)            break;        if (!yaz_is_abspath(ad) && base)        {            strcpy (dirname, base);            i = strlen(dirname);            dirname[i++] = '/';        }        while (*ad)        {	    if (*ad == ':' && strchr ("+-0123456789", ad[1]))		break;            if (i < FILENAME_MAX)                dirname[i++] = *ad;            ad++;        }        dirname[i] = '\0';        if (*ad++ != ':')        {	    logf (LOG_WARN, "Missing colon after path: %s", ad0);            return -1;        }        if (i == 0)        {	    logf (LOG_WARN, "Empty path: %s", ad0);            return -1;        }        while (*ad == ' ' || *ad == '\t')            ad++;        if (*ad == '-')        {            fact = -1;            ad++;        }        else if (*ad == '+')            ad++;        size = 0;        if (*ad < '0' || *ad > '9')        {	    logf (LOG_FATAL, "Missing size after path: %s", ad0);            return -1;        }        size = 0;        while (*ad >= '0' && *ad <= '9')            size = size*10 + (*ad++ - '0');        switch (*ad)	{	case 'B': case 'b': multi = 1; break;	case 'K': case 'k': multi = 1024; break;	case 'M': case 'm': multi = 1048576; break;	case 'G': case 'g': multi = 1073741824; break;        case '\0':            logf (LOG_FATAL, "Missing unit: %s", ad0);            return -1;        default:            logf (LOG_FATAL, "Illegal unit: %c in %s", *ad, ad0);            return -1;	}        ad++;	*dp = dir = (mf_dir *) xmalloc(sizeof(mf_dir));	dir->next = 0;	strcpy(dir->name, dirname);	dir->max_bytes = dir->avail_bytes = fact * size * multi;	dp = &dir->next;    }    return 0;}static int file_position(MFile mf, int pos, int offset){    int off = 0, c = mf->cur_file, ps;    if ((c > 0 && pos <= mf->files[c-1].top) ||	(c < mf->no_files -1 && pos > mf->files[c].top))    {	c = 0;	while (c + 1 < mf->no_files && mf->files[c].top < pos)	{	    off += mf->files[c].blocks;	    c++;	}	assert(c < mf->no_files);    }    else    	off = c ? (mf->files[c-1].top + 1) : 0;    if (mf->files[c].fd < 0)    {        if ((mf->files[c].fd = open(mf->files[c].path,	                            mf->wr ?                                        (O_BINARY|O_RDWR|O_CREAT) :                                        (O_BINARY|O_RDONLY), 0666)) < 0)        {            if (!mf->wr && errno == ENOENT && off == 0)                return -2;    	    logf (LOG_WARN|LOG_ERRNO, "Failed to open %s", mf->files[c].path);    	     return -1;        }    }    ps = pos - off;    if (mfile_seek(mf->files[c].fd, ps * (mfile_off_t) mf->blocksize + offset,    	SEEK_SET) < 0)    {    	logf (LOG_WARN|LOG_ERRNO, "Failed to seek in %s", mf->files[c].path);        logf(LOG_WARN, "pos=%d off=%d blocksize=%d offset=%d",                       pos, off, mf->blocksize, offset);    	return -1;    }    mf->cur_file = c;    return ps;}static int cmp_part_file(const void *p1, const void *p2){    return ((part_file *)p1)->number - ((part_file *)p2)->number;}/* * Create a new area, cotaining metafiles in directories. * Find the part-files in each directory, and inventory the existing metafiles. */MFile_area mf_init(const char *name, const char *spec, const char *base){    MFile_area ma = (MFile_area) xmalloc(sizeof(*ma));    mf_dir *dirp;    meta_file *meta_f;    part_file *part_f = 0;    DIR *dd;    struct dirent *dent;    int fd, number;    char metaname[FILENAME_MAX+1], tmpnam[FILENAME_MAX+1];        logf (LOG_DEBUG, "mf_init(%s)", name);    strcpy(ma->name, name);    ma->mfiles = 0;    ma->dirs = 0;    if (scan_areadef(ma, spec, base) < 0)    {    	logf (LOG_WARN, "Failed to access description of '%s'", name);    	return 0;    }    /* look at each directory */    for (dirp = ma->dirs; dirp; dirp = dirp->next)    {    	if (!(dd = opendir(dirp->name)))    	{    	    logf (LOG_WARN|LOG_ERRNO, "Failed to open directory %s",                                     dirp->name);    	    return 0;	}	/* look at each file */	while ((dent = readdir(dd)))	{	    int len = strlen(dent->d_name);	    const char *cp = strrchr (dent->d_name, '-');	    if (strchr (".-", *dent->d_name))		continue;	    if (len < 5 || !cp || strcmp (dent->d_name + len - 3, ".mf"))		continue;	    number = atoi(cp+1);	    memcpy (metaname, dent->d_name, cp - dent->d_name);	    metaname[ cp - dent->d_name] = '\0';	    for (meta_f = ma->mfiles; meta_f; meta_f = meta_f->next)	    {	    	/* known metafile */		if (!strcmp(meta_f->name, metaname))	    	{		    part_f = &meta_f->files[meta_f->no_files++];		    break;		}	    }	    /* new metafile */	    if (!meta_f)	    {	    	meta_f = (meta_file *) xmalloc(sizeof(*meta_f));        	zebra_mutex_init (&meta_f->mutex);	    	meta_f->ma = ma;	    	meta_f->next = ma->mfiles;	    	meta_f->open = 0;	    	meta_f->cur_file = -1;                meta_f->unlink_flag = 0;	    	ma->mfiles = meta_f;	    	strcpy(meta_f->name, metaname);	    	part_f = &meta_f->files[0];	    	meta_f->no_files = 1;	    }	    part_f->number = number;	    part_f->dir = dirp;	    part_f->fd = -1;	    sprintf(tmpnam, "%s/%s", dirp->name, dent->d_name);	    part_f->path = xstrdup(tmpnam);	    /* get size */	    if ((fd = open(part_f->path, O_BINARY|O_RDONLY)) < 0)	    {	    	logf (LOG_FATAL|LOG_ERRNO, "Failed to access %s",                      dent->d_name);	    	return 0;	    }	    if ((part_f->bytes = mfile_seek(fd, 0, SEEK_END)) < 0)	    {	    	logf (LOG_FATAL|LOG_ERRNO, "Failed to seek in %s",                      dent->d_name);	    	return 0;	    }#ifndef WIN32	    fsync(fd);#endif	    close(fd);	    if (dirp->max_bytes >= 0)		dirp->avail_bytes -= part_f->bytes;	}	closedir(dd);    }    for (meta_f = ma->mfiles; meta_f; meta_f = meta_f->next)    {    	logf (LOG_DEBUG, "mf_init: %s consists of %d part(s)", meta_f->name,              meta_f->no_files);    	qsort(meta_f->files, meta_f->no_files, sizeof(part_file),              cmp_part_file);    }    return ma;}void mf_destroy(MFile_area ma){    mf_dir *dp;    meta_file *meta_f;    if (!ma)	return;    dp = ma->dirs;    while (dp)    {	mf_dir *d = dp;	dp = dp->next;	xfree (d);    }    meta_f = ma->mfiles;    while (meta_f)    {	int i;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -