⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 stor_man.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
static char rcsid[] = "$Id: stor_man.c,v 2.3 2000/01/21 17:37:33 sxw Exp $";/* *   stor_man.c -- Stores and retreives Summary Objects in the Broker. *   This storage manager is based on the UNIX file system. * *  DEBUG: section  74, level 1		Broker storage manager routines *  AUTHOR: Harvest derived * *  Harvest Indexer http://www.tardis.ed.ac.uk/harvest/ *  --------------------------------------------------- * *  The Harvest Indexer is a continued development of code developed by *  the Harvest Project. Development is carried out by numerous individuals *  in the Internet community, and is not officially connected with the *  original Harvest Project or its funding sources. * *  Please mail harvest@tardis.ed.ac.uk if you are interested in participating *  in the development effort. * *  This program is free software; you can redistribute it and/or modify *  it under the terms of the GNU General Public License as published by *  the Free Software Foundation; either version 2 of the License, or *  (at your option) any later version. * *  This program is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *  GNU General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//*  ---------------------------------------------------------------------- *  Copyright (c) 1994, 1995.  All rights reserved. * *    The Harvest software was developed by the Internet Research Task *    Force Research Group on Resource Discovery (IRTF-RD): * *          Mic Bowman of Transarc Corporation. *          Peter Danzig of the University of Southern California. *          Darren R. Hardy of the University of Colorado at Boulder. *          Udi Manber of the University of Arizona. *          Michael F. Schwartz of the University of Colorado at Boulder. *          Duane Wessels of the University of Colorado at Boulder. * *    This copyright notice applies to software in the Harvest *    ``src/'' directory only.  Users should consult the individual *    copyright notices in the ``components/'' subdirectories for *    copyright information about other software bundled with the *    Harvest source code distribution. * *  TERMS OF USE * *    The Harvest software may be used and re-distributed without *    charge, provided that the software origin and research team are *    cited in any use of the system.  Most commonly this is *    accomplished by including a link to the Harvest Home Page *    (http://harvest.cs.colorado.edu/) from the query page of any *    Broker you deploy, as well as in the query result pages.  These *    links are generated automatically by the standard Broker *    software distribution. * *    The Harvest software is provided ``as is'', without express or *    implied warranty, and with no support nor obligation to assist *    in its use, correction, modification or enhancement.  We assume *    no liability with respect to the infringement of copyrights, *    trade secrets, or any patents, and are not responsible for *    consequential damages.  Proper use of the Harvest software is *    entirely the responsibility of the user. * *  DERIVATIVE WORKS * *    Users may make derivative works from the Harvest software, subject *    to the following constraints: * *      - You must include the above copyright notice and these *        accompanying paragraphs in all forms of derivative works, *        and any documentation and other materials related to such *        distribution and use acknowledge that the software was *        developed at the above institutions. * *      - You must notify IRTF-RD regarding your distribution of *        the derivative work. * *      - You must clearly notify users that your are distributing *        a modified version and not the original Harvest software. * *      - Any derivative product is also subject to these copyright *        and use restrictions. * *    Note that the Harvest software is NOT in the public domain.  We *    retain copyright, as specified above. * *  HISTORY OF FREE SOFTWARE STATUS * *    Originally we required sites to license the software in cases *    where they were going to build commercial products/services *    around Harvest.  In June 1995 we changed this policy.  We now *    allow people to use the core Harvest software (the code found in *    the Harvest ``src/'' directory) for free.  We made this change *    in the interest of encouraging the widest possible deployment of *    the technology.  The Harvest software is really a reference *    implementation of a set of protocols and formats, some of which *    we intend to standardize.  We encourage commercial *    re-implementations of code complying to this set of standards. * */#include <stdlib.h>#include "broker.h"#include "log.h"/* Global variables */extern char *DIRpath;extern char *brk_obj_url;/* Local functions */char *SM_Get_Obj_Filename();	/* exported to other modules */static fd_t new_fd_value();/* #define MAX_DIR 100 */#define MAX_DIR 256/* *  SM_Init - Initialize the Storage Manager. */int SM_Init(){	int i;	char buf[BUFSIZ];	Log("Storage Manager: Initializing...\n");	sprintf(buf, "%s/objects", DIRpath);	if ((mkdir(buf, 0775) < 0) && errno != EEXIST) {		log_errno(buf);		return ERROR;	}	for (i = 0; i < MAX_DIR; i++) {		sprintf(buf, "%s/objects/%d", DIRpath, i);		if (mkdir(buf, 0775) < 0) {			if (errno != EEXIST) {				log_errno(buf);				return ERROR;			}		}	}	return SUCCESS;}/* *  SM_Create_Obj - Creates a new object in the Storage Manager. *  Assigns a new FD and returns it. */fd_t SM_Create_Obj(){	int i;	fd_t new_fd;	/* Try at most 100 times to create the object */	for (i = 0; i < 100; i++) {		new_fd = new_fd_value();		if (SM_Exist_Obj(new_fd) == FALSE) {			return(new_fd); /* non-existant, so use it. */		}	}	/* this is very rare */	errorlog("SM_Create_Obj: Unable to generate unique FD.\n");	return ERROR;}/* *  SM_Read_Obj - Reads the object from the storage manager.  Returns *  a FILE * so that the caller can read the data.  Returns NULL on error. */FILE *SM_Read_Obj(FD)fd_t FD;{	static FILE *fp;	char *fn;	int fd;	fp = NULL;	fn = SM_Get_Obj_Filename(FD);	fd = open(fn, O_RDONLY);	if (fd < 0) {		Log("Storage Manager: Cannot read object: %d\n", FD);		log_errno(fn);	} else {		fp = fdopen(fd, "r");	}	xfree(fn);	return(fp);}/* *  SM_Write_Obj - Creates the object in the storage manager, and *  returns a FILE * so that the caller can write the data to the *  storage manger.  Returns NULL on error. */FILE *SM_Write_Obj(FD)fd_t FD;{	static FILE *fp;	char *fn;	int fd;	fp = NULL;	fn = SM_Get_Obj_Filename(FD);	fd = open(fn, O_RDWR | O_CREAT | O_TRUNC, 0664);	if (fd < 0) {		Log("Storage Manager: Cannot read object: %d\n", FD);		log_errno(fn);	} else {		fp = fdopen(fd, "w+");	}	xfree(fn);	return(fp);}/* *  SM_Destroy_Obj - Deletes an object from the storage manager. */int SM_Destroy_Obj(FD)fd_t FD;{	char *fn;	fn = SM_Get_Obj_Filename(FD);	if (unlink(fn) < 0) {		errorlog("Storage Manager: Cannot delete object: %d\n", FD);		log_errno(fn);		xfree(fn);		return ERROR;	}	xfree(fn);	return SUCCESS;}/* *  SM_Exist_Obj - See if the FD object exists in the storage manager? */int SM_Exist_Obj(FD)fd_t FD;{	char *fn;	int r;	fn = SM_Get_Obj_Filename(FD);	r = access(fn, R_OK);	xfree(fn);	return((r == 0) ? TRUE : FALSE);}/* *  new_fd_value - Grab another FD value. */static fd_t new_fd_value(){#if defined(HAVE_LRAND48)	extern long lrand48();	return ((fd_t) lrand48());#elif defined(HAVE_RANDOM)#ifndef random	extern long random();#endif	return ((fd_t) random());#else	extern int rand();	return ((fd_t) rand());#endif}/* *  SM_Get_Obj_Filename() - Returns the filename for the given object. *  Only valid for the UNIX file system storage manager. */char *SM_Get_Obj_Filename(fd)fd_t fd;{      	static char *fn;      	fn = (char *) xmalloc(MAX_FN_SIZE);	fn[0] = '\0';      	sprintf(fn, "%s/objects/%d/OBJ%d", DIRpath, fd % MAX_DIR, fd);      	return (fn);}/* *  SM_Get_Obj_URL - Returns the URL to read the object. */char *SM_Get_Obj_URL(fd)fd_t fd;{      	static char *fn;      	fn = (char *) xmalloc(MAX_FN_SIZE);	fn[0] = '\0';      	sprintf(fn, "%s/%d/OBJ%d", brk_obj_url, fd % MAX_DIR, fd);      	return (fn);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -