⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 registry.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
	/* Delete from the Registry file */	if (remove_record(tmp) == ERROR)		return ERROR;	/* Delete it from the in-memory Registry */	if (tmp == Registry) {		/* The item is the first item */		Registry = Registry->next;		if (Registry != NULL)			Registry->prev = NULL;	} else if ((tmp->next != NULL) && (tmp->prev != NULL)) {		/* The item is somewhere in the middle */		tmp->prev->next = tmp->next;		tmp->next->prev = tmp->prev;	} else if ((tmp->next == NULL) && (tmp->prev != NULL)) {		/* The item is the last item in the Registry */		tmp->prev->next = NULL;	} else {		errorlog("RG_Unregister: Fatal Internal Error! %d: %s\n",			 tmp->FD, tmp->url);		return ERROR;	}	RG_Free_Entry(tmp);		/* free memory */	RegHdr->nrecords_deleted++;	/* update Reg Header */	return SUCCESS;}/* ----------------------------------------------------------------- *   RG_Free_Registry()  -- free memory associated with the registry.   ----------------------------------------------------------------- */int RG_Free_Registry(){	reg_t *tmp, *next;	Debug(70,1,("Freeing the entire Registry\n"));	RG_hash_destroy();		/* nukes the entire hash structure */	RG_gid_destroy();		/* reset the Gatherer ID mgmt */	tmp = Registry;	while (tmp) {		next = tmp->next;		RG_Free_Entry(tmp);		tmp = next;	}	Registry = NULL;	return SUCCESS;}/* -----------------------------------------------------------------   RG_Free_Entry() -- free memory associated with a registry entry.   ----------------------------------------------------------------- */int RG_Free_Entry(tmp)reg_t *tmp;{	if (tmp) {		if (tmp->url)			xfree(tmp->url);		if (tmp->md5)			xfree(tmp->md5);		if (tmp->desc)			xfree(tmp->desc);		xfree(tmp);	}	return SUCCESS;}/* ----------------------------------------------------------------- *   RG_Count_Reg() -- count the number of objects in the registry.   ----------------------------------------------------------------- */int RG_Count_Reg(){	reg_t *tmp;	int count = 0;	for (tmp = Registry; tmp != NULL; tmp = tmp->next)		count++;	Debug(70,5,("RG_Count_Reg: Count is %d\n", count));	return (count);}/* -----------------------------------------------------------------   RG_Cleaner() -- remove expired objects and compress registry if needed.   ----------------------------------------------------------------- */int RG_Cleaner(){	reg_t *tmp, *tmp2;	time_t now;	int clnd = 0, n = 0;	LOGCLEANER;	if ((now = UTIL_Get_Time()) <= 0)		return ERROR;	Log("Starting Cleaning.\n");	/* Walk the entire Registry */	tmp = Registry;	while (tmp) {		/* This object's TTL has expired? */		if (((tmp->ttl + tmp->update_time) < now) && tmp->ttl>1) {			Debug(70,5,("RG_Cleaner: Expiring: %d, %s\n",				tmp->FD, tmp->url));			tmp2 = tmp;			tmp = tmp->next;			LOGCLEAN(tmp2);			(void)RG_Clean_Entry(tmp2);			clnd++;		} else {			tmp = tmp->next;		}                if ((n++ & 0x1F) == 0) { /* check on pending connections */                        (void)select_loop(0, 0, 0);                }	}	/* Update the Registry file header if it's changed */	if (clnd > 0)		(void)RG_Sync_Registry();	Log("Finished Cleaning, %d objects expired.\n", clnd);	return SUCCESS;}/* ----------------------------------------------------------------- *   RG_Clean_Entry() -- remove an object from the Broker   ----------------------------------------------------------------- */int RG_Clean_Entry(tmp)reg_t *tmp;{	int err = SUCCESS;	/* remove old object from the storage manager */	if (SM_Destroy_Obj(tmp->FD) == ERROR)		err = ERROR;	/* remove old object from indexer */	if (do_IND_Destroy_Obj(tmp) == ERROR)		err = ERROR;	/* unregister old object, and frees memory */	if (RG_Unregister(tmp) == ERROR)		err = ERROR;	return (err);}/* -----------------------------------------------------------------   RG_Compress() -- rewrites the Registry to removed deleted records.   ----------------------------------------------------------------- */int RG_Compress(){	char *new_fn, *old_fn, *back_fn;	reg_t *tmp;	REGISTRY_HEADER reghdr;	int ndel = 0, n = 0;	LOGCOMPRESSION;	Log("Begin Compressing Registry File...\n");	/* We don't want to write on 'Registry' until we're done */	back_fn = UTIL_make_admin_filename("Registry.old");	old_fn = UTIL_make_admin_filename("Registry");	new_fn = UTIL_make_admin_filename("Registry.compressing");      RG_Registry_Shutdown();	if (set_registry_file(new_fn) != SUCCESS) {		Log("An error occured while trying to open Registry.compressing .\n");		goto reinstate_reg;	}	/* Write a clean header */	reghdr.magic = REGISTRY_MAGIC;	reghdr.version = REGISTRY_VERSION;	reghdr.nrecords = 0;	reghdr.nrecords_deleted = 0;	reghdr.nrecords_valid = 0;	if (write_header(&reghdr) != SUCCESS) {		finish_registry_file();		(void) unlink(new_fn);		goto reinstate_reg;	}	/*	 *  Go through the entire Registry and write the valid entries,	 *  while ignoring the invalid ones.	 */	for (tmp = Registry; tmp; tmp = tmp->next) {		if (!IS_DELETED(tmp->flag)) {			if (write_record(tmp) != SUCCESS) {				finish_registry_file();				(void) unlink(new_fn);				goto reinstate_reg;			}			reghdr.nrecords++;			reghdr.nrecords_valid++;		} else {			ndel++;		}                if ((n++ & 0x1F) == 0) { /* check on pending connections */                        (void)select_loop(0, 0, 0);                }	}	/* Now write the correct Registry header, and close the file */	if (write_header(&reghdr) != SUCCESS) {		finish_registry_file();		(void) unlink(new_fn);		goto reinstate_reg;	}	finish_registry_file();	/* Backup the old Registry, and use the new one instead */	(void)rename(old_fn, back_fn);	if (rename(new_fn, old_fn) < 0) {		log_errno(old_fn);		(void) unlink(new_fn);		goto reinstate_reg;	}	Log("Wrote %d valid records, deleted %d records.\n",	     reghdr.nrecords_valid, ndel + RegHdr->nrecords_deleted);	Log("Finished Compressing Registry File.\n");reinstate_reg:	xfree(back_fn);	xfree(new_fn);	xfree(old_fn);	/* Now rebuild the in-mem registry from scratch */	RG_Free_Registry();	return (RG_Init());}/*********************************************************************** 		Registry services: bulk xfer by timestamp ***********************************************************************//* ----------------------------------------------------------------- *   RG_bulk_query() -- perform a bulk transfer.   ----------------------------------------------------------------- */int RG_bulk_query(rsock, ptime)int rsock;time_t ptime;{	FILE *fp;	reg_t *tmp;	if ((fp = fdopen(rsock, "w")) == NULL) {		log_errno("fdopen");		(void)QM_send_bulk_err(rsock);		return ERROR;	}/* Should be safe to fork here.  In the child process, we should      *//* be only reading from the registry, and not altering anything.      */#ifdef FORK_ON_BULK	if (fork() == 0) {	/* child process */		(void)close(qsock);	/* don't need main socket */#endif		QM_send_bulk_begin(rsock);		/* Send all new objects */		for (tmp = Registry; tmp != NULL; tmp = tmp->next) {			if (tmp->update_time >= ptime) {			    (void)QM_send_bulk_fd(tmp->FD, fp, tmp);			}		}		fflush(fp);		(void)QM_send_bulk_end(rsock);#ifdef FORK_ON_BULK		fclose(fp);		_exit(0);	}#endif	fclose(fp);	return SUCCESS;}/*********************************************************************** 		Registry searching: lookup by FD, URL, MD5 ***********************************************************************//* ----------------------------------------------------------------- *   RG_Get_Entry() -- find the registry entry associated with an fd.   ----------------------------------------------------------------- */reg_t *RG_Get_Entry(fd)fd_t fd;{	return (RG_hash_search_byfd(fd));}/* ----------------------------------------------------------------- *   FG_Get_URL -- find the URL associated with an fd.   ----------------------------------------------------------------- */char *RG_Get_URL(fd)fd_t fd;{	static reg_t *tmp;	if ((tmp = RG_Get_Entry(fd)) != NULL)		return (tmp->url);	return (NULL);}/* -----------------------------------------------------------------   RG_FD_Exists() -- check for duplicate fd   ----------------------------------------------------------------- */int RG_FD_Exists(fd)fd_t fd;{	return ((RG_Get_Entry(fd) != NULL) ? TRUE : FALSE);}/* *  A very safe memcmp: works for same length, equivalence only. *  Returns 0 if the two memory buffers match byte-for-byte, and *  are the same length; non-zero otherwise. */#define safe_memmatch(a,asz,b,bsz) \	((((asz) == (bsz)) && ((a) != NULL) && ((b) != NULL)) ? \	(memcmp((a),(b),(asz))) : 1)/* *  An object matches if: *      - the MD5 is the same, *OR* the URL is the same, and *      - the Gatherer Identifiers are the same *      (the Gatherer-Name is the same, and *       the Gatherer-Version is the same, and *       the Gatherer-Host is the same) *  Some objects in the registry might not have an MD5. *  All objects in the registry have a URL. * *  We've written this out so that it's optimized for speed. *  Generally, the Gatherer-* stuff is the same and the MD5 is *  the shortest and most different piece of information. *  Size compares are used first before memcmp's for speed. */#if 0#define RG_Object_Match(a,b) \(((a)->GID == (b)->GID) && \ (!safe_memmatch((a)->md5, (a)->md5s, (b)->md5, (b)->md5s)  \	|| \  !safe_memmatch((a)->url, (a)->urls, (b)->url, (b)->urls)))#endif/* * Match on Object URLs only */#define RG_Object_URL_Match(a,b) \  (!safe_memmatch((a)->url, (a)->urls, (b)->url, (b)->urls))#define RG_Object_Match(a,b) RG_Object_URL_Match((a),(b))/* -----------------------------------------------------------------   RG_Object_Search_Entry() -- search for an entry in the registry.   ----------------------------------------------------------------- */reg_t *RG_Object_Search_Entry(e)reg_t *e;{	static hash_link *tmp;	if (e == NULL)		return (NULL);	/*	 *  We used to traverse the entire Registry to locate an object.	 *  Now we use the URL and the MD5 hash buckets.  We must	 *  search both hash buckets since a matching object may lie	 *  anywhere in either.	 */#if 0	if (e->md5s > 8) {	/* md5 may be null */		tmp = RG_hash_md5_bucket(e->md5);	} else {		tmp = NULL;	}	for (/* empty */ ; tmp != NULL; tmp = tmp->next) {		Debug(70,9,("RG_Object_Search_Entry: MD5 bucket: Looking at %d, %s\n", tmp->item->FD, tmp->item->url));		if (RG_Object_Match(e, tmp->item))			return(tmp->item);	}#endif	/* url is always set */	for (tmp = RG_hash_url_bucket(e->url); tmp != NULL; tmp = tmp->next) {		Debug(70,9,("RG_Object_Search_Entry: URL bucket: Looking at %d, %s\n", tmp->item->FD, tmp->item->url));		if (RG_Object_Match(e, tmp->item)) {			return(tmp->item);		}	}	return (NULL);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -