⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 io.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
	sprintf(s, "%s.tmp", indexfile);	if ((newindexfp = fopen(s, "w")) == NULL) {		fprintf(stderr, "Can't open for writing: %s\n", s);		fclose(indexfp);		exit(2);	}	sprintf(s, "%s/%s", INDEX_DIR, MINI_FILE);	if ((minifp = fopen(s, "w")) == NULL) {		fprintf(stderr, "Can't open for writing: %s\n", s);		fclose(indexfp);		fclose(newindexfp);		exit(2);	}	fputs(indexnumber, newindexfp);	fputs(onefileperblock, newindexfp);	if (attr_num != -2) fprintf(newindexfp, "%%%d\n", attr_num);	else fprintf(newindexfp, "%%%d %s\n", attr_num, temp_rdelim);	prev_hash_value = -1;	hash_value = 0;	offset = ftell(newindexfp);	while (fgets(s, MAX_LINE_LEN, indexfp) != NULL) {		linelen = strlen(s);		t = s;		while ((*t != ' ') && (t < s + linelen)) t++;		if (t >= s + linelen) continue;		*t = '\0';		sscanf(s, "%d", &hash_value);		t ++;	/* points to first character of the beginning of s */		fputs(t, newindexfp);		if (hash_value != prev_hash_value) {			for (j=prev_hash_value + 1; j<=hash_value; j++) {				eoffset = encode32b((int)offset);				putc((eoffset & 0xff000000) >> 24, minifp);				putc((eoffset & 0xff0000) >> 16, minifp);				putc((eoffset & 0xff00) >> 8, minifp);				if (putc((eoffset & 0xff), minifp) == EOF) {					fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);					exit(2);				}			}			prev_hash_value = hash_value;		}		offset = ftell(newindexfp);	}	for (hash_value = prev_hash_value + 1; hash_value<MINI_ARRAY_LEN; hash_value++) {		eoffset = encode32b((int)offset);	/* end of index file */		putc((eoffset & 0xff000000) >> 24, minifp);		putc((eoffset & 0xff0000) >> 16, minifp);		putc((eoffset & 0xff00) >> 8, minifp);		if (putc((eoffset & 0xff), minifp) == EOF) {			fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);			exit(2);		}	}	fclose(indexfp);	fflush(newindexfp);	fclose(newindexfp);	fflush(minifp);	fclose(minifp);#if	SFS_COMPAT	unlink(indexfile);#else	sprintf(s, "exec %s '%s'", SYSTEM_RM, escapesinglequote(indexfile, es1));	system(s);#endif#if	SFS_COMPAT	sprintf(s, "%s.tmp", indexfile);	rename(s, indexfile);#else	sprintf(s, "exec %s '%s.tmp' '%s'\n", SYSTEM_MV, escapesinglequote(indexfile, es1), escapesinglequote(indexfile, es2));	system(s);#endif	system(sync_path);	/* sync() has a BUG */}#endif	/* WORD_SORTED *//* Creates data structures that are related to the number of files present in * ".glimpse_filenames". These data structures are: * 1. index sets	-- use my_malloc * 2. index bufs	-- use my_malloc * Once this is done, this function can be called directly from glimpse/get_filenames() * and that can use all sets/bufs data structures directly. * This doesn't care how name_list() is created to be an array of arrays to be able to * add/delete dynamically from it: this uses malloc completely. * But: *	disable_list (which is used only inside glimpse_index) must be malloced separately. *	multi_dest_index_set (which is used only inside glimpse) must be malloced separately. */initialize_data_structures(files)int	files;{	FILEMASK_SIZE = ((files + 1)/(8*sizeof(int)) + 4);	REAL_PARTITION = (FILEMASK_SIZE + 4);	if (REAL_PARTITION < MAX_PARTITION + 2) REAL_PARTITION = MAX_PARTITION + 2;	REAL_INDEX_BUF = ((files + 1)  + 2*MAX_WORD_BUF + 2);	/* index line length with OneFilePerBlock (and/or ByteLevelIndex) */	if (REAL_INDEX_BUF < MAX_SORTLINE_LEN) REAL_INDEX_BUF = MAX_SORTLINE_LEN;	MAX_ALL_INDEX = (REAL_INDEX_BUF / 2);	if (src_index_set == NULL) src_index_set = (unsigned int *)my_malloc(sizeof(int)*REAL_PARTITION);	memset(src_index_set, '\0', sizeof(int) * REAL_PARTITION);	if (dest_index_set == NULL) dest_index_set = (unsigned int *)my_malloc(sizeof(int)*REAL_PARTITION);	memset(dest_index_set, '\0', sizeof(int) * REAL_PARTITION);	if (src_index_buf == NULL) src_index_buf = (unsigned char *)my_malloc(sizeof(char)*REAL_INDEX_BUF);	memset(src_index_buf, '\0', sizeof(char)*REAL_INDEX_BUF);	if (dest_index_buf == NULL) dest_index_buf = (unsigned char *)my_malloc(sizeof(char)*REAL_INDEX_BUF);	memset(dest_index_buf, '\0', sizeof(char)*REAL_INDEX_BUF);	if (merge_index_buf == NULL) merge_index_buf = (unsigned char *)my_malloc(sizeof(char)*REAL_INDEX_BUF);	memset(merge_index_buf, '\0', sizeof(char)*REAL_INDEX_BUF);}destroy_data_structures(){	if (src_index_set != NULL) free(src_index_set);	src_index_set = NULL;	if (dest_index_set != NULL) free(dest_index_set);	dest_index_set = NULL;	if (src_index_buf != NULL) free(src_index_buf);	src_index_buf = NULL;	if (dest_index_buf != NULL) free(dest_index_buf);	dest_index_buf = NULL;	if (merge_index_buf != NULL) free(merge_index_buf);	merge_index_buf = NULL;}/* We MUST be able to parse name as: "goodoldunixfilename firstwordofotherinfo restofotherinfo_whichifNULL_willnotbeprecededbyblanklikeitdoeshere\n" *//* len is strlen(name), being points to                   ^ and end points to ^: the firstwordofotherinfo can be used to create .glimpse_filehash when -U ON *//* Restriction: the 3 strings above cannot contain '\n' or '\0' or ' ' *//* returns 0 if parsing was successful, -1 if error *//* begin/end values are NOT stored for each file (painful!), so this function may be called multiple times for the same name: caller MUST save if reqd. */intspecial_parse_name(name, len, begin, end)	char	*name;	int	len;	int	*begin, *end;{	int	i;	int	index;	*begin = -1;	*end = -1;	if (InfoAfterFilename || ExtractInfo) {	/* Glimpse will ALWAYS terminate filename at first blank (no ' ', '\n', '\0' in filename) */		/* Trying to use FILE_END_MARK instead of blank! --GB 6/7/99 */		for (i=0; i<len; i++) {			if (name[i] == '\n') break;			if (name[i] == FILE_END_MARK)  {				if (*begin == -1) {					*begin = i+1;					if (!InfoAfterFilename) break;	/* don't care about URL since it doesn't exist as far as I know */				}				else {					*end = i;					break;				}			}		}		if (*begin == -1) {			*begin = 0; *end = len;			return 0;		}		else {			if (*end == -1) *end = len;			if (*begin >= *end) {				*end = *begin - 1; *begin = 0;				/* was returning -1 before, but if can't find any "firstwordofinfo", then just use the first word in buffer for indexing... */			}			return 0;		}	}	else {		*begin = 0; *end = len;		return 0;	}}/* Puts the actual name of the file in the file-system into temp (caller must pass buffer that is large enough to hold it...) */intspecial_get_name(name, len, temp)	char	*name;	int	len;	char	*temp;{	int	begin=-1, end=-1;	if (name == NULL) return -1;	if (len < 0) len = strlen(name);	if (len <= 0) {		errno = EINVAL;		return -1;	}	if (special_parse_name(name, len, &begin, &end) == -1) return -1;	if ((begin >= MAX_LINE_LEN) || (len >= MAX_LINE_LEN)) {		errno = ENAMETOOLONG;		return -1;	}	if (begin > 0) {	/* points to first element of the information (like URL) stored after filename */		memcpy(temp, name, begin-1);		temp[begin-1] = '\0';	}	else {	/* no other information stored with filename */		memcpy(temp, name, len);		temp[len] = '\0';	}	return 0;}/* Must NOT write into name or flag since they may be passed as "const" char* on some systems */FILE *my_fopen(name, flag)	char	*name;	char	*flag;{	int	len;	char	temp[MAX_LINE_LEN];	if (name == NULL) return NULL;	len = strlen(name);	if (special_get_name(name, len, temp) == -1) return NULL;	return fopen(temp, flag);}intmy_open(name, flag, mode)	char	*name;	int	flag, mode;{	int	len;	char	temp[MAX_LINE_LEN];	if (name == NULL) return -1;	len = strlen(name);	if (special_get_name(name, len, temp) == -1) return -1;	return open(temp, flag, mode);}intmy_stat(name, buf)	char	*name;	struct stat *buf;{	int	len;	char	temp[MAX_LINE_LEN];	if (name == NULL) return -1;	len = strlen(name);	if (special_get_name(name, len, temp) == -1) return -1;	return stat(temp, buf);}intmy_lstat(name, buf)	char	*name;	struct stat *buf;{	int	len;	char	temp[MAX_LINE_LEN];	if (name == NULL) return -1;	len = strlen(name);	if (special_get_name(name, len, temp) == -1) return -1;	return lstat(temp, buf);}/* Changed hash-routines to look at exactly that portion of the filename that occurs before the first blank character, *//* and use that to compare names: Oct/96 --- But lose efficiency since must parse name everytime: at least 1 string copy *//* Using FILE_END_MARK instead of blank. --GB 6/7/99 */name_hashelement *name_hashtable[MAX_64K_HASH];	/* if (!BigFilenameHashTable) then only the first 4K entries in it are used *//* * Returns the index of the name if the it is found amongst the set * of files in name_array; -1 otherwise. */intget_filename_index(name)	char	*name;{	int	index;	int	len;	int	i, begin=-1, end=-1;	/* int	skips=0; */	name_hashelement	*e;	char	*temp;	int	temp_len;	if (name == NULL) return -1;	len = strlen(name);	if (special_parse_name(name, len, &begin, &end) == -1) return -1;	if ((begin >= MAX_LINE_LEN) || (len >= MAX_LINE_LEN)) {		errno = ENAMETOOLONG;		return -1;	}	temp = name;	if (begin > 0) {	/* points to first element of the information (like URL) stored after filename */		temp_len = begin - 1;	}	else {	/* no other information stored with filename */		temp_len = len;	}	if (FirstWordOfInfoIsKey) index = hashNk(name, end-begin);	else {	/* hash on filename */		if (begin <= 0) index = hashNk(name, len);		else index = hashNk(name, begin-1);	}	e = name_hashtable[index];	while((e != NULL) && (strncmp(temp, e->name, temp_len))) {		/* skips ++; */		e = e->next;	}	/* fprintf(STATFILE, "skips = %d\n", skips); */	if (e == NULL) return -1;	return e->index;}insert_filename(name, name_index)	char	*name;	int	name_index;{	int	len;	int	index;	int	i, begin=-1, end=-1;	name_hashelement **pe;	char	*temp;	int	temp_len;	if (name == NULL) return;	len = strlen(name);	if (special_parse_name(name, len, &begin, &end) == -1) return;	if ((begin >= MAX_LINE_LEN) || (len >= MAX_LINE_LEN)) {		errno = ENAMETOOLONG;		return;	}	temp = name;	if (begin > 0) {	/* points to first element of the information (like URL) stored after filename */		temp_len = begin - 1;	}	else {	/* no other information stored with filename */		temp_len = len;	}	if (FirstWordOfInfoIsKey) index = hashNk(name, end-begin);	else {	/* hash on filename */		if (begin <= 0) index = hashNk(name, len);		else index = hashNk(name, begin-1);	}	pe = &name_hashtable[index];	while((*pe != NULL) && (strncmp((*pe)->name, temp, temp_len))) pe = &(*pe)->next;	if ((*pe) != NULL) return;	if ((*pe = (name_hashelement *)my_malloc(sizeof(name_hashelement))) == NULL) {		fprintf(stderr, "malloc failure in insert_filename %s:%d\n", __FILE__, __LINE__);		exit(2);	}	(*pe)->next = NULL;#if	0	if (((*pe)->name = (char *)my_malloc(len + 2)) == NULL) {		fprintf(stderr, "malloc failure in insert_filename %s:%d\n", __FILE__, __LINE__);		exit(2);	}	strcpy((*pe)->name, name);#else	(*pe)->name = name;#endif	(*pe)->name_len = strlen(name);	(*pe)->index = name_index;}change_filename(name, len, index, newname)	char	*name;	int	len;	int	index;	char	*newname;{	name_hashelement **pe, *t;	char	temp[MAX_LINE_LEN];	int	temp_len;	if (special_get_name(name, len, temp) == -1) return;	temp_len = strlen(temp);	pe = &name_hashtable[index];	while((*pe != NULL) && (strncmp((*pe)->name, temp, temp_len))) pe = &(*pe)->next;	if ((*pe) == NULL) return;#if	0	my_free((*pe)->name);#endif	(*pe)->name = newname;	return;}delete_filename(name, name_index)	char	*name;	int	name_index;{	int	len;	int	index;	int	i, begin=-1, end=-1;	name_hashelement **pe, *t;	char	*temp;	int	temp_len;	if (name == NULL) return;	len = strlen(name);	if (special_parse_name(name, len, &begin, &end) == -1) return;	if ((begin >= MAX_LINE_LEN) || (len >= MAX_LINE_LEN)) {		errno = ENAMETOOLONG;		return;	}	temp = name;	if (begin > 0) {	/* points to first element of the information (like URL) stored after filename */		temp_len = begin - 1;	}	else {	/* no other information stored with filename */		temp_len = len;	}	if (FirstWordOfInfoIsKey) index = hashNk(name, end-begin);	else {	/* hash on filename */		if (begin <= 0) index = hashNk(name, len);		else index = hashNk(name, begin-1);	}	pe = &name_hashtable[index];	while((*pe != NULL) && (strncmp((*pe)->name, temp, temp_len))) pe = &(*pe)->next;	if ((*pe) == NULL) return;	t = *pe;	*pe = (*pe)->next;#if	0	my_free(t->name);#endif	my_free(t, sizeof(name_hashelement));	return;}init_filename_hashtable(){	int	i;	for (i=0; i<MAX_64K_HASH; i++) name_hashtable[i] = NULL;}int	built_filename_hashtable = 0;build_filename_hashtable(names, num)	char	**names[];	int	num;{	int	i;	init_filename_hashtable();	for (i=0; i<num; i++) insert_filename(LIST_GET(names, i), i);	built_filename_hashtable = 1;}destroy_filename_hashtable(){	int	i;	name_hashelement **pe, *t;	for (i=0; i<MAX_64K_HASH; i++) {		pe = &name_hashtable[i];		while(*pe!=NULL) {			t = *pe;			*pe = (*pe)->next;#if	0			my_free(t->name);#endif			my_free(t, sizeof(name_hashelement));		}		*pe = NULL;	}	built_filename_hashtable = 0;}longget_file_time(fp, stbuf, name, i)	FILE	*fp;	struct stat *stbuf;	char	*name;	int	i;{	CHAR	array[sizeof(long)];	int	xx;	long	ret = 0;	struct stat mystbuf;	if (fp != NULL) {		fseek(fp, i*sizeof(long), 0);		fread(array, sizeof(long), 1, fp);		for (xx=0; xx<sizeof(long); xx++) ret |= array[xx] << (8*(sizeof(long) - xx - 1));	}	else if (stbuf != NULL) {		ret = stbuf->st_mtime;	}	else {		if (my_stat(name, &mystbuf) == -1) ret = 0;		else ret = mystbuf.st_mtime;	}	return ret;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -