⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 get_filename.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
				if (num_read<st_buf.st_size) {					if ((ret = fread(tempbuf, 1, MAX_LINE_LEN, fp)) <= 0) goto endofinput;					num_read += ret;					maxcount += ret/sizeof(int);					for (i=0; i<ret; i+=sizeof(int), count++) {						readoffset = (tempbuf[i] << 24) | (tempbuf[i+1] << 16) | (tempbuf[i+2] << 8) | tempbuf[i+3];						/* printf("readoffset=%d\n", readoffset); */						if ((offset >= prevreadoffset) && (offset < readoffset)) {							/* printf("count=%d\n", count); */							if (OneFilePerBlock)								multi_dest_index_set[0][block2index(count)] |= mask_int[count % 32];							else {								for (; l<MAX_PARTITION; l++) {									if ((count >= p_table[l]) && (count < p_table[l+1])) {										multi_dest_index_set[0][l] = 1;										break;	/* out of for */									}								}								/* can't come here without break: if it does (serious!) will break out w/o setting anything */							}							prevreadoffset = readoffset;							i += sizeof(int);							count ++;							found = 1;							break;	/* out of for */						}						prevreadoffset = readoffset;					}				}				else if ((offset >= prevreadoffset) && (offset < name_list_size)) {					/* printf("count=%d\n", count); */					if (OneFilePerBlock)						multi_dest_index_set[0][block2index(count)] |= mask_int[count % 32];					else {						for (; l<MAX_PARTITION; l++) {							if ((count >= p_table[l]) && (count < p_table[l+1])) {								multi_dest_index_set[0][l] = 1;								break;	/* out of for */							}						}						/* can't come here without break: if it does (serious!) will break out without setting anything */					}					count ++;					found = 1;				}				else goto endofinput;	/* since this offset >= name_list_size and there's no more input after that */			}			else {				for (; i<ret; i+=sizeof(int), count++) {					readoffset = (tempbuf[i] << 24) | (tempbuf[i+1] << 16) | (tempbuf[i+2] << 8) | tempbuf[i+3];					/* printf("readoffset=%d\n", readoffset); */					if ((offset >= prevreadoffset) && (offset < readoffset)) {						/* printf("count=%d\n", count); */						if (OneFilePerBlock)							multi_dest_index_set[0][block2index(count)] |= mask_int[count % 32];						else {							for (; l<MAX_PARTITION; l++) {								if ((count >= p_table[l]) && (count < p_table[l+1])) {									multi_dest_index_set[0][l] = 1;									break;	/* out of for */								}							}							/* can't come here without break: if it does (serious!) will break out without setting anything */						}						prevreadoffset = readoffset;						i += sizeof(int);						count ++;						found = 1;						break;	/* out of for */					}					prevreadoffset = readoffset;				}			}		}	}endofinput:	/* Now AND the incoming mask with the one constructed above */	if (OneFilePerBlock) {		for (i=0; i<round(file_num, 8*sizeof(int)); i++)			index_vect[i] &= multi_dest_index_set[0][i];	}	else {		for (i=0; i<MAX_PARTITION; i++)			index_vect[i] &= multi_dest_index_set[0][i];	}	fclose(fp);	return 0;}get_filenames(index_vect, argc, argv, dummylen, dummypat, file_num)int  *index_vect;int argc; /* the arguments to agrep for -F */char *argv[];int dummylen;CHAR dummypat[];int file_num;{	int  i=0,j, ret;        int  start, end, k, prevk;	int filesseen;	char *beginptr, *endptr;	char tempbuf[MAX_PAT * 3];#if	BG_DEBUG	fprintf(debug, "get_filenames(): the following partitions are ON\n");	for(i=0; i<((OneFilePerBlock > 0) ? round(file_num, 8*sizeof(int)) : MAX_PARTITION); i++)		if(index_vect[i]) fprintf(debug, "i=%d,%x\n", i, index_vect[i]);#endif	/*BG_DEBUG*/	GNumfiles = 0;	filesseen = 0;	endptr = beginptr = bigbuffer + MAX_PAT;	if(MATCHFILE == OFF) {	/* just copy the filenames */	    if (OneFilePerBlock) {		for (i=0; i<round(file_num, 8*sizeof(int)); i++) {		    if (index_vect[i] == 0) continue;		    for (j=0; j<8*sizeof(int); j++) {			if (!(index_vect[i] & mask_int[j])) continue;			start = i*8*sizeof(int) + j;			end = start + 1;#if	BG_DEBUG			fprintf(debug, "start=%d, end=%d\n", start, end);#endif	/*BG_DEBUG*/			/*			 * skip over so many filenames and get the filenames to copy.			 * NOTE: successive "start"s ALWAYS increase.			 */			while(filesseen < start) {				while(*beginptr != '\n') beginptr ++;				beginptr ++;	/* skip over '\n' */				filesseen ++;			}			endptr = beginptr;			while (filesseen < end) {				while(*endptr != '\n') endptr ++;				if (endptr == beginptr + 1) goto end_of_loop1;	/* null name of non-existent file */				*endptr = '\0';				/* return with all the names you COULD get */				if ((GTextfiles[GNumfiles] = (CHAR *)strdup(beginptr)) == NULL) {					*endptr = '\n';					fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);					return;				}				GFileIndex[GNumfiles] = i*8*sizeof(int) + j;				*endptr = '\n';				if (++GNumfiles >= file_num) goto end_files;			end_of_loop1:				beginptr = endptr = endptr + 1;	/* skip over '\n' */				filesseen ++;			}		    }		}	    } /* one file per block */	    else {		/* Just the outer for-loop and initial begin/end values are different: rest is same */		for (i=0; i<MAX_PARTITION; i++) {		    if(index_vect[i] > 0) {			start = p_table[i];			end = p_table[i+1];			if (start >= end) continue;#if	BG_DEBUG			fprintf(debug, "start=%d, end=%d\n", start, end);#endif	/*BG_DEBUG*/			/*			 * skip over so many filenames and get the filenames to copy.			 * NOTE: successive "start"s ALWAYS increase.			 */			while(filesseen < start) {				while(*beginptr != '\n') beginptr ++;				beginptr ++;	/* skip over '\n' */				filesseen ++;			}			endptr = beginptr;			while (filesseen < end) {				while(*endptr != '\n') endptr ++;				if (endptr == beginptr + 1) goto end_of_loop2;	/* null name of non-existent file */				*endptr = '\0';				/* return with all the names you COULD get */				if ((GTextfiles[GNumfiles] = (CHAR *)strdup(beginptr)) == NULL) {					*endptr = '\n';					fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);					return;				}				GFileIndex[GNumfiles] = filesseen;				*endptr = '\n';				if (++GNumfiles >= file_num) goto end_files;			end_of_loop2:				beginptr = endptr = endptr + 1;	/* skip over '\n' */				filesseen ++;			}		    }		}	    }	}	else {	/* search and copy matched filenames */	    extern int REGEX, FASTREGEX, D, WORDBOUND;	/* agrep global which tells us whether the pattern is a regular expression or not, and if there are errors w/ -w */	    int myREGEX, myFASTREGEX, myD, myWORDBOUND;	    errno = 0;	    if ((dummylen = memagrep_init(argc, argv, MAX_PAT, dummypat)) <= 0) goto end_files;	    memcpy(tempbuf, bigbuffer, bigbuffer_size >= MAX_PAT ? MAX_PAT*3 : MAX_PAT*2 + bigbuffer_size);	    ret = memagrep_search(dummylen, dummypat, dummylen*2, beginptr, outputbuffer_len, outputbuffer);	    memcpy(bigbuffer, tempbuf, bigbuffer_size >= MAX_PAT ? MAX_PAT*3 : MAX_PAT*2 + bigbuffer_size);	    myREGEX = REGEX; myFASTREGEX = FASTREGEX; myD = D; myWORDBOUND = WORDBOUND;	    if (OneFilePerBlock) {		for (i=0; i<round(file_num, 8*sizeof(int)); i++) {		    if (index_vect[i] == 0) continue;		    for (j=0; j<8*sizeof(int); j++) {			if (!(index_vect[i] & mask_int[j])) continue;			start = i*8*sizeof(int) + j;			end = start + 1;#if	BG_DEBUG			fprintf(debug, "start=%d, end=%d\n", start, end);#endif	/*BG_DEBUG*/			/*			 * skip over so many filenames and get the region to search =			 * beginptr to endptr: NOTE: successive "start"s ALWAYS increase.			 */			while(filesseen < start) {				while(*beginptr != '\n') beginptr ++;				beginptr ++;	/* skip over '\n' */				filesseen ++;			}			beginptr --;	/* I need '\n' for memory search */			endptr = beginptr+1;			while (filesseen < end) {				while(*endptr != '\n') endptr ++;				endptr ++;	/* skip over '\n' */				filesseen ++;			}			endptr --;	/* I need '\n' for memory search */			if (endptr == beginptr + 1) goto end_of_loop3;	/* null name of non-existent file */#if	BG_DEBUG			*endptr = '\0';			fprintf(debug, "From %d searching:\n%s\n", filesseen, beginptr+1);			*endptr = '\n';#endif	/*BG_DEBUG*/			/* if file in the partition matches then copy it */#if	EACHOPTION			if (myREGEX || myFASTREGEX || (myD && myWORDBOUND)) ret = memagrep_search(dummylen, dummypat, endptr-beginptr + 1, beginptr, outputbuffer_len, outputbuffer);			else ret = memagrep_search(dummylen, dummypat, endptr-beginptr/* + 1*/, beginptr+1, outputbuffer_len, outputbuffer);#else			ret = memagrep_search(dummylen, dummypat, endptr-beginptr+1, beginptr, outputbuffer_len, outputbuffer);#endif			if (ret > 0) {#if	BG_DEBUG			    {				char c = outputbuffer[agrep_outpointer + 1];				outputbuffer[agrep_outpointer + 1] = '\0';				fprintf(debug, "OUTPUTBUFFER=%s\n", outputbuffer);				outputbuffer[agrep_outpointer + 1] = c;			    }#endif	/*BG_DEBUG*/			    k = prevk = 0;#if	EACHOPTION#else			    while (outputbuffer[k] == '\n') {				k ++; prevk ++;			    }#endif			    while(k+1<agrep_outpointer) {	/* name of a file cannot have '\n' in it */				k++;				if (outputbuffer[k] == '\n') {					outputbuffer[k] = '\0';					/* return with all the names you COULD get */					if ((GTextfiles[GNumfiles] = (CHAR *)strdup(outputbuffer+prevk)) == NULL) {						outputbuffer[k] = '\n';						fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);						return;					}					outputbuffer[k] = '\n';					GFileIndex[GNumfiles] = i*8*sizeof(int)+j;					if (++GNumfiles >= file_num) goto end_files;					k = prevk = k+1;				}			    }			}			else {			    index_vect[i] &= ~mask_int[j];	/* remove it from the list: used if ByteLevelIndex */			}		    end_of_loop3:			beginptr = endptr = endptr + 1;		    }		}	    } /* one file per block */	    else {		/* Just the outer for-loop and initial begin/end values are different: rest is same */		for (i=0; i<MAX_PARTITION; i++) {		    if(index_vect[i] > 0) {			start = p_table[i];			end = p_table[i+1];			if (start >= end) continue;#if	BG_DEBUG			fprintf(debug, "start=%d, end=%d\n", start, end);#endif	/*BG_DEBUG*/			/*			 * skip over so many filenames and get the region to search =			 * beginptr to endptr: NOTE: successive "start"s ALWAYS increase.			 */			while(filesseen < start) {				while(*beginptr != '\n') beginptr ++;				beginptr ++;	/* skip over '\n' */				filesseen ++;			}			beginptr --;	/* I need '\n' for memory search */			endptr = beginptr+1;			while (filesseen < end) {				while(*endptr != '\n') endptr ++;				endptr ++;	/* skip over '\n' */				filesseen ++;			}			endptr --;	/* I need '\n' for memory search */			if (endptr == beginptr + 1) goto end_of_loop4;	/* null name of non-existent file */#if	BG_DEBUG			*endptr = '\0';			fprintf(debug, "From %d searching:\n%s\n", filesseen, beginptr+1);			*endptr = '\n';#endif	/*BG_DEBUG*/			/* if file in the partition matches then copy it */#if	EACHOPTION			if (myREGEX || myFASTREGEX || (myD && myWORDBOUND)) ret = memagrep_search(dummylen, dummypat, endptr-beginptr + 1, beginptr, outputbuffer_len, outputbuffer);			else ret = memagrep_search(dummylen, dummypat, endptr-beginptr/* + 1*/, beginptr+1, outputbuffer_len, outputbuffer);#else			/* beginptr points to '\n', entptr+1 points to '\n' */			ret = memagrep_search(dummylen, dummypat, endptr-beginptr+1, beginptr, outputbuffer_len, outputbuffer);#endif			if (ret > 0) {			    k = prevk = 0;#if	EACHOPTION#else			    while (outputbuffer[k] == '\n') {				k ++; prevk ++;			    }#endif			    while(k+1<agrep_outpointer) {	/* name of a file cannot have '\n' in it */				k++;				if (outputbuffer[k] == '\n') {					outputbuffer[k] = '\0';					/* return with all the names you COULD get */					if ((GTextfiles[GNumfiles] = (CHAR *)strdup(outputbuffer+prevk)) == NULL) {						outputbuffer[k] = '\n';						fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);						return;					}					outputbuffer[k] = '\n';					GFileIndex[GNumfiles] = filesseen - 1;	/* not sure here which one but this is never used so ok to fill junk */					if (++GNumfiles >= file_num) goto end_files;					k = prevk = k+1;				}			    }			}			else {			    index_vect[i] = 0;	/* mask it off */			}		    end_of_loop4:			beginptr = endptr = endptr + 1;		    }		}	    }	}end_files:#if	BG_DEBUG	fprintf(debug, "The following %d filenames are ON\n", GNumfiles);	for (i=0; i<GNumfiles; i++)		fprintf(debug, "\t%s\n", GTextfiles[i]);#endif	/*BG_DEBUG*/	return;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -