⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 partition.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
	p_table[2] = file_num;	part_num = 2;	return 0;    }    MinPartNum = (200 < file_num/2)? 200 : file_num/2;    while(part_num < MinPartNum) {        pdx = 0;	i = 0;        subtotal = 0;        while ((i < file_num) && (pdx < MAX_PARTITION)) {	    if((pdx == 0) || (pdx == '\n')) {		/*		 * So that there cannot be a partition #'\n' and a '\n' can indicate		 * the end of the list of partition#s after the WORD_END_MARK.		 * Also, partition#0 is not accessed so that sort does not		 * ignore the partitions after partition# 0!		 */		p_table[pdx++] = i;		continue;	    }	    p_table[pdx++] = i;            while(subtotal < part_size) {                subtotal += LIST_GET(size_list, i);		i++;            }#ifdef	SW_DEBUG	    printf("pdx=%d part_num=%d i=%d subtotal=%d\n", pdx, part_num, i, subtotal);#endif            subtotal = 0;        }	part_num = pdx;#if	0	printf("part_num = %d part_size = %d\n", part_num, part_size);#endif	part_size = part_size * 0.9;        LIST_ADD(size_list, file_num, part_size, int);    }    p_table[pdx] = file_num;    /* Calculate partition sizes for later output into statistics */    for (i=0; i<= part_num; i++)	for (j = p_table[i]; j<p_table[i+1]; j++)	    p_size_list[i] += LIST_GET(size_list, j);    return 0;}int printed_warning = 0;/* * Difference from above: does not build a new partition table: * adds to the existing one (see glimpse.c, options -a and -f). * -- added on dec 7th '93 */oldpartition(dir_num, dir_name)char **dir_name;int  dir_num;{    int num_pat=0;    int num_inc=0;    int len;    int  pat_len[MAX_EXCLUSIVE];    int  inc_len[MAX_EXCLUSIVE];    CHAR *inc[MAX_INCLUSIVE];	/* store the patterns used to mask in files */    CHAR *pat[MAX_EXCLUSIVE];     /* store the patterns that are used to				     mask out those files that are not to				     be indexed  */    int i=0;    FILE *patfile; 	/* file descriptor for prohibit pattern file */    FILE *incfile;	/* file descriptor for include pattern file */    char *current_dir;	/* must have '\n' before directory name */    char s[MAX_LINE_LEN];    char working_dir[MAX_LINE_LEN];    struct stat sbuf;    current_dir_buf[0] = '\n';    current_dir_buf[1] = '\0';    current_dir = &current_dir_buf[1];    if ((dummyout = fopen("/dev/null", "w")) == NULL) return -1;    exin_argv[0] = "glimpseindex";    exin_argv[1] = "dummypat";    exin_argc = 2;    if ((dummylen = memagrep_init(exin_argc, exin_argv, MAX_PAT, dummypat)) <= 0) return -1;	/* exclude/include pattern search */    sprintf(s, "%s/%s", INDEX_DIR, PROHIBIT_LIST);    patfile = fopen(s, "r");    if(patfile == NULL) {	/* fprintf(stderr, "can't open exclude-pattern file\n"); -- no need! */	num_pat = 0;    }    else {	stat(s, &excstbuf);	while((num_pat < MAX_EXCLUSIVE) && fgets(patbuf, MAX_PAT, patfile)) {		if ((len = strlen(patbuf)) < 1) continue;		patbuf[len-1] = '\0';		if ((pat_len[num_pat] = convert2agrepregexp(patbuf, len-1)) == 0) continue;		pat[num_pat++] = (unsigned char *) strdup(patbuf);	}	fclose(patfile);    }#if	0    printf("num_pat %d\n", num_pat);    for(i=0; i<num_pat; i++) printf("len=%d pat=%s\n", pat_len[i], pat[i]);#endif    sprintf(s, "%s/%s", INDEX_DIR, INCLUDE_LIST);    incfile = fopen(s, "r");    if(incfile == NULL) {	/* fprintf(stderr, "can't open include-pattern file\n"); -- no need! */	num_inc = 0;    }    else {	stat(s, &incstbuf);	while((num_inc < MAX_INCLUSIVE) && fgets(patbuf, MAX_PAT, incfile)) {		if ((len = strlen(patbuf)) < 1) continue;		patbuf[len-1] = '\0';		if ((inc_len[num_inc] = convert2agrepregexp(patbuf, len-1)) == 0) continue;		inc[num_inc++] = (unsigned char *) strdup(patbuf);	}	fclose(incfile);    }#if	0    printf("num_inc %d\n", num_inc);    for(i=0; i<num_inc; i++) printf("len=%d inc=%s\n", inc_len[i], inc[i]);#endif#ifdef	SW_DEBUG    printf("dir_num = %d part_num = %d", dir_num-1, part_num);#endif    if (!OneFilePerBlock) {	/* Worry about partitions */	files_per_partition = ((file_num - 1)/part_num) + 1;	/* approximate only but gives a fair idea... */	files_in_partition = 0;	new_partition = part_num;	/* part_num itself is guaranteed to be <= MaxNumPartition */	if (new_partition + 1 > MaxNumPartition) {	    printed_warning = 1;	    if (AddToIndex) {		fprintf(MESSAGEFILE, "Warning: partition-table overflow! Fresh indexing recommended.\n");	    }	    else {		fprintf(MESSAGEFILE, "Warning: partition-table overflow! Commencing fresh indexing...\n");		return partition(dir_num, dir_name);	    }	}    }    if ((dir_num <= 1) && FilenamesOnStdin) while (fgets(current_dir, MAX_LINE_LEN, stdin) == current_dir) {	current_dir[strlen(current_dir)-1] = '\0';	/* overwrite \n with \0 */	/* Get absolute path name of the directory or file being indexed */	if (-1 == my_stat(current_dir, &sbuf)) {		fprintf(stderr, "permission denied or non-existent: %s\n", current_dir);		continue;	}	if ((S_ISDIR(sbuf.st_mode)) && (current_dir[0] != '/')) {	    getcwd(working_dir, MAX_LINE_LEN - 1);	    if (-1 == chdir(current_dir)) {		fprintf(stderr, "Cannot chdir to %s\n", current_dir);		continue;	    }	    getcwd(current_dir, MAX_LINE_LEN - 1);	    chdir(working_dir);	}	if (!DeleteFromIndex) printf("Indexing \"%s\" ...\n", current_dir);    	fsize(current_dir, pat, pat_len, num_pat, inc, inc_len, num_inc, 0); /* the file names will be in name_list[]: NOT TOP LEVEL!!! Mar/11/96 */    }    else for(i=1; i<dir_num; i++) {    	strcpy(current_dir, dir_name[i]);	/* Get absolute path name of the directory or file being indexed */	if (-1 == my_stat(current_dir, &sbuf)) {		fprintf(stderr, "permission denied or non-existent: %s\n", current_dir);		continue;	}	if ((S_ISDIR(sbuf.st_mode)) && (current_dir[0] != '/')) {	    getcwd(working_dir, MAX_LINE_LEN - 1);	    if (-1 == chdir(current_dir)) {		fprintf(stderr, "Cannot chdir to %s\n", current_dir);		continue;	    }	    getcwd(current_dir, MAX_LINE_LEN - 1);	    chdir(working_dir);	}	if (!DeleteFromIndex) printf("Indexing \"%s\" ...\n", current_dir);    	if (-1 == fsize(current_dir, pat, pat_len, num_pat, inc, inc_len, num_inc, 1)) { /* the file names will be in name_list[] */	    return -1;	}    }    if (!OneFilePerBlock) {	p_table[new_partition] = file_num;	part_num = new_partition;    }    for (i=0; i<num_inc; i++) {#if	BG_DEBUG	memory_usage -= strlen(inc) + 2;#endif	/*BG_DEBUG*/	my_free(inc[i], 0);    }    for (i=0; i<num_pat; i++) {#if	BG_DEBUG	memory_usage -= strlen(pat) + 2;#endif	/*BG_DEBUG*/	my_free(pat[i], 0);    }    for(i=0; i<file_num; i++) total_size += LIST_GET(size_list, i);    for(i=0; i<file_num; i++) if (LIST_GET(name_list, i) == NULL) total_deleted ++;    if (DeleteFromIndex) {	if (total_size <= 0) {	    fprintf(STATFILE, "#of files being deleted = %d, Total #of files = %d\n", total_deleted, file_num - total_deleted);	    printf("\n#of files being deleted = %d, Total #of files = %d\n", total_deleted, file_num - total_deleted);	/* the only output the user sees */	}	else {	    fprintf(STATFILE, "Size of files being indexed = %d B, #of files being deleted = %d, Total #of files = %d\n", total_size, total_deleted, file_num - total_deleted);	    printf("\nSize of files being indexed = %d B, #of files being deleted = %d, Total #of files = %d\n", total_size, total_deleted, file_num - total_deleted);	/* the only output the user sees */	}    }    else {	fprintf(STATFILE, "Size of files being indexed = %d B, Total #of files = %d\n", total_size, file_num);	printf("\nSize of files being indexed = %d B, Total #of files = %d\n", total_size, file_num);	/* the only output the user sees */    }#ifdef	SW_DEBUG    for (i=0; i<file_num; i++)	printf("name_list[%d] = %s, size=%d\n", i, LIST_GET(name_list, i), LIST_GET(size_list, i));#endif	/*SW_DEBUG*/    return 0;}save_data_structures(){    int	i, hashtablesize;    char s[MAX_LINE_LEN], s1[MAX_LINE_LEN], es1[MAX_LINE_LEN], es2[MAX_LINE_LEN], es3[MAX_LINE_LEN], temp_rdelim[MAX_LINE_LEN];    FILE *f_out;    FILE *p_out;    int j;    unsigned char c;    FILE *i_in;    FILE *i_out;    int offset, index;    char indexnumberbuf[256];    int	onefileperblock, structuredindex;    int name_list_size = file_num;    name_hashelement *e;    ATLEASTONEFILE = 0;#if	0	fflush(stdout);	printf("BEFORE SAVE_DATA_STRUCTURES:\n");	sprintf(s, "exec %s -lg .glimpse_*", SYSTEM_LS);	system(s);	sprintf(s, "exec %s .glimpse_index", SYSTEM_HEAD);	system(s);	getchar();#endif	/*0*/    if ((new_file_num >= 0) && (new_file_num <= file_num)) file_num = new_file_num;	/* only if purge_index() was called: -f/-a/-d only */    /* Dump attributes */    if (StructuredIndex && (attr_num > 0)) {	int	ret;	sprintf(s, "%s/%s", INDEX_DIR, ATTRIBUTE_FILE);	if (-1 == (ret = attr_dump_names(s))) {	    fprintf(stderr, "can't open %s for writing\n", s);	    exit(2);	}    }    /* Dump partition table; change index if necessary */    sprintf(s, "%s/%s", INDEX_DIR, P_TABLE);    if((p_out = fopen(s, "w")) == NULL) {	fprintf(stderr, "can't open for writing: %s\n", s);	exit(2);    }    if (!OneFilePerBlock) {#ifdef SW_DEBUG	printf("part_num = %d, part_size = %d\n", part_num, part_size);#endif	for(i=0; i<=part_num; i++) {	    /* Assumes sizeof(int) is 32bits, which is true even for ALPHA */	    putc((p_table[i] & 0xff000000) >> 24, p_out);	    putc((p_table[i] & 0x00ff0000) >> 16, p_out);	    putc((p_table[i] & 0x0000ff00) >> 8, p_out);	    if (putc((p_table[i] & 0x000000ff), p_out) == EOF) {		fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);		exit(2);	    }	    if (i==part_num) break;	    if (p_table[i] == p_table[i+1]) {		fprintf(STATFILE, "part_num = %d, files = none, part_size = 0\n",i);		continue;	    }	    fprintf(STATFILE, "part_num = %d, files = %d .. %d, part_size = %d\n",		i, p_table[i], p_table[i+1] - 1, p_size_list[i]);	}	if (StructuredIndex) {	/* check if we can reduce default 2B attributeids to smaller ones */	    sprintf(s, "%s/.glimpse_split.%d", INDEX_DIR, getpid());	    if((i_out = fopen(s, "w")) == NULL) {		fprintf(stderr, "can't open %s for writing\n", s);		exit(2);	    }	    sprintf(s, "%s/%s", INDEX_DIR, INDEX_FILE);	    if((i_in = fopen(s, "r")) == NULL) {		fprintf(stderr, "can't open %s for reading\n", s);		exit(2);	    }	    /* modified the original in glimpse's main.c */	    fgets(indexnumberbuf, 256, i_in);	    fputs(indexnumberbuf, i_out);	    fscanf(i_in, "%%%d\n", &onefileperblock);	    fprintf(i_out, "%%%d\n", onefileperblock);	/* If #of files change, then they are added to a new partition, which is updated above */	    if ( !fscanf(i_in, "%%%d\n", &structuredindex) ) /* temp_rdelim may not be present in new-style indexes.  Fixed by mhubin 10/25/99 --GV */                 fscanf(i_in, "%%%d%s\n", &structuredindex, temp_rdelim);	    if (structuredindex <= 0) structuredindex = 0;	    if (RecordLevelIndex) fprintf(i_out, "%%-2 %s\n", old_rdelim);	/* robint@zedcor.com (CANNOT HAPPEN SINCE RecordLevel AND Strucured ARE NOT COMPATIBLE!!!) */	    else fprintf(i_out, "%%%d\n", attr_num);	/* attributes might have been added during last merge */	    while(fgets(src_index_buf, REAL_INDEX_BUF, i_in)) {		j = 0;		while ((j < REAL_INDEX_BUF) && (src_index_buf[j] != WORD_END_MARK) && (src_index_buf[j] != ALL_INDEX_MARK) && (src_index_buf[j] != '\0') && (src_index_buf[j] != '\n')) j++;		if ((j >= REAL_INDEX_BUF) || (src_index_buf[j] == '\0') || (src_index_buf[j] == '\n')) continue;		/* else it is WORD_END_MARK or ALL_INDEX_MARK */		c = src_index_buf[j+1];		src_index_buf[j+1] = '\0';		fputs(src_index_buf, i_out);		src_index_buf[j+1] = c;		index=decode16b((src_index_buf[j+1] << 8) | (src_index_buf[j+2]));		if ((attr_num > 0) && (attr_num < MaxNum8bPartition - 1)) {		    putc(encode8b(index), i_out);		}		else if (attr_num > 0) {		    putc(src_index_buf[j+1], i_out);		    putc(src_index_buf[j+2], i_out);		}		j += 3;		if (fputs(src_index_buf+j, i_out) == EOF) {	/* Rest of the partitions information */		    fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);		    exit(2);		}	    }	    fclose(i_in);	    fflush(i_out);	    fclose(i_out);#if	SFS_COMPAT	    sprintf(s, "%s/.glimpse_split.%d", INDEX_DIR, getpid());	    sprintf(s1, "%s/%s", INDEX_DIR, INDEX_FILE);	    rename(s, s1);#else	    sprintf(s, "exec %s '%s/.glimpse_split.%d' '%s/%s'", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), getpid(), escapesinglequote(INDEX_DIR, es2), INDEX_FILE);	    system(s);#endif	}    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -