⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 partition.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
    else {	/* Don't care about individual file sizes in statistics since the user can look at it anyway by ls -l! */	sprintf(s, "%s/.glimpse_split.%d", INDEX_DIR, getpid());        if((i_out = fopen(s, "w")) == NULL) {	    fprintf(stderr, "can't open %s for writing\n", s);	    exit(2);        }	sprintf(s, "%s/%s", INDEX_DIR, INDEX_FILE);        if((i_in = fopen(s, "r")) == NULL) {	    fprintf(stderr, "can't open %s for reading\n", s);	    exit(2);        }        /* modified the original in glimpse's main.c */        fgets(indexnumberbuf, 256, i_in);        fputs(indexnumberbuf, i_out);        fscanf(i_in, "%%%d\n", &onefileperblock);        if (ByteLevelIndex) fprintf(i_out, "%%-%d\n", file_num);	/* #of files might have changed due to -f/-a */	else fprintf(i_out, "%%%d\n", file_num);	/* This was the stupidest thing of all! */        if ( !fscanf(i_in, "%%%d\n", &structuredindex) ) /* 10/25/99 as per mhubin --GV */               fscanf(i_in, "%%%d%s\n", &structuredindex, temp_rdelim);	if (structuredindex <= 0) structuredindex = 0;	if (RecordLevelIndex) fprintf(i_out, "%%-2 %s\n", old_rdelim);	/* robint@zedcor.com */        else fprintf(i_out, "%%%d\n", attr_num);	/* attributes might have been added during last merge */	part_size = 0;	/* current offset in the p_table file */	while(fgets(src_index_buf, REAL_INDEX_BUF, i_in)) {	    j = 0;	    while ((j < REAL_INDEX_BUF) && (src_index_buf[j] != WORD_END_MARK) && (src_index_buf[j] != ALL_INDEX_MARK) && (src_index_buf[j] != '\n')) j++;	    if ((j >= REAL_INDEX_BUF) || (src_index_buf[j] == '\n')) continue;	    /* else it is WORD_END_MARK or ALL_INDEX_MARK */	    c = src_index_buf[j+1];	    src_index_buf[j+1] = '\0';	    fputs(src_index_buf, i_out);	    src_index_buf[j+1] = c;	    c = src_index_buf[j];	    if (StructuredIndex) {		index = decode16b((src_index_buf[j+1] << 8) | (src_index_buf[j+2]));		if ((attr_num > 0) && (attr_num < MaxNum8bPartition - 1)) {		    putc(encode8b(index), i_out);		}		else if (attr_num > 0) {		    putc(src_index_buf[j+1], i_out);		    putc(src_index_buf[j+2], i_out);		}		j += 2;	    }	    if (c == ALL_INDEX_MARK) {		putc(DONT_CONFUSE_SORT, i_out);		if (putc('\n', i_out) == EOF) {		    fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);		    exit(2);		}		continue;	    }	    offset = encode32b(part_size);	    putc((offset & 0xff000000) >> 24, i_out);	/* force big-endian */	    putc((offset & 0x00ff0000) >> 16, i_out);	    putc((offset & 0x0000ff00) >> 8, i_out);	    putc((offset & 0x000000ff), i_out);	    if (putc('\n', i_out) == EOF) {		fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);		exit(2);	    }	    j++;	/* @first byte of the block numbers */	    while((src_index_buf[j] != '\n') && (src_index_buf[j] != '\0')) {		putc(src_index_buf[j++], p_out);		part_size ++;	    }	    if (putc('\n', p_out) == EOF) {		fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);		exit(2);	    }	    part_size ++;	}	fclose(i_in);	fflush(i_out);	fclose(i_out);#if	SFS_COMPAT	sprintf(s, "%s/.glimpse_split.%d", INDEX_DIR, getpid());	sprintf(s1, "%s/%s", INDEX_DIR, INDEX_FILE);	rename(s, s1);#else	sprintf(s, "exec %s '%s/.glimpse_split.%d' '%s/%s'", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), getpid(), escapesinglequote(INDEX_DIR, es2), INDEX_FILE);	system(s);#endif	system(sync_path);	/* sync() has a BUG */	sprintf(s, "%s/%s", INDEX_DIR, INDEX_FILE);	if (BuildTurbo) dump_mini(s);    }    fflush(p_out);    fclose(p_out);    /* Dump file names */    if (KeepFilenames) {	sprintf(s, "exec %s '%s/%s' '%s/%s.prev'", SYSTEM_CP, escapesinglequote(INDEX_DIR, es1), NAME_LIST, escapesinglequote(INDEX_DIR, es2), NAME_LIST);	system(s);	sprintf(s, "exec %s '%s/%s' '%s/%s.prev'", SYSTEM_CP, escapesinglequote(INDEX_DIR, es1), NAME_LIST_INDEX, escapesinglequote(INDEX_DIR, es2), NAME_LIST_INDEX);	system(s);    }    sprintf(s, "%s/%s", INDEX_DIR, NAME_LIST);    if((f_out = fopen(s, "w")) == NULL) {        fprintf(stderr, "can't open %s for writing\n", s);        exit(2);    }    sprintf(s, "%s/%s", INDEX_DIR, NAME_LIST_INDEX);    if((i_out = fopen(s, "w")) == NULL) {        fprintf(stderr, "can't open %s for writing\n", s);        exit(2);    }    fprintf(f_out, "%d\n", file_num);    for(i=0,offset=ftell(f_out); i<name_list_size; i++) {	if ((LIST_GET(name_list, i) != NULL) && (name_list[0] != '\0')) {		ATLEASTONEFILE = 1;		putc((offset&0xff000000) >> 24, i_out);		putc((offset&0xff0000) >> 16, i_out);		putc((offset&0xff00) >> 8, i_out);		putc((offset&0xff), i_out);		fputs(LIST_GET(name_list, i), f_out);		putc('\n', f_out);		offset += strlen(LIST_GET(name_list, i)) + 1;	}	else {	/* else empty line to indicate file that was removed = HOLE */		if (name_list_size == file_num) {			putc((offset&0xff000000) >> 24, i_out);			putc((offset&0xff0000) >> 16, i_out);			putc((offset&0xff00) >> 8, i_out);			putc((offset&0xff), i_out);			putc('\n', f_out);			offset += 1;		}	}	/* else there are no holes since index was purged, so don't put anything */    }    if (!ATLEASTONEFILE) {	fprintf(MESSAGEFILE, "Warning: number of files in the index is zero!\n");    }    fflush(f_out);    fclose(f_out);    fflush(i_out);    fclose(i_out);    if (GenerateHash) {    /* Dump file hash: don't want to keep filenames in hash-order like index since adding a file can shift many hash-values and change the whole index! */    if (KeepFilenames) {	sprintf(s, "exec %s '%s/%s' '%s/%s.prev'", SYSTEM_CP, escapesinglequote(INDEX_DIR, es1), NAME_HASH, escapesinglequote(INDEX_DIR, es2), NAME_HASH);	system(s);	sprintf(s, "exec %s '%s/%s' '%s/%s.prev'", SYSTEM_CP, escapesinglequote(INDEX_DIR, es1), NAME_HASH_INDEX, escapesinglequote(INDEX_DIR, es2), NAME_HASH_INDEX);	system(s);    }    sprintf(s, "%s/%s", INDEX_DIR, NAME_HASH);    if((f_out = fopen(s, "w")) == NULL) {        fprintf(stderr, "can't open %s for writing\n", s);        exit(2);    }    sprintf(s, "%s/%s", INDEX_DIR, NAME_HASH_INDEX);    if((i_out = fopen(s, "w")) == NULL) {        fprintf(stderr, "can't open %s for writing\n", s);        exit(2);    }    if (!built_filename_hashtable) build_filename_hashtable(name_list, file_num);    hashtablesize = (BigFilenameHashTable ? MAX_64K_HASH : MAX_4K_HASH);    for (i=0,offset=ftell(f_out); i<hashtablesize; i++) {	putc((offset&0xff000000) >> 24, i_out);	putc((offset&0xff0000) >> 16, i_out);	putc((offset&0xff00) >> 8, i_out);	putc((offset&0xff), i_out);	e = name_hashtable[i];	while(e!=NULL) {		if ((index = get_new_index(deletedlist, e->index)) < 0) {			e = e->next;			continue;		}		putc(((index)&0xff000000)>>24, f_out);		putc(((index)&0xff0000)>>16, f_out);		putc(((index)&0xff00)>>8, f_out);		putc(((index)&0xff), f_out);		offset += 4;		fputs(e->name, f_out);		fputc('\0', f_out);	/* so that I can do direct strcmp */		offset += strlen(e->name) + 1;		e = e->next;	}    }    fflush(f_out);    fclose(f_out);    fflush(i_out);    fclose(i_out);    }#if	0	fflush(stdout);	printf("AFTER SAVE_DATA_STRUCTURES:\n");	sprintf(s, "exec %s -lg .glimpse_*", SYSTEM_LS);	system(s);	sprintf(s, "exec %s .glimpse_index", SYSTEM_WC);	system(s);	getchar();#endif	/*0*/    return 0;}/* Merges the index split by save_data_structures into a single index */merge_splits(){	FILE *i_in;	FILE *p_in;	FILE *i_out;	char s[MAX_LINE_LEN], s1[MAX_LINE_LEN], es1[MAX_LINE_LEN], es2[MAX_LINE_LEN], es3[MAX_LINE_LEN], temp_rdelim[MAX_LINE_LEN];	int j, index;	unsigned char c;	char indexnumberbuf[256];	int onefileperblock, structuredindex, i, recordlevelindex;#if	0	fflush(stdout);	printf("BEFORE MERGE_SPLITS:\n");	sprintf(s, "exec %s -lg .glimpse_*", SYSTEM_LS);	system(s);	sprintf(s, "exec %s .glimpse_index", SYSTEM_HEAD);	system(s);	getchar();#endif	/*0*/	temp_rdelim[0] = '\0';  /* Initialize in case not read. 10/25/99 --GV */	sprintf(s, "%s/%s", INDEX_DIR, P_TABLE);	if ((p_in = fopen(s, "r")) == NULL) {		fprintf(stderr, "cannot open for reading: %s\n", s);		exit(2);	}	sprintf(s, "%s/%s", INDEX_DIR, INDEX_FILE);	if ((i_in = fopen(s, "r")) == NULL) {		fprintf(stderr, "cannot open for reading: %s\n", s);		exit(2);	}	sprintf(s, "%s/.glimpse_merge.%d", INDEX_DIR, getpid());	if ((i_out = fopen(s, "w")) == NULL) {		fprintf(stderr, "cannot open for writing: %s\n", s);		exit(2);	}	/* modified the original in glimpse's main.c */	fgets(indexnumberbuf, 256, i_in);	fputs(indexnumberbuf, i_out);	fscanf(i_in, "%%%d\n", &onefileperblock);	fprintf(i_out, "%%%d\n", onefileperblock);      	if ( !fscanf(i_in, "%%%d\n", &structuredindex) )    /* 10/25/99 as per mhubin --GV */             fscanf(i_in, "%%%d%s\n", &structuredindex, temp_rdelim);	if (structuredindex == -2) recordlevelindex = 1;	if (structuredindex <= 0) structuredindex = 0;	if (recordlevelindex) fprintf(i_out, "%%-2 %s\n", temp_rdelim);	else fprintf(i_out, "%%%d\n", structuredindex);	printf("merge: %s\n", temp_rdelim);#if	!WORD_SORTED	if (!DeleteFromIndex || FastIndex) {	/* a new index is going to be built in this case: must sort by word */		fclose(i_in);		sprintf(s, "%s/%s", INDEX_DIR, MINI_FILE);		if ((i_in = fopen(s, "r")) != NULL) {	/* minifile exists */#if	DONTUSESORT_T_OPTION || SFS_COMPAT			sprintf(s, "exec %s '%s/%s' > '%s/%s.tmp'", SYSTEM_SORT, escapesinglequote(INDEX_DIR, es1), INDEX_FILE, escapesinglequote(INDEX_DIR, es2), INDEX_FILE);#else			sprintf(s, "exec %s -T '%s' '%s/%s' > '%s/%s.tmp'", SYSTEM_SORT, escapesinglequote(INDEX_DIR, es1), escapesinglequote(INDEX_DIR, es2), INDEX_FILE, escapesinglequote(INDEX_DIR, es3), INDEX_FILE);#endif			system(s);#if	SFS_COMPAT			sprintf(s, "%s/%s.tmp", INDEX_DIR, INDEX_FILE);			sprintf(s1, "%s/%s", INDEX_DIR, INDEX_FILE);			rename(s, s1);#else			sprintf(s, "exec %s '%s/%s.tmp' '%s/%s'", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), INDEX_FILE, escapesinglequote(INDEX_DIR, es2), INDEX_FILE);			system(s);#endif			system(sync_path);	/* sync() has a BUG */			fclose(i_in);		}		sprintf(s, "%s/%s", INDEX_DIR, INDEX_FILE);		if ((i_in = fopen(s, "r")) == NULL) {			fprintf(stderr, "cannot open for reading: %s\n", s);			exit(2);		}		/* skip the 1st 3 lines which might get jumbled up */		fgets(s, MAX_LINE_LEN, i_in);		fgets(s, MAX_LINE_LEN, i_in);		fgets(s, MAX_LINE_LEN, i_in);	}#endif	/* !WORD_SORTED */	while (fgets(src_index_buf, REAL_INDEX_BUF, i_in)) {	    j = 0;	    while ((j < REAL_INDEX_BUF) && (src_index_buf[j] != WORD_END_MARK) && (src_index_buf[j] != ALL_INDEX_MARK) && (src_index_buf[j] != '\0') && (src_index_buf[j] != '\n')) j++;	    if ((j >= REAL_INDEX_BUF) || (src_index_buf[j] == '\0') || (src_index_buf[j] == '\n')) continue;	    /* else it is WORD_END_MARK or ALL_INDEX_MARK */	    c = src_index_buf[j+1];	    src_index_buf[j+1] = '\0';	    fputs(src_index_buf, i_out);	    src_index_buf[j+1] = c;	    c = src_index_buf[j];	    if (structuredindex) {	/* convert all attributes to 2B to make merge_in()s easy in build_in.c */		if (structuredindex < MaxNum8bPartition - 1) {		    index = encode16b(decode8b(src_index_buf[j+1]));		    putc((index & 0x0000ff00) >> 8, i_out);		    putc(index & 0x000000ff, i_out);		    j ++;		}		else {		    putc(src_index_buf[j+1], i_out);		    putc(src_index_buf[j+2], i_out);		    j += 2;		}	    }	    if (c == ALL_INDEX_MARK) {		putc(DONT_CONFUSE_SORT, i_out);		putc('\n', i_out);		continue;	    }	    /* src_index_buf[j+1] points to the first byte of the offset */	    get_block_numbers(&src_index_buf[j+1], &dest_index_buf[0], p_in);	    j = 0;	/* first byte of the block numbers */	    while ((dest_index_buf[j] != '\n') && (dest_index_buf[j] != '\0')) {		putc(dest_index_buf[j], i_out);		dest_index_buf[j] = '\0';		j++;	    }	    if (putc('\n', i_out) == EOF) {		fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);		exit(2);	    }	}	fclose(i_in);	fclose(p_in);	fflush(i_out);	fclose(i_out);#if	SFS_COMPAT	sprintf(s, "%s/.glimpse_merge.%d", INDEX_DIR, getpid());	sprintf(s1, "%s/%s", INDEX_DIR, INDEX_FILE);	rename(s, s1);#else	sprintf(s, "exec %s '%s/.glimpse_merge.%d' '%s/%s'", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), getpid(), escapesinglequote(INDEX_DIR, es2), INDEX_FILE);	system(s);#endif#if	0	fflush(stdout);	printf("AFTER MERGE_SPLITS:\n");	sprintf(s, "exec %s -lg .glimpse_*", SYSTEM_LS);	system(s);	sprintf(s, "exec %s .glimpse_index"SYSTEM_HEAD);	system(s);	getchar();#endif	/*0*/}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -