⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 invlib.c

📁 tvapp用于播放tv程序
💻 C
📖 第 1 页 / 共 3 页
字号:
/*=========================================================================== Copyright (c) 1998-2000, The Santa Cruz Operation  All rights reserved.  Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: *Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. *Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. *Neither name of The Santa Cruz Operation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  =========================================================================*/#include <ctype.h>#include <stdio.h>#include <stdlib.h>#if SHARE#include <sys/types.h>#include <sys/ipc.h>#include <sys/shm.h>#define ERR  -1#endif#include "invlib.h"#include "global.h"#include <assert.h>#define	DEBUG		0	/* debugging code and realloc messages */#define BLOCKSIZE	2 * BUFSIZ	/* logical block size */#define	LINEMAX		1000	/* sorted posting line max size */#define	POSTINC		10000	/* posting buffer size increment */#define SEP		' '	/* sorted posting field separator */#define	SETINC		100	/* posting set size increment */#define	STATS		0	/* print statistics */#define	SUPERINC	10000	/* super index size increment */#define	TERMMAX		512	/* term max size */#define	FMTVERSION	1	/* inverted index format version */#define	ZIPFSIZE	200	/* zipf curve size */static char const rcsid[] = "$Id: invlib.c,v 1.15 2001/08/13 15:31:22 broeker Exp $";#if DEBUG/* FIXME HBB 20010705: nowhere in the source is `invbreak' ever set to * a value other than the (silent) initialization to zero. Pretty * useless, that looks */int	invbreak;#endifstatic	int	boolready(void);static	int	invnewterm(void);static	void	invstep(INVCONTROL *invcntl);static	void	invcannotalloc(unsigned n);static	void	invcannotopen(char *file);static	void	invcannotwrite(char *file);#if STATSint	showzipf;	/* show postings per term distribution */#endifstatic	POSTING	*item, *enditem, *item1 = NULL, *item2 = NULL;static	unsigned setsize1, setsize2;static	long	numitems, totterm, zerolong;static	char	*indexfile, *postingfile;static	FILE	*outfile, *fpost;static	unsigned supersize = SUPERINC, supintsize;static	int	numpost, numlogblk, amtused, nextpost,		lastinblk, numinvitems;static	POSTING	*POST, *postptr;static	unsigned long	*SUPINT, *supint, nextsupfing;static	char	*SUPFING, *supfing;static	char	thisterm[TERMMAX];typedef union logicalblk {	long	invblk[BLOCKSIZE / sizeof(long)];	char	chrblk[BLOCKSIZE];} t_logicalblk;static t_logicalblk logicalblk;#if DEBUG || STATSstatic	long	totpost;#endif#if STATSstatic	int	zipf[ZIPFSIZE + 1];#endiflonginvmake(char *invname, char *invpost, FILE *infile){	unsigned char	*s;	long	num;	int	i;	long	fileindex = 0;	/* initialze, to avoid warning */	unsigned postsize = POSTINC * sizeof(POSTING);	unsigned long	*intptr;	char	line[LINEMAX];	long	tlong;	PARAM	param;	POSTING	posting;	char 	temp[BLOCKSIZE];#if STATS	int	j;	unsigned maxtermlen = 0;#endif	/* output file */	if ((outfile = vpfopen(invname, "w+b")) == NULL) {		invcannotopen(invname);		return(0);	}	indexfile = invname;	(void) fseek(outfile, (long) BUFSIZ, 0);	/* posting file  */	if ((fpost = vpfopen(invpost, "wb")) == NULL) {		invcannotopen(invpost);		return(0);	}	postingfile = invpost;	nextpost = 0;	/* get space for the postings list */	if ((POST = malloc(postsize)) == NULL) {		invcannotalloc(postsize);		return(0);	}	postptr = POST;	/* get space for the superfinger (superindex) */	if ((SUPFING = malloc(supersize)) == NULL) {		invcannotalloc(supersize);		return(0);	}	supfing = SUPFING;	/* FIXME HBB: magic number alert (40) */	supintsize = supersize / 40;	/* also for the superfinger index */	if ((SUPINT = malloc(supintsize * sizeof(long))) == NULL) {		invcannotalloc(supintsize * sizeof(long));		return(0);	}	supint = SUPINT;	supint++; /* leave first term open for a count */	/* initialize using an empty term */	(void) strcpy(thisterm, "");	*supint++ = 0;	*supfing++ = ' ';	*supfing++ = '\0';	nextsupfing = 2;#if DEBUG || STATS	totpost = 0L;#endif	totterm = 0L;	numpost = 1;	/* set up as though a block had come and gone, i.e., set up for new block  */	/* FIXME HBB: magic number alert (16) */	amtused = 16; /* leave no space - init 3 words + one for luck */	numinvitems = 0;	numlogblk = 0;	lastinblk = sizeof(t_logicalblk);	/* now loop as long as more to read (till eof)  */	while (fgets(line, LINEMAX, infile) != NULL) {#if DEBUG || STATS		++totpost;#endif		s = strchr(line, SEP);		if (s != NULL) {			*s = '\0';		}		else {			continue;		}#if STATS		if ((i = strlen(line)) > maxtermlen) {			maxtermlen = i;		}#endif#if DEBUG		(void) printf("%ld: %s ", totpost, line);		(void) fflush(stdout);#endif		if (strcmp(thisterm, line) == 0) {			if (postptr + 10 > POST + postsize / sizeof(POSTING)) {				i = postptr - POST;				postsize += POSTINC * sizeof(POSTING);				if ((POST = realloc(POST, postsize)) == NULL) {					invcannotalloc(postsize);					return(0);				}				postptr = i + POST;#if DEBUG				(void) printf("reallocated post space to %u, totpost=%ld\n",				    postsize, totpost);#endif			}			numpost++;		} else {			/* have a new term */			if (!invnewterm()) {				return(0);			}			(void) strcpy(thisterm, line);			numpost = 1;			postptr = POST;			fileindex = 0;		}		/* get the new posting */		num = *++s - '!';		i = 1;		do {			num = BASE * num + *++s - '!';		} while (++i < PRECISION);		posting.lineoffset = num;		while (++fileindex < nsrcoffset && num > srcoffset[fileindex]) {			;		}		posting.fileindex = --fileindex;		posting.type = *++s;		num = *++s - '!';		if (*s != '\n') {			num = *++s - '!';			while (*++s != '\n') {				num = BASE * num + *s - '!';			}			posting.fcnoffset = num;		}		else {			posting.fcnoffset = 0;		}		*postptr++ = posting;#if DEBUG		(void) printf("%ld %ld %ld %ld\n", posting.fileindex,		    posting.fcnoffset, posting.lineoffset, posting.type);		(void) fflush(stdout);#endif	}	if (!invnewterm()) {		return(0);	}	/* now clean up final block  */	logicalblk.invblk[0] = numinvitems;	/* loops pointer around to start */	logicalblk.invblk[1] = 0;	logicalblk.invblk[2] = numlogblk - 1;	if (fwrite(&logicalblk, sizeof(t_logicalblk), 1, outfile) == 0) {		goto cannotwrite;	}	numlogblk++;	/* write out block to save space. what in it doesn't matter */	if (fwrite(&logicalblk, sizeof(t_logicalblk), 1, outfile) == 0) {		goto cannotwrite;	}	/* finish up the super finger */	*SUPINT = numlogblk;	/* add to the offsets the size of the offset pointers */	intptr = (SUPINT + 1);	i = (char *)supint - (char *)SUPINT;	while (intptr < supint)		*intptr++ += i;	/* write out the offsets (1 for the N at start) and the super finger */	if (fwrite(SUPINT, sizeof(*SUPINT), numlogblk + 1, outfile) == 0 ||	    fwrite(SUPFING, 1, supfing - SUPFING, outfile) == 0) {		goto cannotwrite;	}	/* save the size for reference later */	nextsupfing = sizeof(long) + sizeof(long) * numlogblk + (supfing - SUPFING);	/* make sure the file ends at a logical block boundary.  This is 	necessary for invinsert to correctly create extended blocks 	 */	i = nextsupfing % sizeof(t_logicalblk);	/* write out junk to fill log blk */	if (fwrite(temp, sizeof(t_logicalblk) - i, 1, outfile) == 0 ||	    fflush(outfile) == EOF) {	/* rewind doesn't check for write failure */		goto cannotwrite;	}	/* write the control area */	rewind(outfile);	param.version = FMTVERSION;	param.filestat = 0;	param.sizeblk = sizeof(t_logicalblk);	param.startbyte = (numlogblk + 1) * sizeof(t_logicalblk) + BUFSIZ;;	param.supsize = nextsupfing;	param.cntlsize = BUFSIZ;	param.share = 0;	if (fwrite(&param, sizeof(param), 1, outfile) == 0) {		goto cannotwrite;	}	for (i = 0; i < 10; i++)	/* for future use */		if (fwrite(&zerolong, sizeof(zerolong), 1, outfile) == 0) {			goto cannotwrite;		}	/* make first block loop backwards to last block */	if (fflush(outfile) == EOF) {	/* fseek doesn't check for write failure */		goto cannotwrite;	}	(void) fseek(outfile, (long)BUFSIZ + 8, 0); /* get to second word first block */	tlong = numlogblk - 1;	if (fwrite(&tlong, sizeof(tlong), 1, outfile) == 0 ||	    fclose(outfile) == EOF) {	cannotwrite:		invcannotwrite(invname);		return(0);	}	if (fclose(fpost) == EOF) {		invcannotwrite(postingfile);		return(0);	}	--totterm;	/* don't count null term */#if STATS	(void) printf("logical blocks = %d, postings = %ld, terms = %ld, max term length = %d\n",	    numlogblk, totpost, totterm, maxtermlen);	if (showzipf) {		(void) printf("\n*************   ZIPF curve  ****************\n");		for (j = ZIPFSIZE; j > 1; j--)			if (zipf[j]) 				break;		for (i = 1; i < j; ++i) {			(void) printf("%3d -%6d ", i, zipf[i]);			if (i % 6 == 0) (void) putchar('\n');		}		(void) printf(">%d-%6d\n", ZIPFSIZE, zipf[0]);	}#endif	/* free all malloc'd memory */	free(POST);	free(SUPFING);	free(SUPINT);	return(totterm);}/* add a term to the data base */static intinvnewterm(void){	int	backupflag, i, j, maxback, holditems, gooditems, howfar;	int	len, numwilluse, wdlen;	char	*tptr, *tptr2, *tptr3;	union {		unsigned long	packword[2];		ENTRY		e;	} iteminfo;	gooditems = 0;		/* initialize, to avoid warning */	totterm++;#if STATS	/* keep zipfian info on the distribution */	if (numpost <= ZIPFSIZE)		zipf[numpost]++;	else		zipf[0]++;#endif	len = strlen(thisterm);	wdlen = (len + (sizeof(long) - 1)) / sizeof(long);	numwilluse = (wdlen + 3) * sizeof(long);	/* new block if at least 1 item in block */	if (numinvitems && numwilluse + amtused > sizeof(t_logicalblk)) {		/* set up new block */		if (supfing + 500 > SUPFING + supersize) {			i = supfing - SUPFING;			supersize += 20000;			if ((SUPFING = (char *)realloc(SUPFING, supersize)) == NULL) {				invcannotalloc(supersize);				return(0);			}			supfing = i + SUPFING;#if DEBUG			(void) printf("reallocated superfinger space to %d, totpost=%ld\n", 			    supersize, totpost);#endif		}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -