⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 makedb.c

📁 一个与传统电子字典不同的字典
💻 C
📖 第 1 页 / 共 2 页
字号:
		}#ifdef FOOP		if (ss->frames) hasframes = 1; /* add in verb frames */#endif		if (!printflag) {		    fprintf(fs, "%s %c %d ",			    strlower(sym->label),			    partchars[part],			    (sym->sensecnt[part] != NOSENSE			     ? sym->sensecnt[part] : 0));		    printflag = 1;		}	    }	    if (printflag) {		/* output pointer count and types */		fprintf(fs, "%d ", ptr_cnt);#ifdef FOOP		fprintf(fs, "%d ", ptr_cnt + hasframes);		if (hasframes)	/* print verb frame symbol */		    fprintf(fs, "+ ");#endif		if (ptr_cnt)		    for (j = 1; j <= LASTTYPE; j++)			if (ptrused[j])			    fprintf(fs, "%s ", ptrsymbols[j]);		/* output synset count and offsets */		fprintf(fs, "%d ", synset_cnt);		/* if ADJ, output cluseter heads, then fans */		if (part == ADJ) {		    for (sl = sym->syns; sl; sl = sl->snext) {			ss = sl->psyn->ss;			if (ss->part == ADJ)			    if ((idx + 1) >= MAXSENSE) {				fprintf(logfile,					"%s: %s %s has > MAXSENSE senses\n",					Argv[0],					partnames[ss->part], sym->label);			    } else				sl_list[idx++] = sl;		    }		    for (sl = sym->syns; sl; sl = sl->snext) {			ss = sl->psyn->ss;			if (ss->part == SATELLITE)			    if ((idx + 1) >= MAXSENSE) {				fprintf(logfile,					"%s: %s %s has > MAXSENSE senses\n",					Argv[0],					partnames[ss->part], sym->label);			    } else				sl_list[idx++] = sl;		    }		} else {		    for (sl = sym->syns; sl; sl = sl->snext) {			ss = sl->psyn->ss;			if (ss->part == part || ss->part == part2)			    if ((idx + 1) >= MAXSENSE) {				fprintf(logfile,					"%s: %s %s has > MAXSENSE senses\n",					Argv[0],					partnames[ss->part], sym->label);			    } else				sl_list[idx++] = sl;		    }		}		attest_cnt = 0;		/* Sort senses from most to least frequently tagged. */		if (ordersenses) {		    done = 0;		    while (!done) {			done = 1;			for (j = 0; j < idx - 1; j++) {			    if (sl_list[j]->psyn->tagcnt <				sl_list[j + 1]->psyn->tagcnt) {				tmp = sl_list[j];				sl_list[j] = sl_list[j + 1];				sl_list[j + 1] = tmp;				done = 0;			    }			}		    }		    if (sl_list[0]->psyn->tagcnt == 0) {			attest_cnt = 0;		    } else {			for (j = 1; j < idx; j++) {			    if (sl_list[j]->psyn->tagcnt == 0)				break;			}			attest_cnt = j;		    }		}		fprintf(fs, "%d ", attest_cnt);		/* Output offsets and store sense number for sense index */		for (j = 0; j < idx; j++) {		    fprintf(fs, "%08d ", sl_list[j]->psyn->ss->filepos);		    sl_list[j]->psyn->wnsensenum = j + 1;		}		fprintf(fs, " \n");	    }	}    }}void DumpSenseIndex(){    char cmd[100];    FILE *ofs, *fp;        fprintf(logfile, "Dumping sense index...\n");        if ((fp = fopen("index.sense", "w")) == NULL) {	fprintf(logfile, "Cannot open file: index.sense\n");	exit(-1);    }    sprintf(cmd, "sort +0 -1 >> index.sense");    if ((ofs = popen(cmd, "w")) == NULL) {	fprintf(logfile, "Cannot open pipe: %s\n", cmd);	exit(-1);    }    DumpSenses(ofs);    pclose(ofs);    fprintf(logfile, "Done dumping sense index...\n");}static void DumpSenses(FILE *fs){    register int i;    register G_Synset s;    register Symbol sym;    register SynList sl;    for (i = 0; i < HASHSIZE; i++) {	for (sym = hashtab[i]; sym; sym = sym->symnext) {	    for (sl = sym->syns; sl; sl = sl->snext) {		s = sl->psyn->ss;		if (s->part != SATELLITE)		    fprintf(fs, "%s%%%-1.1d:%-2.2d:%-2.2d::",			    strlower(sym->label),			    s->part,			    s->filenum,			    sl->psyn->sensenum);		else#ifdef FOOP		    fprintf(fs, "%s%%%-1.1d:%-2.2d:%-2.2d:%s%s:%-2.2d",#endif			    /* don't print adj class */		    fprintf(fs, "%s%%%-1.1d:%-2.2d:%-2.2d:%s:%-2.2d",			    strlower(sym->label),			    s->part,			    s->filenum,			    sl->psyn->sensenum,			    s->fans->syns->word->label,#ifdef FOOP			    adjclass[s->fans->syns->adjclass],#endif			    s->fans->syns->sensenum);		/* Dump byte offset, WordNet sense number and		   number of tags in semantic concordance. */		fprintf(fs, " %-8.8d %d %d\n",			s->filepos, sl->psyn->wnsensenum, sl->psyn->tagcnt);	    }	}    }}#define BUFSIZE		4196static void FixLastRecord(int part){    char fn[16];    FILE *fp;    char buf[BUFSIZE];    int i = BUFSIZE;    int lastlen, secondtolastlen, pad, svpos;    /* Funky routine to pad the second to the last record of the       index file to be longer than the last record so the binary       search in the search code works properly. */    sprintf(fn, "index.%s", partnames[part]);    if ((fp = fopen(fn, "r+")) == NULL) {	fprintf(logfile, "Cannot open file: %s\n", fn);	exit(-1);    }    /* Move to 2 characters from the end of file (don't read final       newline).  Read the last entry and save it in a buffer.       Record it's starting location in the buffer.  Then read the       previous record.  Calculate the length of both records.  If       the second to last record is shorter than (or equal to) the       last record, pad spaces onto the end of the second to last       record, then ouput the last record which was saved in the buffer. */    buf[--i] = '\0';    fseek(fp, -2L, 2);    while ((buf[--i] = getc(fp)) != '\n')	fseek(fp, -2L, 1);    fseek(fp, -2L, 1);    svpos = i;			/* save location of last record */    while (getc(fp) != '\n') {	--i;	fseek(fp, -2L, 1);    }    secondtolastlen = svpos - i;    lastlen = BUFSIZE - svpos;    pad = lastlen - secondtolastlen;    if (pad > 0) {	fseek(fp, (long)(secondtolastlen - 1), 1);	while(pad--)	    putc(' ', fp);	fprintf(fp, "\n%s\n", buf + svpos + 1);    }    fclose(fp);}void ReadCntlist(){    FILE *fp;    register Symbol sym;    register SynList sl;    register Synonym syn;    int cnt, id, pos, filenum, hid, wnsns;    char *lparen;    char wd[100], buf[100], head[100];    if ((fp = fopen("cntlist", "r")) == NULL) {	fprintf(logfile, "Cannot open file: cntlist\n");	fprintf(logfile, "Cannot order senses\n");	return;    }    fprintf(logfile, "Reading cntlist...\n");    while (fscanf(fp, "%d %[^%]%%%1d:%2d:%2d:%s %d\n",		  &cnt, wd, &pos, &filenum, &id, buf, &wnsns) != EOF) {	if (pos == SATELLITE) {	    sscanf(buf, "%[^:]:%2d", head, &hid);	    if ((lparen = strchr(head, '(')) != NULL) {	        *lparen = '\0';	/* truncate string at '(' if present */	    }	} else	    hid = -1;	sym = FindSymbol(wd);	if (sym != NULL) {	    for (sl = sym->syns; sl; sl = sl->snext) {		syn = sl->psyn;		if (syn->sensenum == id &&		    syn->ss->part == pos &&		    syn->ss->filenum == filenum) {		    if (hid == -1) {			syn->tagcnt = cnt;			break;		    } else {			if (syn->ss->fans->syns->sensenum == hid &&			    !strcmp(syn->ss->fans->syns->word->label, head)) {			    syn->tagcnt = cnt;			    break;			}		    }		}	    }	}    }    fprintf(logfile, "Done reading cntlist\n");}/*  Revision log: (since version 1.5)  $Log: makedb.c,v $  Revision 1.42  2005/01/31 20:03:36  wn  cleaned up and consolidated include files  Revision 1.41  2005/01/27 15:51:34  wn  *** empty log message ***  Revision 1.40  2004/01/16 18:12:30  wn  updated dumpwords to check for new instance/instances pointer  Revision 1.39  2003/06/19 17:57:36  wn  fixed bug in dumpindex  Revision 1.38  2003/04/15 17:07:09  wn  fixed temporary loss of domains  Revision 1.37  2003/04/15 13:52:36  wn  *** empty log message ***  Revision 1.36  2001/09/14 13:40:38  wn  added code to skip generation of DUPLICATE pointers  Revision 1.35  2001/09/06 17:55:38  wn  added code for synset keys  Revision 1.34  2001/05/24 16:25:58  wn  removed adjective marker from satellite in sense key when printing  sense index  Revision 1.33  2000/07/17 19:54:20  wn  in FixLastRecord, changed "a+" to "r+"  Revision 1.32  1999/07/22 16:10:12  wn  fixed bug in ordering senses - wasn't working for SATELLITE with markers  in 'head' field.  Revision 1.31  1997/10/09 18:39:16  wn  fixed bug in calculating number of attested senses  Revision 1.30  1997/09/04 20:15:59  wn  *** empty log message *** * Revision 1.29  1997/08/08  19:15:04  wn * added "attest_cnt" field to index file * * Revision 1.28  1997/08/05  14:26:01  wn * cleanups * * Revision 1.27  1996/05/30  20:56:48  wn * *** empty log message *** * * Revision 1.26  1995/06/23  16:39:23  wn * removed nonlocal stuff * * Revision 1.25  1995/02/07  20:46:50  wn * changed FANSS to SATELLITE * * Revision 1.24  1994/11/04  16:49:01  wn * fixed bug in readcntlist for satellites * * Revision 1.23  1994/09/27  19:52:33  wn * changed to use new filelist structure * * Revision 1.22  1994/08/18  19:05:42  wn * fixed bug in ReadCntlist for satellites (naturally) * * Revision 1.21  1994/07/14  17:54:46  wn * changed sense index code to new fmt * * Revision 1.20  1994/07/14  16:46:51  wn * changed ReadCntlist to read new format * * Revision 1.19  1994/07/01  19:26:52  wn * *** empty log message *** * * Revision 1.18  94/03/24  09:53:31  wn * *** empty log message *** *  * Revision 1.17  94/03/03  11:57:06  wn * added ReadCntlist and code to sort senses by most freq. tagged sense *  * Revision 1.16  94/02/03  10:59:09  wn * changed sort cmmand - took out -z5120 *  * Revision 1.15  93/05/05  14:41:19  wn * changed pos field from 2 to 1 byte in DumpSenses *  * Revision 1.14  93/04/13  13:24:48  wn * added code for index.sense *  * Revision 1.1  91/09/11  14:49:42  wn * Initial revision * */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -