⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 makedb.c

📁 一个与传统电子字典不同的字典
💻 C
📖 第 1 页 / 共 2 页
字号:
/*  makedb.c - Build/dump WordNet database*/#include <stdio.h>#include <string.h>#include "wngrind.h"static char *Id = "$Id: makedb.c,v 1.42 2005/01/31 20:03:36 wn Rel $";#define PADLEN	3static int GetSize(Synonym, Pointer, Framelist, char *);void FindOffsets(){    register G_Synset ss, fan;    int count[NUMPARTS + 1];	/* no part 0 */    int i;        fprintf(logfile, "Figuring out byte offsets...\n");    for (i = 1; i <= NUMPARTS; i++)	count[i] = DBLICENSE_SIZE - 1;    for (ss = headss; ss; ss = ss->ssnext) {	ss->filepos = count[ss->part];	count[ss->part] += GetSize(ss->syns, ss->ptrs, ss->frames, ss->defn);	/* Find size of cluster fans */	for (fan = ss->fans; fan; fan = fan->ssnext) {	    fan->filepos = count[ADJ];	    count[ADJ] += GetSize(fan->syns, fan->ptrs, NULL, fan->defn);	}    }}#define	SSMINSIZE	23#define SSFIXSIZE	3#define PTRSIZE		17#define DEFNFIXSIZE	2#define FRAMESIZE	8#define FIXFRAMESIZE	4static int GetSize(Synonym s, Pointer p, Framelist f, char *defn){    register int total = SSMINSIZE;    int flag = 0;    /* Find size of synonyms */        for (; s; s = s->synnext) {	total += (SSFIXSIZE + strlen(s->word->label));	if (s->adjclass)	    total += strlen(adjclass[s->adjclass]);    }        /* Add size of pointers */        for (; p; p = p->pnext) {	switch (p->status) {	case RESOLVED:	    total += (PTRSIZE + strlen(ptrsymbols[p->ptype]));	    break;	case UNRESOLVED:	    fprintf(logfile, "Unresolved pointers exist. Aborting.\n");	    exit(-1);	case DUPLICATE:	    break;	}    }        /* If there's a definition, add its length */        if (defn && *defn)	total += (DEFNFIXSIZE + strlen(defn));        /* For each framelist, add in its size */        for (; f; f = f->fnext) {	int i;		if (f->frames) {	    for (i = 0; i < NUMFRAMES; i++)		if (1 << (i % 32) & f->frames[i / 32]) {		    total += FRAMESIZE;		    flag = 1;		}	}    }    if (flag)	total += FIXFRAMESIZE;    else 	total++;		/* kludge - off by one if no frames */        return(total);}static FILE *ofp;static FILE *keyfp;static void DumpSynset(G_Synset);static void DumpSynonyms(Synonym);static void DumpPointers(Pointer);static void DumpFrames(Framelist);static void DumpDefn(char *);static void DumpKey(unsigned int, long, char);static void DumpWords(FILE *, int);static void FixLastRecord(int);static void DumpSenses(FILE *);void DumpData(){    register G_Synset ss;    FILE *ofile[NUMPARTS + 1];	/* no part 0 */    char tempfn[100];    int i;        fprintf(logfile, "Dumping data files...\n");    for (i = 1; i <= NUMPARTS; i++)	ofile[i] = NULL;        for (i = 1; i <= NUMPARTS; i++) {	if (partseen[i]) {	    sprintf(tempfn,"data.%s",partnames[i]);	    if ((ofile[i] = fopen(tempfn,"w")) == NULL) {		fprintf(logfile,			"%s: Cannot open output file %s\n", Argv[0], tempfn);		goto abortdump;	    }	    fprintf(ofile[i], "%s", dblicense);	}    }    /* Generate "index.key" file */    if (synsetkeys) {	if ((keyfp = popen("env LC_ALL=C sort +0 -1 > index.key", "w")) == NULL) {	    fprintf(logfile,		    "%s: Cannot open pipe to output keyfile index.key\n",		    Argv[0]);	    exit(-1);	}    }    for (ss = headss; ss; ss = ss->ssnext) {	if (ftell(ofile[ss->part]) != ss->filepos) {	    fprintf(logfile,		    "%s: sanity error - actual pos %d != assigned pos %d!\n",		    Argv[0], ftell(ofile[ss->part]), ss->filepos);	    goto abortdump;	}	ofp = ofile[ss->part];	DumpSynset(ss);    }abortdump:    for (i = 1; i <= NUMPARTS; i++)	if (partseen[i] && ofile[i])	    fclose(ofile[i]);    if (synsetkeys) 	pclose(keyfp);    fprintf(logfile, "Done dumping data files...\n");}static void DumpSynset(G_Synset ss){    register G_Synset fan;    /* BYTE_OFFSET FILENUMBER [n | v | a] */    fprintf(ofp,"%8.8d %2.2d %c ",	    ss->filepos, ss->filenum, partchars[ss->part]);    DumpSynonyms(ss->syns);    DumpPointers(ss->ptrs);    DumpFrames(ss->frames);    DumpDefn(ss->defn);    DumpKey(ss->key, ss->filepos, partchars[ss->part]);    fprintf(ofp,"  \n");    for (fan = ss->fans; fan; fan = fan->ssnext) {	/* pos is 's' for satellite */	fprintf(ofp,"%8.8d %2.2d s ",		fan->filepos, fan->fans->filenum);	DumpSynonyms(fan->syns);	DumpPointers(fan->ptrs);	DumpDefn(fan->defn);	DumpKey(fan->key, fan->filepos, partchars[fan->part]);	fprintf(ofp,"  \n");    }}static void DumpSynonyms(Synonym syns){    register Synonym s;    register int count;    /* NUMBER_OF_SYNONYMS */    count = 0;    for (s = syns; s; s = s->synnext) count++;    fprintf(ofp, "%2.2x ", count);    /* list of synonyms */    for (s = syns; s; s = s->synnext) {	if (s->adjclass)	    fprintf(ofp, "%s%s ",		    s->label ? s->label : s->word->label,		    adjclass[s->adjclass]);	else	    fprintf(ofp, "%s ", (s->label ? s->label : s->word->label));	fprintf(ofp, "%1.1x ", s->sensenum % 16);    }}static void DumpPointers(Pointer ptrs){    register Pointer p;    register int count;    /* NUMBER_OF_POINTERS */    count = 0;    for (p = ptrs; p; p = p->pnext)	if (p->status == RESOLVED) { count++; }    fprintf(ofp, "%3.3d ", count);        /* list of pointers */    for (p = ptrs; p; p = p->pnext)	if (p->status == RESOLVED) {	    fprintf(ofp,"%s %8.8d %c %2.2x%2.2x ",		    ptrsymbols[p->ptype],		    p->psynset->filepos,		    partchars[p->psynset->part],		    (p->fromwdnum < 0 ? -p->fromwdnum : p->fromwdnum),		    (p->towdnum < 0 ? -p->towdnum : p->towdnum));	}}static void DumpFrames(Framelist frames){    register Framelist f;    register int count;    register unsigned int i;    /* NUMBER_OF_VERBFRAMES list of verb frames*/    count = 0;    for(f = frames; f; f = f->fnext) {	if (f->frames) {	    for (i=0;i<NUMFRAMES; i++)		if ((1<<(i%32)) & f->frames[i/32]) count++;	}    }    if (count) {	fprintf(ofp, "%2.2d ", count);	for(f = frames; f; f = f->fnext) {	    if (f->frames) {		for (i=0;i<NUMFRAMES; i++)		    if ((1<<(i%32)) & f->frames[i/32])			fprintf(ofp,"+ %2.2d %2.2x ", i + 1, f->frwdnum);	    }	}    }}static void DumpDefn(char *defn){    /* | DEFINITION */    if (defn)	fprintf(ofp, "| %s", defn);}static void DumpKey(unsigned int key, long offset, char pos){    /* unique synset key gets printed to file along with offset and pos */    if (synsetkeys && key != 0) {	fprintf(keyfp, "%6.6d %c:%8.8d\n", key, pos, offset);    }}void DumpIndex(){    int i;    char cmd[100], fn[16];    FILE *ofs, *fp;    fprintf(logfile, "Dumping index files...\n");    for (i = 1; i <= NUMPARTS; i++) {	if (partseen[i]) {	    sprintf(fn, "index.%s", partnames[i]);	    if ((fp = fopen(fn, "w")) == NULL) {		fprintf(logfile, "Cannot open file: %s\n", fn);		exit(-1);	    }	    	    fprintf(fp, "%s", dblicense);	    fflush(fp);	    sprintf(cmd, "sort +0 -1 >> %s", fn);	    if ((ofs = popen(cmd, "w")) == NULL) {		fprintf(logfile, "Cannot open pipe: %s\n", cmd);		exit(-1);	    }	    DumpWords(ofs, i);	    pclose(ofs);	    FixLastRecord(i);	/* make 2nd to last record longer than last */	}    }    fprintf(logfile, "Done dumping index files...\n");}static void DumpWords(FILE *fs, int part){    register int i, j;    register Symbol sym;    register SynList sl;    Pointer p;    G_Synset ss;    int printflag, ptr_cnt, synset_cnt, hasframes, attest_cnt;    char ptrused[LASTTYPE + 1];    int part2, idx, done;    SynList sl_list[MAXSENSE], tmp; /* array of pointers to senses */        part2 = (part == ADJ ? SATELLITE : part);    for (i = 0; i < HASHSIZE; i++) {	for (sym = hashtab[i]; sym; sym = sym->symnext) {	    printflag = synset_cnt = ptr_cnt = hasframes = idx = 0;	    for (j = 1; j <= LASTTYPE; j++)		ptrused[j] = 0;	    for (sl = sym->syns; sl; sl = sl->snext) { 		/* First, get the synset that this word belongs to */		ss = sl->psyn->ss;		if (ss->part != part && ss->part != part2)		    continue;	/* wrong part of speech */		synset_cnt++;	/* count this synset */		/* Find all the pointers that come from this		   word.  If this is the first pointer of this type		   found, increment the type counter and set a flag		   for this pointer type.  Later the pointer count		   and list of pointer characters is output. */		for (p = ss->ptrs; p; p = p->pnext) {		    if (p->status == RESOLVED &&			(p->fromwdnum == ALLWORDS ||			 p->fromwdnum == sl->psyn->sswdnum)) {			if ( (p->ptype >= CLASSIF_START) &&			    (p->ptype <= CLASSIF_END) &&			    !ptrused[CLASSIFICATION] ) {			    ptrused[CLASSIFICATION] = 1;			    ptr_cnt++;			} else if ( (p->ptype >= CLASS_START) &&				   (p->ptype <= CLASS_END) &&				   !ptrused[CLASS] ) {			    ptrused[CLASS] = 1;			    ptr_cnt++;			} else if ( p->ptype == INSTANCE &&				    !ptrused[HYPERPTR] ) {			    ptrused[HYPERPTR] = 1;			    ptr_cnt++;			} else if ( p->ptype == INSTANCES &&				    !ptrused[HYPOPTR] ) {			    ptrused[HYPOPTR] = 1;			    ptr_cnt++;			} else if ( p->ptype <= LASTTYPE  &&				   !ptrused[p->ptype]) {			    ptrused[p->ptype] = 1;			    ptr_cnt++;			}		    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -