📄 makedb.c
字号:
/* makedb.c - Build/dump WordNet database*/#include <stdio.h>#include <string.h>#include "wngrind.h"static char *Id = "$Id: makedb.c,v 1.42 2005/01/31 20:03:36 wn Rel $";#define PADLEN 3static int GetSize(Synonym, Pointer, Framelist, char *);void FindOffsets(){ register G_Synset ss, fan; int count[NUMPARTS + 1]; /* no part 0 */ int i; fprintf(logfile, "Figuring out byte offsets...\n"); for (i = 1; i <= NUMPARTS; i++) count[i] = DBLICENSE_SIZE - 1; for (ss = headss; ss; ss = ss->ssnext) { ss->filepos = count[ss->part]; count[ss->part] += GetSize(ss->syns, ss->ptrs, ss->frames, ss->defn); /* Find size of cluster fans */ for (fan = ss->fans; fan; fan = fan->ssnext) { fan->filepos = count[ADJ]; count[ADJ] += GetSize(fan->syns, fan->ptrs, NULL, fan->defn); } }}#define SSMINSIZE 23#define SSFIXSIZE 3#define PTRSIZE 17#define DEFNFIXSIZE 2#define FRAMESIZE 8#define FIXFRAMESIZE 4static int GetSize(Synonym s, Pointer p, Framelist f, char *defn){ register int total = SSMINSIZE; int flag = 0; /* Find size of synonyms */ for (; s; s = s->synnext) { total += (SSFIXSIZE + strlen(s->word->label)); if (s->adjclass) total += strlen(adjclass[s->adjclass]); } /* Add size of pointers */ for (; p; p = p->pnext) { switch (p->status) { case RESOLVED: total += (PTRSIZE + strlen(ptrsymbols[p->ptype])); break; case UNRESOLVED: fprintf(logfile, "Unresolved pointers exist. Aborting.\n"); exit(-1); case DUPLICATE: break; } } /* If there's a definition, add its length */ if (defn && *defn) total += (DEFNFIXSIZE + strlen(defn)); /* For each framelist, add in its size */ for (; f; f = f->fnext) { int i; if (f->frames) { for (i = 0; i < NUMFRAMES; i++) if (1 << (i % 32) & f->frames[i / 32]) { total += FRAMESIZE; flag = 1; } } } if (flag) total += FIXFRAMESIZE; else total++; /* kludge - off by one if no frames */ return(total);}static FILE *ofp;static FILE *keyfp;static void DumpSynset(G_Synset);static void DumpSynonyms(Synonym);static void DumpPointers(Pointer);static void DumpFrames(Framelist);static void DumpDefn(char *);static void DumpKey(unsigned int, long, char);static void DumpWords(FILE *, int);static void FixLastRecord(int);static void DumpSenses(FILE *);void DumpData(){ register G_Synset ss; FILE *ofile[NUMPARTS + 1]; /* no part 0 */ char tempfn[100]; int i; fprintf(logfile, "Dumping data files...\n"); for (i = 1; i <= NUMPARTS; i++) ofile[i] = NULL; for (i = 1; i <= NUMPARTS; i++) { if (partseen[i]) { sprintf(tempfn,"data.%s",partnames[i]); if ((ofile[i] = fopen(tempfn,"w")) == NULL) { fprintf(logfile, "%s: Cannot open output file %s\n", Argv[0], tempfn); goto abortdump; } fprintf(ofile[i], "%s", dblicense); } } /* Generate "index.key" file */ if (synsetkeys) { if ((keyfp = popen("env LC_ALL=C sort +0 -1 > index.key", "w")) == NULL) { fprintf(logfile, "%s: Cannot open pipe to output keyfile index.key\n", Argv[0]); exit(-1); } } for (ss = headss; ss; ss = ss->ssnext) { if (ftell(ofile[ss->part]) != ss->filepos) { fprintf(logfile, "%s: sanity error - actual pos %d != assigned pos %d!\n", Argv[0], ftell(ofile[ss->part]), ss->filepos); goto abortdump; } ofp = ofile[ss->part]; DumpSynset(ss); }abortdump: for (i = 1; i <= NUMPARTS; i++) if (partseen[i] && ofile[i]) fclose(ofile[i]); if (synsetkeys) pclose(keyfp); fprintf(logfile, "Done dumping data files...\n");}static void DumpSynset(G_Synset ss){ register G_Synset fan; /* BYTE_OFFSET FILENUMBER [n | v | a] */ fprintf(ofp,"%8.8d %2.2d %c ", ss->filepos, ss->filenum, partchars[ss->part]); DumpSynonyms(ss->syns); DumpPointers(ss->ptrs); DumpFrames(ss->frames); DumpDefn(ss->defn); DumpKey(ss->key, ss->filepos, partchars[ss->part]); fprintf(ofp," \n"); for (fan = ss->fans; fan; fan = fan->ssnext) { /* pos is 's' for satellite */ fprintf(ofp,"%8.8d %2.2d s ", fan->filepos, fan->fans->filenum); DumpSynonyms(fan->syns); DumpPointers(fan->ptrs); DumpDefn(fan->defn); DumpKey(fan->key, fan->filepos, partchars[fan->part]); fprintf(ofp," \n"); }}static void DumpSynonyms(Synonym syns){ register Synonym s; register int count; /* NUMBER_OF_SYNONYMS */ count = 0; for (s = syns; s; s = s->synnext) count++; fprintf(ofp, "%2.2x ", count); /* list of synonyms */ for (s = syns; s; s = s->synnext) { if (s->adjclass) fprintf(ofp, "%s%s ", s->label ? s->label : s->word->label, adjclass[s->adjclass]); else fprintf(ofp, "%s ", (s->label ? s->label : s->word->label)); fprintf(ofp, "%1.1x ", s->sensenum % 16); }}static void DumpPointers(Pointer ptrs){ register Pointer p; register int count; /* NUMBER_OF_POINTERS */ count = 0; for (p = ptrs; p; p = p->pnext) if (p->status == RESOLVED) { count++; } fprintf(ofp, "%3.3d ", count); /* list of pointers */ for (p = ptrs; p; p = p->pnext) if (p->status == RESOLVED) { fprintf(ofp,"%s %8.8d %c %2.2x%2.2x ", ptrsymbols[p->ptype], p->psynset->filepos, partchars[p->psynset->part], (p->fromwdnum < 0 ? -p->fromwdnum : p->fromwdnum), (p->towdnum < 0 ? -p->towdnum : p->towdnum)); }}static void DumpFrames(Framelist frames){ register Framelist f; register int count; register unsigned int i; /* NUMBER_OF_VERBFRAMES list of verb frames*/ count = 0; for(f = frames; f; f = f->fnext) { if (f->frames) { for (i=0;i<NUMFRAMES; i++) if ((1<<(i%32)) & f->frames[i/32]) count++; } } if (count) { fprintf(ofp, "%2.2d ", count); for(f = frames; f; f = f->fnext) { if (f->frames) { for (i=0;i<NUMFRAMES; i++) if ((1<<(i%32)) & f->frames[i/32]) fprintf(ofp,"+ %2.2d %2.2x ", i + 1, f->frwdnum); } } }}static void DumpDefn(char *defn){ /* | DEFINITION */ if (defn) fprintf(ofp, "| %s", defn);}static void DumpKey(unsigned int key, long offset, char pos){ /* unique synset key gets printed to file along with offset and pos */ if (synsetkeys && key != 0) { fprintf(keyfp, "%6.6d %c:%8.8d\n", key, pos, offset); }}void DumpIndex(){ int i; char cmd[100], fn[16]; FILE *ofs, *fp; fprintf(logfile, "Dumping index files...\n"); for (i = 1; i <= NUMPARTS; i++) { if (partseen[i]) { sprintf(fn, "index.%s", partnames[i]); if ((fp = fopen(fn, "w")) == NULL) { fprintf(logfile, "Cannot open file: %s\n", fn); exit(-1); } fprintf(fp, "%s", dblicense); fflush(fp); sprintf(cmd, "sort +0 -1 >> %s", fn); if ((ofs = popen(cmd, "w")) == NULL) { fprintf(logfile, "Cannot open pipe: %s\n", cmd); exit(-1); } DumpWords(ofs, i); pclose(ofs); FixLastRecord(i); /* make 2nd to last record longer than last */ } } fprintf(logfile, "Done dumping index files...\n");}static void DumpWords(FILE *fs, int part){ register int i, j; register Symbol sym; register SynList sl; Pointer p; G_Synset ss; int printflag, ptr_cnt, synset_cnt, hasframes, attest_cnt; char ptrused[LASTTYPE + 1]; int part2, idx, done; SynList sl_list[MAXSENSE], tmp; /* array of pointers to senses */ part2 = (part == ADJ ? SATELLITE : part); for (i = 0; i < HASHSIZE; i++) { for (sym = hashtab[i]; sym; sym = sym->symnext) { printflag = synset_cnt = ptr_cnt = hasframes = idx = 0; for (j = 1; j <= LASTTYPE; j++) ptrused[j] = 0; for (sl = sym->syns; sl; sl = sl->snext) { /* First, get the synset that this word belongs to */ ss = sl->psyn->ss; if (ss->part != part && ss->part != part2) continue; /* wrong part of speech */ synset_cnt++; /* count this synset */ /* Find all the pointers that come from this word. If this is the first pointer of this type found, increment the type counter and set a flag for this pointer type. Later the pointer count and list of pointer characters is output. */ for (p = ss->ptrs; p; p = p->pnext) { if (p->status == RESOLVED && (p->fromwdnum == ALLWORDS || p->fromwdnum == sl->psyn->sswdnum)) { if ( (p->ptype >= CLASSIF_START) && (p->ptype <= CLASSIF_END) && !ptrused[CLASSIFICATION] ) { ptrused[CLASSIFICATION] = 1; ptr_cnt++; } else if ( (p->ptype >= CLASS_START) && (p->ptype <= CLASS_END) && !ptrused[CLASS] ) { ptrused[CLASS] = 1; ptr_cnt++; } else if ( p->ptype == INSTANCE && !ptrused[HYPERPTR] ) { ptrused[HYPERPTR] = 1; ptr_cnt++; } else if ( p->ptype == INSTANCES && !ptrused[HYPOPTR] ) { ptrused[HYPOPTR] = 1; ptr_cnt++; } else if ( p->ptype <= LASTTYPE && !ptrused[p->ptype]) { ptrused[p->ptype] = 1; ptr_cnt++; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -