📄 odidx.c
字号:
if(!odclose(odeum)){ pdperror(name); err = TRUE; } if(err){ printfinfo("%s: registration was over%s", name, fatal ? " with fatal error" : ""); } else { printfinfo("%s: registration completed successfully", name); } return err ? 1 : 0;}/* find and index files in a directory */int indexdir(ODEUM *odeum, VILLA *mtdb, const char *name, const char *dir, int wmax, int ft, const CBLIST *tsuflist, const CBLIST *hsuflist){ CBLIST *files; const char *file; char path[PATHBUFSIZ]; int i, isroot, isdir, err; if(!(files = cbdirlist(dir))){ printferror("%s: directory cannot be opened", dir); return FALSE; } isroot = dir[0] == PATHCHR && dir[1] == '\0'; err = FALSE; for(i = 0; i < cblistnum(files); i++){ if(sigterm){ printferror("aborting due to a termination signal"); cblistclose(files); return FALSE; } file = cblistval(files, i, NULL); if(!strcmp(file, CDIRSTR) || !strcmp(file, PDIRSTR)) continue; if(isroot){ sprintf(path, "%s%s", dir, file); } else { sprintf(path, "%s%c%s", dir, PATHCHR, file); } if(!cbfilestat(path, &isdir, NULL, NULL)){ printferror("%s: file does not exist", file); err = TRUE; continue; } if(isdir){ if(!indexdir(odeum, mtdb, name, path, wmax, ft, tsuflist, hsuflist)) err = TRUE; } else { if(!indexfile(odeum, mtdb, name, path, wmax, ft, tsuflist, hsuflist)) err = TRUE; } } cblistclose(files); return err ? FALSE : TRUE;}/* index a file into the database */int indexfile(ODEUM *odeum, VILLA *mtdb, const char *name, const char *file, int wmax, int ft, const CBLIST *tsuflist, const CBLIST *hsuflist){ static int cnt = 0; char *vbuf, *buf, *uri; const char *title; int size, mtime, hot, vsiz, wnum, bnum; ODDOC *doc; if(!cbfilestat(file, NULL, &size, &mtime)){ printferror("%s: file does not exist", file); return FALSE; } hot = TRUE; if((vbuf = vlget(mtdb, file, -1, &vsiz)) != NULL){ if(vsiz == sizeof(int) && mtime <= *(int *)vbuf) hot = FALSE; free(vbuf); } if(!hot){ printfinfo("%s: passed", file); return TRUE; } doc = NULL; uri = filetouri(file); if(bwimatchlist(file, tsuflist)){ if(!(buf = cbreadfile(file, NULL))){ printferror("%s: file cannot be opened", file); return FALSE; } doc = makedocplain(uri, buf, datestr(mtime)); free(buf); } else if(bwimatchlist(file, hsuflist)){ if(!(buf = cbreadfile(file, NULL))){ printferror("%s: file cannot be opened", file); return FALSE; } doc = makedochtml(uri, buf, datestr(mtime)); free(buf); } free(uri); if(doc){ if(ft && (!(title = oddocgetattr(doc, "title")) || strlen(title) < 1)){ if((title = strrchr(file, PATHCHR)) != NULL){ title++; } else { title = file; } oddocaddattr(doc, "title", title); } if(odput(odeum, doc, wmax, TRUE) && vlput(mtdb, file, -1, (char *)&mtime, sizeof(int), VL_DOVER)){ printfinfo("%s: registered: id=%d wnum=%d", file, oddocid(doc), cblistnum(oddocnwords(doc))); cnt++; } else { pdperror(file); } oddocclose(doc); } wnum = odwnum(odeum); bnum = odbnum(odeum); if(wnum != -1 && bnum != -1 && (double)wnum / (double)bnum > MAXLOAD){ printfinfo("%s: optimizing started: fsiz=%d dnum=%d wnum=%d bnum=%d", name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum)); if(!odoptimize(odeum)){ pdperror(file); return FALSE; } printfinfo("%s: optimizing completed: fsiz=%d dnum=%d wnum=%d bnum=%d", name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum)); } if(cnt >= 256){ printfinfo("%s: database status: fsiz=%d dnum=%d wnum=%d bnum=%d", name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum)); cnt = 0; } return TRUE;}/* make the url from file path */char *filetouri(const char *file){ CBLIST *list; char str[PATHBUFSIZ], *wp, *enc; const char *name; int i, nsiz; sprintf(str, "%c", PATHCHR); list = cbsplit(file, -1, str); wp = str; for(i = 0; i < cblistnum(list); i++){ if(i > 0) *(wp++) = '/'; name = cblistval(list, i, &nsiz); enc = cburlencode(name, nsiz); wp += sprintf(wp, "%s", enc); free(enc); } cblistclose(list); *wp = '\0'; return cbmemdup(str, -1);}/* make a document of plain text */ODDOC *makedocplain(const char *uri, const char *text, const char *date){ ODDOC *doc; CBLIST *awords; const char *asis; char *normal; int i; doc = oddocopen(uri); if(date) oddocaddattr(doc, "date", date); awords = odbreaktext(text); for(i = 0; i < cblistnum(awords); i++){ asis = cblistval(awords, i, NULL); normal = odnormalizeword(asis); oddocaddword(doc, normal, asis); free(normal); } cblistclose(awords); return doc;}/* make a document of HTML */ODDOC *makedochtml(const char *uri, const char *html, const char *date){ static CBMAP *pairs = NULL; ODDOC *doc; CBLIST *elems, *awords; const char *text, *asis; char kbuf[8], vbuf[8], *rtext, *normal; int i, j, body; if(!pairs){ pairs = cbmapopen(); cbglobalgc(pairs, (void (*)(void *))cbmapclose); cbmapput(pairs, "&", -1, "&", 1, TRUE); cbmapput(pairs, "<", -1, "<", 1, TRUE); cbmapput(pairs, ">", -1, ">", 1, TRUE); cbmapput(pairs, """, -1, "\"", 1, TRUE); cbmapput(pairs, "'", -1, "'", 1, TRUE); cbmapput(pairs, " ", -1, " ", 1, TRUE); cbmapput(pairs, "©", -1, "(C)", -1, TRUE); cbmapput(pairs, "®", -1, "(R)", -1, TRUE); cbmapput(pairs, "™", -1, "(TM)", -1, TRUE); for(i = 1; i <= 127; i++){ sprintf(vbuf, "%c", i); sprintf(kbuf, "&#%d;", i); cbmapput(pairs, kbuf, -1, vbuf, 1, TRUE); } } doc = oddocopen(uri); if(date) oddocaddattr(doc, "date", date); elems = htmllist(html); body = FALSE; for(i = 0; i < cblistnum(elems); i++){ text = cblistval(elems, i, NULL); if(fwimatch(text, "<title")){ i++; if(i < cblistnum(elems)){ text = cblistval(elems, i, NULL); if(text[0] == '<') text = ""; rtext = cbreplace(text, pairs); for(j = 0; rtext[j] != '\0'; j++){ if(strchr("\t\n\v\f\r", rtext[j])) rtext[j] = ' '; } while(--j >= 0){ if(rtext[j] != ' ') break; rtext[j] = '\0'; } for(j = 0; rtext[j] != '\0'; j++){ if(rtext[j] != ' ') break; } oddocaddattr(doc, "title", rtext + j); awords = odbreaktext(rtext); for(j = 0; j < cblistnum(awords); j++){ asis = cblistval(awords, j, NULL); normal = odnormalizeword(asis); oddocaddword(doc, normal, ""); free(normal); } cblistclose(awords); free(rtext); } } else if(fwimatch(text, "<body")){ body = TRUE; } else if(body && text[0] != '<'){ rtext = cbreplace(text, pairs); awords = odbreaktext(rtext); for(j = 0; j < cblistnum(awords); j++){ asis = cblistval(awords, j, NULL); normal = odnormalizeword(asis); oddocaddword(doc, normal, asis); free(normal); } cblistclose(awords); free(rtext); } } if(!body){ for(i = 0; i < cblistnum(elems); i++){ text = cblistval(elems, i, NULL); if(fwimatch(text, "<title")){ i++; } else if(text[0] != '<'){ rtext = cbreplace(text, pairs); awords = odbreaktext(rtext); for(j = 0; j < cblistnum(awords); j++){ asis = cblistval(awords, j, NULL); normal = odnormalizeword(asis); oddocaddword(doc, normal, asis); free(normal); } cblistclose(awords); free(rtext); } } } cblistclose(elems); return doc;}/* break HTML into elements */CBLIST *htmllist(const char *html){ CBLIST *list; int i, pv, tag; char *ep; list = cblistopen(); i = 0; pv = 0; tag = FALSE; while(TRUE){ if(html[i] == '\0'){ if(i > pv) cblistpush(list, html + pv, i - pv); break; } else if(fwimatch(html + i, "<!--")){ if(i > pv) cblistpush(list, html + pv, i - pv); if((ep = strstr(html + i, "-->")) != NULL){ i = ep - html + 2; pv = i + 1; } } else if(!tag && html[i] == '<'){ if(i > pv) cblistpush(list, html + pv, i - pv); tag = TRUE; pv = i; } else if(tag && html[i] == '>'){ if(i > pv) cblistpush(list, html + pv, i - pv + 1); tag = FALSE; pv = i + 1; } i++; } return list;}/* register scores of documents */int procrelate(const char *name){ ODEUM *odeum; DEPOT *scdb; ODDOC *doc; CBMAP *scores; const char *file; char path[PATHBUFSIZ], *mbuf; int err, fatal, id, msiz; printfinfo("%s: relating started", name); if(!(odeum = odopen(name, OD_OWRITER))){ pdperror(name); return 1; } sprintf(path, "%s%c%s", name, PATHCHR, SCDBNAME); if(!(scdb = dpopen(path, OD_OWRITER | OD_OCREAT, SCDBBNUM))){ pdperror(name); odclose(odeum); return 1; } printfinfo("%s: database opened: fsiz=%d dnum=%d wnum=%d bnum=%d", name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum)); err = FALSE; if(!oditerinit(odeum)){ pdperror(name); err = TRUE; } else { while(TRUE){ if(sigterm){ printferror("aborting due to a termination signal"); err = TRUE; break; } if(!(doc = oditernext(odeum))){ if(dpecode != DP_ENOITEM){ pdperror(name); err = TRUE; } break; } file = oddocuri(doc); id = oddocid(doc); scores = oddocscores(doc, KEYNUM, odeum); mbuf = cbmapdump(scores, &msiz); if(!dpput(scdb, (char *)&id, sizeof(int), mbuf, msiz, DP_DOVER)){ pdperror(name); err = TRUE; } else { printfinfo("%s: related", file); } free(mbuf); cbmapclose(scores); oddocclose(doc); if(err) break; } } fatal = odfatalerror(odeum); printfinfo("%s: database closing: fsiz=%d dnum=%d wnum=%d bnum=%d", name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum)); if(!dpclose(scdb)){ pdperror(name); err = TRUE; } if(!odclose(odeum)){ pdperror(name); err = TRUE; } if(err){ printfinfo("%s: relating was over%s", name, fatal ? " with fatal error" : ""); } else { printfinfo("%s: relating completed successfully", name); } return err ? 1 : 0;}/* purge documents which is not existing. */int procpurge(const char *name){ ODEUM *odeum; ODDOC *doc; const char *file; int err, fatal; printfinfo("%s: purging started", name); if(!(odeum = odopen(name, OD_OWRITER))){ pdperror(name); return 1; } printfinfo("%s: database opened: fsiz=%d dnum=%d wnum=%d bnum=%d", name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum)); err = FALSE; if(!oditerinit(odeum)){ pdperror(name); err = TRUE; } else { while(TRUE){ if(sigterm){ printferror("aborting due to a termination signal"); err = TRUE; break; } if(!(doc = oditernext(odeum))){ if(dpecode != DP_ENOITEM){ pdperror(name); err = TRUE; } break; } file = oddocuri(doc); if(cbfilestat(file, NULL, NULL, NULL)){ printfinfo("%s: passed", file); } else { if(!odout(odeum, file)){ pdperror(file); err = TRUE; } printfinfo("%s: purged", file); } oddocclose(doc); } } fatal = odfatalerror(odeum); printfinfo("%s: database closing: fsiz=%d dnum=%d wnum=%d bnum=%d", name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum)); if(!odclose(odeum)){ pdperror(name); err = TRUE; } if(err){ printfinfo("%s: purging was over%s", name, fatal ? " with fatal error" : ""); } else { printfinfo("%s: purging completed successfully", name); } return err ? 1 : 0;}/* END OF FILE */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -