📄 odidx.c
字号:
/************************************************************************************************* * Utility for indexing document files into a database of Odeum * Copyright (C) 2000-2003 Mikio Hirabayashi * This file is part of QDBM, Quick Database Manager. * QDBM is free software; you can redistribute it and/or modify it under the terms of the GNU * Lesser General Public License as published by the Free Software Foundation; either version * 2.1 of the License or any later version. QDBM is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * You should have received a copy of the GNU Lesser General Public License along with QDBM; if * not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA. *************************************************************************************************/#include <depot.h>#include <cabin.h>#include <odeum.h>#include <stdlib.h>#include <stdio.h>#include <string.h>#include <ctype.h>#include <stdarg.h>#include <time.h>#include <signal.h>#undef TRUE#define TRUE 1 /* boolean true */#undef FALSE#define FALSE 0 /* boolean false */#define PATHCHR '/' /* delimiter character of path */#define EXTCHR '.' /* delimiter character of extension */#define CDIRSTR "." /* string of current directory */#define PDIRSTR ".." /* string of parent directory */#define MTDBNAME "_mtime" /* name of the database for last modified times */#define MTDBLRM 81 /* records in a leaf node of time database */#define MTDBNIM 192 /* records in a non-leaf node of time database */#define MTDBLCN 64 /* number of leaf cache of time database */#define MTDBNCN 32 /* number of non-leaf cache of time database */#define SCDBNAME "_score" /* name of the database for scores */#define SCDBBNUM 32749 /* bucket number of score database */#define PATHBUFSIZ 2048 /* size of a path buffer */#define MAXLOAD 0.85 /* max ratio of bucket loading */#define KEYNUM 32 /* number of keywords to store *//* for Win32 and RISC OS */#if defined(_WIN32)#undef PATHCHR#define PATHCHR '\\'#undef EXTCHR#define EXTCHR '.'#undef CDIRSTR#define CDIRSTR "."#undef PDIRSTR#define PDIRSTR ".."#elif defined(__riscos__) || defined(__riscos)#include <unixlib/local.h>int __riscosify_control = __RISCOSIFY_NO_PROCESS;#undef PATHCHR#define PATHCHR '.'#undef EXTCHR#define EXTCHR '/'#undef CDIRSTR#define CDIRSTR "@"#undef PDIRSTR#define PDIRSTR "^"#endif/* global variables */const char *progname; /* program name */int sigterm; /* flag for termination signal *//* function prototypes */int main(int argc, char **argv);void setsignals(void);void sigtermhandler(int num);void usage(void);int runregister(int argc, char **argv);int runrelate(int argc, char **argv);int runpurge(int argc, char **argv);int fwimatch(const char *str, const char *key);int bwimatch(const char *str, const char *key);int bwimatchlist(const char *str, const CBLIST *keys);char *fgetl(FILE *IN);void pdperror(const char *name);void printferror(const char *format, ...);void printfinfo(const char *format, ...);const char *datestr(int t);int proclist(const char *name, const char *lfile, int wmax, int ft, const CBLIST *tsuflist, const CBLIST *hsuflist);int procdir(const char *name, const char *dir, int wmax, int ft, const CBLIST *tsuflist, const CBLIST *hsuflist);int indexdir(ODEUM *odeum, VILLA *mtdb, const char *name, const char *dir, int wmax, int ft, const CBLIST *tsuflist, const CBLIST *hsuflist);int indexfile(ODEUM *odeum, VILLA *mtdb, const char *name, const char *file, int wmax, int ft, const CBLIST *tsuflist, const CBLIST *hsuflist);char *filetouri(const char *file);ODDOC *makedocplain(const char *uri, const char *text, const char *date);ODDOC *makedochtml(const char *uri, const char *html, const char *date);CBLIST *htmllist(const char *html);int procrelate(const char *name);int procpurge(const char *name);/* main routine */int main(int argc, char **argv){ int rv; progname = argv[0]; sigterm = FALSE; setsignals(); if(argc < 2) usage(); rv = 0; if(!strcmp(argv[1], "register")){ rv = runregister(argc, argv); } else if(!strcmp(argv[1], "relate")){ rv = runrelate(argc, argv); } else if(!strcmp(argv[1], "purge")){ rv = runpurge(argc, argv); } else { usage(); } return rv;}/* set signal handlers */void setsignals(void){ signal(1, sigtermhandler); signal(2, sigtermhandler); signal(3, sigtermhandler); signal(13, sigtermhandler); signal(15, sigtermhandler);}/* handler of termination signal */void sigtermhandler(int num){ signal(num, SIG_DFL); sigterm = TRUE; printfinfo("the termination signal %d catched", num);}/* print the usage and exit */void usage(void){ fprintf(stderr, "%s: indexer of document files\n", progname); fprintf(stderr, "\n"); fprintf(stderr, "usage:\n"); fprintf(stderr, " %s register [-l file] [-wmax num] [-tsuf sufs] [-hsuf sufs] [-ft]" " name [dir]\n", progname); fprintf(stderr, " %s relate name\n", progname); fprintf(stderr, " %s purge name\n", progname); exit(1);}/* parse arguments of register command */int runregister(int argc, char **argv){ char *name, *dir, *lfile, *tsuf, *hsuf, path[PATHBUFSIZ]; int i, wmax, ft, plen, rv; CBLIST *tsuflist, *hsuflist; name = NULL; dir = NULL; lfile = NULL; tsuf = NULL; hsuf = NULL; wmax = -1; ft = FALSE; for(i = 2; i < argc; i++){ if(!name && argv[i][0] == '-'){ if(!strcmp(argv[i], "-l")){ if(++i >= argc) usage(); lfile = argv[i]; } else if(!strcmp(argv[i], "-wmax")){ if(++i >= argc) usage(); wmax = atoi(argv[i]); } else if(!strcmp(argv[i], "-tsuf")){ if(++i >= argc) usage(); tsuf = argv[i]; } else if(!strcmp(argv[i], "-hsuf")){ if(++i >= argc) usage(); hsuf = argv[i]; } else if(!strcmp(argv[i], "-ft")){ ft = TRUE; } else { usage(); } } else if(!name){ name = argv[i]; } else if(!dir){ dir = argv[i]; } else { usage(); } } if(!name) usage(); if(!dir) dir = CDIRSTR; plen = sprintf(path, "%s", dir); if(plen > 1 && path[plen-1] == PATHCHR) path[plen-1] = '\0'; tsuflist = cbsplit(tsuf ? tsuf : ".txt,.text", -1, ","); hsuflist = cbsplit(hsuf ? hsuf : ".html,.htm", -1, ","); if(lfile){ rv = proclist(name, lfile, wmax, ft, tsuflist, hsuflist); } else { rv = procdir(name, path, wmax, ft, tsuflist, hsuflist); } cblistclose(hsuflist); cblistclose(tsuflist); return rv;}/* parse arguments of relate command */int runrelate(int argc, char **argv){ char *name; int i, rv; name = NULL; for(i = 2; i < argc; i++){ if(!name && argv[i][0] == '-'){ usage(); } else if(!name){ name = argv[i]; } else { usage(); } } if(!name) usage(); rv = procrelate(name); return rv;}/* parse arguments of purge command */int runpurge(int argc, char **argv){ char *name; int i, rv; name = NULL; for(i = 2; i < argc; i++){ if(!name && argv[i][0] == '-'){ usage(); } else if(!name){ name = argv[i]; } else { usage(); } } if(!name) usage(); rv = procpurge(name); return rv;}/* case insensitive forward matching */int fwimatch(const char *str, const char *key){ int len, i; len = strlen(key); for(i = 0; i < len; i++){ if(tolower(str[i]) != tolower(key[i]) || str[i] == '\0') return FALSE; } return TRUE;}/* case insensitive backward matching */int bwimatch(const char *str, const char *key){ int slen, klen, i; slen = strlen(str); klen = strlen(key); for(i = 1; i <= klen; i++){ if(tolower(str[slen-i]) != tolower(key[klen-i]) || i > slen) return FALSE; } return TRUE;}/* case insensitive backward matching with a list */int bwimatchlist(const char *str, const CBLIST *keys){ int i; for(i = 0; i < cblistnum(keys); i++){ if(bwimatch(str, cblistval(keys, i, NULL))) return TRUE; } return FALSE;}/* read a line */char *fgetl(FILE *IN){ char *buf; int c, len, blen; buf = NULL; len = 0; blen = 256; while((c = fgetc(IN)) != EOF){ if(blen <= len) blen *= 2; buf = cbrealloc(buf, blen + 1); if(c == '\n') c = '\0'; buf[len++] = c; if(c == '\0') break; } if(!buf) return NULL; buf[len] = '\0'; return buf;}/* print an error message */void pdperror(const char *name){ printf("%s: ERROR: %s: %s\n", progname, name, dperrmsg(dpecode)); fflush(stdout);}/* print formatted error string and flush the buffer */void printferror(const char *format, ...){ va_list ap; va_start(ap, format); printf("%s: ERROR: ", progname); vprintf(format, ap); putchar('\n'); fflush(stdout); va_end(ap);}/* print formatted information string and flush the buffer */void printfinfo(const char *format, ...){ va_list ap; va_start(ap, format); printf("%s: INFO: ", progname); vprintf(format, ap); putchar('\n'); fflush(stdout); va_end(ap);}/* get static string of the date */const char *datestr(int t){ static char buf[32]; struct tm *stp; time_t tt; tt = (time_t)t; if(!(stp = localtime(&tt))) return "0000/00/00 00:00:00"; sprintf(buf, "%04d/%02d/%02d %02d:%02d:%02d", stp->tm_year + 1900, stp->tm_mon + 1, stp->tm_mday, stp->tm_hour, stp->tm_min, stp->tm_sec); return buf;}/* processing with finding files in a list file */int proclist(const char *name, const char *lfile, int wmax, int ft, const CBLIST *tsuflist, const CBLIST *hsuflist){ ODEUM *odeum; VILLA *mtdb; FILE *IN; char *line, path[PATHBUFSIZ]; int err, fatal; if(!strcmp(lfile, "-")){ IN = stdin; } else { if(!(IN = fopen(lfile, "rb"))){ printferror("%s: file cannot be opened", lfile); return 1; } } printfinfo("%s: registration started", name); if(!(odeum = odopen(name, OD_OWRITER | OD_OCREAT))){ pdperror(name); if(IN != stdin) fclose(IN); return 1; } sprintf(path, "%s%c%s", name, PATHCHR, MTDBNAME); if(!(mtdb = vlopen(path, VL_OWRITER | VL_OCREAT, VL_CMPLEX))){ pdperror(name); odclose(odeum); if(IN != stdin) fclose(IN); return 1; } vlsettuning(mtdb, MTDBLRM, MTDBNIM, MTDBLCN, MTDBNCN); printfinfo("%s: database opened: fsiz=%d dnum=%d wnum=%d bnum=%d", name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum)); err = FALSE; while((line = fgetl(IN)) != NULL){ if(sigterm){ printferror("aborting due to a termination signal"); free(line); err = TRUE; break; } if(!indexfile(odeum, mtdb, name, line, wmax, ft, tsuflist, hsuflist)) err = TRUE; free(line); } fatal = odfatalerror(odeum); printfinfo("%s: database closing: fsiz=%d dnum=%d wnum=%d bnum=%d", name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum)); if(!vlclose(mtdb)){ pdperror(name); err = TRUE; } if(!odclose(odeum)){ pdperror(name); err = TRUE; } if(IN != stdin) fclose(IN); if(err){ printfinfo("%s: registration was over%s", name, fatal ? " with fatal error" : ""); } else { printfinfo("%s: registration completed successfully", name); } return err ? 1 : 0;}/* processing with finding files in a directory */int procdir(const char *name, const char *dir, int wmax, int ft, const CBLIST *tsuflist, const CBLIST *hsuflist){ ODEUM *odeum; VILLA *mtdb; char path[PATHBUFSIZ]; int err, fatal; printfinfo("%s: registration started", name); if(!(odeum = odopen(name, OD_OWRITER | OD_OCREAT))){ pdperror(name); return 1; } sprintf(path, "%s%c%s", name, PATHCHR, MTDBNAME); if(!(mtdb = vlopen(path, VL_OWRITER | VL_OCREAT, VL_CMPLEX))){ pdperror(name); odclose(odeum); return 1; } vlsettuning(mtdb, MTDBLRM, MTDBNIM, MTDBLCN, MTDBNCN); printfinfo("%s: database opened: fsiz=%d dnum=%d wnum=%d bnum=%d", name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum)); err = FALSE; if(!indexdir(odeum, mtdb, name, dir, wmax, ft, tsuflist, hsuflist)) err = TRUE; fatal = odfatalerror(odeum); printfinfo("%s: database closing: fsiz=%d dnum=%d wnum=%d bnum=%d", name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum)); if(!vlclose(mtdb)){ pdperror(name); err = TRUE; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -