📄 selectd.c
字号:
mm = UC_MAX; else if (!strcmp (co->type, "multi_pair_maxwins")) mm = PAIR_MAX_WINS; else { fprintf (stderr, "Error: Unknown classifier type: %s\n", co->type); return NULL; } /* Create tokenizer */ tok = tokenizer_new (co->tok); if (!tok) { fprintf (stderr, "Error: Unknown tokenizer type: %s\n", co->tok); return NULL; } tokenizer_set_languages (tok, langs); /* Create vectorizer */ if (!strcmp (co->vec, "tfidf")) vec = vectorizer_tfidf_create (tok); else if (!strcmp (co->vec, "tf")) vec = vectorizer_tf_create (tok); else if (!strcmp (co->vec, "bool")) vec = vectorizer_bool_create (tok); else { fprintf (stderr, "Error: Unknown vectorizer: %s\n", co->vec); return NULL; } if (co->sel) vectorizer_set_selector (vec, selector_new ()); /* Create normalizer */ if (co->nor) { if (!strcmp (co->nor, "cosine")) vectorizer_set_normalizer (vec, vector_cosine_normalize); else { fprintf (stderr, "Error: Unknown normalizer: %s\n", co->nor); return NULL; } } vectorizer_set_autobias (vec, 1); mf = holders_find_classifier (co->cls); if (!mf) { fprintf (stderr, "Error: Cannot find classifier: %s\n", co->cls); return NULL; } if (mf->new_db) cl = multi_new (mm, mf, mf->new_db (co->options), 0); else cl = multi_new (mm, mf, NULL, 0); dof = classifier_get_doc_classifier_functions (); return doc_classifier_new (cl, dof, vec, VECTOR); }}/** * Add a language. * * @param name language name * @param locale language locale */static voidaddlang (const char *name, const char *locale) { langentry *le; le = my_malloc (sizeof(langentry)); le->name = my_strdup (name); le->locale = my_strdup (locale); le->next = langlist; langlist = le;}/** * Read command line options. * * @param argc argument count * @param argv argument vector * @return Zero if ok, or nonzero otherwise. */static intread_opts (int argc, char *argv[]) { int i, retval; retval = 0; while ((i = getopt_long (argc, argv, "a:df:Q:T:", longopts, NULL)) != EOF) { switch (i) { case 'a': adr_str = optarg; break; case 'd': //daemon = 1; break; case 'f': break; case 'Q': // Just for my convenience, should be removed if (!strcmp (optarg, "rocchio")) { cls_opts = my_realloc (cls_opts, (noc + 1) * sizeof(cls_opt)); cls_opts[noc].name = NULL; cls_opts[noc].cls = "Rocchio"; cls_opts[noc].type = "multi_one"; cls_opts[noc].options = NULL; cls_opts[noc].vec = "tfidf"; cls_opts[noc].tok = "alpha"; cls_opts[noc].sel = NULL; cls_opts[noc].nor = NULL; noc++; } else if (!strcmp (optarg, "naivebayes")) { cls_opts = my_realloc (cls_opts, (noc + 1) * sizeof(cls_opt)); cls_opts[noc].name = NULL; cls_opts[noc].cls = "NaiveBayes"; cls_opts[noc].type = "multi_one"; cls_opts[noc].options = NULL; cls_opts[noc].vec = "tfidf"; cls_opts[noc].tok = "alpha"; cls_opts[noc].sel = NULL; cls_opts[noc].nor = NULL; noc++; } else if (!strcmp (optarg, "from")) { cls_opts = my_realloc (cls_opts, (noc + 1) * sizeof(cls_opt)); cls_opts[noc].name = NULL; cls_opts[noc].cls = "From"; cls_opts[noc].type = "document"; cls_opts[noc].options = "n=200"; cls_opts[noc].vec = NULL; cls_opts[noc].tok = NULL; cls_opts[noc].sel = NULL; cls_opts[noc].nor = NULL; noc++; } else if (!strcmp (optarg, "reply")) { cls_opts = my_realloc (cls_opts, (noc + 1) * sizeof(cls_opt)); cls_opts[noc].name = NULL; cls_opts[noc].cls = "Reply"; cls_opts[noc].type = "document"; cls_opts[noc].options = "n=200"; cls_opts[noc].vec = NULL; cls_opts[noc].tok = NULL; cls_opts[noc].sel = NULL; cls_opts[noc].nor = NULL; noc++; } else if (!strcmp (optarg, "en")) { addlang ("en", "C"); } else if (!strcmp (optarg, "sv")) { addlang ("sv", "sv_SE"); } else if (!strcmp (optarg, "sven")) { addlang ("sven", "sv_SE"); } else if (!strcmp (optarg, "id")) { use_id = 1; } else if (!strcmp (optarg, "stem")) { use_stemmer = 1; } else if (!strcmp (optarg, "stop")) { use_stopwords = 1; } else { fprintf (stderr, "Error: Unknown option: %s\n", optarg); return -1; } break; case 'T': // Quickoption for select_test break; default: printf ("Usage: selectd [-a] [-d] [-f <configfile>] " "[-Q <option>]...\n"); return -1; } } return retval;}/** * Read configuration file. * * @param file filename * @return Zero if ok, or nonzero otherwise. */static intread_config (const char *file) { FILE *fd; char *str; int retval; conf_pair *cp; fd = fopen (file, "r"); if (!fd) { fprintf (stderr, "Error: Cannot open configuration file\n"); return -1; } retval = 0; cp = my_malloc (sizeof(conf_pair)); noc = -1; while (get_next_configuration (fd, cp)) { if (cp->key[0] == '[') { noc++; cls_opts = my_realloc (cls_opts, (noc + 1) * sizeof(cls_opt)); cls_opts[noc].name = NULL; cls_opts[noc].cls = NULL; cls_opts[noc].type = my_strdup ("multi_one"); cls_opts[noc].options = NULL; cls_opts[noc].vec = my_strdup ("tf"); cls_opts[noc].tok = my_strdup ("alpha"); cls_opts[noc].sel = NULL; cls_opts[noc].nor = NULL; continue; } if (noc >= 0) { // Classifier section if (!strcmp (cp->key, "name")) { if (cp->value) { free (cls_opts[noc].name); cls_opts[noc].name = my_strdup (cp->value); } } else if (!strcmp (cp->key, "classifier")) { if (cp->value) { free (cls_opts[noc].cls); cls_opts[noc].cls = my_strdup (cp->value); } } else if (!strcmp (cp->key, "type")) { if (cp->value) { free (cls_opts[noc].type); cls_opts[noc].type = my_strdup (cp->value); } } else if (!strcmp (cp->key, "options")) { if (cp->value) { free (cls_opts[noc].options); cls_opts[noc].options = my_strdup (cp->value); } } else if (!strcmp (cp->key, "vectorizer")) { if (cp->value) { free (cls_opts[noc].vec); cls_opts[noc].vec = my_strdup (cp->value); } } else if (!strcmp (cp->key, "tokenizer")) { if (cp->value) { free (cls_opts[noc].tok); cls_opts[noc].tok = my_strdup (cp->value); } } else if (!strcmp (cp->key, "selector")) { if (cp->value) { free (cls_opts[noc].tok); cls_opts[noc].tok = my_strdup (cp->value); } } else if (!strcmp (cp->key, "normalizer")) { if (cp->value) { free (cls_opts[noc].nor); cls_opts[noc].nor = my_strdup (cp->value); } } else { fprintf (stderr, "Warning: Unknown classifier option: %s\n", cp->key); } continue; } if (!strcmp (cp->key, "address")) { if (cp->value) adr_str = my_strdup (cp->value); } else if (!strcmp (cp->key, "data_dir")) { if (cp->value) data_dir = my_strdup (cp->value); } else if (!strcmp (cp->key, "share_dir")) { if (cp->value) share_dir = my_strdup (cp->value); } else if (!strcmp (cp->key, "plugin_dir")) { if (cp->value) plugin_dir = my_strdup (cp->value); } else if (!strcmp (cp->key, "language")) { if (!strcmp (cp->value, "none")) { use_id = 0; use_stemmer = 0; use_stopwords = 0; } else { for (str = cp->value; *str != '\0'; str++) if (isspace (*str)) { *str = '\0'; break; } for (str++; *str != '\0' && isspace (*str); str++) ; if (*str == '\0') { fprintf (stderr, "Warning: No locale for language: %s\n", cp->value); str = "C"; } addlang (cp->value, str); } } else if (!strcmp (cp->key, "identifier")) use_id = !strcmp (cp->value, "on"); else if (!strcmp (cp->key, "stemming")) use_stemmer = !strcmp (cp->value, "on"); else if (!strcmp (cp->key, "stopwords")) use_stopwords = !strcmp (cp->value, "on"); else { fprintf (stderr, "Error: Unknown configuration key: %s\n", cp->key); retval = -1; break; } } noc++; free (cp); fclose (fd); return retval;}/** * Signal handler. */static voidsigint_handler (int si) { FILE *f; char buf[200]; int i; if (adr_str && *adr_str) unlink (adr_str); if (data_dir) { fprintf (stderr, "Exiting: Saving database\n"); /* Save classifiers */ for (i = 0; i < noc; i++) if (save_doc_classifier (classifiers[i].cls, &cls_opts[i])) fprintf (stderr, "Error: Cannot save classifier\n"); /* Save folders */ i = strlen (data_dir); memcpy (buf, data_dir, i); strcpy (&buf[i], "/saved.folders"); f = fopen (buf, "w"); if (f) { fprintf (f, "folders "); dict_save (f, folders); fclose (f); } else fprintf (stderr, "Error: Cannot save folders\n"); } exit (0);}/** * Main program. */intmain (int argc, char *argv[]) { FILE *f; char buf[200]; char *str, *conf_file; int i, j; langentry *le, *le2; languages *langs; stemmer_functions *sf; vectorizer *vec; word_stopper *ws; /* Configuration */ adr_str = NULL; share_dir = "/usr/local/share/select"; plugin_dir = "/usr/local/lib/select"; use_subject = 1; use_id = 0; use_stopwords = 1; use_stemmer = 0; langlist = NULL; /* Find configuration file */ conf_file = NULL; for (i = 1; i < argc; i++) if (!strcmp (argv[i], "-f")) { if (argc > i + 1 && *argv[i + 1] != '-') conf_file = argv[i + 1]; else fprintf (stderr, "Error in option -f\n"); } else if (!strncmp (argv[i], "--conf-file=", 12)) { if (argv[i][12] != '\0') conf_file = &argv[i][12]; else fprintf (stderr, "Error in option --conf-file\n"); } if (!conf_file) { str = getenv ("HOME"); if (str) { i = strlen (str); conf_file = my_malloc (i + 22); memcpy (conf_file, str, i); strcpy (&conf_file[i], "/.select/selectd.conf"); } else { fprintf (stderr, "Error: Cannot determine location of " "configuration file\n"); return 1; } } /* Read configuration */ if (read_config (conf_file)) { fprintf (stderr, "Error: Cannot read configuration\n"); return 1; } /* Read command line options */ if (read_opts (argc, argv)) { fprintf (stderr, "Error: Cannot read command line options\n"); return 1; } /* Initialization */ pdata = protocol_s_new (15000, adr_str); holders_load (plugin_dir); /* Language identifier */ if (use_id) { id = identifier_new (); if (!id) { fprintf (stderr, "Error: Cannot create language identifier\n"); return -1; } i = strlen (share_dir); for (le = langlist; le; le = le->next) { str = my_malloc (i + 9 + strlen (le->name)); memcpy (str, share_dir, i); memcpy (&str[i], "/langid.", 8); strcpy (&str[i + 8], le->name); if (identifier_load_language (id, str, le->name)) { fprintf (stderr, "Error: Cannot read language data\n"); return -1; }; free (str); } } /* Languages */ if (langlist) langs = languages_create (); else langs = NULL; i = strlen (share_dir); for (le = langlist; le; le = le->next) { ws = NULL; sf = NULL; if (use_stopwords) { ws = stopword_new (); str = my_malloc (i + 12 + strlen (le->name)); memcpy (str, share_dir, i); memcpy (&str[i], "/stopwords.", 11); strcpy (&str[i + 11], le->name); if (stopword_load (ws, str)) fprintf (stderr, "Warning: Cannot read stopwords: %s!\n", le->name); free (str); } if (use_stemmer) { sf = holders_find_stemmer (le->name); if (!sf) fprintf (stderr, "Warning: Cannot find stemmer: %s!\n", le->name); else if (sf->init) sf->init (); } languages_add (langs, le->name, le->locale, ws, sf); if (!le->next) languages_set_unknown (langs, le->name); } for (le = langlist; le; le = le2) { le2 = le->next; free (le); } /* Classifiers */ if (noc == 0) { fprintf (stderr, "Error: No classifiers specified\n"); return 1; } classifiers = my_malloc (sizeof(cls_entry) * noc); for (i = 0; i < noc; i++) { classifiers[i].name = cls_opts[i].name; if (data_dir) { j = strlen (data_dir); memcpy (buf, data_dir, j); memcpy (&buf[j], "/saved.", 7); strcpy (&buf[j + 7], classifiers[i].name); j = strlen (buf); strcpy (&buf[j], ".db"); f = fopen (buf, "r"); if (f) { fclose (f); if (strcmp (cls_opts[i].type, "document")) { strcpy (&buf[j], ".vect"); f = fopen (buf, "r"); vec = vectorizer_load (f, langs); fclose (f); if (!vec) { fprintf (stderr, "Error: Cannot load vectorizer " "database\n"); return 1; } } else vec = NULL; strcpy (&buf[j], ".db"); f = fopen (buf, "r"); classifiers[i].cls = load_classifier (f, &cls_opts[i], langs, vec); fclose (f); if (!classifiers[i].cls) { fprintf (stderr, "Error: Cannot load classifier " "database\n"); return 1; } } else { fprintf (stderr, "No database found, creating new\n"); classifiers[i].cls = create_classifier (&cls_opts[i], langs); if (!classifiers[i].cls) return -1; } } else { classifiers[i].cls = create_classifier (&cls_opts[i], langs); if (!classifiers[i].cls) return -1; } free (cls_opts[i].cls); free (cls_opts[i].type); free (cls_opts[i].options); free (cls_opts[i].tok); free (cls_opts[i].vec); free (cls_opts[i].sel); free (cls_opts[i].nor); } free (cls_opts); //holders_free (); /* Folders */ if (data_dir) { j = strlen (data_dir); memcpy (buf, data_dir, j); strcpy (&buf[j], "/saved.folders"); f = fopen (buf, "r"); if (f) { fscanf (f, "folders "); folders = dict_load (f); fclose (f); } else { fprintf (stderr, "No folder database found, creating new\n"); folders = dict_new (); } } else folders = dict_new (); /* Initialize signal handlers */ signal (SIGINT, sigint_handler); signal (SIGTERM, sigint_handler); /* Create socket and accept connections */ protocol_s_loop (pdata, &handle_session); return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -