⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 selectd.c

📁 使用具有增量学习的监控式学习方法。包括几个不同的分类算法。
💻 C
📖 第 1 页 / 共 2 页
字号:
	    mm = UC_MAX;	else if (!strcmp (co->type, "multi_pair_maxwins"))	    mm = PAIR_MAX_WINS;	else {	    fprintf (stderr, "Error: Unknown classifier type: %s\n", co->type);	    return NULL;	}	/* Create tokenizer */	tok = tokenizer_new (co->tok);	if (!tok) {	    fprintf (stderr, "Error: Unknown tokenizer type: %s\n", co->tok);	    return NULL;	}	tokenizer_set_languages (tok, langs);	/* Create vectorizer */	if (!strcmp (co->vec, "tfidf"))	    vec = vectorizer_tfidf_create (tok);	else if (!strcmp (co->vec, "tf"))	    vec = vectorizer_tf_create (tok);	else if (!strcmp (co->vec, "bool"))	    vec = vectorizer_bool_create (tok);	else {	    fprintf (stderr, "Error: Unknown vectorizer: %s\n", co->vec);	    return NULL;	}	if (co->sel)	    vectorizer_set_selector (vec, selector_new ());	/* Create normalizer */	if (co->nor) {	    if (!strcmp (co->nor, "cosine"))		vectorizer_set_normalizer (vec, vector_cosine_normalize);	    else {		fprintf (stderr, "Error: Unknown normalizer: %s\n", co->nor);		return NULL;	    }	}	vectorizer_set_autobias (vec, 1);	mf = holders_find_classifier (co->cls);	if (!mf) {	    fprintf (stderr, "Error: Cannot find classifier: %s\n", co->cls);	    return NULL;	}	if (mf->new_db)	    cl = multi_new (mm, mf, mf->new_db (co->options), 0);	else	    cl = multi_new (mm, mf, NULL, 0);	dof = classifier_get_doc_classifier_functions ();	return doc_classifier_new (cl, dof, vec, VECTOR);    }}/** * Add a language. * * @param name    language name * @param locale  language locale */static voidaddlang (const char *name, const char *locale) {    langentry *le;    le = my_malloc (sizeof(langentry));    le->name = my_strdup (name);    le->locale = my_strdup (locale);    le->next = langlist;    langlist = le;}/** * Read command line options. * * @param argc  argument count * @param argv  argument vector * @return Zero if ok, or nonzero otherwise. */static intread_opts (int argc, char *argv[]) {    int i, retval;    retval = 0;    while ((i = getopt_long (argc, argv, "a:df:Q:T:",			     longopts, NULL)) != EOF) {        switch (i) {        case 'a':	    adr_str = optarg;            break;	case 'd':	    //daemon = 1;	    break;	case 'f':	    break;        case 'Q':	// Just for my convenience, should be removed	    if (!strcmp (optarg, "rocchio")) {		cls_opts = my_realloc (cls_opts, (noc + 1) * sizeof(cls_opt));		cls_opts[noc].name = NULL;		cls_opts[noc].cls = "Rocchio";		cls_opts[noc].type = "multi_one";		cls_opts[noc].options = NULL;		cls_opts[noc].vec = "tfidf";		cls_opts[noc].tok = "alpha";		cls_opts[noc].sel = NULL;		cls_opts[noc].nor = NULL;		noc++;	    } else if (!strcmp (optarg, "naivebayes")) {		cls_opts = my_realloc (cls_opts, (noc + 1) * sizeof(cls_opt));		cls_opts[noc].name = NULL;		cls_opts[noc].cls = "NaiveBayes";		cls_opts[noc].type = "multi_one";		cls_opts[noc].options = NULL;		cls_opts[noc].vec = "tfidf";		cls_opts[noc].tok = "alpha";		cls_opts[noc].sel = NULL;		cls_opts[noc].nor = NULL;		noc++;	    } else if (!strcmp (optarg, "from")) {		cls_opts = my_realloc (cls_opts, (noc + 1) * sizeof(cls_opt));		cls_opts[noc].name = NULL;		cls_opts[noc].cls = "From";		cls_opts[noc].type = "document";		cls_opts[noc].options = "n=200";		cls_opts[noc].vec = NULL;		cls_opts[noc].tok = NULL;		cls_opts[noc].sel = NULL;		cls_opts[noc].nor = NULL;		noc++;	    } else if (!strcmp (optarg, "reply")) {		cls_opts = my_realloc (cls_opts, (noc + 1) * sizeof(cls_opt));		cls_opts[noc].name = NULL;		cls_opts[noc].cls = "Reply";		cls_opts[noc].type = "document";		cls_opts[noc].options = "n=200";		cls_opts[noc].vec = NULL;		cls_opts[noc].tok = NULL;		cls_opts[noc].sel = NULL;		cls_opts[noc].nor = NULL;		noc++;	    } else if (!strcmp (optarg, "en")) {		addlang ("en", "C");	    } else if (!strcmp (optarg, "sv")) {		addlang ("sv", "sv_SE");	    } else if (!strcmp (optarg, "sven")) {		addlang ("sven", "sv_SE");	    } else if (!strcmp (optarg, "id")) {		use_id = 1;	    } else if (!strcmp (optarg, "stem")) {		use_stemmer = 1;	    } else if (!strcmp (optarg, "stop")) {		use_stopwords = 1;	    } else {		fprintf (stderr, "Error: Unknown option: %s\n", optarg);		return -1;	    }	    break;	case 'T':	// Quickoption for select_test	    break;	default:	    printf ("Usage: selectd [-a] [-d] [-f <configfile>] "		    "[-Q <option>]...\n");	    return -1;        }    }    return retval;}/** * Read configuration file. * * @param file  filename * @return Zero if ok, or nonzero otherwise. */static intread_config (const char *file) {    FILE *fd;    char *str;    int retval;    conf_pair *cp;    fd = fopen (file, "r");    if (!fd) {	fprintf (stderr, "Error: Cannot open configuration file\n");	return -1;    }    retval = 0;    cp = my_malloc (sizeof(conf_pair));    noc = -1;    while (get_next_configuration (fd, cp)) {	if (cp->key[0] == '[') {	    noc++;	    cls_opts = my_realloc (cls_opts, (noc + 1) * sizeof(cls_opt));	    cls_opts[noc].name = NULL;	    cls_opts[noc].cls = NULL;	    cls_opts[noc].type = my_strdup ("multi_one");	    cls_opts[noc].options = NULL;	    cls_opts[noc].vec = my_strdup ("tf");	    cls_opts[noc].tok = my_strdup ("alpha");	    cls_opts[noc].sel = NULL;	    cls_opts[noc].nor = NULL;	    continue;	}	if (noc >= 0) {		// Classifier section	    if (!strcmp (cp->key, "name")) {		if (cp->value) {		    free (cls_opts[noc].name);		    cls_opts[noc].name = my_strdup (cp->value);		}	    } else if (!strcmp (cp->key, "classifier")) {		if (cp->value) {		    free (cls_opts[noc].cls);		    cls_opts[noc].cls = my_strdup (cp->value);		}	    } else if (!strcmp (cp->key, "type")) {		if (cp->value) {		    free (cls_opts[noc].type);		    cls_opts[noc].type = my_strdup (cp->value);		}	    } else if (!strcmp (cp->key, "options")) {		if (cp->value) {		    free (cls_opts[noc].options);		    cls_opts[noc].options = my_strdup (cp->value);		}	    } else if (!strcmp (cp->key, "vectorizer")) {		if (cp->value) {		    free (cls_opts[noc].vec);		    cls_opts[noc].vec = my_strdup (cp->value);		}	    } else if (!strcmp (cp->key, "tokenizer")) {		if (cp->value) {		    free (cls_opts[noc].tok);		    cls_opts[noc].tok = my_strdup (cp->value);		}	    } else if (!strcmp (cp->key, "selector")) {		if (cp->value) {		    free (cls_opts[noc].tok);		    cls_opts[noc].tok = my_strdup (cp->value);		}	    } else if (!strcmp (cp->key, "normalizer")) {		if (cp->value) {		    free (cls_opts[noc].nor);		    cls_opts[noc].nor = my_strdup (cp->value);		}	    } else {		fprintf (stderr, "Warning: Unknown classifier option: %s\n",			 cp->key);	    }	    continue;	}	if (!strcmp (cp->key, "address")) {	    if (cp->value)		adr_str = my_strdup (cp->value);	} else if (!strcmp (cp->key, "data_dir")) {	    if (cp->value)		data_dir = my_strdup (cp->value);	} else if (!strcmp (cp->key, "share_dir")) {	    if (cp->value)		share_dir = my_strdup (cp->value);	} else if (!strcmp (cp->key, "plugin_dir")) {	    if (cp->value)		plugin_dir = my_strdup (cp->value);	} else if (!strcmp (cp->key, "language")) {	    if (!strcmp (cp->value, "none")) {		use_id = 0;		use_stemmer = 0;		use_stopwords = 0;	    } else {		for (str = cp->value; *str != '\0'; str++)		    if (isspace (*str)) {			*str = '\0';			break;		    }		for (str++; *str != '\0' && isspace (*str); str++)		    ;		if (*str == '\0') {		    fprintf (stderr, "Warning: No locale for language: %s\n",			     cp->value);		    str = "C";		}		addlang (cp->value, str);	    }	} else if (!strcmp (cp->key, "identifier"))	    use_id = !strcmp (cp->value, "on");	else if (!strcmp (cp->key, "stemming"))	    use_stemmer = !strcmp (cp->value, "on");	else if (!strcmp (cp->key, "stopwords"))	    use_stopwords = !strcmp (cp->value, "on");	else {	    fprintf (stderr, "Error: Unknown configuration key: %s\n",		     cp->key);	    retval = -1;	    break;	}    }    noc++;    free (cp);    fclose (fd);    return retval;}/** * Signal handler. */static voidsigint_handler (int si) {    FILE *f;    char buf[200];    int i;    if (adr_str && *adr_str)	unlink (adr_str);    if (data_dir) {	fprintf (stderr, "Exiting: Saving database\n");	/* Save classifiers */	for (i = 0; i < noc; i++)	    if (save_doc_classifier (classifiers[i].cls, &cls_opts[i]))		fprintf (stderr, "Error: Cannot save classifier\n");	/* Save folders */	i = strlen (data_dir);	memcpy (buf, data_dir, i);	strcpy (&buf[i], "/saved.folders");	f = fopen (buf, "w");	if (f) {	    fprintf (f, "folders ");	    dict_save (f, folders);	    fclose (f);	} else	    fprintf (stderr, "Error: Cannot save folders\n");    }    exit (0);}/** * Main program. */intmain (int argc, char *argv[]) {    FILE *f;    char buf[200];    char *str, *conf_file;    int i, j;    langentry *le, *le2;    languages *langs;    stemmer_functions *sf;    vectorizer *vec;    word_stopper *ws;    /* Configuration */    adr_str = NULL;    share_dir = "/usr/local/share/select";    plugin_dir = "/usr/local/lib/select";    use_subject = 1;    use_id = 0;    use_stopwords = 1;    use_stemmer = 0;    langlist = NULL;    /* Find configuration file */    conf_file = NULL;    for (i = 1; i < argc; i++)	if (!strcmp (argv[i], "-f")) {	    if (argc > i + 1 && *argv[i + 1] != '-')		conf_file = argv[i + 1];	    else		fprintf (stderr, "Error in option -f\n");	} else if (!strncmp (argv[i], "--conf-file=", 12)) {	    if (argv[i][12] != '\0')		conf_file = &argv[i][12];	    else		fprintf (stderr, "Error in option --conf-file\n");	}    if (!conf_file) {	str = getenv ("HOME");	if (str) {	    i = strlen (str);	    conf_file = my_malloc (i + 22);	    memcpy (conf_file, str, i);	    strcpy (&conf_file[i], "/.select/selectd.conf");	} else {	    fprintf (stderr, "Error: Cannot determine location of "		     "configuration file\n");	    return 1;	}    }    /* Read configuration */    if (read_config (conf_file)) {	fprintf (stderr, "Error: Cannot read configuration\n");	return 1;    }    /* Read command line options */    if (read_opts (argc, argv)) {	fprintf (stderr, "Error: Cannot read command line options\n");	return 1;    }    /* Initialization */    pdata = protocol_s_new (15000, adr_str);    holders_load (plugin_dir);    /* Language identifier */    if (use_id) {	id = identifier_new ();	if (!id) {	    fprintf (stderr, "Error: Cannot create language identifier\n");	    return -1;	}	i = strlen (share_dir);	for (le = langlist; le; le = le->next) {	    str = my_malloc (i + 9 + strlen (le->name));	    memcpy (str, share_dir, i);	    memcpy (&str[i], "/langid.", 8);	    strcpy (&str[i + 8], le->name);	    if (identifier_load_language (id, str, le->name)) {		fprintf (stderr, "Error: Cannot read language data\n");		return -1;	    };	    free (str);	}    }    /* Languages */    if (langlist)	langs = languages_create ();    else	langs = NULL;    i = strlen (share_dir);    for (le = langlist; le; le = le->next) {	ws = NULL;	sf = NULL;	if (use_stopwords) {	    ws = stopword_new ();	    str = my_malloc (i + 12 + strlen (le->name));	    memcpy (str, share_dir, i);	    memcpy (&str[i], "/stopwords.", 11);	    strcpy (&str[i + 11], le->name);	    if (stopword_load (ws, str))		fprintf (stderr, "Warning: Cannot read stopwords: %s!\n",			 le->name);	    free (str);	}	if (use_stemmer) {	    sf = holders_find_stemmer (le->name);	    if (!sf)		fprintf (stderr, "Warning: Cannot find stemmer: %s!\n",			 le->name);	    else if (sf->init)		sf->init ();	}	languages_add (langs, le->name, le->locale, ws, sf);	if (!le->next)	    languages_set_unknown (langs, le->name);    }    for (le = langlist; le; le = le2) {	le2 = le->next;	free (le);    }    /* Classifiers */    if (noc == 0) {	fprintf (stderr, "Error: No classifiers specified\n");	return 1;    }    classifiers = my_malloc (sizeof(cls_entry) * noc);    for (i = 0; i < noc; i++) {	classifiers[i].name = cls_opts[i].name;	if (data_dir) {	    j = strlen (data_dir);	    memcpy (buf, data_dir, j);	    memcpy (&buf[j], "/saved.", 7);	    strcpy (&buf[j + 7], classifiers[i].name);	    j = strlen (buf);	    strcpy (&buf[j], ".db");	    f = fopen (buf, "r");	    if (f) {		fclose (f);		if (strcmp (cls_opts[i].type, "document")) {		    strcpy (&buf[j], ".vect");		    f = fopen (buf, "r");		    vec = vectorizer_load (f, langs);		    fclose (f);		    if (!vec) {			fprintf (stderr, "Error: Cannot load vectorizer "				 "database\n");			return 1;		    }		} else		    vec = NULL;		strcpy (&buf[j], ".db");		f = fopen (buf, "r");		classifiers[i].cls = load_classifier (f, &cls_opts[i], langs,						      vec);		fclose (f);		if (!classifiers[i].cls) {		    fprintf (stderr, "Error: Cannot load classifier "			     "database\n");		    return 1;		}	    } else {		fprintf (stderr, "No database found, creating new\n");		classifiers[i].cls = create_classifier (&cls_opts[i], langs);		if (!classifiers[i].cls)		    return -1;	    }	} else {	    classifiers[i].cls = create_classifier (&cls_opts[i], langs);	    if (!classifiers[i].cls)		return -1;	}	free (cls_opts[i].cls);	free (cls_opts[i].type);	free (cls_opts[i].options);	free (cls_opts[i].tok);	free (cls_opts[i].vec);	free (cls_opts[i].sel);	free (cls_opts[i].nor);    }    free (cls_opts);    //holders_free ();    /* Folders */    if (data_dir) {	j = strlen (data_dir);	memcpy (buf, data_dir, j);	strcpy (&buf[j], "/saved.folders");	f = fopen (buf, "r");	if (f) {	    fscanf (f, "folders ");	    folders = dict_load (f);	    fclose (f);	} else {	    fprintf (stderr, "No folder database found, creating new\n");	    folders = dict_new ();	}    } else	folders = dict_new ();    /* Initialize signal handlers */    signal (SIGINT, sigint_handler);    signal (SIGTERM, sigint_handler);    /* Create socket and accept connections */    protocol_s_loop (pdata, &handle_session);    return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -