⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 select_textstat.c

📁 使用具有增量学习的监控式学习方法。包括几个不同的分类算法。
💻 C
字号:
/* Copyright (C) 2001-2002  Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * Text statistics program. * Counts number of n-grams or words of a text. * * @author  Mikael Ylikoski * @date    2001-2002 */#include <ctype.h>#include <locale.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include "dictionary.h"#include "utility.h"#include "vector.h"static int verbose = 0;		/**< If !0 print verbose descriptions. */static dict *dt;		/**< Dictionary */static vector *v;		/**< Counts vector */static voidprint_word (gpointer key, gpointer value, gpointer data) {    int i;    char *wd;    wd = (char *)key;    for (i = 0; wd[i] != '\0'; i++)	if (wd[i] == ' ')	    putchar ('_');	else	    putchar (wd[i]);    i = vector_get_value (v, *(int *)value);    printf (" %d\n", i);}static intcount_ngrams (int min, int max, int lowercase, int alpha, char *lang) {    int i, j, k;    char d;    char *buf;    k = 0;    buf = my_malloc (max + 1);    dt = dict_new ();    if (!dt) {	free (buf);	return 1;    }    v = vector_new (100);    if (!v) {	free (buf);	free (dt);	return 1;    }    if (alpha)	memset (buf, ' ', max);    else {	for (i = 0; i < max - 1; i++) {	    if ((i = getchar ()) == EOF) {		max = i;		break;	    }	    buf[i] = (char)i;	}	/* R鋕na h鋜 */    }    while ((i = getchar ()) != EOF) {	/* Set character */	if (lowercase)	    i = tolower (i);	if (alpha) {	    if (!isalpha (i))		i = ' ';	}	buf[max - 1] = (char)i;	/* Count n-grams */	if (alpha)	    for (k = i = 0; i < min; i++)		if (buf[i] == ' ')		    k++;	for (i = min; i <= max; i++) {	    d = buf[i];	    if (alpha)		if (d == ' ')		    if (++k > 2)			break;	    buf[i] = '\0';	    j = dict_insert_word (dt, buf);	    vector_inc_value (v, j);	    buf[i] = d;	}	/* Shift characters */	for (i = 0; i < max; i++)	    buf[i] = buf[i + 1];    };    /* Print results */    /*printf ("# min=%d max=%d lang=%s\n", min, max, lang);      printf ("!%s\n", lang);*/    dict_for_each (dt, print_word);    return 0;}static intcount_words (int min, int max) {    printf ("Error: Not implemented yet!\n");    return 0;}/** * Main program */intmain (int argc, char *argv[]) {    int i, j, k;    /* Configuration */    verbose = 1;    setlocale (LC_CTYPE, "");    switch (argc) {    case 3:	i = sscanf (argv[1], "%d", &j);	i = sscanf (argv[2], "%d", &k);	return count_words (j, k);    case 4:	i = sscanf (argv[1], "%d", &j);	i = sscanf (argv[2], "%d", &k);	return count_ngrams (j, k, 1, 1, argv[3]);    default:	printf ("Wrong number of arguments!\n");	printf ("Usage:\n  To count words:  %s min max < input\n", argv[0]);	printf ("  To count ngrams: %s min max lang < input\n", argv[0]);	return 1;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -