📄 selector.c
字号:
/* Copyright (C) 2002 Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * Feature selector. (s/selector/weighter/ ?) * * normalizer: cannot learn, can change weights on vectors * weighter: can learn, can change weights on vectors * selector: can learn, can remove features from vectorizer dictionary * * @author Mikael Ylikoski * @date 2002 */#include <math.h>#include <stdlib.h>#include <string.h>#include "utility.h"#include "vector.h"#include "selector.h"/** * Feature selector. */struct selector_ { //vector *tdf; /**< Term Document Frequency vector */ //int *tcf; /**< Term Class Frequency */ int nod; /**< Number Of Documents */ //int noc; /**< Number Of Classes */ int *tf; /**< Term Frequency */ int *tb; /**< Term Birthtime */ int not; /**< Number Of Terms */};/** * Create a new selector. * * @return A new selector. */selector *selector_new (void) { selector *sel; sel = my_malloc (sizeof(selector)); //sel->tdf = vector_new (100); //sel->tcf = my_calloc (1, sizeof(int)); //sel->noc = 1; sel->nod = 0; sel->tf = NULL; sel->tb = NULL; sel->not = 0; return sel;}/** * Update the selector statistics. * * @param sel selector to update */intselector_update (selector *sel, vector *v, int class) { int i, j, k; //vector_add_v (sel->tdf, v, 1); //sel->tcf[class]++; sel->nod++; if (v->nel > 0) { j = v->name[v->nel - 1]; if (j >= sel->not) { k = j + 1; sel->tf = my_realloc (sel->tf, k * sizeof(int)); sel->tb = my_realloc (sel->tb, k * sizeof(int)); memset (&sel->tf[sel->not], 0, (k - sel->not) * sizeof(int)); memset (&sel->tb[sel->not], 0, (k - sel->not) * sizeof(int)); sel->not = k; } } for (i = 0; i < v->nel; i++) { j = v->name[i]; if (sel->tf[j] == 0) sel->tb[j] = sel->nod; sel->tf[j]++; // += v->value[i] } return 0;}/** * Reweight a vector. */voidselector_weight (selector *sel, vector *v) {}/** * Used by vectorizer to remove terms. * * @param sel selector to use * @return A boolean vector with ones for all features to remove * (if autobias == 1 then feature 0 may never be removed) */vector *selector_get_removable (selector *sel) { int i; vector *v; v = vector_new (10); for (i = 1; i < sel->not; i++) if (sel->tf[i] && sel->tf[i] < log (sel->nod - sel->tb[i]) * 1 - 1) { /* ifile use * (1 / log (2)) = 1.4427 */ sel->tf[i] = 0; vector_append (v, i); } if (v->nel < 1) { vector_free (v); v = NULL; } //else //vector_print (v); return v;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -