📄 tokenizer.h
字号:
#ifndef TOKENIZER_H#define TOKENIZER_H#include <stdio.h>#include "languages.h"#include "stemmer.h"#include "stopword.h"typedef struct tokenizer_ tokenizer;tokenizer *tokenizer_new (const char *str);tokenizer *tokenizer_alpha_new (void);tokenizer *tokenizer_ws_new (void);tokenizer *tokenizer_ngram_new (void);tokenizer *tokenizer_null_new (void);inttokenizer_set_minmax (tokenizer *tok, int min, int max);voidtokenizer_set_stopwords (tokenizer *tok, word_stopper *ws);voidtokenizer_set_stemmer (tokenizer *tok, stemmer_functions *sf);voidtokenizer_set_languages (tokenizer *tok, languages *langs);inttokenizer_set_language (tokenizer *tok, const char *lang);voidtokenizer_set_text (tokenizer *tok, const char *text, int size, const char *code);const char *tokenizer_next_token (tokenizer *tok);inttokenizer_save (tokenizer *tok, FILE *f);#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -