⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 stem.h

📁 根据潜在语义分析进行查询。将文本中的特征集合做LSI变换。
💻 H
字号:
/**************************************************** * *  TEXT INDEXING * *  Program  : stem.h *  Function : Header file for stem.c * *  Original Author : C. Fox, 1990 *  Modified By     : Choon Eng, You *  Year            : 1999 * *****************************************************/#include <stdio.h>#include <string.h>#include <ctype.h>/******** Private Defines and Data Structures **********************/#define FALSE   0#define TRUE    1#define EOS   '\0'#define IsVowel(c) ('A'==(c)||'E'==(c)||'I'==(c)||'O'==(c)||'U'==(c))typedef struct {  int id;  char *old_end;  char *new_end;  int old_offset;  int new_offset;  int min_root_size;  int (*condition)();} RuleList;static char LAMBDA[1] = "";static char *end;/******** Private Function Declarations ****************************/#ifdef __STDC__static int WordSize(char *word);static int ContainsVowel(char *word);static int EndsWithCVC(char *word);static int AddAnE(char *word);static int RemoveAnE(char *word);static int ReplaceEnd(char *word, RuleList *rule);#elsestatic int WordSize (/* word */);static int ContainsVowel(/* word */);static int EndsWithCVC(/* word */);static int AddAnE(/* word */);static int RemoveAnE(/* word */);static int ReplaceEnd(/* word, rule */);#endif/******** Initialized Private Data Structures **********************/static RuleList step1a_rules[] ={   101, "SSES",     "SS",   3,  1, -1, NULL,   102, "IES",      "I",    2,  0, -1, NULL,   103, "SS",       "SS",   1,  1, -1, NULL,   104, "S",        LAMBDA, 0, -1, -1, NULL,   000, NULL,       NULL,   0,  0,  0, NULL,};static RuleList step1b_rules[] ={   105, "EED",      "EE",   2,  1,  0, NULL,   106, "ED",       LAMBDA, 1, -1, -1, ContainsVowel,   107, "ING",      LAMBDA, 2, -1, -1, ContainsVowel,   000, NULL,       NULL,   0,  0,  0, NULL,};static RuleList step1b1_rules[] ={   108, "AT",       "ATE",  1,  2, -1, NULL,   109, "BL",       "BLE",  1,  2, -1, NULL,   110, "IZ",       "IZE",  1,  2, -1, NULL,   111, "BB",       "B",    1,  0, -1, NULL,   112, "DD",       "D",    1,  0, -1, NULL,   113, "FF",       "F",    1,  0, -1, NULL,   114, "GG",       "G",    1,  0, -1, NULL,   115, "MM",       "M",    1,  0, -1, NULL,   116, "NN",       "N",    1,  0, -1, NULL,   117, "PP",       "P",    1,  0, -1, NULL,   118, "RR",       "R",    1,  0, -1, NULL,   119, "TT",       "T",    1,  0, -1, NULL,   120, "WW",       "W",    1,  0, -1, NULL,   121, "XX",       "X",    1,  0, -1, NULL,   122, LAMBDA,     "E",   -1,  0, -1, AddAnE,   000, NULL,       NULL,   0,  0,  0, NULL,};static RuleList step1c_rules[] ={   123, "Y",        "I",    0,  0, -1, ContainsVowel,   000, NULL,       NULL,   0,  0,  0, NULL,};static RuleList step2_rules[] ={   203, "ATIONAL",  "ATE",  6,  2,  0, NULL,   204, "TIONAL",   "TION", 5,  3,  0, NULL,   205, "ENCI",     "ENCE", 3,  3,  0, NULL,   206, "ANCI",     "ANCE", 3,  3,  0, NULL,   207, "IZER",     "IZE",  3,  2,  0, NULL,   208, "ABLI",     "ABLE", 3,  3,  0, NULL,   209, "ALLI",     "AL",   3,  1,  0, NULL,   210, "ENTLI",    "ENT",  4,  2,  0, NULL,   211, "ELI",      "E",    2,  0,  0, NULL,   213, "OUSLI",    "OUS",  4,  2,  0, NULL,   214, "IZATION",  "IZE",  6,  2,  0, NULL,   215, "ATION",    "ATE",  4,  2,  0, NULL,   216, "ATOR",     "ATE",  3,  2,  0, NULL,   217, "ALISM",    "AL",   4,  1,  0, NULL,   218, "IVENESS",  "IVE",  6,  2,  0, NULL,   219, "FULNESS",  "FUL",  5,  2,  0, NULL,   220, "OUSNESS",  "OUS",  6,  2,  0, NULL,   221, "ALITI",    "AL",   4,  1,  0, NULL,   222, "IVITI",    "IVE",  4,  2,  0, NULL,   223, "BILITI",   "BLE",  5,  2,  0, NULL,   000, NULL,       NULL,   0,  0,  0, NULL,};static RuleList step3_rules[] ={   301, "ICATE",    "IC",   4,  1,  0, NULL,   302, "ATIVE",    LAMBDA, 4, -1,  0, NULL,   303, "ALIZE",    "AL",   4,  1,  0, NULL,   304, "ICITI",    "IC",   4,  1,  0, NULL,   305, "ICAL",     "IC",   3,  1,  0, NULL,   308, "FUL",      LAMBDA, 2, -1,  0, NULL,   309, "NESS",     LAMBDA, 3, -1,  0, NULL,   000, NULL,       NULL,   0,  0,  0, NULL,};static RuleList step4_rules[] ={   401, "AL",       LAMBDA, 1, -1,  1, NULL,   402, "ANCE",     LAMBDA, 3, -1,  1, NULL,   403, "ENCE",     LAMBDA, 3, -1,  1, NULL,   405, "ER",       LAMBDA, 1, -1,  1, NULL,   406, "IC",       LAMBDA, 1, -1,  1, NULL,   407, "ABLE",     LAMBDA, 3, -1,  1, NULL,   408, "IBLE",     LAMBDA, 3, -1,  1, NULL,   409, "ANT",      LAMBDA, 2, -1,  1, NULL,   410, "EMENT",    LAMBDA, 4, -1,  1, NULL,   411, "MENT",     LAMBDA, 3, -1,  1, NULL,   412, "ENT",      LAMBDA, 2, -1,  1, NULL,   423, "SION",     "S",    3,  0,  1, NULL,   424, "TION",     "T",    3,  0,  1, NULL,   415, "OU",       LAMBDA, 1, -1,  1, NULL,   416, "ISM",      LAMBDA, 2, -1,  1, NULL,   417, "ATE",      LAMBDA, 2, -1,  1, NULL,   418, "ITI",      LAMBDA, 2, -1,  1, NULL,   419, "OUS",      LAMBDA, 2, -1,  1, NULL,   420, "IVE",      LAMBDA, 2, -1,  1, NULL,   421, "IZE",      LAMBDA, 2, -1,  1, NULL,   000, NULL,       NULL,   0,  0,  0, NULL,};static RuleList step5a_rules[] ={   501, "E",        LAMBDA, 0, -1,  1, NULL,   502, "E",        LAMBDA, 0, -1, -1, RemoveAnE,   000, NULL,       NULL,   0,  0,  0, NULL,};static RuleList step5b_rules[] ={   503, "LL",       "L",    1,  0,  1, NULL,   000, NULL,       NULL,   0,  0,  0, NULL,};/******** Public Function Declarations ************************/int stem(word);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -