📄 stem.c
字号:
/**************************************************** * * TEXT INDEXING * * Program : stem.c * Function : Implementation of the Porter stemming * algorithm * * Original Author : C. Fox, 1990 * Modified By : Choon Eng, You * Year : 1999 * *****************************************************/#include "stem.h"/******* Private Function Declarations ***************/static int WordSize(word) register char *word;{ register int result; register int state; result=0; state=0; while (EOS != *word) { switch (state) { case 0: state = (IsVowel(*word)) ? 1 : 2; break; case 1: state = (IsVowel(*word)) ? 1 : 2; if (2 == state) result++; break; case 2: state = (IsVowel(*word) || ('Y' == *word)) ? 1 : 2; break; } word++; } return (result);}static int ContainsVowel(word) register char *word;{ if (EOS == *word) return(FALSE); else return(IsVowel(*word) || (NULL != strpbrk(word+1, "AEIOUY")));}static int EndsWithCVC(word) register char *word;{ int length; if ((length = strlen(word)) < 2) return(FALSE); else { end = word + length - 1; return( (NULL == strchr("AEIOUWXY", *end--)) && (NULL != strchr("AEIOUY", *end--)) && (NULL == strchr("AEIOU", *end))); }}static int AddAnE(word) register char *word;{ return ((1 == WordSize(word)) && EndsWithCVC(word));}static int RemoveAnE(word) register char *word;{ return ((1 == WordSize(word) && !EndsWithCVC(word)));}static int ReplaceEnd(word, rule) register char *word; RuleList *rule;{ register char *ending; char tmp_ch; while (0 != rule->id) { ending = end - rule->old_offset; if (word <= ending) if (0 == strcmp(ending, rule->old_end)) { tmp_ch = *ending; *ending = EOS; if (rule->min_root_size < WordSize(word)) if (!rule->condition || (*rule->condition)(word)) { (void)strcat(word, rule->new_end); end = ending + rule->new_offset; break; } *ending = tmp_ch; } rule++; } return(rule->id);}
int stem(word)
register char *word;
{
int rule;
for (end = word; *end != EOS; end++)
if (!isalpha(*end))
return(FALSE);
// else
// *end = tolower(*end);
end--;
ReplaceEnd(word, step1a_rules);
rule = ReplaceEnd(word, step1b_rules);
if ((106 == rule) || (107 == rule))
ReplaceEnd(word, step1b1_rules);
ReplaceEnd(word, step1c_rules);
ReplaceEnd(word, step2_rules);
ReplaceEnd(word, step3_rules);
ReplaceEnd(word, step4_rules);
ReplaceEnd(word, step5a_rules);
ReplaceEnd(word, step5b_rules);
return(TRUE);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -