⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 stem.c

📁 潜在语义准备
💻 C
字号:
/**************************************************** * *  TEXT INDEXING * *  Program  : stem.c *  Function : Implementation of the Porter stemming *             algorithm * *  Original Author : C. Fox, 1990 *  Modified By     : Choon Eng, You *  Year            : 1999 * *****************************************************/#include "stem.h"/******* Private Function Declarations ***************/static int WordSize(word)    register char *word;{   register int result;   register int state;   result=0;   state=0;   while (EOS != *word) {      switch (state) {         case 0: state = (IsVowel(*word)) ? 1 : 2;                 break;         case 1: state = (IsVowel(*word)) ? 1 : 2;                 if (2 == state)                    result++;                 break;         case 2: state = (IsVowel(*word) || ('Y' == *word)) ? 1 : 2;                 break;      }   word++;   }   return (result);}static int ContainsVowel(word)   register char *word;{   if (EOS == *word)      return(FALSE);   else      return(IsVowel(*word) || (NULL != strpbrk(word+1, "AEIOUY")));}static int EndsWithCVC(word)   register char *word;{   int length;   if ((length = strlen(word)) < 2)      return(FALSE);   else {      end = word + length - 1;      return(   (NULL == strchr("AEIOUWXY", *end--))             && (NULL != strchr("AEIOUY", *end--))             && (NULL == strchr("AEIOU", *end)));   }}static int AddAnE(word)   register char *word;{   return ((1 == WordSize(word)) && EndsWithCVC(word));}static int RemoveAnE(word)   register char *word;{   return ((1 == WordSize(word) && !EndsWithCVC(word)));}static int ReplaceEnd(word, rule)   register char *word;   RuleList *rule;{   register char *ending;   char tmp_ch;   while (0 != rule->id) {      ending = end - rule->old_offset;      if (word <= ending)         if (0 == strcmp(ending, rule->old_end)) {            tmp_ch = *ending;            *ending = EOS;            if (rule->min_root_size < WordSize(word))               if (!rule->condition || (*rule->condition)(word)) {                  (void)strcat(word, rule->new_end);                  end = ending + rule->new_offset;                  break;               }            *ending = tmp_ch;         }      rule++;   }   return(rule->id);}
int stem(word)
   register char *word;
{
   int rule;

   for (end = word; *end != EOS; end++)
      if (!isalpha(*end))
         return(FALSE);
  //    else
  //       *end = tolower(*end);
   end--;

   ReplaceEnd(word, step1a_rules);

   

   rule = ReplaceEnd(word, step1b_rules);

   if ((106 == rule) || (107 == rule))
      ReplaceEnd(word, step1b1_rules);

   ReplaceEnd(word, step1c_rules);

   ReplaceEnd(word, step2_rules);
   
   ReplaceEnd(word, step3_rules);

   ReplaceEnd(word, step4_rules);

   ReplaceEnd(word, step5a_rules);

   ReplaceEnd(word, step5b_rules);

   return(TRUE);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -