📄 porterstemmer.cs
字号:
case 'l':
if (Ends("bli"))
{
R("ble"); break;
}
if (Ends("alli"))
{
R("al"); break;
}
if (Ends("entli"))
{
R("ent"); break;
}
if (Ends("eli"))
{
R("e"); break;
}
if (Ends("ousli"))
{
R("ous"); break;
}
break;
case 'o':
if (Ends("ization"))
{
R("ize"); break;
}
if (Ends("ation"))
{
R("ate"); break;
}
if (Ends("ator"))
{
R("ate"); break;
}
break;
case 's':
if (Ends("alism"))
{
R("al"); break;
}
if (Ends("iveness"))
{
R("ive"); break;
}
if (Ends("fulness"))
{
R("ful"); break;
}
if (Ends("ousness"))
{
R("ous"); break;
}
break;
case 't':
if (Ends("aliti"))
{
R("al"); break;
}
if (Ends("iviti"))
{
R("ive"); break;
}
if (Ends("biliti"))
{
R("ble"); break;
}
break;
case 'g':
if (Ends("logi"))
{
R("log"); break;
}
break;
}
}
/* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
private void Step4()
{
switch (b[k])
{
case 'e':
if (Ends("icate"))
{
R("ic"); break;
}
if (Ends("ative"))
{
R(""); break;
}
if (Ends("alize"))
{
R("al"); break;
}
break;
case 'i':
if (Ends("iciti"))
{
R("ic"); break;
}
break;
case 'l':
if (Ends("ical"))
{
R("ic"); break;
}
if (Ends("ful"))
{
R(""); break;
}
break;
case 's':
if (Ends("ness"))
{
R(""); break;
}
break;
}
}
/* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
private void Step5()
{
if (k == k0)
return ; /* for Bug 1 */
switch (b[k - 1])
{
case 'a':
if (Ends("al"))
break;
return ;
case 'c':
if (Ends("ance"))
break;
if (Ends("ence"))
break;
return ;
case 'e':
if (Ends("er"))
break; return ;
case 'i':
if (Ends("ic"))
break; return ;
case 'l':
if (Ends("able"))
break;
if (Ends("ible"))
break; return ;
case 'n':
if (Ends("ant"))
break;
if (Ends("ement"))
break;
if (Ends("ment"))
break;
/* element etc. not stripped before the m */
if (Ends("ent"))
break;
return ;
case 'o':
if (Ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
break;
/* j >= 0 fixes Bug 2 */
if (Ends("ou"))
break;
return ;
/* takes care of -ous */
case 's':
if (Ends("ism"))
break;
return ;
case 't':
if (Ends("ate"))
break;
if (Ends("iti"))
break;
return ;
case 'u':
if (Ends("ous"))
break;
return ;
case 'v':
if (Ends("ive"))
break;
return ;
case 'z':
if (Ends("ize"))
break;
return ;
default:
return ;
}
if (M() > 1)
k = j;
}
/* step6() removes a final -e if m() > 1. */
private void Step6()
{
j = k;
if (b[k] == 'e')
{
int a = M();
if (a > 1 || a == 1 && !Cvc(k - 1))
k--;
}
if (b[k] == 'l' && Doublec(k) && M() > 1)
k--;
}
/// <summary> Stem a word provided as a String. Returns the result as a String.</summary>
public virtual System.String Stem(System.String s)
{
if (Stem(s.ToCharArray(), s.Length))
{
return ToString();
}
else
return s;
}
/// <summary>Stem a word contained in a char[]. Returns true if the stemming process
/// resulted in a word different from the input. You can retrieve the
/// result with getResultLength()/getResultBuffer() or toString().
/// </summary>
public virtual bool Stem(char[] word)
{
return Stem(word, word.Length);
}
/// <summary>Stem a word contained in a portion of a char[] array. Returns
/// true if the stemming process resulted in a word different from
/// the input. You can retrieve the result with
/// getResultLength()/getResultBuffer() or toString().
/// </summary>
public virtual bool Stem(char[] wordBuffer, int offset, int wordLen)
{
Reset();
if (b.Length < wordLen)
{
char[] new_b = new char[wordLen + EXTRA];
b = new_b;
}
for (int j = 0; j < wordLen; j++)
b[j] = wordBuffer[offset + j];
i = wordLen;
return Stem(0);
}
/// <summary>Stem a word contained in a leading portion of a char[] array.
/// Returns true if the stemming process resulted in a word different
/// from the input. You can retrieve the result with
/// getResultLength()/getResultBuffer() or toString().
/// </summary>
public virtual bool Stem(char[] word, int wordLen)
{
return Stem(word, 0, wordLen);
}
/// <summary>Stem the word placed into the Stemmer buffer through calls to add().
/// Returns true if the stemming process resulted in a word different
/// from the input. You can retrieve the result with
/// getResultLength()/getResultBuffer() or toString().
/// </summary>
public virtual bool Stem()
{
return Stem(0);
}
public virtual bool Stem(int i0)
{
k = i - 1;
k0 = i0;
if (k > k0 + 1)
{
Step1(); Step2(); Step3(); Step4(); Step5(); Step6();
}
// Also, a word is considered dirty if we lopped off letters
// Thanks to Ifigenia Vairelles for pointing this out.
if (i != k + 1)
dirty = true;
i = k + 1;
return dirty;
}
/// <summary>Test program for demonstrating the Stemmer. It reads a file and
/// stems each word, writing the result to standard out.
/// Usage: Stemmer file-name
/// </summary>
[STAThread]
public static void Main(System.String[] args)
{
PorterStemmer s = new PorterStemmer();
for (int i = 0; i < args.Length; i++)
{
try
{
System.IO.BinaryReader in_Renamed = new System.IO.BinaryReader(System.IO.File.Open(args[i], System.IO.FileMode.Open, System.IO.FileAccess.Read));
byte[] buffer = new byte[1024];
int bufferLen, offset, ch;
bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
offset = 0;
s.Reset();
while (true)
{
if (offset < bufferLen)
ch = buffer[offset++];
else
{
bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
offset = 0;
if (bufferLen <= 0)
ch = - 1;
else
ch = buffer[offset++];
}
if (System.Char.IsLetter((char) ch))
{
s.Add(System.Char.ToLower((char) ch));
}
else
{
s.Stem();
System.Console.Out.Write(s.ToString());
s.Reset();
if (ch < 0)
break;
else
{
System.Console.Out.Write((char) ch);
}
}
}
in_Renamed.Close();
}
catch (System.IO.IOException)
{
System.Console.Out.WriteLine("error reading " + args[i]);
}
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -