📄 tokenizertest.cs
字号:
using System;
using System.Collections.Generic;
using System.Text;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using System.IO;
using Lucene.Net.Analysis.Cn;
using Lucene.Net.Analysis.CJK;
//date:11-02-2007
//home page:http://www.cnblogs.com/xuanfeng
//author:peizunyou
namespace TokenizerTest
{
class TokenizerTest
{
static void Main(string[] args)
{
string testText = "我们是中国人; 我们 是 人;we are chinese; 172.16.34.172;youpeizun@126.com;#$*;85*34;58 69";
Console.WriteLine("测试文字:"+testText);
Console.WriteLine("测试StandardTokenizer的分词情况如下:");
TestStandardTokenizer(testText);
Console.WriteLine("测试CJKTokenizer的分词情况如下:");
TestCJKTokenizer(testText);
Console.WriteLine("测试ChinessTokenizer的分词情况如下:");
TestChinessTokenizer(testText);
Console.WriteLine("测试LowerCaseTokenizer的分词情况如下:");
TestLowerCaseTokenizer(testText);
Console.WriteLine("测试WhitespaceTokenizer的分词情况如下:");
TestWhitespaceTokenizer(testText);
Console.Read();
}
static void TestStandardTokenizer(string text)
{
TextReader tr = new StringReader(text);
StandardTokenizer st = new StandardTokenizer(tr);
while (st.Next() != null)
{
Console.Write(st.token.ToString()+"/ ");
}
Console.WriteLine();
}
static void TestCJKTokenizer(string text)
{
TextReader tr = new StringReader(text);
int end = 0;
CJKAnalyzer cjkA = new CJKAnalyzer();
TokenStream ts = cjkA.TokenStream(tr);
while(end<text.Length)
{
Lucene.Net.Analysis.Token t = ts.Next();
end = t.EndOffset();
Console.Write(t.TermText()+"/ ");
}
Console.WriteLine();
}
static void TestChinessTokenizer(string text)
{
TextReader tr = new StringReader(text);
ChineseTokenizer ct = new ChineseTokenizer(tr);
int end = 0;
Lucene.Net.Analysis.Token t;
while(end<text.Length)
{
t = ct.Next();
end = t.EndOffset();
Console.Write(t.TermText()+"/ ");
}
Console.WriteLine();
}
static void TestLowerCaseTokenizer(string text)
{
TextReader tr = new StringReader(text);
SimpleAnalyzer sA = new SimpleAnalyzer();
//SimpleAnalyzer使用了LowerCaseTokenizer分词器
TokenStream ts = sA.TokenStream(tr);
Lucene.Net.Analysis.Token t;
while((t=ts.Next())!=null)
{
Console.Write(t.TermText()+"/");
}
Console.WriteLine();
}
static void TestWhitespaceTokenizer(string text)
{
TextReader tr = new StringReader(text);
WhitespaceAnalyzer sA = new WhitespaceAnalyzer();
TokenStream ts = sA.TokenStream(tr);
Lucene.Net.Analysis.Token t;
while ((t = ts.Next()) != null)
{
Console.Write(t.TermText() + "/");
}
Console.WriteLine();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -