📄 isolatin1accentfilter.cs
字号:
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
namespace Lucene.Net.Analysis
{
/// <summary> A filter that replaces accented characters in the ISO Latin 1 character set
/// (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
/// <p>
/// For instance, 'à' will be replaced by 'a'.
/// <p>
/// </summary>
public class ISOLatin1AccentFilter : TokenFilter
{
public ISOLatin1AccentFilter(TokenStream input) : base(input)
{
}
public override Token Next()
{
Token t = input.Next();
if (t != null)
t.SetTermText(RemoveAccents(t.TermText()));
return t;
}
/// <summary> To replace accented characters in a String by unaccented equivalents.</summary>
public static System.String RemoveAccents(System.String input)
{
System.Text.StringBuilder output = new System.Text.StringBuilder();
for (int i = 0; i < input.Length; i++)
{
long val = input[i];
switch (input[i])
{
case '\u00C0': // À
case '\u00C1': // Ã?
case '\u00C2': // Â
case '\u00C3': // Ã
case '\u00C4': // Ä
case '\u00C5': // Ã…
output.Append("A");
break;
case '\u00C6': // Æ
output.Append("AE");
break;
case '\u00C7': // Ç
output.Append("C");
break;
case '\u00C8': // È
case '\u00C9': // É
case '\u00CA': // Ê
case '\u00CB': // Ë
output.Append("E");
break;
case '\u00CC': // Ì
case '\u00CD': // Ã?
case '\u00CE': // ÃŽ
case '\u00CF': // Ã?
output.Append("I");
break;
case '\u00D0': // Ã?
output.Append("D");
break;
case '\u00D1': // Ñ
output.Append("N");
break;
case '\u00D2': // Ã’
case '\u00D3': // Ó
case '\u00D4': // Ô
case '\u00D5': // Õ
case '\u00D6': // Ö
case '\u00D8': // Ø
output.Append("O");
break;
case '\u0152': // Å’
output.Append("OE");
break;
case '\u00DE': // Þ
output.Append("TH");
break;
case '\u00D9': // Ù
case '\u00DA': // Ú
case '\u00DB': // Û
case '\u00DC': // Ü
output.Append("U");
break;
case '\u00DD': // Ã?
case '\u0178': // Ÿ
output.Append("Y");
break;
case '\u00E0': // Ã
case '\u00E1': // á
case '\u00E2': // â
case '\u00E3': // ã
case '\u00E4': // ä
case '\u00E5': // å
output.Append("a");
break;
case '\u00E6': // æ
output.Append("ae");
break;
case '\u00E7': // ç
output.Append("c");
break;
case '\u00E8': // è
case '\u00E9': // é
case '\u00EA': // ê
case '\u00EB': // ë
output.Append("e");
break;
case '\u00EC': // ì
case '\u00ED': // Ã
case '\u00EE': // î
case '\u00EF': // ï
output.Append("i");
break;
case '\u00F0': // ð
output.Append("d");
break;
case '\u00F1': // ñ
output.Append("n");
break;
case '\u00F2': // ò
case '\u00F3': // ó
case '\u00F4': // ô
case '\u00F5': // õ
case '\u00F6': // ö
case '\u00F8': // ø
output.Append("o");
break;
case '\u0153': // Å“
output.Append("oe");
break;
case '\u00DF': // ß
output.Append("ss");
break;
case '\u00FE': // þ
output.Append("th");
break;
case '\u00F9': // ù
case '\u00FA': // ú
case '\u00FB': // û
case '\u00FC': // ü
output.Append("u");
break;
case '\u00FD': // ý
case '\u00FF': // ÿ
output.Append("y");
break;
default:
output.Append(input[i]);
break;
}
}
return output.ToString();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -