📄 sharpictclas分词系统简介(1)读取词典库 - first we try, then we trust - 博客园.mht
字号:
style=3D"COLOR: #0000ff">public</SPAN> ArrayChainItem next; =
<BR> }=20
<BR><BR> <SPAN style=3D"COLOR: #0000ff">public</SPAN> <SPAN=20
style=3D"COLOR: #0000ff">class</SPAN> WordResult <BR> {=20
<BR> <SPAN style=3D"COLOR: #008000">//The =
word =20
</SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> <SPAN style=3D"COLOR: =
#0000ff">string</SPAN>=20
sWord; <BR><BR> <SPAN style=3D"COLOR: =
#008000">//the=20
POS of the word </SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> <SPAN style=3D"COLOR: =
#0000ff">int</SPAN>=20
nPOS; <BR><BR> <SPAN style=3D"COLOR: =
#008000">//The=20
-log(frequency/MAX) </SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> <SPAN style=3D"COLOR: =
#0000ff">double</SPAN>=20
dValue; <BR> } <BR><BR> <SPAN=20
style=3D"COLOR: =
#008000">//--------------------------------------------------=20
</SPAN><BR> <SPAN style=3D"COLOR: #008000">// data structure =
for word=20
item </SPAN><BR> <SPAN=20
style=3D"COLOR: =
#008000">//--------------------------------------------------=20
</SPAN><BR> <SPAN style=3D"COLOR: #0000ff">public</SPAN> =
<SPAN=20
style=3D"COLOR: #0000ff">class</SPAN> WordItem <BR> {=20
<BR> <SPAN style=3D"COLOR: =
#0000ff">public</SPAN>=20
<SPAN style=3D"COLOR: #0000ff">int</SPAN> nWordLen;=20
<BR><BR> <SPAN style=3D"COLOR: =
#008000">//The=20
word </SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> <SPAN style=3D"COLOR: =
#0000ff">string</SPAN>=20
sWord; <BR><BR> <SPAN style=3D"COLOR: =
#008000">//the=20
process or information handle of the word=20
</SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> <SPAN style=3D"COLOR: =
#0000ff">int</SPAN>=20
nPOS; <BR><BR> <SPAN style=3D"COLOR: =
#008000">//The=20
count which it appear </SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> <SPAN style=3D"COLOR: =
#0000ff">int</SPAN>=20
nFrequency; <BR> } <BR><BR> <SPAN=20
style=3D"COLOR: =
#008000">//--------------------------------------------------=20
</SPAN><BR> <SPAN style=3D"COLOR: #008000">//data structure =
for=20
dictionary index table item </SPAN><BR> <SPAN=20
style=3D"COLOR: =
#008000">//--------------------------------------------------=20
</SPAN><BR> <SPAN style=3D"COLOR: #0000ff">public</SPAN> =
<SPAN=20
style=3D"COLOR: #0000ff">class</SPAN> IndexTableItem <BR> {=20
<BR> <SPAN style=3D"COLOR: #008000">//The =
count=20
number of words which initial letter is sInit=20
</SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> <SPAN style=3D"COLOR: =
#0000ff">int</SPAN>=20
nCount; <BR><BR> <SPAN=20
style=3D"COLOR: #008000">//The head of word items=20
</SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> WordItem[] WordItems; =
<BR> }=20
<BR><BR> <SPAN=20
style=3D"COLOR: =
#008000">//--------------------------------------------------=20
</SPAN><BR> <SPAN style=3D"COLOR: #008000">//data structure =
for word=20
item chain </SPAN><BR> <SPAN=20
style=3D"COLOR: =
#008000">//--------------------------------------------------=20
</SPAN><BR> <SPAN style=3D"COLOR: #0000ff">public</SPAN> =
<SPAN=20
style=3D"COLOR: #0000ff">class</SPAN> WordChain <BR> {=20
<BR> <SPAN style=3D"COLOR: =
#0000ff">public</SPAN>=20
WordItem data; <BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> WordChain next; <BR> =
}=20
<BR><BR> <SPAN=20
style=3D"COLOR: =
#008000">//--------------------------------------------------=20
</SPAN><BR> <SPAN style=3D"COLOR: #008000">//data structure =
for=20
dictionary index table item </SPAN><BR> <SPAN=20
style=3D"COLOR: =
#008000">//--------------------------------------------------=20
</SPAN><BR> <SPAN style=3D"COLOR: #0000ff">public</SPAN> =
<SPAN=20
style=3D"COLOR: #0000ff">class</SPAN> ModifyTableItem <BR> { =
<BR> <SPAN style=3D"COLOR: #008000">//The =
count=20
number of words which initial letter is sInit=20
</SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> <SPAN style=3D"COLOR: =
#0000ff">int</SPAN>=20
nCount; <BR><BR> <SPAN=20
style=3D"COLOR: #008000">//The number of deleted items in the index =
table=20
</SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> <SPAN style=3D"COLOR: =
#0000ff">int</SPAN>=20
nDelete; <BR><BR> <SPAN=20
style=3D"COLOR: #008000">//The head of word items=20
</SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> WordChain pWordItemHead; =
<BR> =20
} <BR>} <BR></DIV></DIV>
<P>=E5=85=B6=E4=B8=ADModifyTableItem=E7=94=A8=E4=BA=8E=E7=BB=84=E6=88=90M=
odifyTable=EF=BC=8C=E4=BD=86=E5=9C=A8=E5=AE=9E=E9=99=85=E5=88=86=E8=AF=8D=
=E6=97=B6=EF=BC=8C=E8=AF=8D=E5=BA=93=E5=BE=80=E5=BE=80=E5=A4=84=E4=BA=8E=E2=
=80=9C=E5=8F=AA=E8=AF=BB=E2=80=9D=E7=8A=B6=E6=80=81=EF=BC=8C=E5=9B=A0=E6=AD=
=A4=E7=94=A8=E4=BA=8E=E4=BF=AE=E6=94=B9=E8=AF=8D=E5=BA=93=E7=9A=84ModifyT=
able=E5=AE=9E=E9=99=85=E4=B8=8A=E8=B5=B7=E7=9A=84=E4=BD=9C=E7=94=A8=E5=B9=
=B6=E4=B8=8D=E5=A4=A7=E3=80=82=E5=9B=A0=E6=AD=A4=E5=9C=A8=E5=90=8E=E9=9D=A2=
=E6=88=91=E5=B0=86ModifyTable=E7=9A=84=E4=BB=A3=E7=A0=81=E6=9A=82=E6=97=B6=
=E7=9C=81=E7=95=A5=E3=80=82</P>
<P>=E6=9C=89=E4=BA=86=E5=9F=BA=E6=9C=AC=E5=85=83=E7=B4=A0=E7=9A=84=E5=AE=9A=
=E4=B9=89=E5=90=8E=EF=BC=8C=E5=B0=B1=E8=AF=A5=E5=AE=9A=E4=B9=89=E2=80=9C=E8=
=AF=8D=E5=85=B8=E2=80=9D=E7=B1=BB=E4=BA=86=E3=80=82=E5=8E=9F=E6=9C=89C++=E4=
=BB=A3=E7=A0=81=E4=B8=AD=E6=89=80=E6=9C=89=E7=B1=BB=E5=90=8D=E5=9D=87=E4=BB=
=A5=E5=A4=A7=E5=86=99=E7=9A=84=E2=80=9CC=E2=80=9D=E6=89=93=E5=A4=B4=EF=BC=
=8C=E8=AF=8D=E5=85=B8=E7=B1=BB=E5=90=8D=E4=B8=BACDictionary=EF=BC=8C=E5=9C=
=A8SharpICTCLAS=E4=B8=AD=EF=BC=8C=E6=88=91=E5=8E=BB=E6=8E=89=E4=BA=86=E5=BC=
=80=E5=A4=B4=E7=9A=84=E2=80=9CC=E2=80=9D=EF=BC=8C=E5=B9=B6=E4=B8=94=E4=B8=
=BA=E4=BA=86=E9=98=B2=E6=AD=A2=E5=92=8C=E7=B3=BB=E7=BB=9F=E7=9A=84Diction=
ary=E7=B1=BB=E9=87=8D=E5=90=8D=EF=BC=8C=E7=89=B9=E8=B5=B7=E5=90=8D=E4=B8=BA=
=E2=80=9CWordDictionary=E2=80=9D=E7=B1=BB=E3=80=82=E8=AF=A5=E7=B1=BB=E4=B8=
=BB=E8=A6=81=E8=B4=9F=E8=B4=A3=E5=AE=8C=E6=88=90=E8=AF=8D=E5=85=B8=E5=BA=93=
=E7=9A=84=E8=AF=BB=E3=80=81=E5=86=99=E4=BB=A5=E5=8F=8A=E6=A3=80=E7=B4=A2=E6=
=93=8D=E4=BD=9C=E3=80=82=E8=AE=A9=E6=88=91=E4=BB=AC=E7=9C=8B=E7=9C=8B=E5=A6=
=82=E4=BD=95=E8=AF=BB=E5=8F=96=E8=AF=8D=E5=85=B8=E5=BA=93=EF=BC=9A</P>
<DIV class=3Dcode>
<DIV class=3Dtitle>
<DIV style=3D"FLOAT: right"><IMG class=3DcopyCodeImage=20
src=3D"http://www.cnblogs.com/images/cnblogs_com/zhenyulu/200701/copycode=
.gif"=20
align=3DabsMiddle name=3DccImage> <A onclick=3DCopyCode(this) =
href=3D"javascript:">Copy=20
Code</A></DIV>
<DIV style=3D"CLEAR: =
none">=E8=AF=8D=E5=85=B8=E5=BA=93=E7=9A=84=E8=AF=BB=E5=8F=96=EF=BC=9A</DI=
V></DIV>
<DIV class=3Dcontent><SPAN style=3D"COLOR: #0000ff">public</SPAN> <SPAN=20
style=3D"COLOR: #0000ff">class</SPAN> WordDictionary <BR>{ =
<BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> <SPAN style=3D"COLOR: =
#0000ff">bool</SPAN>=20
bReleased =3D <SPAN style=3D"COLOR: #0000ff">true</SPAN>; =
<BR><BR> <SPAN=20
style=3D"COLOR: #0000ff">public</SPAN> IndexTableItem[] indexTable;=20
<BR> <SPAN style=3D"COLOR: #0000ff">public</SPAN> =
ModifyTableItem[]=20
modifyTable; <BR><BR> <SPAN style=3D"COLOR: =
#0000ff">public</SPAN>=20
<SPAN style=3D"COLOR: #0000ff">bool</SPAN> Load(<SPAN=20
style=3D"COLOR: #0000ff">string</SPAN> sFilename) <BR> {=20
<BR> <SPAN style=3D"COLOR: =
#0000ff">return</SPAN>=20
Load(sFilename, <SPAN style=3D"COLOR: #0000ff">false</SPAN>); =
<BR> }=20
<BR><BR> <SPAN style=3D"COLOR: #0000ff">public</SPAN> <SPAN=20
style=3D"COLOR: #0000ff">bool</SPAN> Load(<SPAN=20
style=3D"COLOR: #0000ff">string</SPAN> sFilename, <SPAN=20
style=3D"COLOR: #0000ff">bool</SPAN> bReset) <BR> {=20
<BR> <SPAN style=3D"COLOR: =
#0000ff">int</SPAN>=20
frequency, wordLength, pos; <SPAN=20
style=3D"COLOR: =
#008000">//=E9=A2=91=E7=8E=87=E3=80=81=E8=AF=8D=E9=95=BF=E3=80=81=E8=AF=BB=
=E5=8F=96=E8=AF=8D=E6=80=A7 </SPAN><BR> =20
<SPAN style=3D"COLOR: #0000ff">bool</SPAN> isSuccess =3D <SPAN=20
style=3D"COLOR: #0000ff">true</SPAN>; <BR> =
FileStream fileStream =3D <SPAN style=3D"COLOR: #0000ff">null</SPAN>;=20
<BR> BinaryReader binReader =3D <SPAN=20
style=3D"COLOR: #0000ff">null</SPAN>; =
<BR><BR> <SPAN=20
style=3D"COLOR: #0000ff">try</SPAN> <BR> { =
<BR> fileStream =3D =
<SPAN=20
style=3D"COLOR: #0000ff">new</SPAN> FileStream(sFilename, FileMode.Open, =
FileAccess.Read); <BR> =
<SPAN=20
style=3D"COLOR: #0000ff">if</SPAN> (fileStream =3D=3D <SPAN=20
style=3D"COLOR: #0000ff">null</SPAN>)=20
<BR> =
<SPAN=20
style=3D"COLOR: #0000ff">return</SPAN> <SPAN style=3D"COLOR: =
#0000ff">false</SPAN>;=20
<BR><BR> binReader =3D =
<SPAN=20
style=3D"COLOR: #0000ff">new</SPAN> BinaryReader(fileStream,=20
Encoding.GetEncoding(<SPAN style=3D"COLOR: #ff00ff">"gb2312"</SPAN>));=20
<BR><BR> indexTable =3D =
<SPAN=20
style=3D"COLOR: #0000ff">new</SPAN> IndexTableItem[Predefine.CC_NUM];=20
<BR><BR> bReleased =3D =
<SPAN=20
style=3D"COLOR: #0000ff">false</SPAN>;=20
<BR> <SPAN=20
style=3D"COLOR: #0000ff">for</SPAN> (<SPAN style=3D"COLOR: =
#0000ff">int</SPAN> i =3D=20
0; i < Predefine.CC_NUM; i++)=20
<BR> {=20
<BR> =
<SPAN=20
style=3D"COLOR: =
#008000">//=E8=AF=BB=E5=8F=96=E4=BB=A5=E8=AF=A5=E6=B1=89=E5=AD=97=E6=89=93=
=E5=A4=B4=E7=9A=84=E8=AF=8D=E6=9C=89=E5=A4=9A=E5=B0=91=E4=B8=AA=20
</SPAN><BR> &n=
bsp;=20
indexTable[i] =3D <SPAN style=3D"COLOR: #0000ff">new</SPAN> =
IndexTableItem();=20
<BR> =20
indexTable[i].nCount =3D binReader.ReadInt32();=20
<BR><BR>  =
; <SPAN=20
style=3D"COLOR: #0000ff">if</SPAN> (indexTable[i].nCount <=3D 0)=20
<BR> &nb=
sp; =20
<SPAN style=3D"COLOR: #0000ff">continue</SPAN>;=20
<BR><BR>  =
;=20
indexTable[i].WordItems =3D <SPAN style=3D"COLOR: #0000ff">new</SPAN>=20
WordItem[indexTable[i].nCount];=20
<BR><BR>  =
; <SPAN=20
style=3D"COLOR: #0000ff">for</SPAN> (<SPAN style=3D"COLOR: =
#0000ff">int</SPAN> j =3D=20
0; j < indexTable[i].nCount; j++)=20
<BR> { =
<BR> &nb=
sp; =20
indexTable[i].WordItems[j] =3D <SPAN style=3D"COLOR: #0000ff">new</SPAN> =
WordItem();=20
<BR><BR>  =
; =20
frequency =3D binReader.ReadInt32(); <SPAN=20
style=3D"COLOR: #008000">//=E8=AF=BB=E5=8F=96=E9=A2=91=E7=8E=87=20
</SPAN><BR> &n=
bsp; =20
wordLength =3D binReader.ReadInt32(); <SPAN style=3D"COLOR: =
#008000">//=E8=AF=BB=E5=8F=96=E8=AF=8D=E9=95=BF=20
</SPAN><BR> &n=
bsp; =20
pos =3D binReader.ReadInt32(); <SPAN=20
style=3D"COLOR: #008000">//=E8=AF=BB=E5=8F=96=E8=AF=8D=E6=80=A7=20
</SPAN><BR><BR> &nbs=
p; =20
<SPAN style=3D"COLOR: #0000ff">if</SPAN> (wordLength > 0)=20
<BR> &nb=
sp; =20
indexTable[i].WordItems[j].sWord =3D=20
Utility.ByteArray2String(binReader.ReadBytes(wordLength));=20
<BR> &nb=
sp; =20
<SPAN style=3D"COLOR: #0000ff">else</SPAN>=20
<BR> &nb=
sp; =20
indexTable[i].WordItems[j].sWord =3D <SPAN style=3D"COLOR: =
#ff00ff">""</SPAN>;=20
<BR><BR>  =
; =20
<SPAN style=3D"COLOR: #008000">//Reset the frequency=20
</SPAN><BR> &n=
bsp; =20
<SPAN style=3D"COLOR: #0000ff">if</SPAN> (bReset)=20
<BR> &nb=
sp; =20
indexTable[i].WordItems[j].nFrequency =3D 0;=20
<BR> &nb=
sp; =20
<SPAN style=3D"COLOR: #0000ff">else</SPAN>=20
<BR> &nb=
sp; =20
indexTable[i].WordItems[j].nFrequency =3D frequency;=20
<BR><BR>  =
; =20
indexTable[i].WordItems[j].nWordLen =3D wordLength;=20
<BR> &nb=
sp; =20
indexTable[i].WordItems[j].nPOS =3D pos;=20
<BR> } =
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -