📄 sharpictclas分词系统简介(6)segment - first we try, then we trust - 博客园.mht
字号:
=E7=94=A8=E6=A0=B9=E6=9C=AC=E4=B8=8D=E9=9C=80=E8=A6=81=E5=BB=BA=E7=AB=8B=E5=
=AF=B9=E8=B1=A1=EF=BC=8C=E8=BF=99=E4=BA=9B=E8=BF=87=E7=A8=8B=E4=BB=85=E4=BB=
=85=E5=AE=8C=E6=88=90=E4=BE=8B=E8=A1=8C=E8=AE=A1=E7=AE=97=E8=80=8C=E5=B7=B2=
=EF=BC=8C=E5=9B=A0=E6=AD=A4=E5=B0=86=E8=BF=99=E4=BA=9B=E6=96=B9=E6=B3=95=E5=
=A3=B0=E6=98=8E=E4=B8=BA=E9=9D=99=E6=80=81=E6=96=B9=E6=B3=95=E6=9B=B4=E5=90=
=88=E9=80=82=EF=BC=8C=E4=BD=95=E5=86=B5=E9=9D=99=E6=80=81=E6=96=B9=E6=B3=95=
=E7=9A=84=E8=B0=83=E7=94=A8=E6=95=88=E7=8E=87=E6=AF=94=E5=AE=9E=E4=BE=8B=E6=
=96=B9=E6=B3=95=E9=AB=98=E3=80=82=E5=9B=A0=E6=AD=A4=E6=9C=AC=E4=BA=BA=E5=9C=
=A8=E5=B0=86ICTCLAS=E7=A7=BB=E6=A4=8D=E5=88=B0C#=E5=B9=B3=E5=8F=B0=E4=B8=8A=
=E6=97=B6=EF=BC=8C=E5=B0=86=E5=B0=BD=E5=8F=AF=E8=83=BD=E7=9A=84=E6=96=B9=E6=
=B3=95=E5=AE=9A=E4=B9=89=E6=88=90=E9=9D=99=E6=80=81=E6=96=B9=E6=B3=95=E3=80=
=82</P>
<P>=E4=B8=8B=E9=9D=A2=E6=88=91=E5=B0=B1=E8=AF=B4=E8=AF=B4SharpICTCLAS=E4=B8=
=ADSegment=E7=B1=BB=E7=9A=84=E4=B8=80=E4=BA=9B=E4=B8=BB=E8=A6=81=E5=86=85=
=E5=AE=B9=EF=BC=9A</P>
<H3>1=E3=80=81=E4=B8=BB=E4=BD=93=E9=83=A8=E5=88=86</H3>
<P>=E6=AF=94=E8=BE=83=E5=85=B8=E5=9E=8B=E7=9A=84=E4=B8=80=E4=B8=AA=E8=BF=90=
=E7=AE=97=E8=BF=87=E7=A8=8B=E5=8F=AF=E4=BB=A5=E5=8F=82=E8=80=83BiSegment=E6=
=96=B9=E6=B3=95=EF=BC=8C=E4=BB=A3=E7=A0=81=EF=BC=88=E7=BB=8F=E8=BF=87=E7=AE=
=80=E5=8C=96=EF=BC=89=E5=A6=82=E4=B8=8B=EF=BC=9A</P>
<DIV class=3Dcode>
<DIV class=3Dtitle>
<DIV style=3D"FLOAT: right"><IMG class=3DcopyCodeImage=20
src=3D"http://www.cnblogs.com/images/cnblogs_com/zhenyulu/200701/copycode=
.gif"=20
align=3DabsMiddle name=3DccImage> <A onclick=3DCopyCode(this) =
href=3D"javascript:">Copy=20
Code</A></DIV>
<DIV style=3D"CLEAR: =
none">Segment=E7=B1=BB=E7=9A=84BiSegment=E6=96=B9=E6=B3=95</DIV></DIV>
<DIV class=3Dcontent><SPAN style=3D"COLOR: #0000ff">public</SPAN> <SPAN=20
style=3D"COLOR: #0000ff">int</SPAN> BiSegment(<SPAN=20
style=3D"COLOR: #0000ff">string</SPAN> sSentence, <SPAN=20
style=3D"COLOR: #0000ff">double</SPAN> smoothPara, <SPAN=20
style=3D"COLOR: #0000ff">int</SPAN> nKind) <BR>{ <BR> =
WordResult[]=20
tmpResult; <BR> WordLinkedArray linkedArray; =
<BR> =20
m_pWordSeg =3D <SPAN style=3D"COLOR: #0000ff">new</SPAN> =
List<WordResult[]>();=20
<BR> m_graphOptimum =3D <SPAN style=3D"COLOR: =
#0000ff">new</SPAN>=20
RowFirstDynamicArray<ChainContent>(); <BR><BR> <SPAN=20
style=3D"COLOR: #008000">//---=E5=8E=9F=E5=AD=90=E5=88=86=E8=AF=8D =
</SPAN><BR> <FONT=20
color=3D#ff0000>atomSegment =3D AtomSegment(sSentence); =
</FONT><BR><BR> =20
<SPAN style=3D"COLOR: =
#008000">//---=E6=A3=80=E7=B4=A2=E8=AF=8D=E5=BA=93=EF=BC=8C=E5=8A=A0=E5=85=
=A5=E6=89=80=E6=9C=89=E5=8F=AF=E8=83=BD=E5=88=86=E8=AF=8D=E6=96=B9=E6=A1=88=
=E5=B9=B6=E5=AD=98=E5=85=A5=E9=93=BE=E8=A1=A8=E7=BB=93=E6=9E=84 =
</SPAN><BR> =20
segGraph =3D GenerateWordNet(atomSegment, coreDict); =
<BR><BR> <SPAN=20
style=3D"COLOR: =
#008000">//---=E6=A3=80=E7=B4=A2=E6=89=80=E6=9C=89=E5=8F=AF=E8=83=BD=E7=9A=
=84=E4=B8=A4=E4=B8=A4=E7=BB=84=E5=90=88 </SPAN><BR> =
biGraphResult =3D=20
BiGraphGenerate(segGraph, smoothPara, biDict, coreDict); =
<BR><BR> =20
<SPAN style=3D"COLOR: #008000">//---N =
=E6=9C=80=E7=9F=AD=E8=B7=AF=E5=BE=84=E8=AE=A1=E7=AE=97=E5=87=BA=E5=A4=9A=E4=
=B8=AA=E5=88=86=E8=AF=8D=E6=96=B9=E6=A1=88 </SPAN><BR> =20
NShortPath.Calculate(biGraphResult, nKind); <BR> =
List<<SPAN=20
style=3D"COLOR: #0000ff">int</SPAN>[]> spResult =3D=20
NShortPath.GetNPaths(Predefine.MAX_SEGMENT_NUM); <BR><BR> =
<SPAN=20
style=3D"COLOR: =
#008000">//---=E5=AF=B9=E7=BB=93=E6=9E=9C=E8=BF=9B=E8=A1=8C=E4=BC=98=E5=8C=
=96=EF=BC=8C=E4=BE=8B=E5=A6=82=E5=90=88=E5=B9=B6=E6=97=A5=E6=9C=9F=E7=AD=89=
=E5=B7=A5=E4=BD=9C </SPAN><BR> <SPAN=20
style=3D"COLOR: #0000ff">for</SPAN> (<SPAN style=3D"COLOR: =
#0000ff">int</SPAN> i =3D=20
0; i < spResult.Count; i++) <BR> {=20
<BR> <FONT color=3D#ff0000>linkedArray =3D =
BiPath2LinkedArray(spResult[i], segGraph, atomSegment);=20
</FONT><BR> tmpResult =3D =
GenerateWord(spResult[i],=20
linkedArray, m_graphOptimum); <BR><BR> =
<SPAN=20
style=3D"COLOR: #0000ff">if</SPAN> (tmpResult !=3D <SPAN=20
style=3D"COLOR: #0000ff">null</SPAN>)=20
<BR> =
m_pWordSeg.Add(tmpResult);=20
<BR> } <BR><BR> <SPAN=20
style=3D"COLOR: #0000ff">return</SPAN> m_pWordSeg.Count; =
<BR>}</DIV></DIV>
<P>=E4=BB=8E=E4=B8=8A=E9=9D=A2=E4=BB=A3=E7=A0=81=E5=8F=AF=E4=BB=A5=E7=9C=8B=
=E5=87=BA=EF=BC=8C=E5=B7=B2=E7=BB=8F=E5=B0=86=E5=8E=9F=E6=9C=89ICTCLAS=E7=
=9A=84=E5=8E=9F=E5=AD=90=E5=88=86=E8=AF=8D=E5=8A=9F=E8=83=BD=E5=90=88=E5=B9=
=B6=E5=85=A5Segment=E7=B1=BB=E4=BA=86=E3=80=82</P>
<P>=E5=B0=B1=E6=8B=BF=E2=80=9C<FONT =
color=3D#0000ff>=E4=BB=96=E5=9C=A81=E6=9C=88=E4=BB=BD=E5=A4=A7=E4=BC=9A=E4=
=B8=8A=E8=AF=B4=E7=9A=84=E7=A1=AE=E5=AE=9E=E5=9C=A8=E7=90=86</FONT>=E2=80=
=9D=E8=BF=99=E5=8F=A5=E8=AF=9D=E6=9D=A5=E8=AF=B4=EF=BC=8C=E4=B8=8A=E9=9D=A2=
=E5=87=A0=E4=B8=AA=E6=AD=A5=E9=AA=A4=E5=BE=97=E5=88=B0=E7=9A=84=E4=B8=AD=E9=
=97=B4=E7=BB=93=E6=9E=9C=E5=A6=82=E4=B8=8B=EF=BC=9A</P>
<DIV class=3Dcode>
<DIV class=3Dtitle>
<DIV style=3D"FLOAT: right"><IMG class=3DcopyCodeImage=20
src=3D"http://www.cnblogs.com/images/cnblogs_com/zhenyulu/200701/copycode=
.gif"=20
align=3DabsMiddle name=3DccImage> <A onclick=3DCopyCode(this) =
href=3D"javascript:">Copy=20
Code</A></DIV>
<DIV style=3D"CLEAR: none">=E7=A8=8B=E5=BA=8F</DIV></DIV>
<DIV class=3Dcontent><SPAN style=3D"COLOR: #008000">//=3D=3D=3D=3D =
=E5=8E=9F=E5=A7=8B=E5=8F=A5=E5=AD=90=EF=BC=9A=20
</SPAN><BR><BR>=E4=BB=96=E5=9C=A81=E6=9C=88=E4=BB=BD=E5=A4=A7=E4=BC=9A=E4=
=B8=8A=E8=AF=B4=E7=9A=84=E7=A1=AE=E5=AE=9E=E5=9C=A8=E7=90=86 =
<BR><BR><BR><SPAN style=3D"COLOR: #008000">//=3D=3D=3D=3D=20
=E5=8E=9F=E5=AD=90=E5=88=87=E5=88=86=EF=BC=9A =
</SPAN><BR><BR>=E5=A7=8B##=E5=A7=8B, =E4=BB=96, =E5=9C=A8, 1, =E6=9C=88, =
=E4=BB=BD, =E5=A4=A7, =E4=BC=9A, =E4=B8=8A, =E8=AF=B4, =E7=9A=84, =
=E7=A1=AE, =E5=AE=9E, =E5=9C=A8, =E7=90=86, =E6=9C=AB##=E6=9C=AB,=20
<BR><BR><BR><SPAN style=3D"COLOR: #008000">//=3D=3D=3D=3D =
=E7=94=9F=E6=88=90 segGraph=EF=BC=9A=20
</SPAN><BR><BR>row: 0, col: 1, eWeight:=20
329805.00, nPOS: =
1, =20
sWord:=E5=A7=8B##=E5=A7=8B <BR>row: 1, col: 2, =
eWeight: =20
19823.00, nPOS: 0, =
sWord:=E4=BB=96=20
<BR>row: 2, col: 3, eWeight: =
78484.00, =20
nPOS: 0, sWord:=E5=9C=A8 =
<BR>row: =20
3, col: 4, eWeight: =20
0.00, nPOS: -27904, sWord:=E6=9C=AA##=E6=95=B0 =
<BR>row: 4, =20
col: 5, eWeight: 1900.00, =20
nPOS: 0, sWord:=E6=9C=88 =
<BR>row: =20
4, col: 6, eWeight: =
11.00, =20
nPOS: 28160, sWord:=E6=9C=88=E4=BB=BD <BR>row: =
5, col: =20
6, eWeight: 1234.00, =20
nPOS: 0, sWord:=E4=BB=BD =
<BR>row: =20
6, col: 7, eWeight: 14536.00, =20
nPOS: 0, sWord:=E5=A4=A7 =
<BR>row: =20
6, col: 8, eWeight: 1333.00, =20
nPOS: 28160, sWord:=E5=A4=A7=E4=BC=9A <BR>row: =
7, col: =20
8, eWeight: 6136.00, =20
nPOS: 0, sWord:=E4=BC=9A =
<BR>row: =20
7, col: 9, eWeight: =
469.00, =20
nPOS: 0, =
sWord:=E4=BC=9A=E4=B8=8A <BR>row: =20
8, col: 9, eWeight: 23706.00, =20
nPOS: 0, sWord:=E4=B8=8A =
<BR>row: =20
9, col: 10, eWeight: 17649.00, =20
nPOS: 0, sWord:=E8=AF=B4 =
<BR>row: 10, =20
col: 11, eWeight: 358156.00, =20
nPOS: 0, sWord:=E7=9A=84 =
<BR>row: 10, =20
col: 12, eWeight: 210.00, =
nPOS: =20
25600, sWord:=E7=9A=84=E7=A1=AE <BR>row: 11, col: =
12, =20
eWeight: 181.00, =20
nPOS: 0, sWord:=E7=A1=AE =
<BR>row: 11, =20
col: 13, eWeight: 361.00, =20
nPOS: 0, =
sWord:=E7=A1=AE=E5=AE=9E <BR>row: 12, =20
col: 13, eWeight: 357.00, =20
nPOS: 0, sWord:=E5=AE=9E =
<BR>row: 12, =20
col: 14, eWeight: 295.00, =20
nPOS: 0, =
sWord:=E5=AE=9E=E5=9C=A8 <BR>row: 13, =20
col: 14, eWeight: 78484.00, =20
nPOS: 0, sWord:=E5=9C=A8 =
<BR>row: 13, =20
col: 15, eWeight: 3.00, =20
nPOS: 24832, sWord:=E5=9C=A8=E7=90=86 <BR>row: =
14, col: 15, =20
eWeight: 129.00, =20
nPOS: 0, sWord:=E7=90=86 =
<BR>row: 15, =20
col: 16, eWeight:2079997.00, =20
nPOS: 4, =
sWord:=E6=9C=AB##=E6=9C=AB <BR><BR><BR><SPAN=20
style=3D"COLOR: #008000">//=3D=3D=3D=3D =E7=94=9F=E6=88=90 =
biSegGraph=EF=BC=9A </SPAN><BR><BR>row: 0, =20
col: 1, eWeight: =
3.37, =20
nPOS: 1, =
sWord:=E5=A7=8B##=E5=A7=8B@=E4=BB=96 <BR>row: =20
1, col: 2, eWeight: =20
3.37, nPOS: 0, =
sWord:=E4=BB=96@=E5=9C=A8=20
<BR>row: 2, col: 3, =20
eWeight: 3.74, =20
nPOS: 0, =
sWord:=E5=9C=A8@=E6=9C=AA##=E6=95=B0 <BR>row: =20
3, col: 4, eWeight: -27898.79, nPOS:=20
-27904, sWord:=E6=9C=AA##=E6=95=B0@=E6=9C=88 <BR>row: =
3, col: 5, =20
eWeight: -27898.75, nPOS: -27904, =
sWord:=E6=9C=AA##=E6=95=B0@=E6=9C=88=E4=BB=BD=20
<BR>row: 4, col: 6, =20
eWeight: 9.33, =20
nPOS: 0, =
sWord:=E6=9C=88@=E4=BB=BD <BR>row: =20
5, col: 7, eWeight: =
13.83, =20
nPOS: 28160, sWord:=E6=9C=88=E4=BB=BD@=E5=A4=A7 =
<BR>row: 6, col: =20
7, eWeight: 9.76, =20
nPOS: 0, =
sWord:=E4=BB=BD@=E5=A4=A7 <BR>row: =20
5, col: 8, eWeight: =
13.83, =20
nPOS: 28160, =
sWord:=E6=9C=88=E4=BB=BD@=E5=A4=A7=E4=BC=9A <BR>row: 6, =
col: =20
8, eWeight: 9.76, =20
nPOS: 0, =
sWord:=E4=BB=BD@=E5=A4=A7=E4=BC=9A <BR>row: =20
7, col: 9, eWeight: =20
7.30, nPOS: 0, =
sWord:=E5=A4=A7@=E4=BC=9A=20
<BR>row: 7, col: 10, =
eWeight: =20
7.30, nPOS: 0, =
sWord:=E5=A4=A7@=E4=BC=9A=E4=B8=8A=20
<BR>row: 8, col: 11, =
eWeight: =20
2.11, nPOS: 28160, =
sWord:=E5=A4=A7=E4=BC=9A@=E4=B8=8A <BR>row: =20
9, col: 11, eWeight: =
8.16, =20
nPOS: 0, =
sWord:=E4=BC=9A@=E4=B8=8A <BR>row: 10, =20
col: 12, eWeight: 3.42, =20
nPOS: 0, =
sWord:=E4=BC=9A=E4=B8=8A@=E8=AF=B4 <BR>row: 11, =20
col: 12, eWeight: 4.07, =20
nPOS: 0, =
sWord:=E4=B8=8A@=E8=AF=B4 <BR>row: 12, =20
col: 13, eWeight: 4.05, =20
nPOS: 0, =
sWord:=E8=AF=B4@=E7=9A=84 <BR>row: 12, =20
col: 14, eWeight: 7.11, =20
nPOS: 0, =
sWord:=E8=AF=B4@=E7=9A=84=E7=A1=AE <BR>row: 13, =20
col: 15, eWeight: 4.10, =20
nPOS: 0, =
sWord:=E7=9A=84@=E7=A1=AE <BR>row: 13, =20
col: 16, eWeight: 4.10, =20
nPOS: 0, =
sWord:=E7=9A=84@=E7=A1=AE=E5=AE=9E <BR>row: 14, =20
col: 17, eWeight: 11.49, =
nPOS: =20
25600, sWord:=E7=9A=84=E7=A1=AE@=E5=AE=9E <BR>row: 15, =
col: 17, =20
eWeight: 11.63, =20
nPOS: 0, =
sWord:=E7=A1=AE@=E5=AE=9E <BR>row: 14, =20
col: 18, eWeight: 11.49, =
nPOS: =20
25600, sWord:=E7=9A=84=E7=A1=AE@=E5=AE=9E=E5=9C=A8 <BR>row: =
15, col: 18, =20
eWeight: 11.63, =20
nPOS: 0, =
sWord:=E7=A1=AE@=E5=AE=9E=E5=9C=A8 <BR>row: 16, =20
col: 19, eWeight: 3.92, =20
nPOS: 0, =
sWord:=E7=A1=AE=E5=AE=9E@=E5=9C=A8 <BR>row: 17, =20
col: 19, eWeight: 10.98, =20
nPOS: 0, =
sWord:=E5=AE=9E@=E5=9C=A8 <BR>row: 16, =20
col: 20, eWeight: 10.97, =20
nPOS: 0, =
sWord:=E7=A1=AE=E5=AE=9E@=E5=9C=A8=E7=90=86 <BR>row:=20
17, col: 20, eWeight: =
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -