📄 parsing.cpp
字号:
#include "stdafx.h"
#include "PcfgParser.h"
#include "parsing.h"
CObArray rules,edges;
int wordNum;
CRule::CRule(CString Line)
{
int i=Line.Find("->");
Ls=Line.Left(i);
Ls.TrimRight();
Ls.TrimLeft();
Line=Line.Mid(i+3);
Line.TrimLeft();
i=Line.Find(' '); // 找空格
if(i<0) {
Rs1=Line;
Rs2="";
}
else {
Rs1=Line.Left(i);
Line=Line.Mid(i+1);
Line.TrimLeft();
Rs2=Line;
}
}
CEdge::CEdge(CString wt, int wid)
{// 为词语建立一个局部分析
int i=wt.Find('/');
if(i<0)
Root=wt;
else
Root=wt.Mid(i+1)+'('+wt.Left(i)+')';
First=Last=wid;
Sub1=Sub2=-1;
}
CEdge::CEdge(CEdge *p,int pid, CString r)
{// 用提升规则建立一个局部分析
Root=r;
First=p->First;
Last=p->Last;
Sub1=pid;
Sub2=-1;
}
CEdge::CEdge(CEdge *p1,CEdge *p2,int pid1,int pid2,CString r)
{// 用捆绑规则建立一个局部分析
Root=r;
First=p1->First;
Last=p2->Last;
Sub1=pid1;
Sub2=pid2;
}
// 全程函数 --> 分析句子(自底向上)
BOOL GetRule(CString &ls,CString rs1, CString rs2)
{ // 查找规则
CRule *r;//规则指针
for(int i=0;i<rules.GetSize();i++) {
r=(CRule *)rules[i];
if(r->Rs1==rs1 && r->Rs2==rs2) {
ls=r->Ls;
return TRUE;
}
}
return FALSE;
}
void Expanding()
{// 根据规则,增加局部分析
int i=edges.GetSize()-1, j=0;
CString nr;
CEdge *e, *e2;
BOOL Raising=FALSE;
while(i<edges.GetSize()) {
e=(CEdge *)edges[i];
e2=(CEdge *)edges[j];
if(!Raising && GetRule(nr,e->GetRoot())) {
edges.Add(new CEdge(e,i,nr));
Raising=TRUE;
}
if(e2->Last+1==e->First && GetRule(nr,e2->GetRoot(),e->GetRoot()))
edges.Add(new CEdge(e2,e,j,i,nr));
j++;
if(j>=i) {
i++;
j=0;
Raising=FALSE;
}
}
}
CString Parsing(CString s)
{// 分析一个句子
CEdge * e=NULL; // 局部分析指针
int wid=1;
CString t;
for(int i=0;i<edges.GetSize();i++)
if(edges[i]!=NULL)
delete edges[i];
edges.RemoveAll(); // 以上是清除分析上一个句子留下的局部分析结果
while(s.GetLength()>0) {
int i=s.Find(' ');
if(i<0) {
t=s;
s="";
}
else {
t=s.Left(i);
s=s.Mid(i+1);
s.TrimLeft();
}
e=new CEdge(t,wid);
edges.Add(e);
wid++;
Expanding();
}
e->WordNumber=wid-1; // 句子中的词数
wordNum = e->WordNumber;
return GetTrees(wid-1);
}
CString GetTrees(int wid)
{ // 获取分析结果树
CString s="",tmp;
CEdge *e;
for(int i=0;i<edges.GetSize();i++) {
e=(CEdge *)edges[i];
if(e->First==1 && e->Last==wid && e->GetRoot()=="S")
s+=GetOneTree(e)+"\n\n";
}
tmp.Format("局部分析%d个;",edges.GetSize());
if(s.IsEmpty())
s=tmp+"句子不合语法"+"\n\n";
else
s=tmp+'\n'+s;
return s;
}
CString GetOneTree(CEdge *e)
{ // 获取一棵分析结果树
if(e->Sub1==-1 && e->Sub2==-1)
return e->Root;
CEdge *e1,*e2;
CString s=e->Root+'(';
e1=(CEdge *)edges[e->Sub1];
s+=GetOneTree(e1);
if(e->Sub2>=0) {
e2=(CEdge *)edges[e->Sub2];
s+='+'+GetOneTree(e2);
}
s+=')';
return s;
}
// 概率语法类成员函数
CProbRule::CProbRule(CString Line)
{
DesireCount=0.0; // 期望次数赋初值
int i=Line.Find("->");
Ls=Line.Left(i);
Ls.TrimRight();
Ls.TrimLeft(); // 规则左部
Line=Line.Mid(i+2); // 原书中为i+3,有误
Line.TrimLeft();
i=Line.ReverseFind(' '); // 找空格
if(i<0)
return;
Prob=atof(Line.Mid(i+1)); // 规则概率
Line=Line.Left(i);
Line.TrimRight();
i=Line.Find(' ');
if(i<0) {
Rs1=Line;
return;
}
Rs1=Line.Left(i);
Line=Line.Mid(i+1);
Line.TrimLeft();
if(Line.GetLength()>0)
Rs2=Line;
else
Rs2="";
}
CProbEdge::CProbEdge(CProbEdge *p,int pid,int Rid)
{
RuleId=Rid;
OutsideProb=0.0;
Parent="";
CProbRule *r=(CProbRule *)rules[Rid];
Root=r->Ls;
First=p->First;
Last=p->Last;
Sub1=pid;
Sub2=-1;
InsideProb=InProbAddUp=(r->Prob)*(p->InProbAddUp);
}
CProbEdge::CProbEdge(CProbEdge *p1,CProbEdge *p2,int pid1,int pid2,int Rid)
{
RuleId=Rid;
OutsideProb=0.0;
Parent="";
CProbRule * r=(CProbRule *)rules[Rid];
Root=r->Ls;
First=p1->First;
Last=p2->Last;
Sub1=pid1;
Sub2=pid2;
InsideProb=InProbAddUp=(r->Prob)*(p1->InProbAddUp)*(p2->InProbAddUp);
}
// 全程函数 -> 概率语法分析
CString ProbParsing(CString s, double & sProb)
{
int i,j,pid,rid,wn=0; // wm是语句s中词的个数
CString w;
CProbEdge *e;
CProbRule *r;
for(i=0;i<edges.GetSize();i++)
if(edges[i]!=NULL)
delete edges[i];
edges.RemoveAll();
while(!s.IsEmpty()) {
wn++;
i=s.Find(' ');
if(i<0) {
w=s;
s="";
}
else {
w=s.Left(i);
s=s.Mid(i);
s.TrimLeft();
}
e=new CProbEdge(w,wn);
edges.Add(e);
pid=edges.GetSize()-1;
rid=GetProbRule(e->GetRoot());
if(rid>=0) {
r=(CProbRule *) rules[rid];
edges.Add(new CProbEdge(e,pid,rid));
}
}
for(j=1;j<=wn;j++) // 原书j=1
for(i=1;i+j<=wn;i++) // 原书i=1
InsertEdges(i,i+j);
e->WordNumber=wn; // 句子中的词数
wordNum = e->WordNumber;
sProb=0.0;
for(i=edges.GetSize()-1;i>=0;i--) {
e=(CProbEdge *)edges[i];
if(e->Root=="S" && e->First==1 && e->Last==wn)
sProb+=e->InsideProb;
else
break;
}
if(sProb>0.0) {
GetOutsideProb(wn);
GetDesireCount(sProb);
}
CString t=GetProbTrees(wn);
return t;
}
int GetProbRule(CString rs1,CString rs2)
{
CProbRule *r;
for(int i=0;i<rules.GetSize();i++) {
r=(CProbRule *) rules[i];
if(r->Rs1==rs1 && r->Rs2==rs2)
return i;
}
return -1;
}
void GetRuleNewProb()
{// 获取新的规则参数
int i,j,start=0; // start 是每组规则的第一条规则的位置
CProbRule *r, *r0;
r=(CProbRule *) rules[0];
double sumCount=r->DesireCount;
CString Ls=r->Ls;
for(i=1;i<rules.GetSize();i++) {
r=(CProbRule *) rules[i];
if(r->Ls==Ls)
sumCount+=r->DesireCount;
else {
for(j=start;j<i;j++) {
r0=(CProbRule *)rules[j];
if (sumCount!=0) // 各条同类规则使用期望数之和不为0,0分母无意义
r0->Prob=(r0->DesireCount)/sumCount;
else
r0->Prob=0;
}
start=i;
sumCount=r->DesireCount;
Ls=r->Ls;
}
}
for(j=start;j<rules.GetSize();j++) {
r0=(CProbRule *) rules[j];
if (sumCount!=0)
r0->Prob=(r0->DesireCount)/sumCount;
else
r0->Prob=0;
}
}
CString GetProbTrees(int wn)
{ // 获取概率分析的结果树
CString s="",tmp,tmpprob;
CProbEdge *e;
for(int i=0;i<edges.GetSize();i++) {
e=(CProbEdge *)edges[i];
if(e->First==1 && e->Last==wn && e->GetRoot()=="S") { // 如果想输出每一个局部分析结果,把这个 { 去掉即可
tmpprob.Format("%7.5f: ",e->InsideProb);
s+=tmpprob+GetOneProbTree(e)+"\n\n";
} // 如果想输出每一个局部分析结果,把这个 } 去掉即可
}
tmp.Format("局部分析%d个;\n\n",edges.GetSize());
if(s.IsEmpty())
s=tmp+"句子不合语法"+"\n\n";
else
s=tmp+s;
return s;
}
CString GetOneProbTree(CProbEdge *e)
{
if(e->Sub1==-1 && e->Sub2==-1)
return e->Root;
CProbEdge *e1,*e2;
CString s=e->Root+'(';
e1=(CProbEdge *)edges[e->Sub1];
s+=GetOneProbTree(e1);
if(e->Sub2>=0) {
e2=(CProbEdge *)edges[e->Sub2];
s+='+'+GetOneProbTree(e2);
}
s+=')';
return s;
}
void InsertEdges(int first,int last)
{
int i,j,rid;
CProbEdge *e, *e1, *e2, *e0;
for(i=0;i<edges.GetSize();i++) {
e1=(CProbEdge *) edges[i];
if(e1->First!=first)
continue;
if(e1->Last==last && (rid=GetProbRule(e1->GetRoot()))>=0) {
e=new CProbEdge(e1,i,rid);
edges.Add(e);
for(int k=edges.GetSize()-2;k>=0;k--) {
e0=(CProbEdge *) edges[k];
if(e0->First!=e->First || e0->Last !=e->Last)
break;
if(e0->GetRoot()==e->GetRoot()) {
e0->InProbAddUp+=e->InsideProb;
e->InProbAddUp+=e0->InsideProb;
}
}
continue;
}
for(j=0;j<edges.GetSize();j++) {
e2=(CProbEdge *) edges[j];
if(e2->Last != last || e1->Last+1 != e2->First)
continue;
rid=GetProbRule(e1->GetRoot(),e2->GetRoot());
if(rid<0)
continue;
e=new CProbEdge(e1,e2,i,j,rid);
edges.Add(e);
for(int k=edges.GetSize()-2;k>=0;k--) {
e0=(CProbEdge *) edges[k];
if(e0->First != e->First || e0->Last != e->Last)
break;
if(e0->GetRoot() == e->GetRoot()) {
e0->InProbAddUp+=e->InsideProb;
e->InProbAddUp+=e0->InsideProb;
}
}
}
}
}
void GetOutsideProb(int wn)
{// 向外算法
int i,j,pid,bid;
CString tmp;
CProbEdge *e, *pe, *be;
CProbRule *r;
for(i=edges.GetSize()-1;i>=0;i--) {
e=(CProbEdge *) edges[i];
e->Parent.TrimRight();
if(e->First==1 && e->Last==wn && e->Root == "S")
e->OutsideProb=1.0;
else
if(e->Sub1<0 || e->Parent.IsEmpty())
continue;
else
while(!(e->Parent.IsEmpty())) {
j=e->Parent.Find(' ');
if(j>0) {
tmp=e->Parent.Left(j);
e->Parent=e->Parent.Mid(j+1);
}
else {
tmp=e->Parent;
e->Parent="";
}
pid = atoi((const char *)tmp);
pe=(CProbEdge *) edges[pid];
r=(CProbRule *) rules[pe->RuleId];
double outProb=(pe->OutsideProb) * (r->Prob);
if(pe->Sub1>=0 && pe->Sub1!=i)
bid=pe->Sub1;
else
if(pe->Sub2>=0 && pe->Sub2!=i)
bid=pe->Sub2;
else
bid=-1;
if(bid>=0) {
be=(CProbEdge *)edges[bid];
outProb *=be->InProbAddUp;
}
e->OutsideProb+=outProb;
}
if(e->Sub1>=0) {
CProbEdge * s1=(CProbEdge *) edges[e->Sub1];
tmp.Format("%d",i);
s1->Parent=tmp; // 原书为+=
}
if(e->Sub2>=0) {
CProbEdge * s2=(CProbEdge *) edges[e->Sub2];
tmp.Format("%d",i);
s2->Parent=tmp;
}
}
}
void GetDesireCount(double sProb)
{ // 计算规则的期望次数
CProbRule *r;
CProbEdge *e,*e1,*e2;
double dCount;
for(int i=0;i<edges.GetSize();i++) {
e=(CProbEdge *)edges[i];
if(e->OutsideProb==0.0)
continue;
r=(CProbRule *)rules[e->RuleId];
dCount=r->Prob *e->OutsideProb;
if(e->Sub1>=0) {
e1=(CProbEdge *) edges[e->Sub1];
dCount *= e1->InProbAddUp;
}
if(e->Sub2>=0) {
e2=(CProbEdge *)edges[e->Sub2];
dCount *=e2->InProbAddUp;
}
dCount/=sProb;
r->DesireCount+=dCount;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -