⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dictment.cpp

📁 计算机英汉机器翻译系统中的英语词性标注方法实现
💻 CPP
📖 第 1 页 / 共 3 页
字号:
	ObWord* pObject;

	if ( !pSlot->m_bIsTranRule ) { // 不是规则
		if ( pSlot->m_pszSlotValue != NULL ) { // 槽值不为空
			if ( m_mapSlotName.Lookup(pSlot->m_pszSlotName,( CObject*& )pObject) == FALSE ) {
				// Not Found
				CString strMsg;
				strMsg.Format("ERROR 23, %s 第%d行: 槽名错,没有 %s 这个槽名",
					m_pszDicName,m_nCurrReadLineNum,pSlot->m_pszSlotName);
				if ( m_bIsWriteLog ) {
					fputs(strMsg,m_fpErrLogFile);
					fputs("\r\n",m_fpErrLogFile);
				} else
					AfxMessageBox(strMsg);

				if ( m_bIsWriteErrToDic ) {
					fputs(strMsg,m_fpTarDic);
					fputs("\r\n",m_fpTarDic);
				}
				return FALSE;
			} else { // 若槽名正确
				if ( pObject->m_nIndex != ADDQUALFR ) {
					if ( m_mapSlotValue[pObject->m_nIndex].Lookup(pSlot->m_pszSlotValue,( CObject*& )pObject) == FALSE ) {
						CString strMsg;
						strMsg.Format("ERROR 24,第%d行: 槽值错,槽名%s项没有 %s 这个槽值",
							m_pszDicName,m_nCurrReadLineNum,pSlot->m_pszSlotName,
							pSlot->m_pszSlotValue);
						if ( m_bIsWriteLog ) {
							fputs(strMsg,m_fpErrLogFile);
							fputs("\r\n",m_fpErrLogFile);
						} else
							AfxMessageBox(strMsg);

						if ( m_bIsWriteErrToDic ) {
							fputs(strMsg,m_fpTarDic);
							fputs("\r\n",m_fpTarDic);
						}
						return FALSE;
					}
				}
			}
		} else { // 槽值为空
			if ( m_mapNoValueSlot.Lookup(pSlot->m_pszSlotName,( CObject*& )pObject) == FALSE ) {
				CString strMsg;
				strMsg.Format("ERROR 25,第%d行: 没有 %s 这个保留字",
					m_pszDicName,m_nCurrReadLineNum,pSlot->m_pszSlotName);
				if ( m_bIsWriteLog ) {
					fputs(strMsg,m_fpErrLogFile);
					fputs("\r\n",m_fpErrLogFile);
				} else
					AfxMessageBox(strMsg);

				if ( m_bIsWriteErrToDic ) {
					fputs(strMsg,m_fpTarDic);
					fputs("\r\n",m_fpTarDic);
				}
				return FALSE;
			}
		}
	}

	pSlot->m_pNextSlot = NULL;
	return pSlot;
}

BOOL COneWord::SetSoltOfWordItem(LPSTR pszLeft)
// pszLeft 源串中除了英文和中文剩下的部分
{
	LPSTR pszTep,pszTepA;
	BOOL bIsFinishLine = FALSE; // 是否已经完成当前行的处理

	m_pCurrChinese->m_pFirstSlot = NULL;
	m_pCurrChinese->m_pLastSlot = NULL;
	m_pCurrChinese->m_pCurrSlot = NULL;
	
	BOOL bHaveExistRule = FALSE; // 是否已经有了规则

	do {
		pszTep = strchr(pszLeft,'=');
		pszTepA = strchr(pszLeft,',');
		if ( pszTepA == NULL )
			pszTepA = strchr(pszLeft,0x0d);

		if ( pszTepA < pszTep )
			pszTep = NULL;

		if ( pszTep != NULL ) {
			if ( strncmp(pszLeft,"Cate",pszTep-pszLeft) == 0 ) {
				pszLeft = pszTep + 1;
				
				pszTep = strchr(pszLeft,',');
				if ( pszTep == NULL ) {
					bIsFinishLine = TRUE;
					pszTep = strchr(pszLeft,0x0d);
					if ( pszTep == NULL ) {
						CString strMsg;
						strMsg.Format("ERROR 10,第%d行",
							m_pszDicName,m_nCurrReadLineNum);
						if ( m_bIsWriteLog ) {
							fputs(strMsg,m_fpErrLogFile);
							fputs("\r\n",m_fpErrLogFile);
						} else
							AfxMessageBox(strMsg);

						if ( m_bIsWriteErrToDic ) {
							fputs(strMsg,m_fpTarDic);
							fputs("\r\n",m_fpTarDic);
						}
						return FALSE;
					}
				}
				if ( !SetCateOfWordItem(pszLeft,pszTep-pszLeft) )
					return FALSE;
				pszLeft = pszTep + 1;
			} else if ( strncmp(pszLeft,"Head",pszTep-pszLeft) == 0 ) {
				pszLeft = pszTep + 1;

				pszTep = strchr(pszLeft,',');
				if ( pszTep == NULL ) {
					bIsFinishLine = TRUE;
					pszTep = strchr(pszLeft,0x0d);
					if ( pszTep == NULL ) {
						CString strMsg;
						strMsg.Format("ERROR 11,第%d行",
							m_pszDicName,m_nCurrReadLineNum);
						if ( m_bIsWriteLog ) {
							fputs(strMsg,m_fpErrLogFile);
							fputs("\r\n",m_fpErrLogFile);
						} else
							AfxMessageBox(strMsg);

						if ( m_bIsWriteErrToDic ) {
							fputs(strMsg,m_fpTarDic);
							fputs("\r\n",m_fpTarDic);
						}
						return FALSE;
					}
				}
				if ( !SetHeadOfWordItem(pszLeft,pszTep-pszLeft) )
					return FALSE;
				pszLeft = pszTep + 1;
			} else {
				LPSTR pszTep2;
				
				pszTep2 = strchr(pszTep+1,',');
				if ( pszTep2 == NULL ) {
					bIsFinishLine = TRUE;
					pszTep2 = strchr(pszTep+1,0x0d);
					if ( pszTep2 == NULL ) {
						CString strMsg;
						strMsg.Format("ERROR 12,第%d行",
							m_pszDicName,m_nCurrReadLineNum);
						if ( m_bIsWriteLog ) {
							fputs(strMsg,m_fpErrLogFile);
							fputs("\r\n",m_fpErrLogFile);
						} else
							AfxMessageBox(strMsg);

						if ( m_bIsWriteErrToDic ) {
							fputs(strMsg,m_fpTarDic);
							fputs("\r\n",m_fpTarDic);
						}
						return FALSE;
					}
				}

				Slot *pSlot;
				if ( pszLeft[0] == '@' )  {// 是规则
					if ( bHaveExistRule == TRUE ) {
						CString strMsg;
						strMsg.Format("ERROR 27,第%d行: 规则重复",
							m_pszDicName,m_nCurrReadLineNum);
						if ( m_bIsWriteLog ) {
							fputs(strMsg,m_fpErrLogFile);
							fputs("\r\n",m_fpErrLogFile);
						} else
							AfxMessageBox(strMsg);

						if ( m_bIsWriteErrToDic ) {
							fputs(strMsg,m_fpTarDic);
							fputs("\r\n",m_fpTarDic);
						}
						return FALSE;
					} else
						bHaveExistRule = TRUE;

					pSlot = MakeSlot(TRUE,pszLeft,pszTep2-pszLeft,
							NULL,0);
				} else // 不是规则
					pSlot = MakeSlot(FALSE,pszLeft,pszTep-pszLeft,
							pszTep+1,pszTep2-pszTep-1);
				
				if ( m_pCurrChinese->m_pFirstSlot == NULL )
					m_pCurrChinese->m_pFirstSlot = pSlot;

				if ( m_pCurrChinese->m_pLastSlot != NULL )
					m_pCurrChinese->m_pLastSlot->m_pNextSlot = pSlot;

				m_pCurrChinese->m_pLastSlot = pSlot;
				m_pCurrChinese->m_pCurrSlot = pSlot;
				pszLeft = pszTep2 + 1;
			}
		} else {
			/*
			CString strMsg;
			strMsg.Format("ERROR 13,在%s的%d行(缺少 = )",
				m_pszDicName,m_nCurrReadLineNum);
			if ( m_bIsWriteLog ) {
				fputs(strMsg,m_fpErrLogFile);
				fputs("\r\n",m_fpErrLogFile);
			} else
				AfxMessageBox(strMsg);
			return FALSE;
			*/
			LPSTR pszTep2;
			
			pszTep2 = strchr(pszLeft,',');
			if ( pszTep2 == NULL ) {
				bIsFinishLine = TRUE;
				pszTep2 = strchr(pszLeft,0x0d);
				if ( pszTep2 == NULL ) {
					CString strMsg;
					strMsg.Format("ERROR 12,第%d行",
						m_pszDicName,m_nCurrReadLineNum);
					if ( m_bIsWriteLog ) {
						fputs(strMsg,m_fpErrLogFile);
						fputs("\r\n",m_fpErrLogFile);
					} else
						AfxMessageBox(strMsg);

					if ( m_bIsWriteErrToDic ) {
						fputs(strMsg,m_fpTarDic);
						fputs("\r\n",m_fpTarDic);
					}
					return FALSE;
				}
			}
			Slot *pSlot;
			if ( pszLeft[0] == '@' ) {
				pSlot = MakeSlot(TRUE,pszLeft,pszTep2-pszLeft,
						NULL,0);
				//ASSERT(FALSE); // 不带等号的翻译规则
			} else {
				pSlot = MakeSlot(FALSE,pszLeft,pszTep2-pszLeft,
						NULL,0);
			}
			
			if ( m_pCurrChinese->m_pFirstSlot == NULL )
				m_pCurrChinese->m_pFirstSlot = pSlot;

			if ( m_pCurrChinese->m_pLastSlot != NULL )
				m_pCurrChinese->m_pLastSlot->m_pNextSlot = pSlot;

			m_pCurrChinese->m_pLastSlot = pSlot;
			m_pCurrChinese->m_pCurrSlot = pSlot;
			pszLeft = pszTep2 + 1;
		}
	} while ( bIsFinishLine == FALSE );
	return TRUE;
}

BOOL COneWord::SetChiPartOfWordItem(LPSTR pszChiPart)
{
	LPSTR pszTep = strchr(pszChiPart,',');
	if ( pszTep == NULL ) {
		CString strMsg;
		strMsg.Format("ERROR 14,第%d行: 找不到 , ",
			m_pszDicName,m_nCurrReadLineNum);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
		}
		return FALSE;
	}
	if ( !SetChiTextOfWordItem(pszChiPart,pszTep - pszChiPart) )
		return FALSE;
	pszChiPart = pszTep + 1;
	if ( !SetSoltOfWordItem(pszChiPart) )
		return FALSE;
	return TRUE;
}

BOOL COneWord::ReadWordFromSouDict(FILE *fpSouFile,LPSTR pszSouName,
								   FILE *fpTarDic,BOOL &bIsEof)
{
	static FILE *fpLastFile = NULL;
	if ( fpSouFile != fpLastFile ) {
		m_nCurrReadLineNum = 0;
		fpLastFile = fpSouFile;
	}
	
	m_pszDicName = pszSouName;
	m_fpTarDic = fpTarDic;

	char szSouLine[MAX_SOU_LINE_LEN];
	BOOL bNoFinishCurWord = TRUE;
	BOOL bIsFirstGetChiWhenHaveOrig = TRUE;

	do {
		fgets(szSouLine,MAX_SOU_LINE_LEN,fpSouFile);

		if ( m_bIsWriteErrToDic )
			fputs(szSouLine,m_fpTarDic);

		m_nCurrReadLineNum ++;

		if ( feof(fpSouFile) ) {
			bIsEof = TRUE;
			return TRUE;
		} else {
			bIsEof = FALSE;
		}
		
		LPSTR pszTep;
		LPSTR pszCurrSite;
		switch ( szSouLine[0] ) {
		case '#':
			pszTep = strstr(szSouLine+1,"\\");
			if ( pszTep == NULL ) {
				CString strMsg;
				strMsg.Format("ERROR 15,第%d行",
					m_pszDicName,m_nCurrReadLineNum);
				if ( m_bIsWriteLog ) {
					fputs(strMsg,m_fpErrLogFile);
					fputs("\r\n",m_fpErrLogFile);
				} else
					AfxMessageBox(strMsg);

				if ( m_bIsWriteErrToDic ) {
					fputs(strMsg,m_fpTarDic);
					fputs("\r\n",m_fpTarDic);
				}
				return FALSE;
			}
			switch ( pszTep[1] ) {
			case '*':
				m_bIsExistOrig = FALSE;
				m_bIsExistAmbig = FALSE;

				if ( pszTep - szSouLine - 1 <= 0 ) {
					CString strMsg;
					strMsg.Format("ERROR 16,第%d行",
						m_pszDicName,m_nCurrReadLineNum);
					if ( m_bIsWriteLog ) {
						fputs(strMsg,m_fpErrLogFile);
						fputs("\r\n",m_fpErrLogFile);
					} else
						AfxMessageBox(strMsg);

					if ( m_bIsWriteErrToDic ) {
						fputs(strMsg,m_fpTarDic);
						fputs("\r\n",m_fpTarDic);
					}
					return FALSE;
				}
				if ( !SetEnglishOfWordItem(szSouLine + 1,pszTep - szSouLine - 1) )
					return FALSE;
				pszCurrSite = pszTep + 2;

				m_pFirstChinese = new ChinesePart;
				
				m_pFirstChinese->m_pszChinese = NULL;
				m_pFirstChinese->m_pszCate = NULL;
				m_pFirstChinese->m_pszHead = NULL;

				m_pFirstChinese->m_nChineseLen = 0;
				m_pFirstChinese->m_pFirstSlot = NULL;
				m_pFirstChinese->m_pLastSlot = NULL;
				m_pFirstChinese->m_pCurrSlot = NULL;

				m_pFirstChinese->m_pNextPart = NULL;

				m_pLastChinese = m_pFirstChinese;
				
				m_pCurrChinese = m_pFirstChinese;

				if ( !SetChiPartOfWordItem(pszCurrSite) )
					return FALSE;
				break;
			case '^':
				if ( pszTep - szSouLine - 1 <= 0 ) {
					CString strMsg;
					strMsg.Format("ERROR 17,第%d行",
						m_pszDicName,m_nCurrReadLineNum);
					if ( m_bIsWriteLog ) {
						fputs(strMsg,m_fpErrLogFile);
						fputs("\r\n",m_fpErrLogFile);
					} else
						AfxMessageBox(strMsg);

					if ( m_bIsWriteErrToDic ) {
						fputs(strMsg,m_fpTarDic);
						fputs("\r\n",m_fpTarDic);
					}
					return FALSE;
				}
				if ( !SetEnglishOfWordItem(szSouLine + 1,pszTep - szSouLine - 1) )
					return FALSE;
				
				pszCurrSite = pszTep + 2;
				if ( !SetOrigWord(pszCurrSite) )
					return FALSE;
				break;
			case 'A':
				if ( pszTep - szSouLine - 1 <= 0 ) {
					CString strMsg;
					strMsg.Format("ERROR 18,第%d行",
						m_pszDicName,m_nCurrReadLineNum);
					if ( m_bIsWriteLog ) {
						fputs(strMsg,m_fpErrLogFile);
						fputs("\r\n",m_fpErrLogFile);
					} else
						AfxMessageBox(strMsg);

					if ( m_bIsWriteErrToDic ) {
						fputs(strMsg,m_fpTarDic);
						fputs("\r\n",m_fpTarDic);
					}
					return FALSE;
				}
				if ( !SetEnglishOfWordItem(szSouLine + 1,pszTep - szSouLine - 1) )
					return FALSE;

				pszCurrSite = pszTep + 2;
				if ( !SetAmbig(pszCurrSite) )
					return FALSE;
				break;
			default:
				CString strMsg;
				strMsg.Format("ERROR 20,第%d行: \\后跟未知字符",
					m_pszDicName,m_nCurrReadLineNum);
				if ( m_bIsWriteLog ) {
					fputs(strMsg,m_fpErrLogFile);
					fputs("\r\n",m_fpErrLogFile);
				} else
					AfxMessageBox(strMsg);

				if ( m_bIsWriteErrToDic ) {
					fputs(strMsg,m_fpTarDic);
					fputs("\r\n",m_fpTarDic);
				}
				return FALSE;
			}
			break;
		case '*':
			if (   (!m_bIsExistOrig && !m_bIsExistAmbig)
				|| !bIsFirstGetChiWhenHaveOrig ) {
				// 本词没有原形和兼类
				pszCurrSite = szSouLine + 1;
				m_pCurrChinese = new ChinesePart;
				
				m_pCurrChinese->m_pszChinese = NULL;
				m_pCurrChinese->m_pszCate = NULL;
				m_pCurrChinese->m_pszHead = NULL;

				m_pCurrChinese->m_nChineseLen = 0;
				m_pCurrChinese->m_pFirstSlot = NULL;
				m_pCurrChinese->m_pLastSlot = NULL;
				m_pCurrChinese->m_pCurrSlot = NULL;

				m_pCurrChinese->m_pNextPart = NULL;
				
				m_pLastChinese->m_pNextPart = m_pCurrChinese;
				m_pLastChinese = m_pCurrChinese;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -