sentseg.cpp
来自「汉字字频统计工具。功能很简单。」· C++ 代码 · 共 133 行
CPP
133 行
// SentSeg.cpp: implementation of the CSentSeg class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "CPT.h"
#include "SentSeg.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CSentSeg::CSentSeg(CCPTDoc * pDoc, CString dirpath)
{
m_pDoc = pDoc;
m_sDir = dirpath;
m_fOut = NULL;
m_nIdx = 0;
}
CSentSeg::~CSentSeg()
{
if (m_fOut)
delete m_fOut;
}
void CSentSeg::DoSentSeg()
{
m_pDoc->ClearAllCounters();
m_saFiles.RemoveAll();
FindAllFiles(m_sDir, "*.htm;*.html;*.txt;");
m_fOut = new CFile;
if (!(m_fOut->Open("sentence.txt", CFile::modeCreate | CFile::modeWrite)))
{
delete m_fOut;
m_fOut = NULL;
return;
}
for (int i=0; i<m_saFiles.GetSize(); i++,m_pDoc->IncNumFiles())
{
try
{
ChChar cc;
UINT n;
CFile f;
f.Open(m_saFiles[i], CFile::modeRead);
while ((n=f.Read(cc, 1))==1)
{
if (cc[0] >= 128)
{
n=f.Read(cc+1, 1);
if (n==1 && IsChineseChar(cc))
{
if (m_nIdx == 1023)
{
delete m_fOut;
m_fOut = NULL;
return;
}
m_ccBuf[m_nIdx][0]=cc[0];
m_ccBuf[m_nIdx++][1]=cc[1];
m_pDoc->IncNumChars();
continue;
}
}
OutputSentence();
}
OutputSentence();
f.Close();
}
catch (CFileException *e) {}
}
delete m_fOut;
m_fOut = NULL;
}
void CSentSeg::OutputSentence()
{
ASSERT(m_fOut);
ASSERT(m_nIdx < 1024);
if (m_nIdx == 0)
return;
m_pDoc->IncNumSent();
((unsigned char *)(m_ccBuf+m_nIdx))[0]=(unsigned char)0;
m_fOut->Write(m_ccBuf, m_nIdx<<1);
m_fOut->Write("\n", 1);
m_nIdx = 0;
}
void CSentSeg::FindAllFiles(CString sDir, CString sPattern)
{
CFileFind finder;
BOOL bWorking;
CString sPatterns=sPattern;
CStringArray sPat;
int i;
while ((i=sPatterns.FindOneOf(",;"))!=-1)
{
sPat.Add(sPatterns.Left(i));
sPatterns = sPatterns.Right(sPatterns.GetLength() - i - 1);
}
if (sPatterns != "")
sPat.Add(sPatterns);
for (i=0; i<sPat.GetSize(); i++)
{
bWorking = finder.FindFile(sDir + "\\" + sPat[i]);
while (bWorking)
{
bWorking = finder.FindNextFile();
m_saFiles.Add(finder.GetFilePath());
}
}
bWorking = finder.FindFile(sDir+"\\"+"*");
while (bWorking)
{
bWorking = finder.FindNextFile();
if (finder.IsDirectory() && !finder.IsDots())
FindAllFiles(finder.GetFilePath(), sPattern);
}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?