📄 chinese_get.cpp
字号:
#include<iostream.h>
#include<string.h>
#include<fstream.h>
char *statekey="tileh12p<>/TILEH12P<>/";//关键字表
char delchar[33]="0123456789&#@ %^=-\"<>/;:!(+)?,._";//需要删掉的字符
int state_switch[12][11]={ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 ,
2 ,0 ,0 ,0 ,7 ,0 ,0 ,9 ,1 ,0 ,11,
0 ,3 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 ,
4 ,0, 0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 ,
0, 0 ,5 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 ,
0, 0 ,0 ,6 ,0 ,0 ,0, 0 ,1 ,0 ,0 ,
0, 0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,10,0 ,
0, 0 ,0 ,0 ,0 ,8 ,8 ,0 ,1 ,0 ,0 ,
0, 0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,10,0 ,
0, 0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,10,0 ,
10,10,10,10,10,10,10,10,1 ,10,10,
0, 0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 };
//state_switch为状态转化矩阵,其中0为开始态7,10,12,15为终止态
class character_pickde
{
public:
char current_ch[1];//当前读入字符
int state;//当前状态
int befor_state;//前一个状态
character_pickde(char,int);//构造函数
int check_key(char);//查找字表
int judgeabc(char);//判断是否为字母
int judgedelchar(char);//判断是否为需要删掉的字符
char *exchange(int);//把数字转化为字符串
char *string_join(char*,char*);//字符串连接
int judge_char(char*,char*);//汉字判断是否相等
void operate(int);//汉字提取操作
};
//-------------------------------------------------------------------
character_pickde::character_pickde(char a,int b)//构造初始化
{
current_ch[0]=a;
state=b;
befor_state=b;
}
//-------------------------------------------------------------------
int character_pickde::check_key(char e)//查找字表
{
for(int i=0;i<22;i++)
if(e==statekey[i])
return i%11;
return -1;
}
//--------------------------------------------------------------------
int character_pickde::judgeabc(char e)//判断是否为字母
{
if((e>='a'&&e<='z')||(e>='A'&&e<='Z'))
return 1;
return 0;
}
//--------------------------------------------------------------------
int character_pickde::judgedelchar(char e)//判断是否需要删掉
{
for(int i=0;i<33;i++)
if(e==delchar[i])
return 1;
return 0;
}
//----------------------------------------------------------------------
int character_pickde::judge_char(char *a,char *b)//判断汉字是否相等
{
for(int i=0;i<2;i++)
if(a[i]!=b[i])
return 0;
return 1;
}
//--------------------------------------------------------------------
char *character_pickde::exchange(int a)//数字转化为字符串
{
char *word1=new char[3];
word1[0]='#';word1[1]='#';word1[2]='#';
char e;
int c=0,b=0,m=a;
while(a>0)
{
b=a%10;
a=int(a/10);
switch(b)
{
case 0:e='0';break;
case 1:e='1';break;
case 2:e='2';break;
case 3:e='3';break;
case 4:e='4';break;
case 5:e='5';break;
case 6:e='6';break;
case 7:e='7';break;
case 8:e='8';break;
case 9:e='9';break;
default:cout<<"ERROR!";
}
word1[c]=e;
c++;
}
char *word=new char[3];
word[0]='#';word[1]='#';word[2]='#';
int i,j;
for(i=2,j=0;i>=0;i--)
{
if(word1[i]!='#')
{
word[j]=word1[i];
j++;
}
}
return word;
}
//---------------------------------------------------------------------
char *character_pickde::string_join(char *a,char *b)//字符串连接
{
unsigned int len=0;unsigned i=0,j=0,m=0;
char *stem=new char[strlen(a)+7];
for(j=0;j<strlen(a);j++)
stem[j]=a[j];
for(i=0;i<3;i++)
{
stem[strlen(a)+i]=b[i];
}
for(unsigned int k=0;k<strlen(a)+3;k++)
{
if(stem[k]=='#')
{
m=k;
break;
}
else m=strlen(a)+3;
}
stem[m]='.';stem[m+1]='t';stem[m+2]='x';stem[m+3]='t';stem[m+4]='\0';
return stem;
}
//---------------------------------------------------------------------
void character_pickde::operate(int num_name)
{
char *stemp=new char[3];
char *cop=new char[2];
for(int i=0;i<3;i++)
stemp[i]=exchange(num_name)[i];
char *filename1="E:\\中文web页面分类\\程序\\财经\\";
char *filename2="E:\\中文web页面分类\\程序\\处理后的财经\\";
ifstream infile(string_join(filename1,exchange(num_name)));
ofstream outfile(string_join(filename2,exchange(num_name)));
char e;
int across=0,portrait=0;//横向和纵向坐标
int check=0;
int flag=0;
while(!infile.eof())
{
infile.get(current_ch,2,char(-1));
e=current_ch[0];
cop[1]=e;
check=check_key(e);
if(check!=-1)
state=state_switch[state][check];
if(befor_state==10)
flag=1;
if(flag==1&&judgeabc(e)==0&&judgedelchar(e)==0&&e!='\n')
{
outfile.write(current_ch,strlen(current_ch));
}
if(flag==1&&state==11)
{
outfile.write("\n",strlen("\n"));
flag=0;
}
befor_state=state;
cop[0]=e;
}
infile.close();
outfile.close();
}
//-----------------------------------------------------------------------
void main()
{
int start_n=1,file_number=1;
cout<<"请输入所要提取的开始文件和文件数:";
cin>>start_n;
cin>>file_number;
character_pickde ch_pi('0',0);
for(int i=start_n;i<start_n+file_number;i++)
{
ch_pi.operate(i);
}
cout<<"结束";
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -