📄 mycode.h
字号:
#include<stdio.h>
#include<string.h>
#include<locale.h>
#include<stdlib.h>
#include<malloc.h>
#include<io.h>
typedef struct word_item{
wchar_t *word;
unsigned freq;
}WordItem;
wchar_t *Puncs=L" \n\r\t";
int LoadUnicText(wchar_t * &text, const wchar_t *filename);//读入文件,必须用word保存为unicode编码
int wcs_cmp(const void *p,const void *q);//音序排序函数
unsigned int gbk2uni(const wchar_t *inname,const wchar_t *outname);
//读入GBK,输出UNICODE文件
wchar_t **GetUnicStringArray(wchar_t *TEXT,int char_num);//建立索引
int stringcmp (const wchar_t *string1,const wchar_t *string2);
//比较两个字符串中相同词条的长度
int stringcmp2 (const wchar_t *string1,const wchar_t *string2);
//比较两个字符串相同词条+词性的长度,如,人们 /n
void GetItems(WordItem *items,wchar_t**array,int array_num,FILE *output);
//获取词条,排序输出。参数为词表结构数组,二级指针,词条数,输出文件的buf
void GetItems2(WordItem *items,wchar_t**array,int array_num,FILE *output);
//获取词条,排序输出。参数为词表结构数组,二级指针,词条数,输出文件的buf
int writefile(const wchar_t *filename,FILE *out);
//把输入的文本,输出到out
int countfilenum(const wchar_t *filename);//计算输入的文本中的字符数
//////////////////////////////////////////////////
int wcs_cmp(const void *p,const void *q)//音序排序函数
{
return wcscmp(*(const wchar_t**)p,*(const wchar_t**)q);
}
int LoadUnicText(wchar_t * &text, const wchar_t *filename)//读入文件,必须用word保存为unicode编码
{
int char_num=0;
wchar_t ch,*p=text;
FILE *in;
if((in=_wfopen(filename,L"rb"))==NULL){//判断源文件
AfxMessageBox(L"Can't open file!");
return 0;
}
char_num=_filelength(_fileno(in))/sizeof(wchar_t); //length of the file
rewind(in);
p=(wchar_t*)calloc(char_num,sizeof(wchar_t));
if(text || (_msize(p)/sizeof(wchar_t) < (unsigned)char_num)){//分配检查判断
AfxMessageBox(L"内存分配失败!\n");
fcloseall();
return 0;
}
while((ch=fgetwc(in))!=WEOF) {//copy
*p=ch;
p++;
}
*p=L'\0'; //确保它是以0结尾
rewind(in);
if(fgetwc(in)==65279) char_num--;//unicode开头有一个不明的标记字符:值为65279
p=p-char_num;
text=p;
fclose(in);
return char_num;
}
unsigned int gbk2uni(const wchar_t *inname,const wchar_t *outname)
{
FILE *in,*out;
setlocale(LC_ALL,"chs");
if((in=_wfopen(inname,L"rt"))==NULL){
AfxMessageBox(L"Cant open file!");
// printf("Cant open file!");
return 0;
}
if((out=_wfopen(outname,L"wb"))==NULL){
AfxMessageBox(L"Cant open file!");
// printf("Cant open file!");
return 0;
}
int length =_filelength(_fileno(in));
wchar_t ch;
fputwc(65279,out);//输出unic标记
int i=0;
for(i=0; (ch=fgetwc(in))!=WEOF && i<length; i++){
fputwc(ch,out);
}
fcloseall();
return i;
}
wchar_t **GetUnicStringArray(wchar_t *TEXT,int char_num)//建立索引
{
wchar_t **pp;
int i=0;
if(TEXT==NULL || *TEXT==L'\0') {//判断源串(char_num)
AfxMessageBox(L"GetUnicStringArray_源文本为空!\n");
return NULL;
}
pp=(wchar_t**)calloc(char_num+1,sizeof(wchar_t*));//分配row内存
if(!pp || _msize(pp)/sizeof(wchar_t *)< (unsigned)char_num){//check
AfxMessageBox(L"row内存分配失败!\n");
return NULL;
}
pp[char_num]=L"END!";//加一个结尾标记
for(int j=0;j<char_num;j++){//赋值
if(TEXT[j]==L'/' && TEXT[j+1]<128){
int n_head=1;
while(!wcschr(Puncs,TEXT[j-n_head])){//注意:输入文本的开头必须有Puncs,否则出错
n_head++;
}
// TEXT[j]=0;
pp[i]=&TEXT[j-n_head+1];//指向每个词例的首字
i++;
}
}
CString msg;
msg.Format(L"文本中有词例_%d个",i);
AfxMessageBox(msg);
qsort(pp,i,sizeof(pp[i]),wcs_cmp);
// printf("排序后:\n");
// for (i=0;i<char_num;i++) printf("No.%3d:[%ls]\n",i,pp[i]);
return pp;
}
/*
int stringcmp (const wchar_t *string1,const wchar_t *string2)
{//比较两个字符串中相同词条的长度
if(!string1 || !string2) return -1;
wchar_t *p=(wchar_t *)string1,*q=(wchar_t *)string2;
while(p && q){
if(*p!=*q || *(p)==L'/' || *(q)==L'/') return p-string1;
p++,q++;
}
return 0;
}
*/
int itemlen(const wchar_t *string)
{//取每个词条+词性的长度
if(!string) return -1;
wchar_t *p=(wchar_t *)string;
while(*p++!=L'/');
while(*p++!=L' ');
return p-string-1;
}
int stringcmp(const wchar_t *string1,const wchar_t *string2)
{//比较两个字符串相同词条的长度,如,人们/,返回整个的长度
if(!string1 || !string2) return -1;
wchar_t *p=(wchar_t *)string1,*q=(wchar_t *)string2;
while(*p++!=L'/');
int len1=p-string1-1;
while(*q++!=L'/');
int len2=q-string2-1;
if (len1!=len2) return 0;
p=(wchar_t *)string1,q=(wchar_t *)string2;
while(p && q && (len1>-1) && (len2>-1)){
if(len1==0) return p-string1;
if(*p!=*q) return 0;
p++,q++;
len1--;
len2--;
}
return 0;
}
int stringcmp2 (const wchar_t *string1,const wchar_t *string2)
{//比较两个字符串相同词条+词性的长度,如,人们/n ,返回整个的长度
if(!string1 || !string2) return -1;
wchar_t *p=(wchar_t *)string1,*q=(wchar_t *)string2;
int len1=itemlen(p);
int len2=itemlen(q);
if (len1!=len2) return 0;
while(p && q && (len1>-1) && (len2>-1)){
if(len1==0) return p-string1;
if(*p!=*q) return 0;
p++,q++;
len1--;
len2--;
}
return 0;
}
void GetItems(WordItem *items,wchar_t**array,int array_num,FILE *output)
{//获取词条,排序输出。参数为词表结构数组,二级指针,词条数,输出文件的buf
wchar_t **p=(wchar_t**)array;
setlocale(LC_ALL,"chs");
for(int i=0;i<array_num-1;i++){
int samelen=0,sl2=0,tmplen=1;
while(1)
{
samelen=stringcmp(p[i],p[i+1]);//求相同字段的长度
if(samelen>=1){
i++;
tmplen++;
sl2=samelen;
}
else break;
}
wchar_t *tmpword=NULL;
int j=0;
for(j=0;j<20;j++){//取每个词条
if(p[i][j]==L'/')
break;
fprintf(output,"%lc",p[i][j]);//保存临时词串频率
}
fprintf(output,"\t%d\n",tmplen);//保存临时词串频率
}
}
void GetItems2(WordItem *items,wchar_t**array,int array_num,FILE *output)
{//获取词条pos,排序输出。参数为词表结构数组,二级指针,词条数,输出文件的buf
wchar_t **p=(wchar_t**)array;
setlocale(LC_ALL,"chs");
for(int i=0;i<array_num-1;i++){
int samelen=0,sl2=0,tmplen=1;
while(1)
{
samelen=stringcmp2(p[i],p[i+1]);//求相同字段的长度
if(samelen>=1){
i++;
tmplen++;
sl2=samelen;
}
else break;
}
/*
*items.word=
*items.freq=tmplen;
*/
wchar_t *tmpword=NULL;
int j=0;
for(j=0;j<20;j++){//取每个词条
if(p[i][j]==L'/')
break;
fprintf(output,"%lc",p[i][j]);//保存临时词串频率
}
fprintf(output,"\t");
for(j++;j<30;j++){//取每个词性
if(p[i][j]==L' ')
break;
fprintf(output,"%lc",p[i][j]);//保存临时词串频率
}
fprintf(output,"\t%d\n",tmplen);//保存临时词串频率
}
}
int countfilenum(const wchar_t *filename)
{//计算输入的文本中的字符数
int char_num=0;
FILE *in;
if((in=_wfopen(filename,L"rb"))==NULL){//判断源文件
AfxMessageBox(L"Can't open file!");
return 0;
}
char_num=_filelength(_fileno(in))/sizeof(wchar_t); //length of the file
fclose(in);
return char_num;
/* rewind(in);
unsigned int plen=1;
while(p) {
p++;
plen++;
}
if(plen==1) p=(wchar_t*)calloc(char_num,sizeof(wchar_t));
else p=(wchar_t*)realloc(p,(plen+char_num)*sizeof(wchar_t));
if( p || ( ( _msize(p)/sizeof(wchar_t) ) < unsigned(char_num) ) ){//分配检查判断
AfxMessageBox(L"内存分配失败!\n");
fcloseall();
return 0;
}
// p+=plen;//指向text的末尾
while((ch=fgetwc(in))!=WEOF) {//copy
*p=ch;
p++;
}
*p=L'\0'; //确保它是以0结尾
rewind(in);
if(fgetwc(in)==65279) char_num--;//unicode开头有一个不明的标记字符:值为65279
p=p-(plen+char_num);
text=p+1;
fclose(in);
return plen+char_num;
*/}
int writefile(const wchar_t *filename,FILE *out)
{//把输入的文本,输出到out
int char_num=0;
FILE *in;
if((in=_wfopen(filename,L"rb"))==NULL){//判断源文件
AfxMessageBox(L"Can't open file!");
return 0;
}
char_num=_filelength(_fileno(in))/sizeof(wchar_t); //length of the file
for(int i=0;i<char_num;i++){
wchar_t ch=fgetwc(in);
fputwc(ch,out);
}
fclose(in);
// fputws(L"**********************",out);
return char_num;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -