⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chinese.c

📁 unix 下c语言拆解汉字字符
💻 C
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>/* * 判断是否为汉字第一字节 * GB 2312-80 The first byte 	0xA1 - 0xFE * GBK        第一字节 	 	0x81 - 0xFE */int ischn_fb( int ch ){    return ( ch > 0x80 && ch < 0xFF );}/* * 判断是否为汉字 * *   GBK/1  0xA1A1-0xA9FE *   GBK/2  0xB0A1-0xF7FE *   GBK/3  0x8140-0xA0FE *   GBK/4  0xAA40-0xFEA0 *   GBK/5  0xA840-0xA9A0 *   EUDC/1 0xAAA1-0xAFFE *   EUDC/2 0xF8A1-0xFEFE *   EUDC/3 0xA140-0xA7A0 * *   第一字节  0x81-0xFE *   第二字节  0x40-0x7E, 0x80-0xFE */int ischn_2b( int ch1, int ch2 ){    return ( ch1 >= 0x81 && ch1 <= 0xFE &&             ( ch2 >= 0x40 && ch2 <= 0xFE && ch2 != 0x7F ) );}/* * 判断字符串中pos位置的字符是否为汉字 * 0 不是 * 1 是第一字节 * 2 是第二字节 */int ischn_instr( char * str, int len, int pos ){    int             i;    unsigned char * ustr = ( unsigned char * ) str;    if ( str == NULL )        return ( 0 );    if ( pos < 0 || pos >= len )        return ( 0 );    if ( ustr[ pos ] < 0x40 )        return ( 0 );    i = 0;    while ( i < pos )    {        if ( ischn_2b( ustr[i], ustr[i+1] ) )        {            i += 2;            if ( i > pos )                return ( 2 );        }        else            i++;    }    return ( ischn_fb( ustr[pos] ) );}char * ChineseStr( char * str, int len ){    int slen = strlen( str );    if ( str == NULL )        return ( NULL );    if ( slen > len )    {        if ( ischn_instr( str, slen, len-1 ) == 1 )            str[ len-1 ] = 0x0;        else            str[ len ] = 0x0;    }    return ( str );}/**********************count the unsigned string's  length . entry segment : unsigned char 	*_bufstr,	The source string		int		_cutlen,	where will you want to cutout   segment : int		_cutflag,	whether cut, if the string was cut then return the times of be cut		unsigned char	**_outbuf,	if be cut , then give the arry of cut		unsigned char 	*_chistr,	the out buffer of chinese string		unsigned char	*_engstr,	the out buffer of english string***********************/int unicodestr (unsigned char *_bufstr, int _cutlen, int *_cutflag, 		unsigned char _outbuf[10][300], unsigned char *_chistr, 		unsigned char *_engstr){#define		CHINESESTR	1#define		ENGLISHSTR	0	int	i,buflen,pos,j;	int	chiflag;	int	firstchi;	int	strcount;	buflen=strlen(_bufstr);	pos=0;	chiflag=ENGLISHSTR;	*_cutflag=0;	firstchi=0;	i=0;    	while ( i < buflen )    	{        	if ( ischn_fb( _bufstr[i]) )        	{			if (chiflag==ENGLISHSTR)				firstchi=i;	    		strncat(_chistr,_bufstr+i,2);            		i += 2;			chiflag=CHINESESTR;        	}        	else		{			strncat(_engstr,_bufstr+i,1);			i++;		}		pos+=2;    	}	i=0;	j=0;	strcount=pos;	if (chiflag==ENGLISHSTR)	{		pos/=2;		_cutlen*=2;		i=pos;		do		{			strncat(_outbuf[j],_bufstr+_cutlen*j,_cutlen);				j++;			i-=_cutlen;		} while (  i > 0 );		*_cutflag=j;		return pos;		}		//its include chinese character	pos=0;	j=1;	do 	{		if ( pos >= _cutlen )		{			pos=0;			j++;		}        	if ( ischn_fb( _bufstr[i]) )        	{	    		strncat(_outbuf[j-1],_bufstr+i,2);            		i += 2;        	}        	else		{			strncat(_outbuf[j-1],_bufstr+i,1);			i++;		}		pos++;    	} while( i<buflen);	*_cutflag=j;	return strcount;}main(int argc , char **argv){	unsigned char	bufstr[51];	int	i,buflen,pos;	int	j;	int	cutlen,cutflag;	unsigned char	outbuf[10][300];	unsigned char	chistr[300],engstr[300];		memset( bufstr, 0x00, sizeof(bufstr));	memset( chistr, 0x00, sizeof(chistr));	memset( engstr, 0x00, sizeof(engstr));	memset( &outbuf,0x00, sizeof(outbuf));	cutlen=2;	//memcpy( bufstr,"2第-一9字s节",sizeof(bufstr));	memcpy( bufstr,"i其3了41了22连接02",sizeof(bufstr));	//memcpy( bufstr,"12335588776655443322",sizeof(bufstr));		printf("the str is [%s]\n",bufstr);	i=unicodestr(bufstr,cutlen,&cutflag,outbuf,chistr,engstr);	//printf("\nthe i is =[%d]\n",i);	printf("\nmain _the cutflag is =[%d]\n",cutflag);	//printf("\nthe chistr is =[%s]\n",chistr);	//printf("\nthe engstr is =[%s]\n",engstr);	for ( i =0 ; i<cutflag;i++)	{		printf("outstr[%d]---[%s]\n",i,outbuf[i]);	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -