⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utf.c

📁 一个UTF的源代码,可以提供参考
💻 C
字号:
#ifdef PLAN9#include	<u.h>#include	<libc.h>#include	<bio.h>#else#include	<sys/types.h>#include	<stdio.h>#include	<stdlib.h>#include	<string.h>#include	<unistd.h>#include	<errno.h>#include	"plan9.h"#endif#include	"hdr.h"/*	the our_* routines are implementations for the corresponding library	routines. for a while, i tried to actually name them wctomb etc	but stopped that after i found a system which made wchar_t an	unsigned char.*/int our_wctomb(char *s, unsigned long wc);int our_mbtowc(wchar_t *p, char *s, unsigned n);enum{	Char1	= Runeself,	Rune1	= Runeself,	Char21	= 0xA1,		Rune21	= 0x0100,	Char22	= 0xF6,		Rune22	= 0x4016,	Char3	= 0xFC,		Rune3	= 0x10000,	/* really 0x38E2E */	Esc	= 0xBE,		Bad	= Runeerror};#ifdef PLAN9int	errno;#endifenum{	T1	= 0x00,	Tx	= 0x80,	T2	= 0xC0,	T3	= 0xE0,	T4	= 0xF0,	T5	= 0xF8,	T6	= 0xFC,	Bit1	= 7,	Bitx	= 6,	Bit2	= 5,	Bit3	= 4,	Bit4	= 3,	Bit5	= 2,	Bit6	= 2,	Mask1	= (1<<Bit1)-1,	Maskx	= (1<<Bitx)-1,	Mask2	= (1<<Bit2)-1,	Mask3	= (1<<Bit3)-1,	Mask4	= (1<<Bit4)-1,	Mask5	= (1<<Bit5)-1,	Mask6	= (1<<Bit6)-1,	Wchar1	= (1UL<<Bit1)-1,	Wchar2	= (1UL<<(Bit2+Bitx))-1,	Wchar3	= (1UL<<(Bit3+2*Bitx))-1,	Wchar4	= (1UL<<(Bit4+3*Bitx))-1,	Wchar5	= (1UL<<(Bit5+4*Bitx))-1#ifndef	EILSEQ	, /* we hate ansi c's comma rules */	EILSEQ	= 123#endif /* PLAN9 */};intour_wctomb(char *s, unsigned long wc){	if(s == 0)		return 0;		/* no shift states */	if(wc & ~Wchar2) {		if(wc & ~Wchar4) {			if(wc & ~Wchar5) {				/* 6 bytes */				s[0] = T6 | ((wc >> 5*Bitx) & Mask6);				s[1] = Tx | ((wc >> 4*Bitx) & Maskx);				s[2] = Tx | ((wc >> 3*Bitx) & Maskx);				s[3] = Tx | ((wc >> 2*Bitx) & Maskx);				s[4] = Tx | ((wc >> 1*Bitx) & Maskx);				s[5] = Tx |  (wc & Maskx);				return 6;			}			/* 5 bytes */			s[0] = T5 |  (wc >> 4*Bitx);			s[1] = Tx | ((wc >> 3*Bitx) & Maskx);			s[2] = Tx | ((wc >> 2*Bitx) & Maskx);			s[3] = Tx | ((wc >> 1*Bitx) & Maskx);			s[4] = Tx |  (wc & Maskx);			return 5;		}		if(wc & ~Wchar3) {			/* 4 bytes */			s[0] = T4 |  (wc >> 3*Bitx);			s[1] = Tx | ((wc >> 2*Bitx) & Maskx);			s[2] = Tx | ((wc >> 1*Bitx) & Maskx);			s[3] = Tx |  (wc & Maskx);			return 4;		}		/* 3 bytes */		s[0] = T3 |  (wc >> 2*Bitx);		s[1] = Tx | ((wc >> 1*Bitx) & Maskx);		s[2] = Tx |  (wc & Maskx);		return 3;	}	if(wc & ~Wchar1) {		/* 2 bytes */		s[0] = T2 | (wc >> 1*Bitx);		s[1] = Tx | (wc & Maskx);		return 2;	}	/* 1 byte */	s[0] = T1 | wc;	return 1;}intour_mbtowc(wchar_t *p, char *s, unsigned n){	uchar *us;	int c0, c1, c2, c3, c4, c5;	unsigned long wc;	wchar_t wchar;	if(s == 0)		return 0;		/* no shift states */	if(n < 1)		goto badlen;	us = (uchar*)s;	c0 = us[0];	if(c0 >= T3) {		if(n < 3)			goto badlen;		c1 = us[1] ^ Tx;		c2 = us[2] ^ Tx;		if((c1|c2) & T2)			goto bad;		if(c0 >= T5) {			if(n < 5)				goto badlen;			c3 = us[3] ^ Tx;			c4 = us[4] ^ Tx;			if((c3|c4) & T2)				goto bad;			if(c0 >= T6) {				/* 6 bytes */				if(n < 6)					goto badlen;				c5 = us[5] ^ Tx;				if(c5 & T2)					goto bad;				wc = ((((((((((c0 & Mask6) << Bitx) |					c1) << Bitx) | c2) << Bitx) |					c3) << Bitx) | c4) << Bitx) | c5;				if(wc <= Wchar5)					goto bad;				*p = wchar = wc;				return 6;			}			/* 5 bytes */			wc = ((((((((c0 & Mask5) << Bitx) |				c1) << Bitx) | c2) << Bitx) |				c3) << Bitx) | c4;			if(wc <= Wchar4)				goto bad;			*p = wchar = wc;			return 5;		}		if(c0 >= T4) {			/* 4 bytes */			if(n < 4)				goto badlen;			c3 = us[3] ^ Tx;			if(c3 & T2)				goto bad;			wc = ((((((c0 & Mask4) << Bitx) |				c1) << Bitx) | c2) << Bitx) |				c3;			if(wc <= Wchar3)				goto bad;			*p = wchar = wc;			return 4;		}		/* 3 bytes */		wc = ((((c0 & Mask3) << Bitx) |			c1) << Bitx) | c2;		if(wc <= Wchar2)			goto bad;		*p = wchar = wc;		return 3;	}	if(c0 >= T2) {		/* 2 bytes */		if(n < 2)			goto badlen;		c1 = us[1] ^ Tx;		if(c1 & T2)			goto bad;		wc = ((c0 & Mask2) << Bitx) |			c1;		if(wc <= Wchar1)			goto bad;		*p = wchar = wc;		return 2;	}	/* 1 byte */	if(c0 >= Tx)		goto bad;	*p = wchar = c0;	return 1;bad:	errno = EILSEQ;	return -1;badlen:	return -2;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -