euc_jp_and_sjis.c

来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 457 行

C
457
字号
/*------------------------------------------------------------------------- * *	  EUC_JP, SJIS and MULE_INTERNAL * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION *	  $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.6 2003/08/04 02:40:07 momjian Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "fmgr.h"#include "mb/pg_wchar.h"/* * SJIS alternative code. * this code is used if a mapping EUC -> SJIS is not defined. */#define PGSJISALTCODE 0x81ac#define PGEUCALTCODE 0xa2ae/* * conversion table between SJIS UDC (IBM kanji) and EUC_JP */#include "sjis.map"#define ENCODING_GROWTH_RATE 4PG_FUNCTION_INFO_V1(euc_jp_to_sjis);PG_FUNCTION_INFO_V1(sjis_to_euc_jp);PG_FUNCTION_INFO_V1(euc_jp_to_mic);PG_FUNCTION_INFO_V1(mic_to_euc_jp);PG_FUNCTION_INFO_V1(sjis_to_mic);PG_FUNCTION_INFO_V1(mic_to_sjis);extern Datum euc_jp_to_sjis(PG_FUNCTION_ARGS);extern Datum sjis_to_euc_jp(PG_FUNCTION_ARGS);extern Datum euc_jp_to_mic(PG_FUNCTION_ARGS);extern Datum mic_to_euc_jp(PG_FUNCTION_ARGS);extern Datum sjis_to_mic(PG_FUNCTION_ARGS);extern Datum mic_to_sjis(PG_FUNCTION_ARGS);/* ---------- * conv_proc( *		INTEGER,	-- source encoding id *		INTEGER,	-- destination encoding id *		CSTRING,	-- source string (null terminated C string) *		CSTRING,	-- destination string (null terminated C string) *		INTEGER		-- source string length * ) returns VOID; * ---------- */static void sjis2mic(unsigned char *sjis, unsigned char *p, int len);static void mic2sjis(unsigned char *mic, unsigned char *p, int len);static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len);static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len);Datumeuc_jp_to_sjis(PG_FUNCTION_ARGS){	unsigned char *src = PG_GETARG_CSTRING(2);	unsigned char *dest = PG_GETARG_CSTRING(3);	int			len = PG_GETARG_INT32(4);	unsigned char *buf;	Assert(PG_GETARG_INT32(0) == PG_EUC_JP);	Assert(PG_GETARG_INT32(1) == PG_SJIS);	Assert(len >= 0);	buf = palloc(len * ENCODING_GROWTH_RATE);	euc_jp2mic(src, buf, len);	mic2sjis(buf, dest, strlen(buf));	pfree(buf);	PG_RETURN_VOID();}Datumsjis_to_euc_jp(PG_FUNCTION_ARGS){	unsigned char *src = PG_GETARG_CSTRING(2);	unsigned char *dest = PG_GETARG_CSTRING(3);	int			len = PG_GETARG_INT32(4);	unsigned char *buf;	Assert(PG_GETARG_INT32(0) == PG_SJIS);	Assert(PG_GETARG_INT32(1) == PG_EUC_JP);	Assert(len >= 0);	buf = palloc(len * ENCODING_GROWTH_RATE);	sjis2mic(src, buf, len);	mic2euc_jp(buf, dest, strlen(buf));	pfree(buf);	PG_RETURN_VOID();}Datumeuc_jp_to_mic(PG_FUNCTION_ARGS){	unsigned char *src = PG_GETARG_CSTRING(2);	unsigned char *dest = PG_GETARG_CSTRING(3);	int			len = PG_GETARG_INT32(4);	Assert(PG_GETARG_INT32(0) == PG_EUC_JP);	Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL);	Assert(len >= 0);	euc_jp2mic(src, dest, len);	PG_RETURN_VOID();}Datummic_to_euc_jp(PG_FUNCTION_ARGS){	unsigned char *src = PG_GETARG_CSTRING(2);	unsigned char *dest = PG_GETARG_CSTRING(3);	int			len = PG_GETARG_INT32(4);	Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL);	Assert(PG_GETARG_INT32(1) == PG_EUC_JP);	Assert(len >= 0);	mic2sjis(src, dest, len);	PG_RETURN_VOID();}Datumsjis_to_mic(PG_FUNCTION_ARGS){	unsigned char *src = PG_GETARG_CSTRING(2);	unsigned char *dest = PG_GETARG_CSTRING(3);	int			len = PG_GETARG_INT32(4);	Assert(PG_GETARG_INT32(0) == PG_SJIS);	Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL);	Assert(len >= 0);	sjis2mic(src, dest, len);	PG_RETURN_VOID();}Datummic_to_sjis(PG_FUNCTION_ARGS){	unsigned char *src = PG_GETARG_CSTRING(2);	unsigned char *dest = PG_GETARG_CSTRING(3);	int			len = PG_GETARG_INT32(4);	Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL);	Assert(PG_GETARG_INT32(1) == PG_SJIS);	Assert(len >= 0);	mic2sjis(src, dest, len);	PG_RETURN_VOID();}/* * SJIS ---> MIC */static voidsjis2mic(unsigned char *sjis, unsigned char *p, int len){	int			c1,				c2,/* Eiji Tokuya patched begin */				i,				k,				k2;/* Eiji Tokuya patched end */	while (len >= 0 && (c1 = *sjis++))	{		if (c1 >= 0xa1 && c1 <= 0xdf)		{			/* JIS X0201 (1 byte kana) */			len--;			*p++ = LC_JISX0201K;			*p++ = c1;		}		else if (c1 > 0x7f)		{			/*			 * JIS X0208, X0212, user defined extended characters			 */			c2 = *sjis++;			k = (c1 << 8) + c2;/* Eiji Tokuya patched begin */			if (k >= 0xed40 && k < 0xf040)			{				/* NEC selection IBM kanji */				for (i = 0;; i++)				{					k2 = ibmkanji[i].nec;					if (k2 == 0xffff)						break;					if (k2 == k)					{						k = ibmkanji[i].sjis;						c1 = (k >> 8) & 0xff;						c2 = k & 0xff;					}				}			}			if (k < 0xeb3f)/* Eiji Tokuya patched end */			{				/* JIS X0208 */				len -= 2;				*p++ = LC_JISX0208;				*p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);				*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);			}/* Eiji Tokuya patched begin */			else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))			{				/* NEC selection IBM kanji - Other undecided justice *//* Eiji Tokuya patched end */				*p++ = LC_JISX0208;				*p++ = PGEUCALTCODE >> 8;				*p++ = PGEUCALTCODE & 0xff;			}			else if (k >= 0xf040 && k < 0xf540)			{				/*				 * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -				 * 0x7e7e EUC 0xf5a1 - 0xfefe				 */				len -= 2;				*p++ = LC_JISX0208;				c1 -= 0x6f;				*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);				*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);			}			else if (k >= 0xf540 && k < 0xfa40)			{				/*				 * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -				 * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe				 */				len -= 2;				*p++ = LC_JISX0212;				c1 -= 0x74;				*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);				*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);			}			else if (k >= 0xfa40)			{				/*				 * mapping IBM kanji to X0208 and X0212				 *				 */				len -= 2;				for (i = 0;; i++)				{					k2 = ibmkanji[i].sjis;					if (k2 == 0xffff)						break;					if (k2 == k)					{						k = ibmkanji[i].euc;						if (k >= 0x8f0000)						{							*p++ = LC_JISX0212;							*p++ = 0x80 | ((k & 0xff00) >> 8);							*p++ = 0x80 | (k & 0xff);						}						else						{							*p++ = LC_JISX0208;							*p++ = 0x80 | (k >> 8);							*p++ = 0x80 | (k & 0xff);						}					}				}			}		}		else		{						/* should be ASCII */			len--;			*p++ = c1;		}	}	*p = '\0';}/* * MIC ---> SJIS */static voidmic2sjis(unsigned char *mic, unsigned char *p, int len){	int			c1,				c2,				k;	while (len >= 0 && (c1 = *mic))	{		len -= pg_mic_mblen(mic++);		if (c1 == LC_JISX0201K)			*p++ = *mic++;		else if (c1 == LC_JISX0208)		{			c1 = *mic++;			c2 = *mic++;			k = (c1 << 8) | (c2 & 0xff);			if (k >= 0xf5a1)			{				/* UDC1 */				c1 -= 0x54;				*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;			}			else				*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);			*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);		}		else if (c1 == LC_JISX0212)		{			int			i,						k2;			c1 = *mic++;			c2 = *mic++;			k = c1 << 8 | c2;			if (k >= 0xf5a1)			{				/* UDC2 */				c1 -= 0x54;				*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;				*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);			}			else			{				/* IBM kanji */				for (i = 0;; i++)				{					k2 = ibmkanji[i].euc & 0xffff;					if (k2 == 0xffff)					{						*p++ = PGSJISALTCODE >> 8;						*p++ = PGSJISALTCODE & 0xff;						break;					}					if (k2 == k)					{						k = ibmkanji[i].sjis;						*p++ = k >> 8;						*p++ = k & 0xff;						break;					}				}			}		}		else if (c1 > 0x7f)		{			/* cannot convert to SJIS! */			*p++ = PGSJISALTCODE >> 8;			*p++ = PGSJISALTCODE & 0xff;		}		else		{						/* should be ASCII */			*p++ = c1;		}	}	*p = '\0';}/* * EUC_JP ---> MIC */static voideuc_jp2mic(unsigned char *euc, unsigned char *p, int len){	int			c1;	while (len >= 0 && (c1 = *euc++))	{		if (c1 == SS2)		{						/* 1 byte kana? */			len -= 2;			*p++ = LC_JISX0201K;			*p++ = *euc++;		}		else if (c1 == SS3)		{						/* JIS X0212 kanji? */			len -= 3;			*p++ = LC_JISX0212;			*p++ = *euc++;			*p++ = *euc++;		}		else if (c1 & 0x80)		{						/* kanji? */			len -= 2;			*p++ = LC_JISX0208;			*p++ = c1;			*p++ = *euc++;		}		else		{						/* should be ASCII */			len--;			*p++ = c1;		}	}	*p = '\0';}/* * MIC ---> EUC_JP */static voidmic2euc_jp(unsigned char *mic, unsigned char *p, int len){	int			c1;	while (len >= 0 && (c1 = *mic))	{		len -= pg_mic_mblen(mic++);		if (c1 == LC_JISX0201K)		{			*p++ = SS2;			*p++ = *mic++;		}		else if (c1 == LC_JISX0212)		{			*p++ = SS3;			*p++ = *mic++;			*p++ = *mic++;		}		else if (c1 == LC_JISX0208)		{			*p++ = *mic++;			*p++ = *mic++;		}		else if (c1 > 0x7f)		{						/* cannot convert to EUC_JP! */			mic--;			pg_print_bogus_char(&mic, &p);		}		else		{						/* should be ASCII */			*p++ = c1;		}	}	*p = '\0';}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?