utf8.c

来自「tcpmp.src.0.72RC1 优秀的多媒体播放器TCPMP的源代码」· C语言代码 · 共 320 行
320 行
/* * Copyright (C) 2001 Peter Harris <peter.harris@hummingbird.com> * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. *  * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. *  * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *//* * Convert a string between UTF-8 and the locale's charset. */#include <stdlib.h>#include <string.h>#ifdef HAVE_CONFIG_H#include <config.h>#endif#include "utf8.h"#include "charset.h"#ifdef _WIN32	/* Thanks to Peter Harris <peter.harris@hummingbird.com> for this win32	 * code.	 */#include <stdio.h>#include <windows.h>static unsigned char *make_utf8_string(const wchar_t *unicode){    int size = 0, index = 0, out_index = 0;    unsigned char *out;    unsigned short c;    /* first calculate the size of the target string */    c = unicode[index++];    while(c) {        if(c < 0x0080) {            size += 1;        } else if(c < 0x0800) {            size += 2;        } else {            size += 3;        }        c = unicode[index++];    }	    out = malloc(size + 1);    if (out == NULL)        return NULL;    index = 0;    c = unicode[index++];    while(c)    {        if(c < 0x080) {            out[out_index++] = (unsigned char)c;        } else if(c < 0x800) {            out[out_index++] = 0xc0 | (c >> 6);            out[out_index++] = 0x80 | (c & 0x3f);        } else {            out[out_index++] = 0xe0 | (c >> 12);            out[out_index++] = 0x80 | ((c >> 6) & 0x3f);            out[out_index++] = 0x80 | (c & 0x3f);        }        c = unicode[index++];    }    out[out_index] = 0x00;    return out;}static wchar_t *make_unicode_string(const unsigned char *utf8){    int size = 0, index = 0, out_index = 0;    wchar_t *out;    unsigned char c;    /* first calculate the size of the target string */    c = utf8[index++];    while(c) {        if((c & 0x80) == 0) {            index += 0;        } else if((c & 0xe0) == 0xe0) {            index += 2;        } else {            index += 1;        }        size += 1;        c = utf8[index++];    }	    out = malloc((size + 1) * sizeof(wchar_t));    if (out == NULL)        return NULL;    index = 0;    c = utf8[index++];    while(c)    {        if((c & 0x80) == 0) {            out[out_index++] = c;        } else if((c & 0xe0) == 0xe0) {            out[out_index] = (c & 0x1F) << 12;	        c = utf8[index++];            out[out_index] |= (c & 0x3F) << 6;	        c = utf8[index++];            out[out_index++] |= (c & 0x3F);        } else {            out[out_index] = (c & 0x3F) << 6;	        c = utf8[index++];            out[out_index++] |= (c & 0x3F);        }        c = utf8[index++];    }    out[out_index] = 0;    return out;}int utf8_encode(const char *from, char **to){	wchar_t *unicode;	int wchars, err;	wchars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,			strlen(from), NULL, 0);	if(wchars == 0)	{		fprintf(stderr, "Unicode translation error %d\n", GetLastError());		return -1;	}	unicode = calloc(wchars + 1, sizeof(unsigned short));	if(unicode == NULL) 	{		fprintf(stderr, "Out of memory processing string to UTF8\n");		return -1;	}	err = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from, 			strlen(from), unicode, wchars);	if(err != wchars)	{		free(unicode);		fprintf(stderr, "Unicode translation error %d\n", GetLastError());		return -1;	}	/* On NT-based windows systems, we could use WideCharToMultiByte(), but 	 * MS doesn't actually have a consistent API across win32.	 */	*to = make_utf8_string(unicode);	free(unicode);	return 0;}int utf8_decode(const char *from, char **to){    wchar_t *unicode;    int chars, err;    /* On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but      * MS doesn't actually have a consistent API across win32.     */    unicode = make_unicode_string(from);    if(unicode == NULL)     {        fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n");        return -1;    }    chars = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,            -1, NULL, 0, NULL, NULL);    if(chars == 0)    {        fprintf(stderr, "Unicode translation error %d\n", GetLastError());        free(unicode);        return -1;    }    *to = calloc(chars + 1, sizeof(unsigned char));    if(*to == NULL)     {        fprintf(stderr, "Out of memory processing string to local charset\n");        free(unicode);        return -1;    }    err = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,             -1, *to, chars, NULL, NULL);    if(err != chars)    {        fprintf(stderr, "Unicode translation error %d\n", GetLastError());        free(unicode);        free(*to);        *to = NULL;        return -1;    }    free(unicode);    return 0;}#else /* End win32. Rest is for real operating systems */#ifdef HAVE_LANGINFO_CODESET#include <langinfo.h>#endifint iconvert(const char *fromcode, const char *tocode,	     const char *from, size_t fromlen,	     char **to, size_t *tolen);static char *current_charset = 0; /* means "US-ASCII" */void convert_set_charset(const char *charset){#ifdef HAVE_LANGINFO_CODESET  if (!charset)    charset = nl_langinfo(CODESET);#endif  if (!charset)    charset = getenv("CHARSET");  free(current_charset);  current_charset = 0;  if (charset && *charset)    current_charset = strdup(charset);}static int convert_buffer(const char *fromcode, const char *tocode,			  const char *from, size_t fromlen,			  char **to, size_t *tolen){  int ret = -1;#ifdef HAVE_ICONV  ret = iconvert(fromcode, tocode, from, fromlen, to, tolen);  if (ret != -1)    return ret;#endif#ifndef HAVE_ICONV /* should be ifdef USE_CHARSET_CONVERT */  ret = charset_convert(fromcode, tocode, from, fromlen, to, tolen);  if (ret != -1)    return ret;#endif  return ret;}static int convert_string(const char *fromcode, const char *tocode,			  const char *from, char **to, char replace){  int ret;  size_t fromlen;  char *s;  fromlen = strlen(from);  ret = convert_buffer(fromcode, tocode, from, fromlen, to, 0);  if (ret == -2)    return -1;  if (ret != -1)    return ret;  s = malloc(fromlen + 1);  if (!s)    return -1;  strcpy(s, from);  *to = s;  for (; *s; s++)    if (*s & ~0x7f)      *s = replace;  return 3;}int utf8_encode(const char *from, char **to){  char *charset;  if (!current_charset)    convert_set_charset(0);  charset = current_charset ? current_charset : "US-ASCII";  return convert_string(charset, "UTF-8", from, to, '#');}int utf8_decode(const char *from, char **to){  char *charset;  if (!current_charset)    convert_set_charset(0);  charset = current_charset ? current_charset : "US-ASCII";  return convert_string("UTF-8", charset, from, to, '?');}#endif
utf8.c - 源码说明

本页面展示了「tcpmp.src.0.72RC1 优秀的多媒体播放器TCPMP的源代码」中的 utf8.c 源码文件，采用 C语言编程语言编写，共 320 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与tcpmp相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?