📄 mad_utf8.c
字号:
/* $Author: peltotal $ $Date: 2006/02/17 08:07:17 $ $Revision: 1.2 $ *//* * MAD-FLUTELIB: Implementation of FLUTE protocol. * Copyright (c) 2003-2006 TUT - Tampere University of Technology * main authors/contacts: jani.peltotalo@tut.fi and sami.peltotalo@tut.fi * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#include "flute_inc.h"/* Map from the most-significant 6 bits of the first byte to the total number of bytes in a UTF-8 character. */static char UTF8_2_ISO_8859_1_len[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* erroneous */ 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6 };static char UTF8_2_ISO_8859_1_mask[] = {0x3F, 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01};/* Convert a UTF-8 string to a ISO-8859-1 MultiByte string. No more than 'count' bytes will be written to the output buffer. Return the size of the converted string in bytes, excl null terminator. */int x_utf8s_to_iso_8859_1s( char *mbstr, const char *utf8str, size_t count ) { int res = 0; while (*utf8str != '\0') { int len = UTF8_2_ISO_8859_1_len[(*utf8str >> 2) & 0x3F]; unsigned long u = *utf8str & UTF8_2_ISO_8859_1_mask[len]; /* erroneous */ if (len == 0) len = 5; for (++utf8str; --len > 0 && (*utf8str != '\0'); ++utf8str) { /* be sure this is not an unexpected start of a new character */ if ((*utf8str & 0xC0) != 0x80) break; u = (u << 6) | (*utf8str & 0x3F); } if (mbstr != 0 && count != 0) { /* be sure there is enough space left in the destination buffer */ if (res >= (int)count) return res; /* add the mapped character to the destination string or '?' (0x1A, SUB) if character */ /* can't be represented in ISO-8859-1 */ *mbstr++ = (u <= 0xFF ? (char)u : '?'); } ++res; } /* add the terminating null character */ if (mbstr != 0 && count != 0) { // be sure there is enough space left in the destination buffer if (res >= (int)count) return res; *mbstr = 0; } return res;}/* Convert a ISO-8859-1 MultiByte string to a UTF-8 string. No more than 'count' bytes will be written to the output buffer. Return the size of the converted string in bytes, excl null terminator. */int x_iso_8859_1s_to_utf8s(char *utf8str, const char *mbstr, size_t count) { int res = 0; /* loop until we reach the end of the mb string */ for (; *mbstr != '\0'; ++mbstr) { /* the character needs no mapping if the highest bit is not set */ if ((*mbstr & 0x80) == 0) { if (utf8str != 0 && count != 0) { /* be sure there is enough space left in the destination buffer */ if (res >= (int)count) return res; *utf8str++ = *mbstr; } ++res; } /* otherwise mapping is necessary */ else { if (utf8str != 0 && count != 0) { /* be sure there is enough space left in the destination buffer */ if (res+1 >= (int)count) return res; *utf8str++ = (0xC0 | (0x03 & (*mbstr >> 6))); *utf8str++ = (0x80 | (0x3F & *mbstr)); } res += 2; } } /* add the terminating null character */ if (utf8str != 0 && count != 0) { /* be sure there is enough space left in the destination buffer */ if (res >= (int)count) return res; *utf8str = 0; } return res;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -