📄 utf8.c
字号:
/************************************************** * * utf8.c * * CVS ID: $Id: utf8.c,v 1.10 2006/10/23 14:29:21 trubac Exp $ * Author: Ondrej Trubac [OT] - STM * Date: $Date: 2006/10/23 14:29:21 $ * Revision: $Revision: 1.10 $ * * Description: * * Widechar/UTF8 conversion functions * *************************************************** * * COPYRIGHT (C) ST Microelectronics 2005 * All Rights Reserved * **************************************************** * * STM CVS Log: * * $Log: utf8.c,v $ * Revision 1.10 2006/10/23 14:29:21 trubac * new function for utf8 to utf16 conversion * * Revision 1.9 2006/10/17 10:00:12 trubac * error codes are redefined in gendef.h * * Revision 1.8 2006/09/18 09:55:22 belardi * Corrected CVS keyword usage * * Revision 1.7 2006/09/18 09:24:02 belardi * Added Log CVS keyword into file header * * ***************************************************/#include "apdevsys.h"#include "accordoptypes.h"#include "utf8.h"#include "gendef.h"#include <string.h>GRESULT WideStringToUTF8(uint8 *wide, uint8 *utf, uint8 wlength, t_conv_param *pp){ // wide - pointer to widecharacter string // be - 0 = Little Endian, 1 = Big Endian // utf - pointer to destination memory to create UTF-8 string // wlength - number of wide characters in wide string // trunc - maximum number of characters included in utf8 string, when 0 this parameter is ignored // limit - maximum length of utf8 string in bytes, when 0 this parameter is ignored uint8 i, len = 0, p, tmpb[4], *ppt; uint8 *bptr, *cptr; uint16 wch; if (!wlength) { *utf = '\0'; return E_INVALID_PARAMETER; } // find '.' position bptr = utf; cptr = (uint8 *) wide; if ((wlength <= pp->trunc) || (!pp->trunc)) { for (i = 0; i < wlength; i++) { if (pp->be) { wch = (*(cptr++)) << 8; wch |= *(cptr++); } else { wch = *(cptr++); wch |= (*(cptr++)) << 8; } if (pp->limit) { ppt = tmpb; p = wc2utf8(wch, &ppt); if ((len + p) > pp->limit) { return len; } memcpy(bptr, tmpb, p); len += p; bptr += p; } else len += wc2utf8(wch, &bptr); } } else { for (i = 0; i < pp->trunc; i++) { if (pp->be) { wch = (*(cptr++)) << 8; wch |= *(cptr++); } else { wch = *(cptr++); wch |= (*(cptr++)) << 8; } if (pp->limit) { ppt = tmpb; p = wc2utf8(wch, &ppt); if ((len + p) > pp->limit) { return len; } memcpy(bptr, tmpb, p); len += p; bptr += p; } else len += wc2utf8(wch, &bptr); } } // *bptr = '\0'; // [RB] removed null termination // return len+1; // [RB] removed null termination return len;}//GRESULT WideFileNameToUTF8(uint8 *wide,uint8 be, uint8 *utf, uint8 wlength,uint8 trunc, uint8 limit )GRESULT WideFileNameToUTF8(uint8 *wide, uint8 *utf, uint8 wlength, t_conv_param *pp){ // wide - pointer to widecharacter string // be - 0 = Little Endian, 1 = Big Endian // utf - pointer to destination memory to create UTF-8 string // wlength - number of wide characters in wide string // trunc - maximum number of characters included in utf8 name, when 0 this parameter is ignored // limit - maximum length of utf8 string in bytes, when 0 this parameter is ignored uint8 i, len = 0, exlen = 0, p, tmpb[4], *ppt; uint8 *bptr, *extptr = NULL, *cptr; uint16 wch; if (!wlength) { *utf = '\0'; return E_INVALID_PARAMETER; } // find '.' position //bptr = (wide + 2 * wlength - 1); bptr = wide + 2 * (wlength - 1); if (pp->be) bptr++; while ((bptr > (uint8 *) wide) && (*bptr != '.')) { bptr -= 2; exlen++; } if (*bptr == '.') { exlen++; if (pp->be) extptr = bptr - 1; // ext points to dot starting extension else extptr = bptr; } else { exlen = 0; // return E_WRONG_FORMAT; // extension is missing } bptr = utf; cptr = (uint8 *) wide; if ((wlength <= pp->trunc) || (!pp->trunc)) { for (i = 0; i < wlength; i++) { if (pp->be) { wch = (*(cptr++)) << 8; wch |= *(cptr++); } else { wch = *(cptr++); wch |= (*(cptr++)) << 8; } if (pp->limit) { ppt = tmpb; p = wc2utf8(wch, &ppt); if ((len + p) > pp->limit) return len; memcpy(bptr, tmpb, p); len += p; bptr += p; } else len += wc2utf8(wch, &bptr); } } else { for (i = 0; i < (pp->trunc - exlen); i++) { if (pp->be) { wch = (*(cptr++)) << 8; wch |= *(cptr++); } else { wch = *(cptr++); wch |= (*(cptr++)) << 8; } len += wc2utf8(wch, &bptr); } cptr = extptr; for (i = 0; i < exlen; i++) { if (pp->be) { wch = (*(cptr++)) << 8; wch |= *(cptr++); } else { wch = *(cptr++); wch |= (*(cptr++)) << 8; } len += wc2utf8(wch, &bptr); } } // *bptr = '\0'; // [RB] removed null termination // return len+1; // [RB] removed null termination return len;}uint8 wc2utf8(uint16 wch, uint8 **pt){ if (wch < 128) { **pt = (uint8) wch; (*pt)++; return 1; } if (wch < 2048) { **pt = 192 + (uint8)(wch >> 6); (*pt)++; **pt = 128 + (uint8)(wch & 63); (*pt)++; return 2; } **pt = 224 + (uint8)(wch >> 12); (*pt)++; **pt = 128 + (uint8)((wch >> 6) & 63); (*pt)++; **pt = 128 + (uint8)(wch & 63); (*pt)++; return 3;}#if (HAVE_UTF16==1)GRESULT utf8_2_utf16be(uint8 *utf8,uint8 *utf16, uint32 utf8len, uint32 *limit)// function for utf8 to utf16 conversion// returns number of bytes taken from utf8 string// limit is pointer to maximum length of output in bytes// when success, limit is containing real length of output in bytes{uint32 outcnt,wchar,incnt;int adjcnt;uint8 b,hbyte,lbyte; if(!utf8len) return E_INVALID_PARAMETER; wchar = outcnt = adjcnt = incnt = 0; do { b = *(utf8++); incnt++; if(b<128) { wchar = b; goto write_wch; } if((b&0xC0)==0x80) { if(!adjcnt) goto parse_error; else { wchar = (wchar<<6)|(b&0x3F); if(!--adjcnt) goto write_wch; } } if((b&0xE0)==0xC0) { if(adjcnt) // no adjacent bytes should be awaited goto parse_error; else { adjcnt = 1; wchar = b&0x1F; } continue; } if((b&0xF0)==0xE0) { if(adjcnt) // no adjacent bytes should be awaited goto parse_error; else { adjcnt = 2; wchar = b&0x0F; } continue; } parse_error: return E_WRONG_FORMAT; write_wch: adjcnt = 0; if(outcnt>(*limit)-2) { // output is full *limit = outcnt; return incnt; } *(utf16++) = 0xFF&(wchar>>8); *(utf16++) = wchar&0xFF; outcnt += 2; }while((outcnt<*limit)&&(incnt<utf8len));}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -