📄 unicode.c
字号:
/*************************************************************************** * copyright : (C) 2002 by Hendrik Sattler * * mail : post@hendrik-sattler.de * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/#include "charsets.h"#include "helpers.h"#include "common.h"#include <stdio.h>#include <iconv.h>#include <string.h>#include <errno.h>#include <stdlib.h>#include "depincludes.h"wchar_t* convert_to_internal (char* from_code, char* input, size_t insize) { iconv_t cd; wchar_t* outbuf; char* outptr; size_t outsize; wchar_t* retval; wchar_t buffer[sizeof(wchar_t)*(1+4)]; char* convbuf; int i; if ((cd=iconv_open(INTERNAL_CHARSET,from_code))==(iconv_t)-1) { errexit ("Error on text conversion: %s\n", strerror(errno)); } outsize=insize*sizeof(wchar_t); //reserving space for the wide characters outbuf=mem_alloc((outsize+1)*sizeof(wchar_t),1); outptr=(char*)outbuf; if (iconv(cd,&input,&insize,&outptr,&outsize)==-1) { /* * No special treatment because wchar_t should be big enough for normal unicode * (else go and fix your system) */ errexit ("Error on text conversion from %s to system's %s: %s\n", from_code, INTERNAL_CHARSET, strerror(errno)); } iconv_close(cd); /* * Now we have to handle all direct \XXXX character inputs */ outsize=0; retval=mem_alloc((wcslen(outbuf)+1)*sizeof(wchar_t),1); for (insize=0;insize<wcslen(outbuf);outsize++) { switch (outbuf[insize]) { case 0x5c: // '\' switch (outbuf[insize+1]) { case 0x6e: // 'n' retval[outsize]=0x0a; insize+=2; break; case 0x5c: // '\' retval[outsize]=0x5c; insize+=2; break; default: for (i=0;i<4;i++) { buffer[i]=outbuf[insize+1+i]; } buffer[4]=0; convbuf=convert_from_internal("ANSI_X3.4-1968",buffer,0); retval[outsize]=(wchar_t) hexstr2int(convbuf,4)&(~((wchar_t)0)); free(convbuf); insize+=5; break; } break; default: retval[outsize]=outbuf[insize]; ++insize; break; } } return mem_realloc(retval,(wcslen(retval)+1)*sizeof(wchar_t));}char* convert_from_internal (char* to_code, wchar_t* input, int replace_mode) { iconv_t cd; char* outbuf; char* outptr; size_t outsize; size_t outsize2; size_t insize; char* input_cast; size_t status; int endsize; if ((cd=iconv_open(to_code,INTERNAL_CHARSET))==(iconv_t)-1) { errexit ("Error on text conversion: %s\n", strerror(errno)); } insize=wcslen(input)*sizeof(wchar_t); //those are counted in bytes, not characters input_cast = (char*) input; outsize=6*wcslen(input); //this should be enought even for 6 Bytes of UTF-8 outsize2=outsize; endsize=wcslen(input); outbuf=mem_alloc(outsize+1,1); outptr=outbuf; while ((status=iconv(cd,&input_cast,&insize,&outptr,&outsize2))==-1) { if (errno==E2BIG) { /* We have to resize outbuf */ errexit ("Coding error: insufficient memory on unicode decoding. Please report as bug.\n"); } else if (errno==EINVAL) { /* because we encode the wchar_t internally, this can never happen */ myprintf(0,"Incomplete wide character at end of sequence\n"); break; } else { switch(replace_mode) { case 2: /* replace all unknown character with a \XXXX replacement * it is used for all output that we might read again */ if (outsize2<5) { errexit ("Coding error: insufficient memory on unicode decoding. Please report as bug.\n"); } sprintf(outptr,"\\%04X",(unsigned int)(*((wchar_t*)input_cast) & 0xFFFF)); input_cast+=sizeof(wchar_t); insize-=sizeof(wchar_t); outptr+=5; outsize2-=5; endsize+=4; break; case 1: /* * replace all unknown character with a '?' * it is used for all output that we may not read again */ if (outsize2<1) { outsize+=wcslen(input); outsize2+=wcslen(input); outbuf=mem_realloc(outbuf,outsize+1); } *outptr='?'; input_cast+=sizeof(wchar_t); insize-=sizeof(wchar_t); outptr+=1; outsize2-=1; break; default: errexit("Wide character 0x%lx cannot be converted to %s with current replacement mode.\n", *((wchar_t*)input_cast), to_code); break; } } } iconv_close(cd); return outbuf;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -