📄 unicode.c
字号:
{ free( utf_entry ); goto error; } tab = newtab; tab[num++] = utf_entry; } vlc_closedir_wrapper( dir ); if( compar != NULL ) qsort( tab, num, sizeof( tab[0] ), (int (*)( const void *, const void *))compar ); *namelist = tab; return num; error:{ unsigned i; for( i = 0; i < num; i++ ) free( tab[i] ); if( tab != NULL ) free( tab ); return -1;} }}static int utf8_statEx( const char *filename, void *buf, vlc_bool_t deref ){#if defined (WIN32) || defined (UNDER_CE) /* retrieve Windows OS version */ if( GetVersion() < 0x80000000 ) { /* for Windows NT and above */ wchar_t wpath[MAX_PATH + 1]; if( !MultiByteToWideChar( CP_UTF8, 0, filename, -1, wpath, MAX_PATH ) ) { errno = ENOENT; return -1; } wpath[MAX_PATH] = L'\0'; return _wstati64( wpath, (struct _stati64 *)buf ); }#endif#ifdef HAVE_SYS_STAT_H const char *local_name = ToLocale( filename ); if( local_name != NULL ) { int res = deref ? stat( local_name, (struct stat *)buf ) : lstat( local_name, (struct stat *)buf ); LocaleFree( local_name ); return res; } errno = ENOENT;#endif return -1;}int utf8_stat( const char *filename, void *buf){ return utf8_statEx( filename, buf, VLC_TRUE );}int utf8_lstat( const char *filename, void *buf){ return utf8_statEx( filename, buf, VLC_FALSE );}/** * utf8_*printf: *printf with conversion from UTF-8 to local encoding */static int utf8_vasprintf( char **str, const char *fmt, va_list ap ){ char *utf8; int res = vasprintf( &utf8, fmt, ap ); if( res == -1 ) return -1; *str = ToLocaleDup( utf8 ); free( utf8 ); return res;}int utf8_vfprintf( FILE *stream, const char *fmt, va_list ap ){ char *str; int res = utf8_vasprintf( &str, fmt, ap ); if( res == -1 ) return -1; fputs( str, stream ); free( str ); return res;}int utf8_fprintf( FILE *stream, const char *fmt, ... ){ va_list ap; int res; va_start( ap, fmt ); res = utf8_vfprintf( stream, fmt, ap ); va_end( ap ); return res;}static char *CheckUTF8( char *str, char rep )#define isutf8cont( c ) (((c) >= 0x80) && ((c) <= 0xBF)) { unsigned char *ptr, c; assert (str != NULL); ptr = (unsigned char *)str; while( (c = *ptr) != '\0' ) { /* US-ASCII, 1 byte */ if( c <= 0x7F ) ptr++; /* OK */ else /* 2 bytes */ if( ( c >= 0xC2 ) && ( c <= 0xDF ) ) { c = ptr[1]; if( isutf8cont( c ) ) ptr += 2; /* OK */ else goto error; } else /* 3 bytes */ if( c == 0xE0 ) { c = ptr[1]; if( ( c >= 0xA0 ) && ( c <= 0xBF ) ) { c = ptr[2]; if( isutf8cont( c ) ) ptr += 3; /* OK */ else goto error; } else goto error; } else if( ( ( c >= 0xE1 ) && ( c <= 0xEC ) ) || ( c == 0xEC ) || ( c == 0xEE ) || ( c == 0xEF ) ) { c = ptr[1]; if( isutf8cont( c ) ) { c = ptr[2]; if( isutf8cont( c ) ) ptr += 3; /* OK */ else goto error; } else goto error; } else if( c == 0xED ) { c = ptr[1]; if( ( c >= 0x80 ) && ( c <= 0x9F ) ) { c = ptr[2]; if( isutf8cont( c ) ) ptr += 3; /* OK */ else goto error; } else goto error; } else /* 4 bytes */ if( c == 0xF0 ) { c = ptr[1]; if( ( c >= 0x90 ) && ( c <= 0xBF ) ) { c = ptr[2]; if( isutf8cont( c ) ) { c = ptr[3]; if( isutf8cont( c ) ) ptr += 4; /* OK */ else goto error; } else goto error; } else goto error; } else if( ( c >= 0xF1 ) && ( c <= 0xF3 ) ) { c = ptr[1]; if( isutf8cont( c ) ) { c = ptr[2]; if( isutf8cont( c ) ) { c = ptr[3]; if( isutf8cont( c ) ) ptr += 4; /* OK */ goto error; } else goto error; } else goto error; } else if( c == 0xF4 ) { c = ptr[1]; if( ( c >= 0x80 ) && ( c <= 0x8F ) ) { c = ptr[2]; if( isutf8cont( c ) ) { c = ptr[3]; if( isutf8cont( c ) ) ptr += 4; /* OK */ else goto error; } else goto error; } else goto error; } else goto error; continue;error: if( rep == 0 ) return NULL; *ptr++ = '?'; str = NULL; } return str;}/** * EnsureUTF8: replaces invalid/overlong UTF-8 sequences with question marks * Note that it is not possible to convert from Latin-1 to UTF-8 on the fly, * so we don't try that, even though it would be less disruptive. * * @return str if it was valid UTF-8, NULL if not. */char *EnsureUTF8( char *str ){ return CheckUTF8( str, '?' );}/** * IsUTF8: checks whether a string is a valid UTF-8 byte sequence. * * @param str nul-terminated string to be checked * * @return str if it was valid UTF-8, NULL if not. */const char *IsUTF8( const char *str ){ return CheckUTF8( (char *)str, 0 );}/** * UTF32toUTF8(): converts an array from UTF-32 (host byte order) * to UTF-8. * * @param src the UTF-32 table to be converted * @param len the number of code points to be converted from src * (ie. the number of uint32_t in the table pointed to by src) * @param newlen an optional pointer. If not NULL, *newlen will * contain the total number of bytes written. * * @return the result of the conversion (must be free'd()) * or NULL on error (in that case, *newlen is undefined). */static char *UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen ){ char *res, *out; /* allocate memory */ out = res = (char *)malloc( 6 * len ); if( res == NULL ) return NULL; while( len > 0 ) { uint32_t uv = *src++; len--; if( uv < 0x80 ) { *out++ = uv; continue; } else if( uv < 0x800 ) { *out++ = (( uv >> 6) | 0xc0); *out++ = (( uv & 0x3f) | 0x80); continue; } else if( uv < 0x10000 ) { *out++ = (( uv >> 12) | 0xe0); *out++ = (((uv >> 6) & 0x3f) | 0x80); *out++ = (( uv & 0x3f) | 0x80); continue; } else if( uv < 0x110000 ) { *out++ = (( uv >> 18) | 0xf0); *out++ = (((uv >> 12) & 0x3f) | 0x80); *out++ = (((uv >> 6) & 0x3f) | 0x80); *out++ = (( uv & 0x3f) | 0x80); continue; } else { free( res ); return NULL; } } len = out - res; res = realloc( res, len ); if( newlen != NULL ) *newlen = len; return res;}/** * FromUTF32(): converts an UTF-32 string to UTF-8. * * @param src UTF-32 bytes sequence, aligned on a 32-bits boundary. * * @return the result of the conversion (must be free()'d), * or NULL in case of error. */char *FromUTF32( const uint32_t *src ){ const uint32_t *in; size_t len; /* determine the size of the string */ for( len = 1, in = src; *in; len++ ) in++; return UTF32toUTF8( src, len, NULL );}/** * UTF16toUTF8: converts UTF-16 (host byte order) to UTF-8 * * @param src UTF-16 bytes sequence, aligned on a 16-bits boundary * @param len number of uint16_t to convert */static char *UTF16toUTF8( const uint16_t *in, size_t len, size_t *newlen ){ char *res, *out; /* allocate memory */ out = res = (char *)malloc( 3 * len ); if( res == NULL ) return NULL; while( len > 0 ) { uint32_t uv = *in; in++; len--; if( uv < 0x80 ) { *out++ = uv; continue; } if( uv < 0x800 ) { *out++ = (( uv >> 6) | 0xc0); *out++ = (( uv & 0x3f) | 0x80); continue; } if( (uv >= 0xd800) && (uv < 0xdbff) ) { /* surrogates */ uint16_t low = GetWBE( in ); in++; len--; if( (low < 0xdc00) || (low >= 0xdfff) ) { *out++ = '?'; /* Malformed surrogate */ continue; } else uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000; } if( uv < 0x10000 ) { *out++ = (( uv >> 12) | 0xe0); *out++ = (((uv >> 6) & 0x3f) | 0x80); *out++ = (( uv & 0x3f) | 0x80); continue; } else { *out++ = (( uv >> 18) | 0xf0); *out++ = (((uv >> 12) & 0x3f) | 0x80); *out++ = (((uv >> 6) & 0x3f) | 0x80); *out++ = (( uv & 0x3f) | 0x80); continue; } } len = out - res; res = realloc( res, len ); if( newlen != NULL ) *newlen = len; return res;}/** * FromUTF16(): converts an UTF-16 string to UTF-8. * * @param src UTF-16 bytes sequence, aligned on a 16-bits boundary. * * @return the result of the conversion (must be free()'d), * or NULL in case of error. */char *FromUTF16( const uint16_t *src ){ const uint16_t *in; size_t len; /* determine the size of the string */ for( len = 1, in = src; *in; len++ ) in++; return UTF16toUTF8( src, len, NULL );}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -