📄 tdbconvertencode.h
字号:
/*
* Copyright (C) 2006, Dung-Bang Tsai
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*
* ( If you wnat to use this library for commercial use,
* feel free to contact me, just cost some money, I could sell
* you the code without GPL license, so you could use this code
* for your product without public your source code. )
*
* Authors:
* Tsai, Dung-Bang <dbtsai@gmail.com>
*
* 2006/03/05 at NCKU physics
*/
#ifndef _TDBConvertEncode_h
#define _TDBConvertEncode_h
#include "enum_code_page.h"
#include "TDBConvertEngine.h"
class TDBdetect_code_page
{
/*將來源或目地的raw char的資料用SetSrcChar接上,
ex若要由char轉ucs4,則將原始char[]array接上,然後測試data屬性
或已知,然後目的sting定義為TDBstring, 需用SetResultString接上。
然後使用ConvertFromSrctoResultString去轉成ucs4, 要注意的是,
他的回傳值為讀入幾個raw data,因為有可能被截斷而後面有缺陷。
同理,若有一TDBstring要轉成char[] raw data, 則需先將代轉的sting
用SetResultString接上,然後ConvertFromResultStringtoSrc即可轉回來。
記得執行完上面在執行下面時,上面的原始資料會被破壞,要小心使用。
*/
public:
TDBdetect_code_page()
{test_n_bytes=0;}
void SetSrcChar(unsigned char* src_buf)
{src = src_buf;}
void SetSrcBytes(size_t src_bytes)
{src_n_bytes = src_bytes;}
void SetTestBytes(size_t test_bytes)
{test_n_bytes= test_bytes;}
void SetOutputString(TDBstring& mystring){output_string = &mystring;}
// Define in enum_code_page
size_t ConvertFromSrctoResultString(int code_page);
bool IsBinaryData();
bool IsASCII();
// Unicode BOM test function
int UnicodeBOM_test(); // The return value is defined in enum_code_page.h
bool IsUTF8_BOM_test();
bool IsUTF16LE_BOM_test();
bool IsUTF16BE_BOM_test();
bool IsUTF32LE_BOM_test();
bool IsUTF32BE_BOM_test();
// Code page signature test function
int Unicode_signature_test();
bool UTF8_signature_test();
bool UTF16LE_signature_test();
bool UTF16BE_signature_test();
bool UTF32LE_signature_test();
bool UTF32BE_signature_test();
// Local code page test
int Chinese_code_test();
private:
unsigned char *src;
TDBstring *output_string;
// Only input the bytes you want test,
// generally ony 20k could termin the code page
size_t src_n_bytes;
size_t test_n_bytes; // if equal to zero, test all bytes.
//######################## Convert charset ####################################
// Unicode series.
//Return how many bytes outbuf use
static size_t a_UCS4toUTF32BE(const unsigned int& src, unsigned char *outbuf);
static size_t a_UCS4toUTF32LE(const unsigned int& src, unsigned char *outbuf);
static size_t a_UCS4toUTF16BE(const unsigned int& src, unsigned char *outbuf);
static size_t a_UCS4toUTF16LE(const unsigned int& src, unsigned char *outbuf);
static size_t a_UCS4toUTF8(const unsigned int& src, unsigned char *outbuf);
//Return how many bytes input buffer read, and the last argument
// is the source string's len, for avoid memory lock.
// 而若回傳-n, 則是src剩餘空間為len小於該字所需讀入的大小(len < n)。為了避免讀到非法記憶體區域
// 所以直接跳過終止。若回傳0, 則是有錯誤,跳到下個byte再嘗試。
static int a_UTF8toUCS4 (const unsigned char* src, unsigned int& outbuf, size_t len);
static int a_UTF32BEtoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
static int a_UTF32LEtoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
static int a_UTF16BEtoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
static int a_UTF16LEtoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
// Local charset
//Big5 series
static int a_Big5toUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
static int a_Big5UAOtoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
static int a_Big52003toUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
static int a_Big5HKSCStoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
};
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -