📄 tdbconvertengine.h
字号:
/*
* Copyright (C) 2006, Dung-Bang Tsai
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*
* ( If you wnat to use this library for commercial use,
* feel free to contact me, just cost some money, I could sell
* you the code without GPL license, so you could use this code
* for your product without public your source code. )
*
* Authors:
* Tsai, Dung-Bang <dbtsai@gmail.com>
*
* 2006/03/08 at NCKU physics
*/
#ifndef _TDBConvertEngine_H_
#define _TDBConvertEngine_H_
#include "wx/wx.h"
#include <vector>
#include <list>
#include <deque>
//using namespace std;
enum // You may define by using or | operator
{
map=0x01, // 是否查表直接對應
correction = 0x02, //字彙修正
dest_exit = 0x04 // 目標編碼是否有這個字
};
enum
{
Unix=1,
Mac=2,
DOS=3
};
class TDBstring
{
public:
std::vector<unsigned int> data; // 注意,最上面3bit拿來做flag, 定義如enum
size_t get_n_line(){return n_line;}
size_t get_n_word(){return n_word;}
size_t Len(){return data.size();}
void push_back( unsigned int& ucs4);
void Traditionalized();
void Simplized();
void StoT_mapping();
void TtoS_mapping();
int GetNewLineType(){return NewLineType;}
unsigned int operator[](unsigned int i) const{return (data[i] & 0x000FFFFF);}
TDBstring() : NewLineType(1), n_line(1) , n_word(0) , if_0D_flag(false){}
unsigned int display()
{
std::vector<unsigned int>::iterator beg = data.begin();
const std::vector<unsigned int>::iterator end = data.end();
// TDBdetect_code_page fun;
register unsigned int temp(0);
unsigned char outbuf[5]={0,0,0,0,0};
//static size_t a_UCS4toUTF8(const unsigned int& src, unsigned char *outbuf);
for( ;beg!= end ; beg++)
{
unsigned int src = *beg & 0x000FFFFF;
//cout<<" "<<Number32toHex(src);
if( src<0x80)
{
outbuf[0]=src&0xFF;
temp = 1;
}
else if(src<0x800)
{
outbuf[0]= 0xC0 | (src>>6);
outbuf[1]= 0x80 | (src&0x3F);
temp = 2;
}
else if(src<0x10000)
{
outbuf[0]= 0xE0 | (src>>12);
outbuf[1]= 0x80 | ((src>>6)&0x3F);
outbuf[2]= 0x80 | (src&0x3F);
temp = 3;
}
else if(src<0x110000)
{
outbuf[0]= 0xF0 | (src>>18);
outbuf[1]= 0x80 | ((src>>12)&0x3F);
outbuf[2]= 0x80 | ((src>>6)&0x3F);
outbuf[3]= 0x80 | (src&0x3F);
temp = 4;
}
outbuf[temp] = 0;
std::cout << outbuf;
}
}
private:
// 1 = Unix(0x0A), 2 = Mac(0x0D), 3 = DOS(0x0D 0x0A)
// 為最後一次的換行type, 而如果要raw data,
// 將紀錄在沒用的ucs4上最上面兩個bit, 也就是0x0A上最左邊兩個bit為
// 01 = Unix(0x0A), 10 = Mac(0x0D), 11 = DOS(0x0D 0x0A)
char NewLineType ;
//因為DOS的新行0x0D 0x0A,MAC 0x0D 所以遇到0D還還要看下一碼,
//所以先turn on, 若下一碼為0A就更改new_line_type, 若不是的話就歸false
bool if_0D_flag;
size_t n_line;
size_t n_word;
};
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -