00000004.htm
来自「水木清华BBS」· HTM 代码 · 共 113 行
HTM
113 行
<HTML><HEAD> <TITLE>BBS水木清华站∶精华区</TITLE></HEAD><BODY><CENTER><H1>BBS水木清华站∶精华区</H1></CENTER>发信人: intranetworm (小虫), 信区: Java <BR>标 题: Unicode 汉字与GB内码的转换 <BR>发信站: BBS 水木清华站 (Wed Aug 27 13:44:45 1997) <BR> <BR>这是我编的一个转换程序,使用时现将前面的码表存成文件,例如table.txt <BR>创建一个GBUnicode实例,new GBUnicode("table.txt") <BR>以后调用GB2Uni和Uni2GB作内码转换.注意GB内码是用两个字节表示的. <BR> <BR>import java.io.*; <BR>import java.util.Hashtable; <BR> <BR>class GBUnicode{ <BR> byte high[]=new byte[6763],low[]=new byte[6763]; <BR> char unichar[]=new char[6763]; <BR> Hashtable UniGB; <BR> <BR> public GBUnicode(String table_file)throws IOException <BR> { <BR> //BufferedInputStream tables=new BufferedInputStream (new FileInputStream(table_file)); <BR> DataInputStream tables=new DataInputStream (new FileInputStream(table_file)); <BR> int i,n=0; <BR> byte b,bl,bh,num[]=new byte[20]; <BR> <BR> UniGB=new Hashtable(7000,1); <BR> while (n<6763 ){ <BR> do{ <BR> bh=(byte)tables.read(); <BR> }while ((char)bh<=' '); //find first non-blank char <BR> bl=(byte)tables.read(); <BR> high[n]=bh; <BR> low[n]=bl; <BR> do{ <BR> b=(byte)tables.read(); <BR> }while (b!=(byte)':'); //find ':' <BR> do{ <BR> b=(byte)tables.read(); <BR> }while ((char)b<=' '); //find next non-blank char to read as number <BR> i=0; <BR> while ((char)b>='0' && (char)b<='9'){ <BR> num[i++]=b; <BR> b=(byte)tables.read(); <BR> } <BR> unichar[n]=(char)Integer.parseInt(new String(num,0,0,i)); <BR> if (UniGB.get(new Character(unichar[n]))!= null) <BR> System.out.println("Duplicated : "+unichar[n]); <BR> UniGB.put(new Character(unichar[n]),new Integer(n)); <BR> n=n+1; <BR> } <BR> tables.close(); <BR> } <BR> <BR> <BR> private int getGBindex(byte high,byte low){ <BR> int i,j; <BR> i=high-(byte)0xb0; <BR> j=low-(byte)0xa1; <BR> if (i <39) {// L1 Chinese <BR> if (j<0 || j>94) <BR> return -1; <BR> return (i*94+j); <BR> } <BR> else if (i==39) {//one of the last 89 L1 Chinese <BR> if (j<0 || j>89) <BR> return -1; <BR> return (i*94+j); <BR> } <BR> else {//L2 Chinese <BR> if (j<0 || j>94) <BR> return -1; <BR> return (i*94+j-5); <BR> } <BR> } <BR> <BR> public byte[] Uni2GB(char unicode) { <BR> <BR> Integer index=(Integer)UniGB.get(new Character(unicode)); <BR> if (index==null) <BR> return null; <BR> byte ch[]=new byte[2]; <BR> ch[0]=high[index.intValue()]; <BR> ch[1]=low[index.intValue()]; <BR> return ch; <BR> } <BR> <BR> public char GB2Uni(byte high, byte low) { <BR> int index=getGBindex(high,low); <BR> if (index ==-1) //not GB Chinese <BR> return 0; <BR> return(unichar[index]); <BR> } <BR>} <BR> <BR>-- <BR>※ 来源:·BBS 水木清华站 bbs.net.tsinghua.edu.cn·[FROM: organ.ncic.ac.c] <BR><CENTER><H1>BBS水木清华站∶精华区</H1></CENTER></BODY></HTML>
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?