📄 wmlcparser.java
字号:
package wmlcparser;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Stack;
/**
* history:
* 2007-11-23
* - 将测试的类移出wmlcparser包, 打包的时候不打wmlcparser以外的类
* - add method: getCharsetStr()
* - 修改 getStrInCharset(), 自己处理Exception
*
* 2007-11-20
* - fix get string from table by index @see // 2007-11-20 fay: forget to reset it - -!
*
* 2007-11-08
* - from peter: change utf-16 to utf-8 manual
*
* 2007-11-07:
* - 增加对mb_int32的处理(加多了buffer_4_mb_int32)
* - 对charset进行了处理, 目前四种,
* "UTF-8", "UTF-16", "UTF-32", 和平台默认编码
*
*
* - 两类TOKEN -> global token 和 application token
* - application token 包括(Tag Token 和 Attribute Token)
* - Attribute Start(<128) 和 Attribute Value(>128)
*
*
*
* @author Fay
*
*/
public class WmlcParser
{
static final int STATUS_TAG = 1;
static final int STATUS_ATTRIBUTE = 2;
private Stack tagStack = new Stack();
private int currentStatus = STATUS_TAG;
private ByteArrayOutputStream baos = new ByteArrayOutputStream(); // buffer
private ByteArrayOutputStream buffer_4_mb_int32 = new ByteArrayOutputStream(); // the buffer for handling the mb_int32
private int preGlobalToken = -1;
private int preAttributeStart = -1;
private byte wbxmlVersion = -1;
private int xmlVersion = -1;
private int charset = -1;
private byte[] stringTable = null;
private String charsetStr = null;
/* public Tag parseWmlc(byte[] bs) throws IOException
{
if(bs == null || bs.length == 0) { return null; }
//#ifdef debug
//# System.out.println("parseWmlc, bs.length == " + bs.length);
//#endif
// 0. wbxml version
// 1. wml version (mb_u_int32)
// 2. charset (mb_u_int32)
// 3. table length (mb_u_int32)
// 4. table(if table length != 0)
wbxmlVersion = bs[0]; // 0. wbxml version
//#ifdef debug
//# System.out.println("wbxmlVersion == " + wbxmlVersion);
//#endif
int pos = 1;
// 1. wml version (mb_u_int32)
pos = parseWmlVersion(bs, pos);
//#ifdef debug
//# System.out.println("xmlVersion == " + xmlVersion);
//#endif
// 2. charset (mb_u_int32)
pos = parseWmlCharset(bs, pos);
//#ifdef debug
//# System.out.println("charset == " + charsetStr);
//#endif
// 3. table length (mb_u_int32)
// 4. table(if table length != 0)
pos = parseWmlTableStr(bs, pos);
// for debug!
//#ifdef debug
//# System.out.println("tbl == " + (stringTable==null?0:stringTable.length));
//# for(int i = 0 ; stringTable!= null && i < stringTable.length; i ++)
//# {
//# System.out.println("stringTable[" + i + "]=" + stringTable[i]);
//# }
//#endif
int b = 0;
for(int i = pos; i < bs.length ; i ++)
{
// 处理byte
b = bs[i];
if(b < 0)
{
b += 256;
}
//#ifdef debug
//# System.out.println("bs[" + i + "]=" + Integer.toHexString(b));
//#endif
if(preGlobalToken != -1) // 有global token未处理
{
handleGlobalToken(b);
//#ifdef debug
//# System.out.println("continue 0");
//#endif
continue;
}
// 是需要处理(有follow的)global token -> continue
if(Token.isGlobalTokenNeedHandle(b))
{
preGlobalToken = b;
//#ifdef debug
//# System.out.println("continue 1");
//#endif
continue;
}
// 是Global Token 而且不是END(1), 就Continue, END会由后面处理
if(Token.isGlobalToken(b) && (b != Token.GLOBAL_END))
{
//#ifdef debug
//# System.out.println("continue 2");
//#endif
continue;
}
// 处理byte
switch(currentStatus)
{
case STATUS_TAG:
//#ifdef debug
//# System.out.println("handleTag -> " + b);
//#endif
handle_Tag_Status(b);
break;
case STATUS_ATTRIBUTE:
//#ifdef debug
//# System.out.println("handle attribute -> " + b);
//#endif
handle_Attribute_Status(b);
break;
}
}
return rootTag;
}
*/
/**
* 2007-11-22 new parseWmlc, return the Tag.toString()
*/
public String parseWmlc(byte[] bs) throws Exception
{
if(bs == null || bs.length == 0) { return null; }
//#ifdef debug
//# System.out.println("parseWmlc, bs.length == " + bs.length);
//#endif
// 0. wbxml version
// 1. wml version (mb_u_int32)
// 2. charset (mb_u_int32)
// 3. table length (mb_u_int32)
// 4. table(if table length != 0)
wbxmlVersion = bs[0]; // 0. wbxml version
//#ifdef debug
//# System.out.println("wbxmlVersion == " + wbxmlVersion);
//#endif
int pos = 1;
// 1. wml version (mb_u_int32)
pos = parseWmlVersion(bs, pos);
//#ifdef debug
//# System.out.println("xmlVersion == " + xmlVersion);
//#endif
// 2. charset (mb_u_int32)
pos = parseWmlCharset(bs, pos);
//#ifdef debug
//# System.out.println("charset == " + charsetStr);
//#endif
// 3. table length (mb_u_int32)
// 4. table(if table length != 0)
pos = parseWmlTableStr(bs, pos);
// for debug!
//#ifdef debug
//# System.out.println("tbl == " + (stringTable==null?0:stringTable.length));
//# for(int i = 0 ; stringTable!= null && i < stringTable.length; i ++)
//# {
//# System.out.println("stringTable[" + i + "]=" + stringTable[i]);
//# }
//#endif
int b = 0;
for(int i = pos; i < bs.length ; i ++)
{
// 处理byte
b = bs[i];
if(b < 0)
{
b += 256;
}
//#ifdef debug
//# System.out.println("bs[" + i + "]=" + Integer.toHexString(b));
//#endif
if(preGlobalToken != -1) // 有global token未处理
{
handleGlobalToken(b);
//#ifdef debug
//# System.out.println("continue 0");
//#endif
continue;
}
// 是需要处理(有follow的)global token -> continue
if(Token.isGlobalTokenNeedHandle(b))
{
preGlobalToken = b;
//#ifdef debug
//# System.out.println("continue 1");
//#endif
continue;
}
// 是Global Token 而且不是END(1), 就Continue, END会由后面处理
if(Token.isGlobalToken(b) && (b != Token.GLOBAL_END))
{
//#ifdef debug
//# System.out.println("continue 2");
//#endif
continue;
}
// 处理byte
switch(currentStatus)
{
case STATUS_TAG:
//#ifdef debug
//# System.out.println("handleTag -> " + b);
//#endif
handle_Tag_Status(b);
break;
case STATUS_ATTRIBUTE:
//#ifdef debug
//# System.out.println("handle attribute -> " + b);
//#endif
handle_Attribute_Status(b);
break;
}
}
if(rootTag == null)
{
return null;
}else
{
return rootTag.toString();
}
}
/**
* 将global token和后继转成相应的String, 存在buffer里.
* <br>
* 每一个global token后面跟的byte是一定的!
* <br>
* 记得清空preGlobalToken. preGlobalToken = -1;
*
* @param b
* @throws IOException
*/
private void handleGlobalToken(int b) throws Exception
{
//#ifdef debug
//# System.out.println("handleGlobalToken -> " + b);
//#endif
switch(preGlobalToken)
{
case Token.GLOBAL_ENTITY: // 未处理(mb_u_int32)
// 2007-11-07, fay: 增加对Multi-byte Integers的判断
if(isMB_U_INT32End(b))
{
preGlobalToken = -1;
}
break;
case Token.GLOBAL_STRI_I: // Followed by a termstr.
if(b != 0) // b == 0 means string end
{
baos.write(b);
}else
{
//#ifdef debug
//# System.out.println("STRI END -> " + getStrInCharset());
//#endif
preGlobalToken = -1;
}
break;
case Token.GLOBAL_LITERAL: // 未处理(mb_u_int32)
// 2007-11-07, fay: 增加对Multi-byte Integers的判断
if(isMB_U_INT32End(b))
{
preGlobalToken = -1;
}
break;
case Token.GLOBAL_EXT_I_0: // (termStr)
case Token.GLOBAL_EXT_I_1:
case Token.GLOBAL_EXT_I_2:
if(b == 0)
{
String str = getStrInCharset();
baos.reset();
baos.write(("$(" + str + ")").getBytes()); // TODO handle the charset
preGlobalToken = -1;
}else
{
baos.write(b);
}
break;
case Token.GLOBAL_EXT_T_0: // (mb_u_int32)
case Token.GLOBAL_EXT_T_1: // str= table[b], buffer = "$(str)"
case Token.GLOBAL_EXT_T_2:
// 2007-11-07, fay: 增加对Multi-byte Integers的判断
buffer_4_mb_int32.write(b);
if(!isMB_U_INT32End(b))
{
break; // write it to buffer, and return!
}
// 取出table的数据,
// $(str)
int tableIndex = mergeMB_U_INT32(buffer_4_mb_int32.toByteArray());
String tableData = getStrFromStringTable(tableIndex); // TODO check null!
//#ifdef debug
//# System.out.println("tableData == " + tableData);
//#endif
baos.write(("$(" + tableData + ")").getBytes()); // TODO handle the charset
//#ifdef debug
//# System.out.println("buffer == " + getStrInCharset());
//#
//#endif
buffer_4_mb_int32.reset(); // reset the buffer, it's important!
preGlobalToken = -1;
break;
case Token.GLOBAL_STR_T: // (mb_u_int32)
// 2007-11-07, fay: 增加对Multi-byte Integers的判断
buffer_4_mb_int32.write(b);
if(!isMB_U_INT32End(b))
{
break; // write it to buffer, and return!
}
tableIndex = mergeMB_U_INT32(buffer_4_mb_int32.toByteArray());
tableData = getStrFromStringTable(tableIndex); // TODO check null!
if(tableData == null)
{
//#ifdef debug
//# System.out.println("tableData == null");
//#endif
}else
{
baos.write(tableData.getBytes()); // TODO handle the charset
}
// 2007-11-20 fay: forget to reset it - -!
buffer_4_mb_int32.reset();
preGlobalToken = -1;
break;
}
}
Tag rootTag = null;
/**
* handle the b when it's TAG_STATUS
* @param b
* @throws UnsupportedEncodingException
*/
private void handle_Tag_Status(int b) throws Exception
{
// Tag结束标记
if(b == Token.GLOBAL_END)
{
if(tagStack.size() == 0)
{
// error, 没有tag, 但碰到了 TAG END
//#ifdef debug
//# System.out.println("error, 没有tag, 但碰到了 TAG END");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -