📄 stanzaparserutf16.java
字号:
/**
* Copyright ©? 2006 广州乐言信息科技有限公司.
* All right reserved.
* Created at 2006-8-18
*/
package com.hiany.xml;
import java.util.Iterator;
import org.apache.log4j.Logger;
import com.hiany.util.Strings;
/**
* XML节的解析器,针对UTF-16LE的字符串
*/
public class StanzaParserUtf16 implements StanzaParser {
static Logger logger = Logger.getLogger(StanzaParserUtf16.class);
/* (non-Javadoc)
* @see com.hiany.xml.StanzaParser#buildString()
*/
public String buildString(Stanza stanza) {
String str = "";
str += "<" + stanza.getName();
Iterator itr = stanza.getAttributeNames().iterator();
while (itr.hasNext()) {
String attr = (String) itr.next();
String value = stanza.getAttribute(attr);
if (value != null) {
str += " " + attr + "='" + value + "'";
}
}
str += ">";
str = Strings.utf8ToUtf16le(str);
if (stanza.isClosed()) { // 若已经闭合
str += stanza.getText();
str += Strings.utf8ToUtf16le("</" + stanza.getName() + ">");
}
logger.debug("str(UTF-16):"+ str);
return str;
}
/* (non-Javadoc)
* @see com.hiany.xml.StanzaParser#buildString(int)
*/
public String buildString(Stanza stanza, int part) {
String str = "";
if (part == Stanza.Part.FRONT) {
str += "<" + stanza.getName();
Iterator itr = stanza.getAttributeNames().iterator();
while (itr.hasNext()) {
String attr = (String) itr.next();
String value = stanza.getAttribute(attr);
if (value != null) {
str += " " + attr + "='" + value + "'";
}
}
str += ">";
str = Strings.utf8ToUtf16le(str);
} else if (part == Stanza.Part.BODY) {
str += stanza.getText();
} else {
if (stanza.isClosed()) { // 若已经闭合
str += "</" + stanza.getName() + ">";
}
str = Strings.utf8ToUtf16le(str);
}
logger.debug("str(UTF-16):"+ str);
return str;
}
/* (non-Javadoc)
* @see com.hiany.xml.StanzaParser#parse(java.lang.StringBuffer)
*/
public Stanza parse(StringBuffer sb) {
sb = Strings.utf16Trim(sb);
logger.debug("str:" + sb);
if (sb.length() <= 3) {
return null;
}
// "<"
int p1 = sb.indexOf("\u003C");
logger.debug("p1:" + p1);
if (p1 < 0) {
return null;
}
// ">"
int p2 = sb.indexOf("\u003E", p1);
logger.debug("p2:"+ p2);
if (p2 < p1) {
return null;
}
// 前半段,从 '<'到第一个'>'(不含)
String frontStr = sb.substring(p1 + 1, p2).trim();
if (frontStr.length() < 1) {
return null;
}
logger.debug("frontStr:"+ frontStr);
Stanza stanza = new Stanza();
// "/"
if (frontStr.substring(0, 1).equals("\u002F")) {
// 第一个字符是'/':两段式<name >text</name>的后半部分
stanza.setClosed(true);
stanza.setText(sb.substring(0, p1));
frontStr = frontStr.substring(1);
} else if (frontStr.substring(frontStr.length() - 1, frontStr.length())
.equals("\u002F")) { // <a
// 最后一个字符是'/': 一段式<name />形式
stanza.setClosed(true);
stanza.setText("");
frontStr = frontStr.substring(0, frontStr.length() - 1).trim();
}
// 取name
// "\u0020" is 空格
int bPos = frontStr.indexOf("\u0020");
if (bPos > 0) {
stanza.setName(frontStr.substring(0, bPos));
if (stanza.getName() != null && stanza.getName().length() > 1
&& stanza.getName().charAt(0) == '\u002F') { // 斜线开始则去掉
stanza.setName(stanza.getName().substring(1));
}
frontStr = frontStr.substring(bPos + 1);
} else {
stanza.setName(frontStr);
frontStr = null;
}
// 把name转化为单字节字符
stanza.setName(Strings.utf16Trim(new StringBuffer(stanza.getName())).toString());
stanza.setName(Strings.utf16ToUtf8(stanza.getName()));
stanza.setName(stanza.getName().trim());
// 取属性
int pos = 0;
if (frontStr != null && frontStr.trim().length() > 0) {
while (pos < frontStr.length()) {
// '\u003D' is 等号
int pos1 = frontStr.indexOf('\u003D', pos);
if (pos1 < 0) { // 没有‘=’,不符合规范
break;
}
logger.debug("pos:" + pos);
if (pos1 > 0) {
String attr = frontStr.substring(pos, pos1);
attr = Strings.utf16Trim(new StringBuffer(attr)).toString();
attr = Strings.utf16ToUtf8(attr);
attr = attr.trim();
// 分析属性值
String val = null;
pos1++;
logger.debug("attr:" + attr + ",pos:" + pos + ",pos1:"
+ pos1);
// '\u0020' 是空格
while (frontStr.charAt(pos1) == '\u0020')
pos1++;
//单引号 '\u0027'
char singleQuot = "\u0027".charAt(0);
if (frontStr.charAt(pos1) == singleQuot) { // '\u0027' 是单引号
pos1++;
int pos2 = frontStr.indexOf(singleQuot, pos1);
logger.debug("sQuot, pos1:" + pos1 + ",pos2:" + pos2);
if (pos2 > pos1) {
val = frontStr.substring(pos1, pos2);
pos = pos2 + 1;
} else {
val = frontStr.substring(pos1);
pos = frontStr.length();
}
} else if (frontStr.charAt(pos1) == '\u0022') { // '\u0022'
// 是双引号
pos1++;
int pos2 = frontStr.indexOf('\u0022', pos1);
logger.debug("dQuot, pos1:" + pos1 + ",pos2:" + pos2);
if (pos2 > pos1) {
val = frontStr.substring(pos1, pos2);
pos = pos2 + 1;
} else {
val = frontStr.substring(pos1);
pos = frontStr.length();
}
} else {
// 取空格
int pos2 = frontStr.indexOf('\u0020', pos1);
logger.debug("blank, pos1:" + pos1 + ",pos2:" + pos2);
if (pos2 > pos1) {
val = frontStr.substring(pos1, pos2);
pos = pos2 + 1;
} else {
val = frontStr.substring(pos1);
pos = frontStr.length();
}
}
val = Strings.utf16Trim(new StringBuffer(val)).toString();
val = Strings.utf16ToUtf8(val);
stanza.setAttribute(attr, val);
logger.debug("attr:" + attr + ",val:" + val);
}
}
}
if (stanza.isClosed()) {
// 结束解析,把解析完的字符串部分删除。
sb.delete(0, p2 + 1);
} else {// 解析正文和后半段
// 设置p3从p2开始
int p3 = p2;
logger.debug("p2:" + p2);
do {
// "<"
p3 = sb.indexOf("\u003C", p3 + 1);
logger.debug("p3:" + p3);
if (p3 > 0) {
// ">"
int p4 = sb.indexOf("\u003E", p3 + 1);
logger.debug("p4:" + p4);
if (p4 > 0) {
String backStr = sb.substring(p3 + 1, p4).trim();
logger.debug("backStr:" + backStr +",name:"+ stanza.getName());
// "/"
if (backStr.codePointAt(0) == '\u002F') {
String maybeName = backStr.substring(1);
maybeName = Strings.utf16Trim(new StringBuffer(maybeName)).toString();
maybeName = Strings.utf16ToUtf8(maybeName);
logger.debug("maybeName:"+ maybeName +",name:"+ stanza.getName());
if(stanza.getName().equals(maybeName)){
stanza.setClosed(true);
stanza.setText(sb.substring(p2 + 1, p3));
// 结束解析,把解析完的字符串部分删除。
sb.delete(0, p4 + 1);
break;
}else{
continue;
}
} else {
continue;
}
} else {// 未出现后半段,删除已解析的前半部分,暂时结束
sb = sb.delete(0, p2 + 1);
break;
}
} else {// 未出现后半段,删除已解析的前半部分,暂时结束
sb = sb.delete(0, p2 + 1);
break;
}
} while (true);
}
logger.debug("str:" + sb);
return stanza;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -