📄 articledomparser.java
字号:
/**
* Created by IntelliJ IDEA.
* User: Administrator
* Date: Jul 17, 2003
* Time: 11:14:38 AM
* To change this template use Options | File Templates.
*/
package Classification;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Element;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import java.util.Vector;
import Kernel.Article;
public class ArticleDomParser {
private Document doc;
private String FilePath;
private String FileName;
private Vector ArticleVec = new Vector();
private boolean isContinue = true;
public ArticleDomParser(String filename){
this.FileName = filename;
this.FilePath = ArticleBatch.NewArticlePath + filename;
System.out.println(FilePath);
if (freshFromFile())
doParse();
else isContinue = false;
}
public boolean isContinue() {
return isContinue;
}
private boolean freshFromFile(){
try {
DocumentBuilderFactory docbuilderfactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docbuilder = docbuilderfactory.newDocumentBuilder();
doc = docbuilder.parse(FilePath);
doc.normalize();
} catch (Exception ex) {
return false;
}
return true;
}
private void doParse() {
try {
Element BBSInfo = (Element)doc.getElementsByTagName("BBSInfo").item(0);
String BBS = BBSInfo.getElementsByTagName("BBS").item(0).getFirstChild().getNodeValue();
String Board = BBSInfo.getElementsByTagName("Board").item(0).getFirstChild().getNodeValue();
Element Article = (Element)BBSInfo.getElementsByTagName("Article").item(0);
String Author = Article.getElementsByTagName("Author").item(0).getFirstChild().getNodeValue();
String Alias = " ";
if (Article.getElementsByTagName("Alias").item(0).getFirstChild() != null)
Alias = Article.getElementsByTagName("Alias").item(0).getFirstChild().getNodeValue();
String Title = " ";
if (Article.getElementsByTagName("Title").item(0).getFirstChild() != null)
Title = Article.getElementsByTagName("Title").item(0).getFirstChild().getNodeValue();
String Content = " ";
if (Article.getElementsByTagName("Content").item(0).getFirstChild() != null)
Content = Article.getElementsByTagName("Content").item(0).getFirstChild().getNodeValue();
String Date = Article.getElementsByTagName("Date").item(0).getFirstChild().getNodeValue();
String IP = "";
if (Article.getElementsByTagName("IP").item(0).getFirstChild() != null)
IP = Article.getElementsByTagName("IP").item(0).getFirstChild().getNodeValue();
Alias = tidyString(Alias);
Title = tidyString(Title);
Content = tidyString(Content);
ArticleVec.add(new Article(true,BBS,Board,Author,Alias,Title,Content,Date,IP,FileName));
NodeList REList = doc.getElementsByTagName("RE");
for (int i=0;i<REList.getLength();i++){
Element RENode = (Element)REList.item(i);
String RE_Author = RENode.getElementsByTagName("Author").item(0).getFirstChild().getNodeValue();
String RE_Alias = " ";
if (RENode.getElementsByTagName("Alias").item(0).getFirstChild() != null)
RE_Alias = RENode.getElementsByTagName("Alias").item(0).getFirstChild().getNodeValue();
String RE_Content = " ";
if (RENode.getElementsByTagName("Content").item(0).getFirstChild() != null)
RE_Content = RENode.getElementsByTagName("Content").item(0).getFirstChild().getNodeValue();
String RE_Date = RENode.getElementsByTagName("Date").item(0).getFirstChild().getNodeValue();
String RE_IP = RENode.getElementsByTagName("IP").item(0).getFirstChild().getNodeValue();
ArticleVec.add(new Article(false,BBS,Board,RE_Author,RE_Alias,Title,RE_Content,RE_Date,RE_IP,FileName));
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
public Vector getArticleVec(){
return this.ArticleVec;
}
private String tidyString(String str){
str = str.replaceAll("#","#");
str = str.replaceAll("@","◎");
str = str.replaceAll("'","‘");
str = str.replaceAll("%","%");
return str;
}
public static void main(String args[]){
ArticleDomParser domparser = new ArticleDomParser("BDWM_SecondHand_2003-07-17 19-25-18.xml");
System.out.println(domparser.ArticleVec.size());
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -