📄 articleclassify.java
字号:
/**
* Created by IntelliJ IDEA.
* User: Administrator
* Date: Jul 17, 2003
* Time: 11:08:34 AM
* To change this template use Options | File Templates.
*/
package Classification;
import Kernel.Article;
import Kernel.TradeInfo;
import java.util.Vector;
import java.util.Enumeration;
import java.util.StringTokenizer;
public class ArticleClassify {
private Classifier classifier;
private String FileName;
public ArticleClassify(String filename){
this.classifier = new Classifier();
this.FileName = filename;
}
public boolean DoClassify(){
ArticleDomParser domParser = new ArticleDomParser(FileName);
if (!domParser.isContinue()) {
System.out.println(ArticleBatch.moveErrorFile(FileName));
System.err.println("File "+FileName+" Dom Parse Error and Move into ErrorArticle .");
return false;
}
Vector ArticleVec = domParser.getArticleVec();
Article article = (Article)ArticleVec.get(0);
return storeInfo(article);
}
private short getTradeType(Article article){ //0--sale,1--buy,2--lease,3--wantlease
String title = article.getTitle();
String result = classifier.checkTradeType(title);
if (result.equals("SALE"))
return 0;
else if (result.equals("BUY"))
return 1;
else if (result.equals("LEASE"))
return 2;
else if (result.equals("WANTLEASE"))
return 3;
else return -1;
}
private Vector getCommodity(Article article){
String title = article.getTitle();
String content = article.getContent();
Vector result1 = classifier.checkTitleCommodity(title);
if (result1.size() == 0){
Vector result2 = classifier.checkContentCommodity(content);
return result2;
}else {
classifier.checkContentContact(content);
return result1;
}
}
private boolean storeInfo(Article article){
boolean isSuccess = true;
//交易信息分类
short TradeType = getTradeType(article);
/*
if (TradeType == -1)
return false;
*/
Vector Commodity = getCommodity(article);
Vector Contact = classifier.contactVec;
String contactStr = " ";
for (Enumeration e = Contact.elements(); e.hasMoreElements();) {
contactStr = contactStr + (String)e.nextElement() + " ";
}
if (Commodity.size() != 0){
boolean HouseOnly = false;
for (Enumeration e = Commodity.elements();e.hasMoreElements();){
String commodityItem = (String)e.nextElement();
StringTokenizer st = new StringTokenizer(commodityItem,"#");
String commodity = st.nextToken();
String description = st.nextToken();
for (Enumeration ee = ClassifyConfig.getInstance().HouseCommodityWords.elements(); ee.hasMoreElements();) {
if (commodity.equalsIgnoreCase((String)ee.nextElement())){
HouseOnly = true;
break;
}
}
if (HouseOnly){
boolean notHouse = true;
for (Enumeration ee = ClassifyConfig.getInstance().HouseCommodityWords.elements(); ee.hasMoreElements();) {
if (commodity.equalsIgnoreCase((String)ee.nextElement())){
notHouse = false;
break;
}
}
if (notHouse)
continue;
}
for (Enumeration ee = ClassifyConfig.getInstance().OnlyLeaseWords.elements();ee.hasMoreElements();){
String leaseWord = (String)ee.nextElement();
if (commodity.indexOf(leaseWord) != -1){
if (TradeType == 0)
TradeType =2;
else if (TradeType ==1)
TradeType =3;
}
}
TradeInfo tradeinfo = new TradeInfo(TradeType,commodity,
description,contactStr,article.getDate(),article.getXMLFile());
if (tradeinfo.insertTradeInfo())
System.out.println("Trade Info "+article.getXMLFile()+" Info Insert successfully !");
else {
System.err.println("Trade Info "+article.getXMLFile()+" Info Insert Failed !");
isSuccess = false;
}
}
}else {
String commodity = "NULL";
String description = article.getTitle()+"<$TITLE$>"+article.getContent();
TradeInfo tradeinfo = new TradeInfo(TradeType,commodity,
description,contactStr,article.getDate(),article.getXMLFile());
if (tradeinfo.insertTradeInfo())
System.out.println("Trade Info "+article.getXMLFile()+" Info Insert successfully !");
else {
System.err.println("Trade Info "+article.getXMLFile()+" Info Insert Failed !");
isSuccess = false;
}
}
if (!isSuccess){
TradeInfo.delTradeInfo(article.getXMLFile());
}else{
//将BBS原文插入数据库
if (article.insertBBSInfo())
System.out.println("Article "+article.getXMLFile()+" Info Insert successfully !");
else System.err.println("Article "+article.getXMLFile()+" Info Insert Failed !");
}
return isSuccess;
}
public static void main(String args[]){
System.out.println((new ArticleClassify("bbs.pku.edu.cn_SecondHand_2003-09-14 19-43-19.xml")).DoClassify());
}
}
/*
private Vector getTitleFromFile(String filepath)
{
Vector vec=new Vector();
try{
BufferedReader reader=new BufferedReader(new FileReader(filepath));
String str=null;
while ((str=reader.readLine())!=null){
vec.add(str);
}
reader.close();
}
catch (Exception e) {
System.err.println("ERROR:Can not read file "+filepath+" !");
e.printStackTrace();
return null;
}
return vec;
}
*/
/*
public Vector getCommodity(){
String title = article.getTitle();
//String content = article.getContent();
TestSlicer tSlicer=new TestSlicer(); //直接读入一个字符串
Vector TitleWords = tSlicer.doSlicer(title);
Vector kinds = new Vector();
int wordNum = 0;
int goodsNum = -1;
for (Enumeration e = TitleWords.elements();e.hasMoreElements();){
Word word = (Word)e.nextElement();
wordNum++;
if (word.getAttri().equalsIgnoreCase("n")) {
String result = ClassifyConfig.getInstance().checkCommodity(word.getContent());
if (result != null){
if (goodsNum == wordNum-1){
kinds.set(kinds.size()-1,result);
}else{
kinds.add(result);
}
goodsNum = wordNum;
}
}
if (word.getAttri().equalsIgnoreCase("u")){
if (kinds.size()>0)
kinds.remove(kinds.size()-1);
}
}
return kinds;
}
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -