📄 classifier.java
字号:
/**
* Created by IntelliJ IDEA.
* User: Administrator
* Date: Jul 21, 2003
* Time: 4:57:30 PM
* To change this template use Options | File Templates.
*/
package Classification;
import java.util.Enumeration;
import java.util.Vector;
import java.util.StringTokenizer;
public class Classifier {
public Vector contactVec = new Vector();
public String getTradeTypeName(String keyword){
for (Enumeration e = ClassifyConfig.getInstance().BuyTypeWords.elements(); e.hasMoreElements();) {
String buyWord = (String)e.nextElement();
if (buyWord.equals(keyword))
return "BUY";
}
for (Enumeration e = ClassifyConfig.getInstance().SaleTypeWords.elements(); e.hasMoreElements();) {
String saleWord = (String)e.nextElement();
if (saleWord.equals(keyword))
return "SALE";
}
for (Enumeration e = ClassifyConfig.getInstance().LeaseTypeWords.elements(); e.hasMoreElements();) {
String leaseWord = (String)e.nextElement();
if (leaseWord.equals(keyword))
return "LEASE";
}
for (Enumeration e = ClassifyConfig.getInstance().WantLeaseTypeWords.elements(); e.hasMoreElements();) {
String wantleaseWord = (String)e.nextElement();
if (wantleaseWord.equals(keyword))
return "WANTLEASE";
}
return "NULL";
}
public String getCommodityName(String keyword){
for (Enumeration e = ClassifyConfig.getInstance().CommodityKeyWords.keys();e.hasMoreElements();){
String commodity = (String)e.nextElement();
Vector keyVec = (Vector)ClassifyConfig.getInstance().CommodityKeyWords.get(commodity);
for (Enumeration ee = keyVec.elements();ee.hasMoreElements();){
if (keyword.equals(ee.nextElement())){
return commodity;
}
}
}
return null;
}
public String checkTradeType(String title){
title = title.toLowerCase();
for (Enumeration e = ClassifyConfig.getInstance().AllTypeWords.elements();e.hasMoreElements();){
String keyword = (String)e.nextElement();
if (title.indexOf(keyword) != -1){
return getTradeTypeName(keyword);
}
}
return "NULL";
}
private String replaceStr(int pos,String oldword,String newword,String str){
String str1 = str.substring(0,pos);
String str2 = str.substring(pos+oldword.length(),str.length());
return str1+newword+str2;
}
private Vector divideSentence(String str){
Vector vec = new Vector();
for (int i = 0;i<ClassifyConfig.getInstance().punctuation.length;i++){
int npos = str.indexOf(ClassifyConfig.getInstance().punctuation[i]);
while (npos != -1){
str = replaceStr(npos,ClassifyConfig.getInstance().punctuation[i],"#",str);
npos = str.indexOf(ClassifyConfig.getInstance().punctuation[i]);
}
}
int sentenceCount = 0;
StringTokenizer st = new StringTokenizer(str,"#");
while (st.hasMoreTokens()){
String tempstr = st.nextToken().trim();
vec.add(tempstr);
sentenceCount++;
if (sentenceCount == 30)
break;
}
return vec;
}
public String checkContactLine(String lineText){
for (Enumeration e = ClassifyConfig.getInstance().ContactKeyWords.elements(); e.hasMoreElements();) {
String keyword = (String)e.nextElement();
if (lineText.indexOf(keyword) != -1){
return lineText;
}
}
for (Enumeration e = ClassifyConfig.getInstance().ContactNumberBefore.elements(); e.hasMoreElements();) {
String numberBefore = (String)e.nextElement();
int pos = lineText.indexOf(numberBefore);
if (( pos != -1)&&(pos > 0)){
if (((lineText.charAt(pos-1) == '1')||(lineText.charAt(pos-1) == '2')
||(lineText.charAt(pos-1) == '3')||(lineText.charAt(pos-1) == '4')||(lineText.charAt(pos-1) == '5')
||(lineText.charAt(pos-1) == '6')||(lineText.charAt(pos-1) == '7')||(lineText.charAt(pos-1) == '8')
||(lineText.charAt(pos-1) == '9')||(lineText.charAt(pos-1) == '0')))
return lineText;
}
}
for (Enumeration e = ClassifyConfig.getInstance().ContactNumberLength.elements(); e.hasMoreElements();) {
int numberLength = Integer.parseInt((String)e.nextElement());
int numberCount = 0;
for (int i=0;i<lineText.length();i++){
if (((lineText.charAt(i) == '1')||(lineText.charAt(i) == '2')
||(lineText.charAt(i) == '3')||(lineText.charAt(i) == '4')||(lineText.charAt(i) == '5')
||(lineText.charAt(i) == '6')||(lineText.charAt(i) == '7')||(lineText.charAt(i) == '8')
||(lineText.charAt(i) == '9')||(lineText.charAt(i) == '0'))){
numberCount++;
if (numberCount == numberLength)
return lineText;
}else numberCount = 0;
}
}
return null;
}
public void checkContentContact(String text){
Vector sentences = divideSentence(text);
for (Enumeration e = sentences.elements();e.hasMoreElements();) {
String lineText = (String)e.nextElement();
String contactStr = checkContactLine(lineText);
if (contactStr != null)
contactVec.add(contactStr);
}
}
public Vector checkContentCommodity(String text){
Vector commodityVec = new Vector();
Vector sentences = divideSentence(text);
for (Enumeration e = sentences.elements();e.hasMoreElements();) {
String lineText = (String)e.nextElement();
Vector lineVec = checkTitleCommodity(lineText);
commodityVec.addAll(lineVec);
}
return commodityVec;
}
public Vector checkTitleCommodity(String title){
String contactStr = checkContactLine(title);
if (contactStr != null)
contactVec.add(contactStr);
String tempTitle = title;
tempTitle = tempTitle.toLowerCase();
String tempGoods[] = new String[50];
Vector vec = new Vector();
int goodsNum = -1;
for (Enumeration e = ClassifyConfig.getInstance().AllKeyWords.elements();e.hasMoreElements();){
String keyword = (String)e.nextElement();
int pos = tempTitle.indexOf(keyword);
if (pos != -1)
goodsNum++;
while (pos != -1){
tempTitle = replaceStr(pos,keyword,"@"+String.valueOf(goodsNum)+"@",tempTitle);
tempGoods[goodsNum] = getCommodityName(keyword);
pos = tempTitle.indexOf(keyword);
}
}
System.out.println(tempTitle);
int curpos = tempTitle.indexOf("@@");
while (curpos != -1){
int pos = 0;
int replacepos = 0;
if (tempTitle.charAt(curpos-2) == '@'){
pos = Integer.parseInt(tempTitle.substring(curpos-1,curpos));
replacepos = curpos -2;
}else {
pos = Integer.parseInt(tempTitle.substring(curpos-2,curpos));
replacepos = curpos -3;
}
tempGoods[pos] = null;
tempTitle = replaceStr(replacepos,"@"+String.valueOf(pos)+"@","",tempTitle);
curpos = tempTitle.indexOf("@@");
}
System.out.println(tempTitle);
for (int i=0;i<ClassifyConfig.getInstance().DE.length;i++){
int DEpos = tempTitle.indexOf("@"+ClassifyConfig.getInstance().DE[i]+"@");
while (DEpos != -1){
int pos = 0;
int replacepos = 0;
if (tempTitle.charAt(DEpos-2) == '@'){
pos = Integer.parseInt(tempTitle.substring(DEpos-1,DEpos));
replacepos = DEpos-2;
}else if (tempTitle.charAt(DEpos-3) == '@'){
pos = Integer.parseInt(tempTitle.substring(DEpos-2,DEpos));
replacepos = DEpos-3;
}
else break;
tempGoods[pos] = null;
tempTitle = replaceStr(replacepos,"@"+String.valueOf(pos)+"@","",tempTitle);
DEpos = tempTitle.indexOf("@"+ClassifyConfig.getInstance().DE[i]+"@");
}
}
for (int i=0;i<goodsNum+1;i++)
if (tempGoods[i] != null){
if ((vec.size()==0)||(!vec.contains(tempGoods[i]+"#"+title)))
vec.add(tempGoods[i]+"#"+title);
}
return vec;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -