📄 documentclassifier.java
字号:
/*
* Created on 08.04.2005
*
*/
package de.uni_bremen.informatik.p2p.plugins.filesharing.control.PeerGroupSearch;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.TreeMap;
import org.apache.log4j.Logger;
import de.uni_bremen.informatik.p2p.plugins.filesharing.data.FilesharingState;
import de.uni_bremen.informatik.p2p.plugins.filesharing.data.Share;
/**
*
* Classifies the shared documents by extension
* @author NTB
*
*/
public class DocumentClassifier extends Thread {
/** Logger for warnings, debugs and fatals */
private static Logger log = Logger.getLogger(DocumentClassifier.class);
/** List of shared extensions */
private LinkedList extensionList = new LinkedList();
/** Categories */
public static final int AUDIO = 1;
public static final int MISC = 2;
public static final int VIDEO = 4;
public static final int PIC = 8;
private static final float CATEGORYTHRESHOLD = 0;
/** Category Counter*/
private int audioCnt = 0;
private int miscCnt = 0;
private int videoCnt = 0;
private int picCnt = 0;
/** Time of start for debug purposes */
private long startTime;
/** Maps holding the known extensions **/
private TreeMap audioExtensionsMap = new TreeMap();
private TreeMap videoExtensionsMap = new TreeMap();
private TreeMap pictureExtensionsMap = new TreeMap();
public DocumentClassifier() {
audioExtensionsMap = readExtensionFiles("/audio.txt");
videoExtensionsMap = readExtensionFiles("/video.txt");
pictureExtensionsMap = readExtensionFiles("/picture.txt");
}
/*
* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
startTime = System.currentTimeMillis();
// log.debug("\n\n\n Document Classifier started");
try {
// init
this.init();
// Gather extensions from share list
Iterator it = FilesharingState.sharedfiles.iterator();
while (it.hasNext()) {
Share share = (Share) it.next();
File file = share.file;
if (!file.isDirectory()) {
String fileName = file.getName();
int extPos = fileName.lastIndexOf(".");
if (extPos != -1) {
String extension = fileName.substring(extPos+1);
extensionList.add(extension);
}
}
}
// start classification
this.startClassification();
// evaluate results and take action
this.evaluateResultsAndCategorize();
} catch (Exception e) {
log.fatal(e);
e.printStackTrace();
}
log.debug("DocumentClassifier stopped (" + (System.currentTimeMillis()-startTime) + "ms)");
}
/**
*
* Evaluates percentages and sets categories
*
* */
private void evaluateResultsAndCategorize() {
float nrOfFiles = audioCnt+videoCnt+picCnt+miscCnt;
float audioPercentage = audioCnt/nrOfFiles;
float videoPercentage = videoCnt/nrOfFiles;
float picPercentage = picCnt/nrOfFiles;
float miscPercentage = miscCnt/nrOfFiles;
int categories = 0;
if (audioPercentage > CATEGORYTHRESHOLD) {
categories = categories | AUDIO;
}
if (videoPercentage > CATEGORYTHRESHOLD) {
categories = categories | VIDEO;
}
if (picPercentage > CATEGORYTHRESHOLD) {
categories = categories | PIC;
}
if (miscPercentage > CATEGORYTHRESHOLD) {
categories = categories | MISC;
}
log.debug("\n\n\n Document Classification Results");
log.debug("\nAudio: " + Float.toString(audioPercentage));
log.debug("\nVideo: " + Float.toString(videoPercentage));
log.debug("\nPic: " + Float.toString(picPercentage));
log.debug("\nMisc: " + Float.toString(miscPercentage));
log.debug("\nThreshold: " + CATEGORYTHRESHOLD);
log.debug("Category Audio: " + ((categories & AUDIO) == AUDIO));
log.debug("Category Video: " + ((categories & VIDEO) == VIDEO));
log.debug("Category Pic: " + ((categories & PIC) == PIC));
log.debug("Category Misc: " + ((categories & MISC) == MISC));
FilesharingState.categories = categories;
FilesharingState.networkHandler.remanageCategories();
}
private void init() {
}
/**
*
*/
private void startClassification() {
Iterator it = extensionList.iterator();
while (it.hasNext()) {
String extension = (String) it.next();
int type = getExtensionType(extension);
switch (type) {
case AUDIO: {
audioCnt++;
break;
}
case MISC: {
miscCnt++;
break;
}
case VIDEO: {
videoCnt++;
break;
}
case PIC: {
picCnt++;
break;
}
}
}
}
/**
*
* Returns the type (AUDIO, VIDEO, PICTURE, MISC) of the given filename
*
* @return FileType
*/
public int getFileType(String fileName) {
String extension = "";
int extPos = fileName.lastIndexOf(".");
if (extPos != -1) {
extension = fileName.substring(extPos+1);
}
return getExtensionType(extension);
}
/**
*
* Returns the type of the given file
*
* @param extension
* @return
*/
private int getExtensionType(String extension) {
extension = extension.toUpperCase();
if (audioExtensionsMap.containsKey(extension)) {
return AUDIO;
}
if (videoExtensionsMap.containsKey(extension)) {
return VIDEO;
}
if (pictureExtensionsMap.containsKey(extension)) {
return PIC;
}
// Standard clause
return MISC;
}
/**
* reads all known extensions from files
*
*/
private TreeMap readExtensionFiles(String extensionsfile) {
TreeMap treeMap = new TreeMap();
InputStream is = DocumentClassifier.class.getResourceAsStream(extensionsfile);
BufferedReader r = new BufferedReader(new InputStreamReader(is));
try {
String line;
while ((line = r.readLine()) != null) {
String split[] = line.split("\t");
if (split.length > 1) {
String extension = split[0].toUpperCase();
String description = split[1];
treeMap.put(extension,description);
}
}
} catch (IOException e) {
log.fatal("FileSharing.DocumentClassifier.readExtensionFiles: " + e);
}
return treeMap;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -