📄 entropy.java
字号:
package yus.excerse1;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.StringTokenizer;
public class Entropy {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
long start = System.currentTimeMillis();
double chnEntropy = retChnEntropy();
System.out.println("chnEntropy:" + chnEntropy);
double engEntropy = retEngEntropy();
System.out.println("engEntropy:" + engEntropy);
long end = System.currentTimeMillis();
System.out.println("time lasts " + (end - start) + "ms");
}
public static double retChnEntropy() throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream("D:\\postGraduate\\chn.txt")));
Object[][] hashTable = new Object[2000][2];
int totalNum = 0;
for (int i = 0; i < 2000; i++) {
hashTable[i][1] = 0;
}
// ArrayList words = new ArrayList<Object>();
long code;
String line = br.readLine();
while (line != null) {
// System.out.println("line:" + line);
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
totalNum++;
String word = tokenizer.nextToken();
word = word.toLowerCase();
code = getChnCode(word);
if (hashTable[(int) code][1].equals((Object) 0)) {
Object[] obj = new Object[2];
obj[0] = word;
obj[1] = 1;
ArrayList hashList = new ArrayList<Object>();
hashList.add(obj);
hashTable[(int) code][0] = hashList;
hashTable[(int) code][1] = 1;
} else {
ArrayList list = (ArrayList) hashTable[(int) code][0];
boolean flag = false;
for (Object obj : list) {
Object[] o = (Object[]) obj;
if (((String) o[0]).equals(word)) {
int c = (Integer) o[1];
c++;
o[1] = c;
obj = o;
hashTable[(int) code][0] = list;
flag = true;
break;
}
}
if (flag == false) {
Object[] obj = new Object[2];
obj[0] = word;
obj[1] = 1;
list.add(obj);
hashTable[(int) code][0] = list;
}
}
}
line = br.readLine();
}
br.close();
double entropy;
entropy = getEntropy(hashTable, totalNum);
return entropy;
}
private static long getChnCode(String word) {
byte[] b = word.getBytes();
long code = 1;
if (b.length > 6) {
for (byte c : b) {
code += c;
}
} else {
for (byte c : b) {
code *= c;
}
}
if (code < 0)
code = -code;
return code % 2000;
}
public static double retEngEntropy() throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream("D:\\postGraduate\\eng.txt")));
Object[][] hashTable = new Object[2000][2];
int totalNum = 0;
for (int i = 0; i < 2000; i++) {
hashTable[i][1] = 0;
}
// ArrayList words = new ArrayList<Object>();
long code;
String line = br.readLine();
while (line != null) {
// System.out.println("line:" + line);
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
totalNum++;
String word = tokenizer.nextToken();
word = word.toLowerCase();
code = getCode(word);
if (hashTable[(int) code][1].equals((Object) 0)) {
Object[] obj = new Object[2];
obj[0] = word;
obj[1] = 1;
ArrayList hashList = new ArrayList<Object>();
hashList.add(obj);
hashTable[(int) code][0] = hashList;
hashTable[(int) code][1] = 1;
} else {
ArrayList list = (ArrayList) hashTable[(int) code][0];
boolean flag = false;
for (Object obj : list) {
Object[] o = (Object[]) obj;
if (((String) o[0]).equals(word)) {
int c = (Integer) o[1];
c++;
o[1] = c;
obj = o;
hashTable[(int) code][0] = list;
flag = true;
break;
}
}
if (flag == false) {
Object[] obj = new Object[2];
obj[0] = word;
obj[1] = 1;
list.add(obj);
hashTable[(int) code][0] = list;
}
}
}
line = br.readLine();
}
br.close();
double entropy;
entropy = getEntropy(hashTable, totalNum);
return entropy;
}
private static double getEntropy(Object[][] hashTable, int totalNum) {
double entropy = 0;
int len = hashTable.length;
for (int i = 0; i < len; i++) {
if (hashTable[i][1].equals((Object) 1)) {
ArrayList hashList = (ArrayList) hashTable[i][0];
for (Object object : hashList) {
Object[] obj = (Object[]) object;
entropy -= ((Integer) obj[1]).doubleValue()
/ totalNum
* log(((Integer) obj[1]).doubleValue() / totalNum,
2);
}
}
}
return entropy;
}
static public double log(double value, double base) {
return Math.log(value) / Math.log(base);
}
private static long getCode(String s) {
s = s.toLowerCase();
int len = s.length();
long code = 1;
if (len < 6) {
for (int i = 0; i < len; i++) {
code = code * getAscii(s.charAt(i));
}
} else {
for (int i = 0; i < len; i++) {
code = code + getAscii(s.charAt(i));
}
}
if (code < 0)
return 0;
else
return code % 2000;
}
private static int getAscii(char c) {
byte b = (byte) c;
return b;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -