📄 getword.java
字号:
package test.poi;
import java.io.*;
import org.textmining.text.extraction.WordExtractor;
public class getWord {
/**
* @param args
*/
// 读文件
public static String run(String filename) {
WordExtractor extractor = null;
String text = null;
try {
FileInputStream in = new FileInputStream(filename);
extractor = new WordExtractor();
System.out.println("开始分析文件-->" + filename);
System.out.println("分析结果如下:\n");
text = extractor.extractText(in);
// System.out.println(text);
} catch (Exception ex) {
// log
return null;
}
return text;
}
// 查找替换过程
public static void findWord(String text) {
// 输入查找规则
String[] filterStr = { "!", "我们", "你们", "他们", "我", "你", "他",
"它", "它们", "这儿", "email", "mail", "WIN", "windows", "WINDOWS",
"座标", "缺省值", "唯一", "帐号", "做为", "图象", "存贮", "其它", "这里","?"};
String temp;
if (text != null) {
// 第一步查找是否有不合格词汇
int j = 0;
int num = 0;
for (int i = 0; i < filterStr.length; i++) {
if (text.lastIndexOf(filterStr[i]) != -1) {
j++;
int count = Counts(text, filterStr[i]);
System.out.println(j + ".含有不合法的词汇<-----" + filterStr[i]
+ "------>,共" + count + "个。");
printInstr(text, filterStr[i]);
}
}
} else {
System.err.println("给的字符串是空!");
}
}
// 查找并打印不法字符出现的大体位置
public static void printInstr(String source, String igStr) {
if (source == null) {
source = "";
}
if (source.equals("")) {
return;
}
// System.out.print(source.length());
String temp = source;
if (source.lastIndexOf(igStr) != -1) {
int m = 0, n = 0;
if (source.indexOf(igStr) > 20) {
m = 20;
}
if (source.substring(source.indexOf(igStr)).length() > 20) {
n = 20;
}
System.out.println("@@@@@出现不法字符的大体位置为:……"
+ source.substring(source.indexOf(igStr) - m, source
.indexOf(igStr)
+ n) + "……\n");
if (source.length() > 3) {
printInstr(source.substring(source.indexOf(igStr) + 1), igStr);
}
}
}
// 查找个数
public static int Counts(String ParentStr, String SubStr) {
int lp = ParentStr.length();
int sp = SubStr.length();
// lp -= ParentStr.Replace(SubStr, "").length();
lp -= Replace(ParentStr, SubStr, "").length();
return lp / sp;
}
// replace函数
public static String Replace(String source, String oldString,
String newString) {
if (source == null)
return null;
StringBuffer output = new StringBuffer();
int lengOfsource = source.length();
int lengOfold = oldString.length();
int posStart = 0;
int pos;
while ((pos = source.indexOf(oldString, posStart)) >= 0) {
output.append(source.substring(posStart, pos));
output.append(newString);
posStart = pos + lengOfold;
}
if (posStart < lengOfsource) {
output.append(source.substring(posStart));
}
return output.toString();
}
public static void main(String[] args) {
// TODO 自动生成方法存根
findWord(getWord.run("E:\\myweb\\hibernate\\目录和正文\\第2章.doc"));
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -