⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 getword.java

📁 spring+hibernate+struts 范例代码
💻 JAVA
字号:
package test.poi;

import java.io.*;

import org.textmining.text.extraction.WordExtractor;

public class getWord {

	/**
	 * @param args
	 */
	// 读文件
	public static String run(String filename) {
		WordExtractor extractor = null;
		String text = null;
		try {
			FileInputStream in = new FileInputStream(filename);
			extractor = new WordExtractor();
			System.out.println("开始分析文件-->" + filename);
			System.out.println("分析结果如下:\n");
			text = extractor.extractText(in);
			// System.out.println(text);
		} catch (Exception ex) {
			// log
			return null;
		}
		return text;
	}

	// 查找替换过程
	public static void findWord(String text) {

		// 输入查找规则
		String[] filterStr = { "!", "我们", "你们", "他们", "我", "你", "他",
				"它", "它们", "这儿", "email", "mail", "WIN", "windows", "WINDOWS",
				"座标", "缺省值", "唯一", "帐号", "做为", "图象", "存贮", "其它", "这里","?"};
		String temp;
		if (text != null) {
			// 第一步查找是否有不合格词汇
			int j = 0;
			int num = 0;

			for (int i = 0; i < filterStr.length; i++) {
				if (text.lastIndexOf(filterStr[i]) != -1) {
					j++;

					int count = Counts(text, filterStr[i]);
					System.out.println(j + ".含有不合法的词汇<-----" + filterStr[i]
							+ "------>,共" + count + "个。");

					printInstr(text, filterStr[i]);

				}
			}

		} else {
			System.err.println("给的字符串是空!");
		}

	}

	// 查找并打印不法字符出现的大体位置
	public static void printInstr(String source, String igStr) {
		if (source == null) {
			source = "";
		}
		if (source.equals("")) {
			return;
		}
		// System.out.print(source.length());
		String temp = source;

		if (source.lastIndexOf(igStr) != -1) {
			int m = 0, n = 0;
			if (source.indexOf(igStr) > 20) {
				m = 20;
			}
			if (source.substring(source.indexOf(igStr)).length() > 20) {
				n = 20;
			}

			System.out.println("@@@@@出现不法字符的大体位置为:……"
					+ source.substring(source.indexOf(igStr) - m, source
							.indexOf(igStr)
							+ n) + "……\n");
			if (source.length() > 3) {

				printInstr(source.substring(source.indexOf(igStr) + 1), igStr);
			}
		}
	}

	// 查找个数
	public static int Counts(String ParentStr, String SubStr) {
		int lp = ParentStr.length();
		int sp = SubStr.length();
		// lp -= ParentStr.Replace(SubStr, "").length();
		lp -= Replace(ParentStr, SubStr, "").length();
		return lp / sp;
	}

	// replace函数
	public static String Replace(String source, String oldString,
			String newString) {
		if (source == null)
			return null;
		StringBuffer output = new StringBuffer();
		int lengOfsource = source.length();
		int lengOfold = oldString.length();
		int posStart = 0;
		int pos;
		while ((pos = source.indexOf(oldString, posStart)) >= 0) {
			output.append(source.substring(posStart, pos));
			output.append(newString);
			posStart = pos + lengOfold;
		}
		if (posStart < lengOfsource) {
			output.append(source.substring(posStart));
		}
		return output.toString();
	}

	public static void main(String[] args) {
		// TODO 自动生成方法存根

		findWord(getWord.run("E:\\myweb\\hibernate\\目录和正文\\第2章.doc"));

	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -