📄 util.java
字号:
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Locale;
import java.util.Random;
import org.htmlparser.Parser;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.nodes.AbstractNode;
import org.htmlparser.util.ParserException;
import java.util.regex.*;
/**
* Some misc methods: Read search terms Get webpages
*
* @author james
*
*/
public class Util {
/**
* @param args
*/
public static void main(String[] args) {
String href="http://66.218.69.11/search/cache?ipc=1&.intl=&u=www.linkedin.com/pub/0/4/049&d=DjurWHDuP_RL&p=http%3A%2F%2Fwww.linkedin.com%2Fpub%2F0%2F4";
//System.out.println(new java.net.URL(href).toExternalForm());
try {
System.out.println(java.net.URLDecoder.decode(href,"utf-8"));
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// TODO Auto-generated catch block
}
public static float getARand() {
Random rand = new Random(1);
return rand.nextFloat();
}
public synchronized static void saveAFile(String fileName,
String FileContent) {
FileWriter fr;
try {
fr = new FileWriter(fileName);
fr.write(FileContent);
fr.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public synchronized static String getAPageLocal(String pageLink)
throws IOException {
final int cBufSize = 2000;
char[] chars = new char[cBufSize];
FileReader fr = null;
try {
fr = new FileReader(pageLink);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
BufferedReader br = new BufferedReader(fr);
StringBuilder sb = new StringBuilder();
int i;
while ((i = br.read(chars, 0, cBufSize)) > 0) {
sb.append(chars, 0, i);
}
return sb.toString();
}
public synchronized static String getAPage(String pageLink, int timeout)
throws IOException {
URL url = null;
HttpURLConnection urlCon = null;
BufferedReader br = null;
InputStreamReader isr;
StringBuilder sb = new StringBuilder();
int i;
final int cBufSize = 1000;
char[] cbuf = new char[cBufSize];
url = new URL(pageLink);
urlCon = (HttpURLConnection) url.openConnection();
urlCon.setRequestProperty("User-agent", "IE/6.0");
urlCon.setReadTimeout(timeout);
urlCon.setConnectTimeout(timeout);
isr = new InputStreamReader(urlCon.getInputStream());
br = new BufferedReader(isr);
while ((i = br.read(cbuf, 0, cBufSize)) > 0) {
sb.append(cbuf, 0, i);
}
String page = sb.toString();
if (page.contains("<html") && page.contains("</html>")
&& !page.contains("<title>403 Forbidden</title>")
|| page.contains("<HTML") && page.contains("</HTML>")
&& !page.contains("<TITLE>403 Forbidden</TITLE>"))
return page;
else {
throw new IOException(page);
}
}
public synchronized static String getARawPage(String pageLink)
throws IOException {
// String page = "";
// DataInputStream dis = null;
java.net.URL url = null;
// BufferedInputStream bis=null;
BufferedReader br = null;
InputStreamReader isr;
String s = null;
StringBuilder sb = new StringBuilder();
int i;
final int cBufSize = 1000;
char[] cbuf = new char[cBufSize];
url = new java.net.URL(pageLink);
isr = new InputStreamReader(url.openStream());
br = new BufferedReader(isr);
while ((i = br.read(cbuf, 0, cBufSize)) > 0) {
sb.append(cbuf, 0, i);
}
return sb.toString();
}
public synchronized static ArrayList<String> getSearchTerms(String fileName) {
ArrayList<String> terms = new ArrayList<String>();
FileReader reader;
BufferedReader bfReader = null;
try {
reader = new FileReader(fileName);
bfReader = new BufferedReader(reader);
while (bfReader.ready())
terms.add(bfReader.readLine());
} catch (FileNotFoundException fnfe) {
// TODO Auto-generated catch block
fnfe.printStackTrace();
} catch (IOException ioe) {
// TODO Auto-generated catch block
ioe.printStackTrace();
}
return terms;
}
public synchronized static String cleanString(String rawString) {
if (rawString == null || rawString == "")
return "";
rawString = rawString.replaceAll("&", "&");
rawString = rawString.replaceAll(""", "\"");
rawString = rawString.replaceAll("(\\A[^\\w]+)|([^\\w]+\\Z)", "");
return rawString;
}
public synchronized static String cleanString2(String rawString) {
String regx;
Pattern p;
Matcher m;
if (rawString == null || rawString == "")
return "";
regx = "&";
p = Pattern.compile(regx);
m = p.matcher(rawString);
rawString = m.replaceAll("&");
regx = """;
p = Pattern.compile(regx);
m = p.matcher(rawString);
rawString = m.replaceAll("\"");
regx = "(\\A[^\\w]+)|([^\\w]+\\Z)";
p = Pattern.compile(regx);
m = p.matcher(rawString);
rawString = m.replaceAll("");
return rawString;
}
public synchronized static int getNumberFromString(String rawString) {
String regx = "\\d+";
String numberString = null;
int number;
try {
Pattern p = Pattern.compile(regx);
Matcher m = p.matcher(rawString);
if (m.find())
numberString = m.group();
number = new Integer(numberString).intValue();
} catch (Exception e) {
number = 0;
}
return number;
}
public synchronized static Date parseDate(String theDateString) {
theDateString = Util.cleanString(theDateString);
Date theDate = parseDate1(theDateString);
if (theDate != null)
return theDate;
else
return parseDate2(theDateString);
}
private static Date parseDate1(String theDateString) {
Date theDate = null;
String regx = "[a-zA-Z]{3,15}\\s+\\d{4}";
Pattern p = Pattern.compile(regx);
Matcher m = p.matcher(theDateString);
if (!m.find())
return null;
theDateString = m.group();
try {
theDate = new SimpleDateFormat("MMMM yyyy", Locale.US)
.parse(theDateString);
} catch (ParseException e) {
e.printStackTrace();
return null;
}
return theDate;
}
private static Date parseDate2(String theDateString) {
Date theDate = null;
String regx = "\\d{4}(-\\d{2})?(-\\d{2})?";
Pattern p = Pattern.compile(regx);
Matcher m = p.matcher(theDateString);
if (!m.find())
return null;
theDateString = m.group();
if (theDateString.length() == 4)
theDateString = theDateString + "-01-01";
if (theDateString.length() == 7)
theDateString = theDateString + "-01";
try {
theDate = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
.parse(theDateString);
} catch (ParseException e) {
e.printStackTrace();
return null;
}
return theDate;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -