filesearch.java
来自「java ftp 操作代码,程序可以直接运行」· Java 代码 · 共 536 行
JAVA
536 行
/* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */package net.sf.jftp.tools;import java.io.BufferedInputStream;import java.io.BufferedOutputStream;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.DataInputStream;import java.io.File;import java.io.FileOutputStream;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.net.Socket;import java.util.Enumeration;import java.util.Hashtable;import java.util.StringTokenizer;import java.util.Vector;import net.sf.jftp.system.LocalIO;import net.sf.jftp.system.logging.Log;public class FileSearch { private int currentDepth = 0; private Hashtable checked = new Hashtable(); public static boolean quiet = true; public static boolean ultraquiet = false; String localDir = "."; int MAX = 999999; int MIN_TERM = 1; int MIN_FACTOR = 1; boolean LOAD = false; String[] typeArray = { "" }; String[] termArray = { "" }; String[] optArray = { "" }; String[] ignoreArray = { "" }; String[] scanArray = { "" }; public static void main(String argv[]) { String[] typeArray = { ".gz", ".bz2", ".zip", ".rar" }; String[] termArray = { "linux", "kernel" }; String[] optArray = { "download", "file", "mirror", "location" }; String[] ignoreArray = { ".gif", ".jpg", ".png", ".swf", ".jar", ".class", ".google." }; String[] scanArray = { ".html", ".htm", "/", ".jsp", ".jhtml", ".phtml", ".asp", ".xml", ".js", ".cgi" }; String url = "http://www.google.de/search?hl=de&q="; for(int i=0; i<termArray.length; i++) { url += termArray[i]+"+"; } FileSearch search = new FileSearch(); search.typeArray = typeArray; search.termArray = termArray; search.optArray = optArray; search.ignoreArray = ignoreArray; search.scanArray = scanArray; search.MIN_TERM = 1; search.spider(url); } private void spider(String url) { try { if(url.indexOf("/") < 0) { url = url + "/"; } url = clear(url); Log.out(">>> URL: "+url); Log.out(">>> Scanning for "); for(int i = 0; i < typeArray.length; i++) { Log.out(typeArray[i] + " "); } Log.out(""); Log.out("Fetching initial HTML file..."); Getter urlGetter = new Getter(localDir); urlGetter.fetch(url, true); Log.out("Searching for links..."); LocalIO.pause(500); crawl(url); } catch(Exception ex) { ex.printStackTrace(); } } private String clear(String url) { int idx = url.indexOf("http://"); if(idx >= 0) { url = url.substring(7); } return url; } private Vector addVector(Vector v, Vector x) { Enumeration e = x.elements(); while(e.hasMoreElements()) { String next = (String) e.nextElement(); v.add(next); } return v; } private int rate(String content) { int score = 0; for(int i=0; i<termArray.length; i++) { if(content.indexOf(termArray[i]) >= 0) score += 3; } if(score < MIN_TERM) return 0; for(int i=0; i<optArray.length; i++) { if(content.indexOf(optArray[i]) >= 0) score++; } return score; } private int checkForResult(String url) { //for(int i=0; i<typeArray.length; i++) { // if(url.indexOf(typeArray[i]) >= 0) return 2; //} for(int i=0; i<ignoreArray.length; i++) { if(url.indexOf(ignoreArray[i]) >= 0) return -1; } if(!checkForScanableUrl(url)) return -1; return 1; } private boolean checkForScanableUrl(String url) { if(checked.containsKey(url)) { return false; } else { checked.put(url, ""); } if(url.indexOf("/") > 0) { String tmp = url.substring(0, url.indexOf("/")); } for(int i=0; i<scanArray.length; i++) { if(url.endsWith(scanArray[i])) return true; } return false; } private void crawl(String url) throws Exception { url = clear(url); int urlRating = checkForResult(url); if(!quiet) Log.out("URL-Rating: "+url+" -> "+urlRating+" @"+currentDepth); if(urlRating > 0) { //System.out.println("!!!"); //Getter.chill(1000); //System.exit(0); } else if(urlRating < 0 && currentDepth > 0) { if(!quiet) Log.out("SKIP "+url); return; } Getter urlGetter = new Getter(localDir); String content = urlGetter.fetch(url); int factor = rate(content); if(!quiet) Log.out("Content-Rating: "+url+" -> "+factor+" @"+currentDepth); if(factor < MIN_FACTOR) { if(!quiet) Log.out("DROP: "+url); return; } if(!ultraquiet) Log.out("Url: "+url+" -> "+urlRating+":"+factor+"@"+currentDepth); Vector m = sort(content, url.substring(0, url.lastIndexOf("/")), "href=\""); m = addVector(m, sort(content, url.substring(0, url.lastIndexOf("/")), "src=\"")); m = addVector(m, sort(content, url.substring(0, url.lastIndexOf("/")), "HREF=\"")); m = addVector(m, sort(content, url.substring(0, url.lastIndexOf("/")), "SRC=\"")); Enumeration links = m.elements(); while(links.hasMoreElements()) { String next = (String) links.nextElement(); if(!quiet) Log.out("PROCESS: " + next); boolean skip = false; while(!skip) { for(int i = 0; i < typeArray.length; i++) { if(next.endsWith(typeArray[i]) || typeArray[i].trim().equals("*")) { Log.out("HIT: "+url+" -> "+next); //Getter.chill(2000); if(!LOAD || !checkForScanableUrl(url)) continue; int x = next.indexOf("/"); if((x > 0) && (next.substring(0, x).indexOf(".") > 0)) { Getter urlGetter2 = new Getter(localDir); urlGetter2.fetch(next, false); continue; } } } skip = true; } if(currentDepth < MAX) { int x = next.indexOf("/"); if((x > 0) && (next.substring(0, x).indexOf(".") > 0)) { currentDepth++; crawl(next); currentDepth--; } } } } private Vector sort(String content, String url, String index) { Vector res = new Vector(); int wo = 0; while(true) { wo = content.indexOf(index); if(wo < 0) { return res; } content = content.substring(wo + index.length()); String was = content.substring(0, content.indexOf("\"")); was = createAbsoluteUrl(was, url); res.add(was); if(!quiet) Log.out("ADD: " + was); } } private String[] check(String auswahl) { StringTokenizer tokenizer = new StringTokenizer(auswahl, "-", false); String[] strArr = new String[tokenizer.countTokens()]; int tmp = 0; while(tokenizer.hasMoreElements()) { strArr[tmp] = (String) tokenizer.nextElement(); tmp++; } return strArr; } private String createAbsoluteUrl(String newLink, String baseUrl) { newLink = clear(newLink); if(newLink.startsWith(baseUrl)) { return newLink; } if(newLink.startsWith("/") && (baseUrl.indexOf("/") > 0)) { newLink = baseUrl.substring(0, baseUrl.indexOf("/")) + newLink; } else if(newLink.startsWith("/") && (baseUrl.indexOf("/") < 0)) { newLink = baseUrl + newLink; } else if((newLink.indexOf(".") > 0)) { int idx = newLink.indexOf("/"); String tmp = ""; if(idx >= 0) { tmp = newLink.substring(0, idx); } if((tmp.indexOf(".") > 0)) { return clear(newLink); } if(baseUrl.endsWith("/")) { newLink = baseUrl + newLink; } else { newLink = baseUrl + "/" + newLink; } } //Log.out("-> " + newLink); return newLink; }}class Getter{ private String localDir = null; public Getter(String localDir) { this.localDir = localDir; } public String fetch(String url) { try { String host = url.substring(0, url.indexOf("/")); String wo = url.substring(url.indexOf("/")); String result = ""; //Log.out(">> " + host + wo); Socket deal = new Socket(host, 80); deal.setSoTimeout(5000); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(deal.getOutputStream())); BufferedReader in = new BufferedReader(new InputStreamReader(deal.getInputStream())); out.write("GET http://" + url + " HTTP/1.0\n\n"); out.flush(); int len = 0; while(!in.ready() && (len < 5000)) { chill(100); len += 100; } while(in.ready()) { result = result + in.readLine(); } out.close(); in.close(); return result; } catch(Exception ex) { if(!FileSearch.quiet) ex.printStackTrace(); } return ""; } public void fetch(String url, boolean force) { try { String host = url.substring(0, url.indexOf("/")); String wo = url.substring(url.indexOf("/")); String result = ""; if(!FileSearch.quiet) Log.debug(">>> " + host + wo); //JFtp.statusP.jftp.ensureLogging(); File d = new File(localDir); d.mkdir(); File f = new File(localDir + wo.substring(wo.lastIndexOf("/") + 1)); if(f.exists() && !force) { if(!FileSearch.quiet) Log.debug(">>> file already exists..."); return; } else { f.delete(); } Socket deal = new Socket(host, 80); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(deal.getOutputStream())); DataInputStream in = new DataInputStream(new BufferedInputStream(deal.getInputStream())); BufferedOutputStream localOut = new BufferedOutputStream(new FileOutputStream(localDir + wo.substring(wo.lastIndexOf("/") + 1))); byte[] alu = new byte[2048]; out.write("GET http://" + url + " HTTP/1.0\n\n"); out.flush(); boolean line = true; boolean bin = false; while(true) { chill(10); String tmp = ""; while(line) { String x = in.readLine(); if(x == null) { break; } tmp += (x + "\n"); if(x.equals("")) { line = false; } } int x = in.read(alu); if(x == -1) { if(line) { localOut.write(tmp.getBytes(), 0, tmp.length()); } out.close(); in.close(); localOut.flush(); localOut.close(); return; } else { localOut.write(alu, 0, x); } } } catch(Exception ex) { if(!FileSearch.quiet) ex.printStackTrace(); } } public static void chill(int time) { try { Thread.sleep(time); } catch(Exception ex) { } }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?