📄 crawlerweb.java
字号:
if (!ok) { i++; if (i == URLAddress.length() - 1) { return URLAddress; } else if (i == 0) { return "/"; } else { while (j <= i) { temp = temp + URLAddress.charAt(j); j++; } if (VERBOSE) { err_out.write("remFileName: This is returned: " + temp); err_out.newLine(); err_out.flush(); } return temp; } } else { if (VERBOSE) { err_out.write("remFileName: This is returned: " + URLAddress); err_out.newLine(); err_out.flush(); } return ""; } } if (VERBOSE) { err_out.write("remFileName: This is returned: " + URLAddress); err_out.newLine(); err_out.flush(); } return URLAddress; } /* Converts absolute URL to relative. */ private String AbsToRel(String s, String curr_dir, String domain) throws IOException { String temp = ""; temp = s.replaceFirst(domain + curr_dir, ""); return temp; } /* Returns true if URL is on the same domain, false if not. */ private boolean sameDomain(String s) throws IOException { if (DOMAIN != "") return s.startsWith(DOMAIN); else return false; } /* Takes an URL as argument, and returns same URL pointing to parent directory. (We use this when we have an "../blah/blah.jpg" type of URL) */ private String OneDirUp(String s) throws IOException { int i = s.length() - 1; boolean ok = false; char temp_char = 'z'; String temp = ""; while ((i >= 0) & !ok) { temp_char = s.charAt(i); if ((temp_char == '/') & (i < s.length() - 1)) { temp = s.substring(0, i); ok = true; } i--; } return temp; } /* Removes domain from an absolute URL. */ private String remDomain(String url) throws IOException { String temp = ""; int i; boolean ok = false; i = QuickSearch(url, "://"); if (i != -1) { i += 3; while ( (i <= url.length() - 1) & !ok ) { if (url.charAt(i) == '/') ok = true; i++; } if (ok) temp = url.substring(i); } else temp = url; return temp; } /* Takes an URL, makes all necessary directories and returns absolute file path on disk. */ private String makeFilePath(String url, String curr_dir) throws IOException { String type = typeOfURL(url); String path = "", temp_path = ""; char temp_char; int i = -1; File temp_dir = null; boolean ok; if (type != "") { if (type.equals("absolute")) { i = QuickSearch(url, "://"); if (i != -1) { i += 3; path = url.substring(i); } } else if (type.equals("relative")) { if (url.startsWith(".")) { path = curr_dir + url.substring(2); // we don't need './' from url } else if (url.startsWith("/")) { path = curr_dir + url.substring(1); // we don't need '/' from url } else if (url.startsWith("..")) { do { url = url.substring(3); curr_dir = OneDirUp(curr_dir); } while (url.startsWith("..")); path = curr_dir + url; } else { path = curr_dir + url; } } if (!ROOT_DIR.getAbsolutePath().endsWith(File.separator) & !path.startsWith("/")) path = ROOT_DIR.getAbsolutePath() + File.separator + path; for (i = 0; i <= path.length()-1; i++) { temp_char = path.charAt(i); if (temp_char == '/') temp_char = File.separatorChar; temp_path = temp_path + temp_char; } path = temp_path; temp_dir = new File(path); ok = temp_dir.mkdirs(); path = temp_dir.getAbsolutePath(); if (ok) return path; else return path; } else return ""; } /* Makes current directory (directory on the site where URL is) from URL and current directory of the file in which we've found this URL. */ private String makeCurrDir(String url, String curr_dir) throws IOException { String type = typeOfURL(url); String dir = "", temp = ""; int i; if (VERBOSE) { err_out.write("makeCurrDir: url: " + url + " curr_dir: " + curr_dir); err_out.newLine(); err_out.flush(); } if (type.equals("absolute")) { if (VERBOSE) { err_out.write("makeCurrDir: url is absolute"); err_out.newLine(); err_out.flush(); } i = QuickSearch(url, "://"); if (i != -1) { i += 3; dir = remFileName(url.substring(i)); } else { dir = remFileName(remDomain(url)); } } else if (type.equals("relative")) { if (VERBOSE) { err_out.write("makeCurrDir: url is relative"); err_out.newLine(); err_out.flush(); } if (url.startsWith("./")) { temp = remFileName(url); if (temp.length() > 1) { if (curr_dir.endsWith("/")) dir = curr_dir + temp.substring(2); else dir = curr_dir + "/" + temp.substring(2); } } else if (url.startsWith("/")) { temp = remFileName(url); if (temp.length() > 0) { if (curr_dir.endsWith("/")) dir = curr_dir + temp.substring(1); else dir = curr_dir + "/" + temp.substring(1); } } else if (url.startsWith("..")) { do { url = url.substring(3); curr_dir = OneDirUp(curr_dir); } while (url.startsWith("..")); dir = curr_dir + remFileName(url); } else { temp = remFileName(url); if (!curr_dir.endsWith("/") & !temp.startsWith("/")) dir = curr_dir + "/" + temp; else dir = curr_dir + temp; } } if (VERBOSE) { err_out.write("makeCurrDir: This is returned: " + dir); err_out.newLine(); err_out.flush(); } return dir; } /* Simple method which replaces first occurence of target with replacement in string s. */ private String Replace(String s, String target, String replacement) throws IOException { int i; String ret = "", temp1 = "", temp2 = ""; i = QuickSearch(s, target); if (i != -1) { temp1 = s.substring(0, i); temp2 = s.substring(i + target.length(), s.length()); ret = temp1 + replacement + temp2; } return ret; } private boolean NoFileExtension(String s) { int i = s.length(); boolean no_extension = false, dot_exists = false, slash_exists = false; do { i--; if (i < 0) no_extension = true; else if (s.charAt(i) == '.') dot_exists = true; else if (s.charAt(i) == '/') slash_exists = true; } while (!no_extension & !dot_exists & !slash_exists); if (no_extension) { return true; } else if (slash_exists) { return true; } else if (dot_exists) { return false; } else return true; } /* Makes an absolute address from relative URL using relative URL and current directory of a file in which we've found this URL. We use this when we want to retrieve this relative URL from server. */ private String makeAbsAddr(String addr, String curr_dir) throws IOException { String temp = ""; int i; boolean from_root = false; if (VERBOSE) { err_out.write("makeAbsAddr: addr: " + addr + " curr_dir: " + curr_dir); err_out.newLine(); err_out.flush(); } if (addr.startsWith("..") & (addr.length() >= 3)) { do { curr_dir = OneDirUp(curr_dir); addr = addr.substring(3); } while (addr.startsWith("..") & (addr.length() >= 3)); } else if (addr.startsWith("./")) { addr = addr.substring(2); } else if (addr.startsWith("/")) { from_root = true; addr = addr.substring(1); } if (!curr_dir.endsWith("/") & !addr.startsWith("/")) temp = "http://" + curr_dir + "/" + addr; else temp = "http://" + curr_dir + addr; if (VERBOSE) { err_out.write("makeAbsAddr: This is returned: " + temp); err_out.newLine(); err_out.flush(); } return temp; } /* In fact, simply searches for hash sign (#) in an URL, and returns true if # found. */ private boolean ContainsLocalAnchor(String addr) throws IOException { int i; i = QuickSearch(addr, "#"); if (i != -1) return true; else return false; } private boolean ContainsVariables(String addr) throws IOException { int i; i = QuickSearch(addr, "?"); if (i != -1) return true; else return false; } private String RemVariables(String addr) throws IOException { int i; i = QuickSearch(addr, "?"); if (i != -1) return addr.substring(0, i); else return addr; } /* Removes anything after hash sign (#), including that sign. */ private String RemLocalAnchor(String addr) throws IOException { int i; i = QuickSearch(addr, "#"); if (i != -1) return addr.substring(0, i); else return addr; } /* Searches for single or double quotes in a string. */ private boolean HasMoreQuotes(String s) throws IOException { int i; i = QuickSearch(s, "\""); if (i != -1) return true; else { i = QuickSearch(s, "\'"); if (i != -1) return true; else return false; } } /* This method searches for all strings in quotes, contained in string s. Quotes can be single or double. This method does not recognize quotes within quotes (which is often used in javascript). All found strings are pushed to the vector, which is returned at the end. */ private Vector EverythingInQuotes(String s) throws IOException { int i = 0; boolean end_of_string = false, single_quotes, added, ok = true; Vector strings = new Vector(50); String temp_str = ""; char temp_char = 'z'; while (!end_of_string) { temp_str = ""; ok = true; if (i == s.length() - 1) { end_of_string = true; } else { while ((i <= s.length() - 1) & ok) { if ((s.charAt(i) == '\'') | (s.charAt(i) == '\"')) ok = false; i++; } i--; if (ok) { end_of_string = true; } else { temp_char = s.charAt(i); if (temp_char == '\'') single_quotes = true; else if (temp_char == '\"') single_quotes = false; else single_quotes = false; temp_char = 'z'; i++; if (single_quotes) { while ((i <= s.length() - 1) & (temp_char != '\'') ) { temp_char = s.charAt(i); temp_str = temp_str + temp_char; i++; } } else if (!single_quotes) { while ((i <= s.length() - 1) & (temp_char != '\"') ) { temp_char = s.charAt(i); temp_str = temp_str + temp_char; i++; } } temp_char = 'z'; if (i == s.length()) end_of_string = true; else { temp_str = temp_str.substring(0, temp_str.length()-1); if ((temp_str.length() > 0) & (temp_str != " ")) added = strings.add(temp_str); i++; } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -