📄 testurl.java
字号:
/*
* This program is test URL exists or not ,then print out a overall result
* This is main class
* @author Peng Xu
* @version 4.0
*/
class testURL{
public static void main(String[] args){
// take as many as URL as a command-line argument
for(int m=0;m<args.length;m++){
expURL expurl = new expURL();
//create object
expurl.setProxy();
// set proxy
if(expurl.checks(args[m])){
//check the link exist or not
System.out.println("This URL exists !");
//download web page to local machine
expurl.downloadURL(args[m]);
//process the file
expurl.outprint("source.html",(m+1));
//delete the file
expurl.deleteFile("source.html");
}
else System.out.println("This URL doesn't exits !");
}
}
}
/*
* This program is method class
* @author Peng Xu
* @version 4.0
*/
//import package
import java.net.*;
import java.io.*;
import java.util.*;
class expURL{
/* private int getAbsoluteLinksLength;
private int getImageLinksLength;
private int getFormLinksLength;
private int n_label;*/
private String url; //contain the original url
private ArrayList<String> labels = new ArrayList<String>();
/*
*get absolute url from local file
*@param local file name
*@return a list
*@throws IOException
*/
public ArrayList<String> getAbsoluteLinks(String uriStr){
ArrayList<String> results = new ArrayList<String>();
// create an arraylist which store the all urls
try{
Scanner infile = new Scanner(new FileReader(uriStr));
// create a file reader
int i,y;
while(infile.hasNext()){
// read file
String s = infile.next();
if(s.startsWith("href=\"")){
//if string match ,then get this string
i = s.indexOf("href=\"");
y = s.indexOf("\"", i+6);
if(i>=0&&y>=0){
String z = s.substring(i+6,y);
if(!z.startsWith("http://")
&& !z.startsWith("mailto")
&& !z.startsWith("ftp")
&& !z.startsWith("telnet")
&& !z.startsWith("news")
&& !z.startsWith("gopher")
&& !z.startsWith("file:")
&& (z.indexOf("#") != -1)){
// get label in the file
labels.add(z);
}
else{
// store the url
results.add(z);
}
}
}
else
if(s.startsWith("HREF=")){
//if string match ,then get this string
i = s.indexOf("HREF=");
y = s.indexOf("\"", i+6);
if(i>=0&&y>=0){
String z = s.substring(i+6,y);
if(!z.startsWith("http://")
&& !z.startsWith("mailto")
&& !z.startsWith("ftp")
&& !z.startsWith("telnet")
&& !z.startsWith("news")
&& !z.startsWith("gopher")
&& !z.startsWith("file:")
&& (z.indexOf("#") != -1)){
// get label in the file
labels.add(z);
}
else{
// store the url
results.add(z);
}
}
}
}
}catch(Exception e){
}
// Return all found links
return results;
}
/*
*get label links from local file
*@param local file name
*@return a list
*/
public ArrayList<String> getlabel(String uriStr){
ArrayList<String> results = new ArrayList<String>();
//create an arraylist which store the label links
results = labels;
// get label links and return all found links
return results;
}
/*
*get image links from local file
*@param local file name
*@return a list
*@throws IOException
*/
public ArrayList<String> getImageLinks(String uriStr){
ArrayList<String> results = new ArrayList<String>();
//create an arraylist which store the image links
try{
Scanner infile = new Scanner(new FileReader(uriStr));
// read file
int i,y;
while(infile.hasNext()){
String s = infile.next();
// find match string
if(s.startsWith("src=\"")){
i = s.indexOf("src=\"");
y = s.indexOf("\"", i+5);
if(i>=0&&y>=0){
//get the string
String z = s.substring(i+5,y);
// store string
results.add(z);
}
}
else
// find match string
if(s.startsWith("SRC=")){
i = s.indexOf("SRC=");
y = s.indexOf("\"", i+5);
if(i>=0&&y>=0){
//get the string
String z = s.substring(i+5,y);
// store string
results.add(z);
}
}
}
}catch(Exception e){
}
// Return all found links
return results;
}
/*
*get links are contained in the form which from local file
*@param local file name
*@return a list
*@throws IOException
*/
public ArrayList<String> getFormLinks(String uriStr){
ArrayList<String> results = new ArrayList<String>();
//create an arraylist which store the image links
try{
//create a file reader
Scanner infile = new Scanner(new FileReader(uriStr));
int i,y,x,b;
// read file
while(infile.hasNext()){
String s = infile.next();
//find match string
if(s.startsWith("ACTION=")){
x = s.indexOf("ACTION=\"");
b = s.indexOf("\"", x+8);
// get string
String z = s.substring(x+8,b);
//store string
results.add(z);
}
else
if(s.startsWith("action=")){
//find match string
i = s.indexOf("action=\"");
y = s.indexOf("\"", i+8);
//get string
String a = s.substring(i+8,y);
//store string
results.add(a);
}
}
}catch(Exception e){
}
// Return all found links
return results;
}
/*
*this method is rewrite label links, relative URL ,opaque and hierarchical
*@param Url is original url and urls is URL list
*@return an array which contain all links
*@throws IOException, MalformedURLException, URISyntaxException
*/
public String[] Links(String Url,ArrayList<String> urls){
//create a array to store links
String[] linkset = new String[urls.size()];
try{
// store the links
linkset = urls.toArray(linkset); //change the arraylist to array
URI u = new URI(Url);
// normalize the links
u = u.normalize();
int i = 0;
while(i < linkset.length){
//for some reason this program can't process this kind of url
if(linkset[i].startsWith("about:")){
i++;
}
else{
URI v = new URI(linkset[i]);
// normalize the links
v = v.normalize();
//rewrite the links
String n = u.resolve(u.relativize(v)).toString();
linkset[i] = n;
i++;
}
}
}
catch(Exception e){
e.printStackTrace();
}
// return all the links in the array
return linkset;
}
/*
*this method is checks the url exists or not then return a boolean value
*@param URL which will test
*@return true if exist, otherwise false
*/
public boolean checks(String Url){
boolean fileIsThere = false;
try{//create a url
URL myURI = new URL(Url);
// open connection
URLConnection c = myURI.openConnection();
c.connect();
// get url content type
String contentType = c.getContentType();
// get url header
String head = c.getHeaderField(0);
if (head != null && head.indexOf("200") != -1){
fileIsThere = true;
}
else
if(head != null && head.indexOf("text") != -1){
fileIsThere = true;
}
else
if( head == null && (contentType.startsWith("text") || contentType.startsWith("image"))){
fileIsThere = true;
}
}
catch(MalformedURLException e){
fileIsThere = false;
}
catch(IOException e){
fileIsThere = false;
}
catch(ClassCastException e){
fileIsThere = false;
}
// return boolean value
return fileIsThere;
}
/*
*this method is download web page by given the url
*@param url is given
*@throws MalformedURLException,URISyntaxException,IOException
*/
public void downloadURL(String Url){
try{
url = Url;// store the url address
URL u = new URI(url).toURL();
// open connection
URLConnection con = u.openConnection();
InputStream is = con.getInputStream();
// create reader
Scanner infile = new Scanner(new InputStreamReader(is));
// create writer
PrintWriter download = new PrintWriter("source.html");
while(infile.hasNextLine()){
//read webpage
String line = infile.nextLine();
// write in file
download.println(line);
}
// close file
infile.close();
download.close();
}catch (MalformedURLException e){
System.out.println(e.getMessage());
}catch (URISyntaxException e){
System.out.println(e.getMessage());
}catch (IOException e){
System.err.println(e);
}
}
/*
*this method is delete a file
*@param the file name
*/
public void deleteFile(String filename){
File file = new File(filename);
//create a file reader
if(!file.exists()){
// if file doesn't exist
System.out.println("Can't find the file : "+ filename);
}
else{
//if exists ,delete it,then return true value if delete success
boolean ok = file.delete();
if(ok) System.out.println(filename +" was deleted !");
else System.out.println(filename +" can't be deleted !");
}
}
/*
*this method is set proxy if needed
*/
public void setProxy(){
Properties systemProperties = System.getProperties();
systemProperties.setProperty("http.proxyHost","proxy.dcu.ie");
systemProperties.setProperty("http.proxyPort","8080");
systemProperties.setProperty("ftp.proxyHost","proxy.dcu.ie");
systemProperties.setProperty("ftp.proxyPort","8080");
systemProperties.setProperty("https.proxyHost","proxy.dcu.ie");
systemProperties.setProperty("https.proxyPort","8080");
System.setProperty("http.agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
}
/*
*this method is out print
*@param filename is local file name which need process
*@param m which is the file number you print out
*/
public void outprint(String filename,int m){
try{// create object expURL
expURL expurl = new expURL();
String[] URLs;// create a array to store links
//process absolute URL and store in array
ArrayList<String> AbsoluteURL = new ArrayList<String>();
AbsoluteURL = expurl.getAbsoluteLinks(filename);
String[] absoluteURL = new String[AbsoluteURL.size()];
absoluteURL = expurl.Links(url,AbsoluteURL);
//process label and store in array
ArrayList<String> label = new ArrayList<String>();
label = expurl.getlabel(filename);
String[] labels = new String[label.size()];
labels = expurl.Links(url,label);
//process image Link and store in array
ArrayList<String> imageLink = new ArrayList<String>();
imageLink = expurl.getImageLinks(filename);
String[] imageLinks = new String[imageLink.size()];
imageLinks = expurl.Links(url,imageLink);
//process Form Links and store in array
ArrayList<String> FormLinks = new ArrayList<String>();
FormLinks = expurl.getFormLinks(filename);
String[] FormLink = new String[FormLinks.size()];
FormLink = expurl.Links(url,FormLinks);
//create a writer
PrintWriter writer = new PrintWriter("output"+m+".html");
ArrayList<String> work = new ArrayList<String>();
ArrayList<String> notwork = new ArrayList<String>();
int a=0;int b=0;int c=0;int d=0;int e=0;int f=0;
writer.println("<HTML><HEAD><TITLE>OutPut</TITLE></HEAD>" +
"<body><h1><b>The Link is <a href = \""+
url+"\" >"+url+"</a></b></h1><br><br>");
// if there are some links in array
if(absoluteURL.length > 0){
for(int i = 0;i<absoluteURL.length;i++){
// check link exist or not then store
if(checks(absoluteURL[i])){
work.add(absoluteURL[i]);
System.out.println(absoluteURL[i]+" exists !");
}
else{
notwork.add(absoluteURL[i]);
System.out.println(absoluteURL[i]+" doesn't exist !");
}
}
}
// if there are some links in array
if(labels.length > 0){
for(int i = 0;i < labels.length;i++){
// check link exist or not then store
if(checks(labels[i])){
work.add(labels[i]);
System.out.println(labels[i]+" exists !");
a++;
}
else{
notwork.add(labels[i]);
System.out.println(labels[i]+" doesn't exist !");
b++;
}
}
}
// if there are some links in array
if(imageLinks.length > 0){
for(int i = 0;i<imageLinks.length;i++){
// check link exist or not then store
if(checks(imageLinks[i])){
work.add(imageLinks[i]);
System.out.println(imageLinks[i]+" exists !");
c++;
}
else{
notwork.add(imageLinks[i]);
System.out.println(imageLinks[i]+" doesn't exist !");
d++;
}
}
}
// if there are some links in array
if(FormLink.length> 0){
for(int i = 0;i<FormLink.length;i++){
// check link exist or not then store
if(checks(FormLink[i])){
work.add(FormLink[i]);
System.out.println(FormLink[i]+" exists !");
e++;
}
else{
notwork.add(FormLink[i]);
System.out.println(FormLink[i]+" doesn't exist !");
f++;
}
}
}
// change arraylist to array
URLs = new String[notwork.size()];
URLs = (String[])notwork.toArray(new String[ notwork.size()]);
writer.println("<br><br><br><h2><B>Doesn't work well Links<br></B></h2><br><br>");
//print out the elements
for(int i=0;i<notwork.size();i++){
writer.println("<a href = \""+URLs[i]+"\" >"+URLs[i]+"</a><br>");
}
// change arraylist to array
URLs = new String[work.size()];
URLs = (String[])work.toArray(new String[work.size()]);
writer.println("<h2><B>Works well Links<br></B></h2><br><br>");
//print out the elements
for(int i=0;i<work.size();i++){
writer.println("<a href = \""+URLs[i]+"\" >"+URLs[i]+"</a><br>");
}
// print out other attribute
writer.println("<h3><B>Number of links in the file = " +
(work.size()+notwork.size())+"<br>");
writer.println("Worked = " + work.size()+"<br>");
writer.println("Failed = " + notwork.size()+"<br><br>");
writer.println("Label = "+ label.size()+"<br>");
writer.println("exists = "+ a+"<br>");
writer.println("doesn't exist = "+ b+"<br><br>");
writer.println("Image = "+ imageLink.size()+"<br>");
writer.println("exists = "+ c+"<br>");
writer.println("doesn't exist = "+ d+"<br><br>");
writer.println("Form action = "+FormLinks.size()+"<br>");
writer.println("exists = "+ e+"<br>");
writer.println("doesn't exist = "+ f+"<br></B></h3>");
writer.println("</BODY></html>");
writer.close();
}
catch(Exception e){
System.out.println(e.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -