⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 testurl.java

📁 This program is test URL exists or not ,then print out a overall result in *.html style.
💻 JAVA
字号:
/*
* This program is test URL exists or not ,then print out a overall result
* This is main class
* @author  Peng Xu  
* @version 4.0
*/
class testURL{
	public static void main(String[] args){
		// take as many as URL as a command-line argument
		for(int m=0;m<args.length;m++){
			expURL expurl = new expURL();
			//create object
			expurl.setProxy();
			// set proxy
			if(expurl.checks(args[m])){
				//check the link exist or not
				System.out.println("This URL exists !");
				//download web page to local machine
				expurl.downloadURL(args[m]);
				//process the file
				expurl.outprint("source.html",(m+1));
				//delete the file
				expurl.deleteFile("source.html");
			}
			else System.out.println("This URL doesn't exits !");
		}
	}
}
/*
* This program is method class
* @author  Peng Xu  
* @version 4.0
*/
//import package
import java.net.*;
import java.io.*;
import java.util.*;
class expURL{
/*	private int getAbsoluteLinksLength;
	private int getImageLinksLength;
	private int getFormLinksLength;
	private int n_label;*/
	private String url;	//contain the original url 
	private ArrayList<String> labels = new ArrayList<String>();
	/*
	*get absolute url from local file
	*@param local file name
	*@return a list 
	*@throws IOException
	*/
	public ArrayList<String> getAbsoluteLinks(String uriStr){
		ArrayList<String> results = new ArrayList<String>();
		// create an arraylist which store the all urls
  		try{
			Scanner infile = new Scanner(new FileReader(uriStr));
			// create a file reader
			int i,y;
			while(infile.hasNext()){
				// read file
				String s = infile.next();
				if(s.startsWith("href=\"")){
					//if string match ,then get this string
					i = s.indexOf("href=\"");
					y = s.indexOf("\"", i+6);
					if(i>=0&&y>=0){
						String z = s.substring(i+6,y);
						if(!z.startsWith("http://")
							&& !z.startsWith("mailto")
							&& !z.startsWith("ftp")
							&& !z.startsWith("telnet")
							&& !z.startsWith("news")
							&& !z.startsWith("gopher")
							&& !z.startsWith("file:")
							&& (z.indexOf("#") != -1)){
							// get label in the file
							labels.add(z);
						}
						else{
							// store the url
							results.add(z);
						}
					}
				}
				else
					if(s.startsWith("HREF=")){
					//if string match ,then get this string
						i = s.indexOf("HREF=");
						y = s.indexOf("\"", i+6);
						if(i>=0&&y>=0){
							String z = s.substring(i+6,y);
							if(!z.startsWith("http://")
								&& !z.startsWith("mailto")
								&& !z.startsWith("ftp")
								&& !z.startsWith("telnet")
								&& !z.startsWith("news")
								&& !z.startsWith("gopher")
								&& !z.startsWith("file:")
								&& (z.indexOf("#") != -1)){
								// get label in the file
								labels.add(z);
							}
							else{
								// store the url
								results.add(z);
							}
						}
					}
			}
		}catch(Exception e){
		}
		// Return all found links
		return results;
	}
	/*
	*get label links from local file
	*@param local file name
	*@return a list
	*/
	public ArrayList<String> getlabel(String uriStr){
		ArrayList<String> results = new ArrayList<String>();
		//create an arraylist which store the label links
		results = labels;
		// get label links and return all found links
		return results;
	}
	/*
	*get image links from local file
	*@param local file name
	*@return a list 
	*@throws IOException
	*/
	public ArrayList<String> getImageLinks(String uriStr){
		ArrayList<String> results = new ArrayList<String>();
		//create an arraylist which store the image links
		try{
			Scanner infile = new Scanner(new FileReader(uriStr));
			// read file
			int i,y;
			while(infile.hasNext()){
				String s = infile.next();
				// find match string
				if(s.startsWith("src=\"")){
					i = s.indexOf("src=\"");
					y = s.indexOf("\"", i+5);
					if(i>=0&&y>=0){
						//get the string
						String z = s.substring(i+5,y);
						// store string
						results.add(z);
					}
				}
				else
					// find match string
					if(s.startsWith("SRC=")){
						i = s.indexOf("SRC=");
						y = s.indexOf("\"", i+5);
						if(i>=0&&y>=0){
							//get the string
							String z = s.substring(i+5,y);
							// store string
							results.add(z);
						}
					}
			}
		}catch(Exception e){
		}
		// Return all found links
		return results;
	}
	/*
	*get links are contained in the form which from local file
	*@param local file name
	*@return a list 
	*@throws IOException
	*/
	public ArrayList<String> getFormLinks(String uriStr){
		ArrayList<String> results = new ArrayList<String>();
		//create an arraylist which store the image links
		try{
			//create a file reader
			Scanner infile = new Scanner(new FileReader(uriStr));
			int i,y,x,b;
			// read file
			while(infile.hasNext()){
				String s = infile.next();
				//find match string
				if(s.startsWith("ACTION=")){
					x = s.indexOf("ACTION=\"");
					b = s.indexOf("\"", x+8);
					// get string
					String z = s.substring(x+8,b);
					//store string
					results.add(z);
				}
				else
					if(s.startsWith("action=")){
					//find match string
						i = s.indexOf("action=\"");
						y = s.indexOf("\"", i+8);
						//get string
						String a = s.substring(i+8,y);
						//store string
						results.add(a);
					}
			}
		}catch(Exception e){
		}
		// Return all found links
		return results;
	}
	/*
	*this method is rewrite label links, relative URL ,opaque and hierarchical
	*@param Url is original url  and urls is URL list
	*@return an array which contain all links
	*@throws IOException, MalformedURLException, URISyntaxException
	*/
	public String[] Links(String Url,ArrayList<String> urls){
		//create a array to store links
		String[] linkset = new String[urls.size()];
		try{
			// store the links
			linkset = urls.toArray(linkset); //change the arraylist to array
			URI u = new URI(Url);
			// normalize the links
			u = u.normalize();
			int i = 0;
			while(i < linkset.length){
				//for some reason this program can't process this kind of url
				if(linkset[i].startsWith("about:")){
					i++;
				}
				else{
					URI v = new URI(linkset[i]);
					// normalize the links
					v = v.normalize();
					//rewrite the links
					String n = u.resolve(u.relativize(v)).toString();
					linkset[i] = n;
					i++;
				}
			}
		}
		catch(Exception e){
			e.printStackTrace();
		}
		// return all the links in the array
		return linkset;
	}
	/*
	*this method is checks the url exists or not then return a boolean value
	*@param URL which will test
	*@return true if exist, otherwise false
	*/
	public boolean checks(String Url){
		boolean fileIsThere = false;
		try{//create a url
			URL myURI = new URL(Url);
			// open connection
			URLConnection c = myURI.openConnection();
			c.connect();
			// get url content type
			String contentType = c.getContentType();
			// get url header
			String head = c.getHeaderField(0);
			if (head != null && head.indexOf("200") != -1){
				fileIsThere = true;
			}
			else
				if(head != null && head.indexOf("text") != -1){
					fileIsThere = true;
				}
				else
					if( head == null && (contentType.startsWith("text") || contentType.startsWith("image"))){
						fileIsThere = true;
					}
		}
		catch(MalformedURLException e){
			fileIsThere = false;
		} 
		catch(IOException e){
			fileIsThere = false;
		}
		catch(ClassCastException e){
			fileIsThere = false;
		}
		// return boolean value
		return fileIsThere;
	}
	/*
	*this method is download web page by given the url
	*@param url is given
	*@throws MalformedURLException,URISyntaxException,IOException
	*/
	public void downloadURL(String Url){
		try{
			url = Url;// store the url address
			URL u = new URI(url).toURL();
			// open connection
			URLConnection con = u.openConnection();
			InputStream is = con.getInputStream();
			// create reader
			Scanner infile = new Scanner(new InputStreamReader(is));
			// create writer
			PrintWriter download = new PrintWriter("source.html");
			while(infile.hasNextLine()){
				//read webpage
				String line = infile.nextLine();
				// write in file
				download.println(line);
			}
			// close file
			infile.close();
			download.close();
		}catch (MalformedURLException e){
			System.out.println(e.getMessage());
		}catch (URISyntaxException e){
			System.out.println(e.getMessage());
		}catch (IOException e){
			System.err.println(e);
		}
	}
	/*
	*this method is delete a file
	*@param the file name
	*/
	public void deleteFile(String filename){
		File file = new File(filename);
		//create a file reader
		if(!file.exists()){
			// if file doesn't exist
			System.out.println("Can't find the file : "+ filename);
		}
		else{
			//if exists ,delete it,then return true value if delete success
			boolean ok = file.delete();
			if(ok) System.out.println(filename +" was deleted !");
			else System.out.println(filename +" can't be deleted !");
		}
	}
	/*
	*this method is set proxy if needed
	*/
	public void setProxy(){
		Properties systemProperties = System.getProperties();
		systemProperties.setProperty("http.proxyHost","proxy.dcu.ie");
		systemProperties.setProperty("http.proxyPort","8080");
		systemProperties.setProperty("ftp.proxyHost","proxy.dcu.ie");
		systemProperties.setProperty("ftp.proxyPort","8080");
		systemProperties.setProperty("https.proxyHost","proxy.dcu.ie");
		systemProperties.setProperty("https.proxyPort","8080");
		System.setProperty("http.agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
	}
	/*
	*this method is out print
	*@param filename is local file name which need process
	*@param m which is the file number you print out
	*/
	public void outprint(String filename,int m){
		try{// create object expURL
			expURL expurl = new expURL();
			String[] URLs;// create a array to store links
			//process absolute URL and store in array
			ArrayList<String> AbsoluteURL = new ArrayList<String>();
			AbsoluteURL = expurl.getAbsoluteLinks(filename);
			String[] absoluteURL = new String[AbsoluteURL.size()];
			absoluteURL = expurl.Links(url,AbsoluteURL);
			//process label and store in array
			ArrayList<String> label = new ArrayList<String>();
			label = expurl.getlabel(filename);
			String[] labels = new String[label.size()];
			labels = expurl.Links(url,label);
			//process image Link and store in array
			ArrayList<String> imageLink = new ArrayList<String>();
			imageLink = expurl.getImageLinks(filename);
			String[] imageLinks = new String[imageLink.size()];
			imageLinks = expurl.Links(url,imageLink);
			//process Form Links and store in array
			ArrayList<String> FormLinks = new ArrayList<String>();
			FormLinks = expurl.getFormLinks(filename);
			String[] FormLink = new String[FormLinks.size()];
			FormLink = expurl.Links(url,FormLinks);

			//create a writer
			PrintWriter writer = new PrintWriter("output"+m+".html");
			ArrayList<String> work = new ArrayList<String>();
			ArrayList<String> notwork = new ArrayList<String>();
			int a=0;int b=0;int c=0;int d=0;int e=0;int f=0;
			writer.println("<HTML><HEAD><TITLE>OutPut</TITLE></HEAD>" +
							"<body><h1><b>The Link is <a href = \""+
							url+"\" >"+url+"</a></b></h1><br><br>");
			// if there are some links in array
			if(absoluteURL.length > 0){
				for(int i = 0;i<absoluteURL.length;i++){
					// check link exist or not then store
					if(checks(absoluteURL[i])){
						work.add(absoluteURL[i]);
						System.out.println(absoluteURL[i]+" exists !");
					}
					else{
						notwork.add(absoluteURL[i]);
						System.out.println(absoluteURL[i]+" doesn't exist !");
					}
				}
			}
			// if there are some links in array
			if(labels.length > 0){
				for(int i = 0;i < labels.length;i++){
					// check link exist or not then store
					if(checks(labels[i])){
						work.add(labels[i]);
						System.out.println(labels[i]+" exists !");
						a++;
					}
					else{
						notwork.add(labels[i]);
						System.out.println(labels[i]+" doesn't exist !");
						b++;
					}
				}
			}
			// if there are some links in array
			if(imageLinks.length > 0){
				for(int i = 0;i<imageLinks.length;i++){
					// check link exist or not then store
					if(checks(imageLinks[i])){
						work.add(imageLinks[i]);
						System.out.println(imageLinks[i]+" exists !");
						c++;
					}
					else{
						notwork.add(imageLinks[i]);
						System.out.println(imageLinks[i]+" doesn't exist !");
						d++;
					}
				}
			}
			// if there are some links in array
			if(FormLink.length> 0){
				for(int i = 0;i<FormLink.length;i++){
					// check link exist or not then store
					if(checks(FormLink[i])){
						work.add(FormLink[i]);
						System.out.println(FormLink[i]+" exists !");
						e++;
					}
					else{
						notwork.add(FormLink[i]);
						System.out.println(FormLink[i]+" doesn't exist !");
						f++;
					}
				}
			}
			// change arraylist to array
			URLs = new String[notwork.size()];
			URLs = (String[])notwork.toArray(new String[ notwork.size()]);
			writer.println("<br><br><br><h2><B>Doesn't work well Links<br></B></h2><br><br>");
			//print out the elements
			for(int i=0;i<notwork.size();i++){
				writer.println("<a href = \""+URLs[i]+"\" >"+URLs[i]+"</a><br>"); 
			}
			// change arraylist to array
			URLs = new String[work.size()];
			URLs = (String[])work.toArray(new String[work.size()]);
			writer.println("<h2><B>Works well Links<br></B></h2><br><br>"); 
			//print out the elements
			for(int i=0;i<work.size();i++){
				writer.println("<a href = \""+URLs[i]+"\" >"+URLs[i]+"</a><br>");
			}
			// print out other attribute
			writer.println("<h3><B>Number of links in the file = " + 
							(work.size()+notwork.size())+"<br>");
			writer.println("Worked = " + work.size()+"<br>");
			writer.println("Failed = " + notwork.size()+"<br><br>");
			writer.println("Label = "+ label.size()+"<br>");
			writer.println("exists = "+ a+"<br>");
			writer.println("doesn't exist = "+ b+"<br><br>");
			writer.println("Image = "+ imageLink.size()+"<br>");
			writer.println("exists = "+ c+"<br>");
			writer.println("doesn't exist = "+ d+"<br><br>");
			writer.println("Form action = "+FormLinks.size()+"<br>");
			writer.println("exists = "+ e+"<br>");
			writer.println("doesn't exist = "+ f+"<br></B></h3>");
			writer.println("</BODY></html>");
			writer.close();
		}
		catch(Exception e){
			System.out.println(e.getMessage());
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -