⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xpathcounter.java

📁 Generate all XPaths for a group of XML Files.
💻 JAVA
字号:
/**
 * collects all XPaths, encodes all xml files
 * @author Michal Karolczak
 */
import java.io.*;
import java.util.*;
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;

public class XPathCounter
{ 
   public static void main(String argv[]) throws IOException
   {   	
	   System.out.println("App started");
	   /**
	    * start time
	    */
	   long start = System.currentTimeMillis();
	   /**
	    * output file for all XPaths
	    */
	   File outp= new File("C:\\out.txt");
	   /**
	    * output stream for outp file
	    */
	   FileOutputStream fos = new FileOutputStream(outp,true);
	   /**
	    * initial capacity of Hashtable (the one with all XPaths)
	    */
	   int initialCapacity = 200000;  //133324 jest w sumie
	   /**
	    * global number of XPaths 
	    */
	   int xPathNr = 0;
	   System.out.println("Gathering information about files...");
	   /**
	    * GetFilesList object to create a list of all files in directory
	    */
	   GetFilesList filesList = new GetFilesList("C:\\smalldoc","xml");//xmlDirPath,documents,smalldoc);
	   /**
	    * BitEncodedFilesList object to create a list of all bit encoded files (file name + bitset)
	    */
	   BitEncodedFilesList bitEncodedFilesList = new BitEncodedFilesList();
	   /**
	    * Writer for writing xpaths to file
	    */
	   Writer output = null;
	   /**
	    * buffered writer for outp (writing all xpaths to file)
	    */
	   output = new BufferedWriter( new FileWriter(outp) );
	   /**
	    * Hashtable for all XPaths
	    * changed to LinkedHashMap for correct order
	    */
	   LinkedHashMap<Integer,XPathInfo> allXPathsTable = new LinkedHashMap<Integer, XPathInfo>(initialCapacity);
	   int i=0;
	   /**
	    * iterate through all files in directory
	    */
	   System.out.println("Encoding...");
	   for (i=0;i<filesList.listOfFiles.size()-1;i++)
	   {
		   /**
		    * XPathCounterSaxHandler object to count all xpaths in current document
		    */
		   XPathCounterSaxHandler handler = new XPathCounterSaxHandler();
		   /**
		    * new instance of SAXParserFactory
		    */
		   SAXParserFactory factory = SAXParserFactory.newInstance();
		   /**
		    * BitEncodedFileInfo object to gather all information about encoded file (filename, bitset)
		    */
		   BitEncodedFileInfo bitEncodedFileInfo = new BitEncodedFileInfo(filesList.listOfFiles.get(i).toString());//75047 plikow jest
		   try
		   {   
			   SAXParser saxParser = factory.newSAXParser();
			   /**
			    * parsing the content of the file with XPathCounterSaxHandler handler
			    */
			   saxParser.parse(new File(filesList.listOfFiles.get(i)), handler);
			   /**
			    * gather the information from handler to HashMap
			    */
			   HashMap<String, Integer> xPathCounts =  handler.getXPathCounts();
			   //System.out.print(xPathNr+"\n");
			   if (xPathCounts.isEmpty())
			   {   
				   System.out.println(filesList.listOfFiles.get(i).toString() + " contains no XPaths or it's not well formed.");
			   }
			   else
			   {	
				   /**
				    * iterate through HashMap (now holds all xpaths from the current document)
				    */
				   Iterator iterator = (new TreeSet<String>(xPathCounts.keySet())).iterator();
				   while (iterator.hasNext())
				   {   
					   String xPath = (String) iterator.next();
					   String xPathWithCount = xPath + ": " + xPathCounts.get(xPath);
					   int newKey = xPathWithCount.hashCode();
					   //System.out.println(xPathWithCount + " -> " + newKey);
					   /**
					    * if the key is new
					    */
					   if (!allXPathsTable.containsKey((int)newKey))
					   {   	
						   /**
						    * XPathInfo object to hold xpath and its global number
						    */
						   XPathInfo xPathInfo = new XPathInfo(xPathWithCount,xPathNr);
						   /**
						    * puts new XPathInfo object into Hashtable
						    */
						   allXPathsTable.put(newKey, xPathInfo);
						   /**
						    * sets the bit (index is global XPath number) for current file
						    */
						   bitEncodedFileInfo.SetBit(xPathNr);
						   xPathNr++;
					   }
					   else
					   {   
						   bitEncodedFileInfo.SetBit(allXPathsTable.get(newKey).xPathNr);
						   //System.out.println("XPath already in table | Actual xPath: " + allXPathsTable.get(newKey).toString() + " New xPath: " + xPathWithCount);
					   }
				   }//end - while xpath for 1 file iterator
				   //System.out.println(filesList.listOfFiles.get(i).toString());
			   }//end - else
		   }//end - try
	      catch (Throwable e)
	      {   
	    	  System.out.println(filesList.listOfFiles.get(i).toString() + " contains no XPaths or is not well formed");
	    	  e.printStackTrace();
	      }
	      /**
	       * adds the new encoded information to file list
	       */
	      bitEncodedFilesList.AddNewFileInfo(bitEncodedFileInfo);
      }//for - every file
	  System.out.println("Encoding finished!");
      System.out.println("Nr of files: " + i);
      System.out.println("Nr of XPaths: " + xPathNr);
      // Time measurement
      long time = System.currentTimeMillis() - start;
      double timeInMin = time*0.0000166;
      System.out.println("Time [ms]: " + time);
      System.out.println("Time [min]: " + (int)timeInMin); 

      //iterate through a collection of all xpaths
      Set<Integer> set = allXPathsTable.keySet(); 
      Iterator<Integer> itr = set.iterator();
      while (itr.hasNext())
      {   
    	  int elemNr = itr.next();
    	  output.append(allXPathsTable.get(elemNr).xPath+" "+allXPathsTable.get(elemNr).xPathNr+"\n");
      }
      if (output != null) output.close();
      //Saves encoded files list to file
      bitEncodedFilesList.SaveToFile();
      start = System.currentTimeMillis();
      System.out.println("Creating distance matrix...");
      bitEncodedFilesList.CreateDistanceMatrix();
      System.out.println("Distance matrix created!");
      time = System.currentTimeMillis() - start;
      timeInMin = time*0.0000166;
      System.out.println("Time [ms]: " + time);
      System.out.println("Time [min]: " + (int)timeInMin); 
   }   //   main.
};   //   XPathCounter


class XPathCounterSaxHandler extends DefaultHandler
{  
	int deep=0;
	
	//   All XPaths found in the XML document and their counts.
	private LinkedHashMap<String, Integer> xPathCounts  = new LinkedHashMap<String, Integer>();
	
	//   The stack of XPaths to the current element.
	private Stack<String> xPaths = new Stack<String> ();
	
	//   Default constructor
	public XPathCounterSaxHandler() 
	{   
	}

	//   Return the element events.
	public LinkedHashMap<String, Integer> getXPathCounts() 
	{
		return this.xPathCounts;
	}   //   getXPathCounts

	//   Handler methods:
	//   Save event type (START:) and element name.
	public void startElement(String namespaceURI,String lName,String qName,Attributes attrs) throws SAXException
	{   
		//   Get element names.
		String eName = lName;  
		if ("".equals(eName)) eName = qName;
		
		//   Get parent's XPath
		String parentXPath = xPaths.empty() ? "" : xPaths.peek();
		
		//   current element's XPath
		String currentPath = parentXPath + "/" + eName;
		xPaths.push(currentPath);

		deep++;
		//System.out.println(deep + ":" + currentPath);
		
		//   Insert current element and its attributes to the xPathCounts.
		if (xPathCounts.containsKey(currentPath+":"+Integer.toString(deep)))
		{   
			xPathCounts.put(currentPath+":"+Integer.toString(deep), new Integer(xPathCounts.get(currentPath+":"+Integer.toString(deep)).intValue()+1));
		}
		else
		{   
			xPathCounts.put(currentPath+":"+Integer.toString(deep), new Integer(1));
		}
		//   Insert attributes
		if (attrs != null) 
		{
			for (int i = 0; i < attrs.getLength(); i++) 
			{
				String aName = attrs.getLocalName(i);
				if ("".equals(aName)) aName = attrs.getQName(i);
				String attPath = currentPath + "/@" + aName +":"+Integer.toString((deep+1));
				if (xPathCounts.containsKey(attPath))
				{   
					xPathCounts.put(attPath, new Integer(xPathCounts.get(attPath).intValue()));
				}
				else
				{   
					xPathCounts.put(attPath, new Integer(1));
				}
			}
		}
	}   //   end - startElement

	//   Save event type (START:) and element name.
	public void endElement(String namespaceURI, String lName,String qName) throws SAXException
	{   
		//   pop to remove the current element.
		String currentPath = xPaths.pop();
		deep--;
	}   // end -  endElement
};   //   XPathCounterSaxHandler

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -