📄 parser.java
字号:
package sim;import java.net.*;import java.io.*;import java.util.*;/** * parser: this class should read a URL and * parse the content for the waplet. * * waplet classes are not currently in a package, because * of strange browser/jar behavior with packages. * * This class has been added to the sim package for use * with the simulator class. * * For license information please see Simulator.java. * * 02/29/00 (that's right): fixing the parser a little bit; * it's still a nickel and dime parser, but I think I can * make it a little smoother without bloating it. the problem * is that newlines inside tags cause problems, and cocoon is * a little unpredictable with newlines. * * 03/02/00: not using cocoon at the moment, and now there's * a spacing problem: this class seems to be passing along spaces * where it shouldn't. -- FIXED */public class Parser{ /** * this is a static set of escape strings that need to be * rendered for WML display. */ static final String ESCAPES[] = { "'", "'", "<", "<", ">", ">", "&", "&", " ", " ", """, "\"" }; /** * this hash is used as a lookup table for escape-sequence * substitution. using two arrays would probably be more efficient. */ static Hashtable escapeCharacters; /** default constructor, sets the escape string lookup table. */ public Parser(){ escapeCharacters = new Hashtable(); int len = ESCAPES.length; for(int i = 0; i< len; i += 2){ escapeCharacters.put(ESCAPES[i], ESCAPES[i+1]); } } /** * read a URL, parse it into the vector. this is a rather * dumb way of doing it, but for the time being... * if it's going to throw a murle, it's going to do it here. * * 03/03/00: there have been a lot of problems with this method. * it can't use URLConnections properly, because Netscape doesn't * support any of the useful properties in that class. It can't * use sockets, because they'll fail when it's going through a * proxy. the temporary solution is to add a parameter to the * request, and have the servlets check that parameter as well as the * headers. * * note also that java 1.1 doesn't have built in support for * SSL, so hitting SSL URLs won't work without 1.2. * * the method takes an additional hash parameter, cookiejar, that * is used for managing cookies. by default all cookies are always * exchanged to everyone. this should probably be changed. * */ public Vector readURL(String url, Hashtable cookiejar) throws MalformedURLException, IOException{ URL page = null; URLConnection uconn = null; BufferedReader br = null; StringBuffer sb = new StringBuffer(); String line = null; int idx = 0; String anchor = ""; // debug // System.out.println( "url: " + url); // add a parm to the resource-- this is used because // netscape's broken implementation of url connection. // this won't be necessary if you're not keying off // user-agent (or if you're not using netscape). // things to note: when adding a random parameter, start // with a ?, and & after parms, but in either case // before an #. if( ( idx = url.indexOf( '#')) > -1){ anchor = url.substring( idx); url = url.substring( 0, idx - 1); } if( -1 == url.indexOf( "?")) url = url + "?User-agent=pagea-WAPlet" + anchor; else url = url + "&User-agent=pagea-WAPlet" + anchor; // debug // System.out.println("loading content: " + url); page = new URL(url); uconn = page.openConnection(); //String resource = page.getHost(); //String file = page.getFile(); // debug //System.out.println( "resource: " + resource); //System.out.println( "file: " + file); uconn.setRequestProperty( "User-agent", "pagea-WAPlet"); br = new BufferedReader( new InputStreamReader( uconn.getInputStream())); while( null != (line = br.readLine()) && !line.equals( "0")){ line = line.trim(); // append a space to the end of the line, in // place of the newline character. this should // help solve the waplet display error. sb.append( line.trim() + " "); } br.close(); br = null; return readString( sb.toString()); } /** * read (parse) one line string. */ public void parseString(String s, Vector v){ String tmp = null; if(s.length() > 0){ // put the line in, but chop it first. for(Enumeration e = (cutLine(s)).elements(); e.hasMoreElements(); ){ v.addElement((tmp = (String)(e.nextElement()))); } } } /** * read a multilined string, basically a substitute for * readURL(). */ public Vector readString(String s){ Vector v = new Vector(); parseString( s, v); return v; } /** * cut up a line. all tags are on their own line; * non-tags are on their own line too. this is just a * convience for the parser. */ public Vector cutLine(String s){ StringBuffer sb = new StringBuffer(""); String tmp = null; Vector vec = new Vector(); int index = 0; int len = s.length(); char c = ' '; // the loop. if it's a <, it goes on the // next line; if it's a > it's the _last_ // char on this line. while( index < len){ c = s.charAt(index++); if( c == '<'){ if((tmp = (sb.toString().trim())).length() > 0) vec.addElement(tmp); sb = new StringBuffer(); sb.append(c); } else if( c == '>'){ sb.append(c); if((tmp = (sb.toString().trim())).length() > 0) vec.addElement(tmp); sb = new StringBuffer(); } else sb.append(c); } tmp = sb.toString().trim(); if(tmp.length() > 0) vec.addElement(tmp); return vec; } /** this method returns a tag, or null if it's not a tag. */ public Tag makeTag(Object o){ Tag t = new Tag((String)o); return (t.getState() == Tag.UNDEFINED) ? null : t; } /** * this method builds the document tree. it'll throw an * exception on a document error, which is handy for * validating page code. it should probably display a * specific error screen, but for now it just dumps to * std out. * * store the tree: put it all in a vector. don't worry * about level indices, because it'll fail here if it's bad. * start text with a ' ('). * * catch exceptions, for a better stack trace, but then * throw them back. */ public Vector tree( Vector v) throws Exception{ // maintain a hash, keyed by 'level', with values // of the tag names. fail on a bad match. Hashtable tree = new Hashtable(); Vector document = new Vector(); // temporary tag, string ref, object holder Tag tag = null; String tmp = null; Object obj = null; // current level int currentLevel = -1; for(Enumeration e = v.elements(); e.hasMoreElements(); ){ tmp = ( null == ( obj = e.nextElement())) ? "" : (String)obj; //if( null == obj) System.out.println("null element!"); //System.out.println("el: " + tmp); if(null != (tag = makeTag(tmp))){ if(tag.getState() == tag.TAG_OPEN){ currentLevel++; tree.put(new Integer(currentLevel), tag); document.addElement(tag); } else if(tag.getState() == tag.TAG_CLOSE){ if( null == tree.get( new Integer( currentLevel))){ String xmessage = " attempted close of </" + tag.getName() + "> at root level"; throw new Exception( xmessage); } else if(tag.getName().equals(((Tag)(tree.get( new Integer(currentLevel)))).getName())){ currentLevel--; document.addElement(tag); } else{ String xmessage = " attempted close of </" + tag.getName() + "> in body of <" + ((Tag)(tree.get( new Integer(currentLevel)))).getName() + ">"; throw new Exception( xmessage); } } else if(tag.getState() == tag.TAG_SELFCONTAINED){ document.addElement(tag); } } // it's text... else document.addElement(new String("'" + cleanString(tmp))); } return document; } /** * this method handles escaped chars in the string, and * also the double $$ substitutions. the renderer just takes * plain text. */ public String cleanString(String s){ // first, translate $$ to $... int idx = 0; while((idx = s.indexOf( "$$")) > -1){ s = s.substring(0, idx) + s.substring(idx+1); } StringBuffer sb = new StringBuffer(""); StringBuffer character = new StringBuffer(""); Object obj = null; int len = s.length(); idx = 0; char c = ' '; boolean esc = false; // loop keys on & and ; to parse escaped characters. // it then does a lookup to swap the strings. while(idx < len){ c = s.charAt(idx); if( esc){ character.append(c); if( c == ';'){ obj = escapeCharacters.get( character.toString()); if( null != obj) sb.append((String)obj); esc = false; } } else if( c == '&'){ character = new StringBuffer(); character.append(c); esc = true; } else{ sb.append(c); } idx++; } return sb.toString(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -