⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parser.java

📁 Mobile 应用程序使用 Java Micro Edition (Java ME) 平台
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
		// true.		TagElement t = makeTag(elem);		handleText(t);		attributes.addAttribute(HTML.Attribute.ENDTAG, "true");		handleEmptyTag(makeTag(elem));		unknown = false;		return;	    }	    // find the corresponding start tag	    // A commonly occuring error appears to be the insertion	    // of extra end tags in a table.  The intent here is ignore	    // such extra end tags.	    //	    if (!strict) {		String stackElem = stack.elem.getName();		if (stackElem.equals("table")) {		    // If it isnt a valid end tag ignore it and return		    //		    if (!elem.getName().equals(stackElem)) {			error("tag.ignore", elem.getName());			return;		    }		}		if (stackElem.equals("tr") ||		    stackElem.equals("td")) {		    if ((!elem.getName().equals("table")) &&			(!elem.getName().equals(stackElem))) {			error("tag.ignore", elem.getName());			return;		    }		}	    }	    TagStack sp = stack;	    while ((sp != null) && (elem != sp.elem)) {		sp = sp.next;	    }	    if (sp == null) {		error("unmatched.endtag", elem.getName());		return;	    } 	    // People put font ending tags in the darndest places. 	    // Don't close other contexts based on them being between 	    // a font tag and the corresponding end tag.  Instead, 	    // ignore the end tag like it doesn't exist and allow the end	    // of the document to close us out.	    String elemName = elem.getName(); 	    if (stack != sp &&		(elemName.equals("font") ||		 elemName.equals("center"))) {		// Since closing out a center tag can have real wierd		// effects on the formatting,  make sure that tags		// for which omitting an end tag is legimitate		// get closed out.		//		if (elemName.equals("center")) {		    while(stack.elem.omitEnd() && stack != sp) {			endTag(true);		    }		    if (stack.elem == elem) {			endTag(false);		    }		}		return;	    }	    // People do the same thing with center tags.  In this	    // case we would like to close off the center tag but	    // not necessarily all enclosing tags.	    // end tags	    while (stack != sp) {		endTag(true);	    }	    endTag(false);	    return;	  case -1:	    error("eof");	    return;	}	// start tag [14] 314:1	if (!parseIdentifier(true)) {	    elem = recent;	    if ((ch != '>') || (elem == null)) {		error("expected.tagname");		return;	    }	} else {	    String elemStr = getString(0);	    if (elemStr.equals("image")) {		elemStr = new String("img");	    }	    /* determine if this element is part of the dtd. */	    if (!dtd.elementExists(elemStr)) {		//		parseInvalidTag();		error("tag.unrecognized ", elemStr);		elem = dtd.getElement("unknown");		elem.name = elemStr;		unknown = true;	    } else {		elem = dtd.getElement(elemStr);	    }	}	// Parse attributes	parseAttributeSpecificationList(elem);	switch (ch) {	  case '/':	    net = true;	  case '>':	    ch = readCh();            if (ch == '>' && net) {                ch = readCh();            }	  case '<':	    break;	  default:	    error("expected", "'>'");	    break;	}	if (!strict) {	  if (elem.getName().equals("script")) {	    error("javascript.unsupported");	  }	}	// ignore RE after start tag	//	if (!elem.isEmpty())  {	    if (ch == '\n') {		ln++;		lfCount++;		ch = readCh();	    } else if (ch == '\r') {		ln++;		if ((ch = readCh()) == '\n') {		    ch = readCh();		    crlfCount++;		}		else {		    crCount++;		}	    }	}	// ensure a legal context for the tag	TagElement tag = makeTag(elem, false);	/** In dealing with forms, we have decided to treat	    them as legal in any context.  Also, even though	    they do have a start and an end tag, we will	    not put this tag on the stack.  This is to deal	    several pages in the web oasis that choose to	    start and end forms in any possible location. **/        /*	if (!strict && elem.getName().equals("form")) {	    if (lastFormSent == null) {		lastFormSent = tag;	    } else {		handleEndTag(lastFormSent);		lastFormSent = tag;	    }	} else {        */	    // Smlly, if a tag is unknown, we will apply	    // no legalTagContext logic to it.	    //	    if (!unknown) {		legalTagContext(tag);		// If skip tag is true,  this implies that		// the tag was illegal and that the error		// recovery strategy adopted is to ignore		// the tag.		if (!strict && skipTag) {		    skipTag = false;		    return;		}	    }            /*	}            */	startTag(tag);	if (!elem.isEmpty()) {	    switch (elem.getType()) {	      case CDATA:		parseLiteral(false);		break;	      case RCDATA:		parseLiteral(true);		break;	      default:		if (stack != null) {		    stack.net = net;		}		break;	    }	}    }    private static final String START_COMMENT = "<!--";    private static final String END_COMMENT = "-->";    private static final char[] SCRIPT_END_TAG = "</script>".toCharArray();    private static final char[] SCRIPT_END_TAG_UPPER_CASE =                                         "</SCRIPT>".toCharArray();        void parseScript() throws IOException {        char[] charsToAdd = new char[SCRIPT_END_TAG.length];                /* Here, ch should be the first character after <script> */        while (true) {            int i = 0;            while (i < SCRIPT_END_TAG.length                        && (SCRIPT_END_TAG[i] == ch                            || SCRIPT_END_TAG_UPPER_CASE[i] == ch)) {                charsToAdd[i] = (char) ch;                ch = readCh();                i++;            }            if (i == SCRIPT_END_TAG.length) {                                /*  '</script>' tag detected */                /* Here, ch == '>' */                ch = readCh();                /* Here, ch == the first character after </script> */                return;            } else {                                /* To account for extra read()'s that happened */                for (int j = 0; j < i; j++) {                    addString(charsToAdd[j]);                }                                switch (ch) {                case -1:                    error("eof.script");                    return;                case '\n':                    ln++;                    ch = readCh();                    lfCount++;                    addString('\n');                    break;                case '\r':                    ln++;                    if ((ch = readCh()) == '\n') {                        ch = readCh();                        crlfCount++;                    } else {                        crCount++;                    }                    addString('\n');                    break;                default:                    addString(ch);                    ch = readCh();                    break;                } // switch            }        } // while    }        /**     * Parse Content. [24] 320:1     */    void parseContent() throws IOException {	Thread curThread = Thread.currentThread();	for (;;) {	    if (curThread.isInterrupted()) {                curThread.interrupt(); // resignal the interrupt                break;            }            int c = ch;	    currentBlockStartPos = currentPosition;                        if (recent == dtd.script) { // means: if after starting <script> tag                /* Here, ch has to be the first character after <script> */                parseScript();                last = makeTag(dtd.getElement("comment"), true);                                /* Remove leading and trailing HTML comment declarations */                String str = new String(getChars(0)).trim();                int minLength = START_COMMENT.length() + END_COMMENT.length();                if (str.startsWith(START_COMMENT) && str.endsWith(END_COMMENT)                        && str.length() >= (minLength)) {                    str = str.substring(START_COMMENT.length(),                                       str.length() - END_COMMENT.length());                }                                /* Handle resulting chars as comment */                handleComment(str.toCharArray());                endTag(false);                lastBlockStartPos = currentPosition;            } else {                switch (c) {                  case '<':                    parseTag();                    lastBlockStartPos = currentPosition;                    continue;                  case '/':                    ch = readCh();                    if ((stack != null) && stack.net) {                        // null end tag.                        endTag(false);                        continue;                    }                    break;                  case -1:                    return;                  case '&':                    if (textpos == 0) {                        if (!legalElementContext(dtd.pcdata)) {                            error("unexpected.pcdata");                        }                        if (last.breaksFlow()) {                            space = false;                        }                    }                    char data[] = parseEntityReference();                    if (textpos + data.length + 1 > text.length) {                        char newtext[] = new char[Math.max(textpos + data.length + 128, text.length * 2)];                        System.arraycopy(text, 0, newtext, 0, text.length);                        text = newtext;                    }                    if (space) {                        space = false;                        text[textpos++] = ' ';                    }                    System.arraycopy(data, 0, text, textpos, data.length);                    textpos += data.length;                    ignoreSpace = false;                    continue;                  case '\n':                    ln++;                    lfCount++;                    ch = readCh();                    if ((stack != null) && stack.pre) {                        break;                    }                    if (textpos == 0) {                        lastBlockStartPos = currentPosition;                    }                    if (!ignoreSpace) {                        space = true;                    }                    continue;                  case '\r':                    ln++;                    c = '\n';                    if ((ch = readCh()) == '\n') {                        ch = readCh();                        crlfCount++;                    }                    else {                        crCount++;                    }                    if ((stack != null) && stack.pre) {                        break;                    }                    if (textpos == 0) {                        lastBlockStartPos = currentPosition;                    }                    if (!ignoreSpace) {                        space = true;                    }                    continue;                  case '\t':                  case ' ':                    ch = readCh();                    if ((stack != null) && stack.pre) {                        break;                    }                    if (textpos == 0) {                        lastBlockStartPos = currentPosition;                    }                    if (!ignoreSpace) {                        space = true;                    }                    continue;                  default:                    if (textpos == 0) {                        if (!legalElementContext(dtd.pcdata)) {                            error("unexpected.pcdata");                        }                        if (last.breaksFlow()) {                            space = false;                        }                    }                    ch = readCh();                    break;                }            }                 	    // enlarge buffer if needed	    if (textpos + 2 > text.length) {		char newtext[] = new char[text.length + 128];		System.arraycopy(text, 0, newtext, 0, text.length);		text = newtext;	    }	    // output pending space	    if (space) {		if (textpos == 0) {		    lastBlockStartPos--;		}		text[textpos++] = ' ';		space = false;	    }	    text[textpos++] = (char)c;            ignoreSpace = false;	}    }    /**     * Returns the end of line string. This will return the end of line     * string that has been encountered the most, one of \r, \n or \r\n.     */    String getEndOfLineString() {	if (crlfCount >= crCount) {	    if (lfCount >= crlfCount) {		return "\n";	    }	    else {		return "\r\n";	    }	}	else {	    if (crCount > lfCount) {		return "\r";	    }	    else {		return "\n";	    }	}    }    /**     * Parse an HTML stream, given a DTD.     */    public synchronized void parse(Reader in) throws IOException {	this.in = in;	this.ln = 1;	seenHtml = false;	seenHead = false;	seenBody = false;	crCount = lfCount = crlfCount = 0;	try {            ch = readCh();            text = new char[1024];            str = new char[128];                        parseContent();            // NOTE: interruption may have occurred.  Control flows out            // of here normally.            while (stack != null) {                endTag(true);            }            in.close();	} catch (IOException e) {	    errorContext();	    error("ioexception");	    throw e;	} catch (Exception e) {	    errorContext();	    error("exception", e.getClass().getName(), e.getMessage());	    e.printStackTrace();	} catch (ThreadDeath e) {	    errorContext();	    error("terminated");	    e.printStackTrace();	    throw e;	} finally {	    for (; stack != null ; stack = stack.next) {		handleEndTag(stack.tag);	    }	    text = null;	    str = null;	}    }    /*     * Input cache.  This is much faster than calling down to a synchronized     * method of BufferedReader for each byte.  Measurements done 5/30/97     * show that there's no point in having a bigger buffer:  Increasing     * the buffer to 8192 had no measurable impact for a program discarding     * one character at a time (reading from an http URL to a local machine).     * NOTE: If the current encoding is bogus, and we read too much     * (past the content-type) we may suffer a MalformedInputException. For     * this reason the initial size is 1 and when the body is encountered the     * size is adjusted to 256.     */    private char buf[] = new char[1];    private int pos;    private int len;    /*	tracks position relative to the beginning of the	document.    */    private int currentPosition;    private final int readCh() throws IOException {	if (pos >= len) {	    // This loop allows us to ignore interrupts if the flag	    // says so	    for (;;) {		try {		    len = in.read(buf);		    break;		} catch (InterruptedIOException ex) {		    throw ex;		}	    }	    if (len <= 0) {		return -1;	// eof	    }	    pos = 0;	}	++currentPosition;	return buf[pos++];    }    protected int getCurrentPos() {	return currentPosition;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -