📄 parser.java
字号:
attrValue = buffer.toString(); break; // read unquoted attribute. case NUMTOKEN : value = next; optional(WS); // Check maybe the opening quote is missing. next = getTokenAhead(); if (bQUOTING.get(next.kind)) { hTag = next; error("The value without opening quote is closed with '" + next.getImage() + "'" ); } attrValue = value.getImage(); break; default : break attributeReading; } attributes.addAttribute(name.getImage(), attrValue); optional(WS); } else // The '=' is missing: attribute without value. { noValueAttribute(element, name.getImage()); } } } /** * Return string, corresponding the given named entity. * The name is passed with the preceeding &, but without * the ending semicolon. */ protected String resolveNamedEntity(final String a_tag) { // Discard & if (!a_tag.startsWith("&")) throw new AssertionError("Named entity " + a_tag + " must start witn '&'." ); String tag = a_tag.substring(1); try { Entity entity = dtd.getEntity(tag); if (entity != null) return entity.getString(); entity = dtd.getEntity(tag.toLowerCase()); if (entity != null) { error("The name of this entity should be in lowercase", a_tag); return entity.getString(); } } catch (IndexOutOfBoundsException ibx) { /* The error will be reported. */ } error("Unknown named entity", a_tag); return a_tag; } /** * Return char, corresponding the given numeric entity. * The name is passed with the preceeding &#, but without * the ending semicolon. */ protected char resolveNumericEntity(final String a_tag) { // Discard &# if (!a_tag.startsWith("&#")) throw new AssertionError("Numeric entity " + a_tag + " must start witn '&#'." ); String tag = a_tag.substring(2); try { // Determine the encoding type: char cx = tag.charAt(0); if (cx == 'x' || cx == 'X') // Hexadecimal &#Xnnn; return (char) Integer.parseInt(tag.substring(1), 16); return (char) Integer.parseInt(tag); } /* The error will be reported. */ catch (NumberFormatException nex) { } catch (IndexOutOfBoundsException ix) { } error("Invalid numeric entity", a_tag); return '?'; } /** * Reset all fields into the intial default state, preparing the * parset for parsing the next document. */ protected void restart() { documentTags.clear(); titleHandled = false; titleOpen = false; buffer.setLength(0); title.setLength(0); validator.restart(); } /** * The method is called when the HTML opening tag ((like <table>) * is found or if the parser concludes that the one should be present * in the current position. The method is called immediately before * calling the handleStartTag. * @param The tag */ protected void startTag(TagElement tag) throws ChangedCharSetException { } /** * Handle a complete element, when the tag content is already present in the * buffer and both starting and heading tags behind. This is called * in the case when the tag text must not be parsed for the nested * elements (elements STYLE and SCRIPT). */ private void _handleCompleteElement(TagElement tag) { _handleStartTag(tag); // Suppress inclusion of the SCRIPT ans STYLE texts into the title. HTML.Tag h = tag.getHTMLTag(); if (h == HTML.Tag.SCRIPT || h == HTML.Tag.STYLE) { boolean tmp = titleOpen; titleOpen = false; _handleText(); titleOpen = tmp; } else _handleText(); _handleEndTag(tag); } /** * A hooks for operations, preceeding call to handleEmptyTag(). * Handle the tag with no content, like <br>. As no any * nested tags are expected, the tag validator is not involved. * @param The tag being handled. */ private void _handleEmptyTag(TagElement tag) { try { validator.validateTag(tag, attributes); handleEmptyTag(tag); } catch (ChangedCharSetException ex) { error("Changed charset exception:", ex.getMessage()); } } /** * A hooks for operations, preceeding call to handleEndTag(). * The method is called when the HTML closing tag * is found. Calls handleTitle after closing the 'title' tag. * @param The tag */ private void _handleEndTag(TagElement tag) { validator.closeTag(tag); _handleEndTag_remaining(tag); } /** * Actions that are also required if the closing action was * initiated by the tag validator. * Package-private to avoid an accessor method. */ void _handleEndTag_remaining(TagElement tag) { HTML.Tag h = tag.getHTMLTag(); handleEndTag(tag); endTag(tag.fictional()); if (h.isPreformatted()) preformatted--; if (preformatted < 0) preformatted = 0; if (h == HTML.Tag.TITLE) { titleOpen = false; titleHandled = true; char[] a = new char[ title.length() ]; title.getChars(0, a.length, a, 0); handleTitle(a); } } /** * A hooks for operations, preceeding call to handleStartTag(). * The method is called when the HTML opening tag ((like <table>) * is found. * Package-private to avoid an accessor method. * @param The tag */ void _handleStartTag(TagElement tag) { validator.openTag(tag, attributes); startingTag(tag); handleStartTag(tag); HTML.Tag h = tag.getHTMLTag(); if (h.isPreformatted()) preformatted++; if (h == HTML.Tag.TITLE) { if (titleHandled) error("Repetetive <TITLE> tag"); titleOpen = true; titleHandled = false; } } /** * Resume parsing after heavy errors in HTML tag structure. * @throws ParseException */ private void forciblyCloseTheTag() throws ParseException { int closeAt = 0; buffer.setLength(0); ahead: for (int i = 1; i < 100; i++) { t = getTokenAhead(i - 1); if (t.kind == EOF || t.kind == BEGIN) break ahead; if (t.kind == END) { /* Closing '>' found. */ closeAt = i; break ahead; } } if (closeAt > 0) { buffer.append("Ignoring '"); for (int i = 1; i <= closeAt; i++) { t = getNextToken(); append(t); } buffer.append('\''); error(buffer.toString()); } } /** * Handle comment in string buffer. You can avoid allocating a char * array each time by processing your comment directly here. */ private void handleComment() { char[] a = new char[ buffer.length() ]; buffer.getChars(0, a.length, a, 0); handleComment(a); } private TagElement makeTagElement(String name, boolean isSupposed) { Element e = (Element) dtd.elementHash.get(name.toLowerCase()); if (e == null) { error("Unknown tag <" + name + ">"); e = dtd.getElement(name); e.name = name.toUpperCase(); e.index = -1; } if (!documentTags.contains(e.name)) { markFirstTime(e); documentTags.add(e.name); } return makeTag(e, isSupposed); } /** * Read till the given token, resolving entities. Consume the given * token without adding it to buffer. * @param till The token to read till * @throws ParseException */ private void readTillTokenE(int till) throws ParseException { buffer.setLength(0); read: while (true) { t = getNextToken(); if (t.kind == Constants.ENTITY) { resolveAndAppendEntity(t); } else if (t.kind == EOF) { error("unexpected eof", t); break read; } else if (t.kind == till) break read; else if (t.kind == WS) { // Processing whitespace in accordance with CDATA rules: String s = t.getImage(); char c; for (int i = 0; i < s.length(); i++) { c = s.charAt(i); if (c == '\r') buffer.append(' '); // CR replaced by space else if (c == '\n') ; // LF ignored else if (c == '\t') buffer.append(' '); // Tab replaced by space else buffer.append(c); } } else append(t); } } /** * Resolve the entity and append it to the end of buffer. * @param entity */ private void resolveAndAppendEntity(Token entity) { switch (entity.category) { case ENTITY_NAMED : buffer.append(resolveNamedEntity(entity.getImage())); break; case ENTITY_NUMERIC : buffer.append(resolveNumericEntity(entity.getImage())); break; default : throw new AssertionError("Invalid entity category " + entity.category ); } } /** * Handle the remaining of HTML tags. This is a common end for * TAG, SCRIPT and STYLE. * @param closing True for closing tags ( </TAG> ). * @param name Name of element * @param start Token where element has started * @throws ParseException */ private void restOfTag(boolean closing, Token name, Token start) throws ParseException { boolean end = false; Token next; optional(WS); readAttributes(name.getImage()); optional(WS); next = getTokenAhead(); if (next.kind == END) { mustBe(END); end = true; } hTag = new Token(start, next); attributes.setResolveParent(defaulter.getDefaultParameters(name.getImage())); if (!end) { // The tag body contains errors. If additionally the tag // name is not valid, this construction is treated as text. if (dtd.elementHash.get(name.getImage().toLowerCase()) == null && backupMode ) { error("Errors in tag body and unknown tag name. " + "Treating the tag as a text." ); reset(); hTag = mustBe(BEGIN); buffer.setLength(0); buffer.append(hTag.getImage()); CDATA(false); return; } else { error("Forcibly closing invalid parameter list"); forciblyCloseTheTag(); } } if (closing) { endTag(false); _handleEndTag(makeTagElement(name.getImage(), false)); } else { TagElement te = makeTagElement(name.getImage(), false); if (te.getElement().type == DTDConstants.EMPTY) _handleEmptyTag(te); else _handleStartTag(te); } } /** * This should fire additional actions in response to the * ChangedCharSetException. The current implementation * does nothing. * @param tag */ private void startingTag(TagElement tag) { try { startTag(tag); } catch (ChangedCharSetException cax) { error("Invalid change of charset"); } } private void ws_error() { error("Whitespace here is not permitted"); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -