📄 parser.java
字号:
textpos += data.length; continue; case '\n': ln++; ch = readCh(); lfCount++; break; case '\r': ln++; if ((ch = readCh()) == '\n') { ch = readCh(); crlfCount++; } else { crCount++; } c = '\n'; break; default: ch = readCh(); break; } // output character if (textpos == text.length) { char newtext[] = new char[text.length + 128]; System.arraycopy(text, 0, newtext, 0, text.length); text = newtext; } text[textpos++] = (char)c; } } /** * Parse attribute value. [33] 331:1 */ String parseAttributeValue(boolean lower) throws IOException { int delim = -1; // Check for a delimiter switch(ch) { case '\'': case '"': delim = ch; ch = readCh(); break; } // Parse the rest of the value while (true) { int c = ch; switch (c) { case '\n': ln++; ch = readCh(); lfCount++; if (delim < 0) { return getString(0); } break; case '\r': ln++; if ((ch = readCh()) == '\n') { ch = readCh(); crlfCount++; } else { crCount++; } if (delim < 0) { return getString(0); } break; case '\t': if (delim < 0) c = ' '; case ' ': ch = readCh(); if (delim < 0) { return getString(0); } break; case '>': case '<': if (delim < 0) { return getString(0); } ch = readCh(); break; case '\'': case '"': ch = readCh(); if (c == delim) { return getString(0); } else if (delim == -1) { error("attvalerr"); if (strict || ch == ' ') { return getString(0); } else { continue; } } break; case '=': if (delim < 0) { /* In SGML a construct like <img src=/cgi-bin/foo?x=1> is considered invalid since an = sign can only be contained in an attributes value if the string is quoted. */ error("attvalerr"); /* If strict is true then we return with the string we have thus far. Otherwise we accept the = sign as part of the attribute's value and process the rest of the img tag. */ if (strict) { return getString(0); } } ch = readCh(); break; case '&': if (strict && delim < 0) { ch = readCh(); break; } char data[] = parseEntityReference(); for (int i = 0 ; i < data.length ; i++) { c = data[i]; addString((lower && (c >= 'A') && (c <= 'Z')) ? 'a' + c - 'A' : c); } continue; case -1: return getString(0); default: if (lower && (c >= 'A') && (c <= 'Z')) { c = 'a' + c - 'A'; } ch = readCh(); break; } addString(c); } } /** * Parse attribute specification List. [31] 327:17 */ void parseAttributeSpecificationList(Element elem) throws IOException { while (true) { skipSpace(); switch (ch) { case '/': case '>': case '<': case -1: return; case '-': if ((ch = readCh()) == '-') { ch = readCh(); parseComment(); strpos = 0; } else { error("invalid.tagchar", "-", elem.getName()); ch = readCh(); } continue; } AttributeList att = null; String attname = null; String attvalue = null; if (parseIdentifier(true)) { attname = getString(0); skipSpace(); if (ch == '=') { ch = readCh(); skipSpace(); att = elem.getAttribute(attname);// Bug ID 4102750// Load the NAME of an Attribute Case Sensitive// The case of the NAME must be intact// MG 021898 attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION) && (att.type != NAME));// attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION)); } else { attvalue = attname; att = elem.getAttributeByValue(attvalue); if (att == null) { att = elem.getAttribute(attname); if (att != null) { attvalue = att.getValue(); } else { // Make it null so that NULL_ATTRIBUTE_VALUE is // used attvalue = null; } } } } else if (!strict && ch == ',') { // allows for comma separated attribute-value pairs ch = readCh(); continue; } else if (!strict && ch == '"') { // allows for quoted attributes ch = readCh(); skipSpace(); if (parseIdentifier(true)) { attname = getString(0); if (ch == '"') { ch = readCh(); } skipSpace(); if (ch == '=') { ch = readCh(); skipSpace(); att = elem.getAttribute(attname); attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION)); } else { attvalue = attname; att = elem.getAttributeByValue(attvalue); if (att == null) { att = elem.getAttribute(attname); if (att != null) { attvalue = att.getValue(); } } } } else { char str[] = {(char)ch}; error("invalid.tagchar", new String(str), elem.getName()); ch = readCh(); continue; } } else if (!strict && (attributes.isEmpty()) && (ch == '=')) { ch = readCh(); skipSpace(); attname = elem.getName(); att = elem.getAttribute(attname); attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION)); } else if (!strict && (ch == '=')) { ch = readCh(); skipSpace(); attvalue = parseAttributeValue(true); error("attvalerr"); return; } else { char str[] = {(char)ch}; error("invalid.tagchar", new String(str), elem.getName()); if (!strict) { ch = readCh(); continue; } else { return; } } if (att != null) { attname = att.getName(); } else { error("invalid.tagatt", attname, elem.getName()); } // Check out the value if (attributes.isDefined(attname)) { error("multi.tagatt", attname, elem.getName()); } if (attvalue == null) { attvalue = ((att != null) && (att.value != null)) ? att.value : HTML.NULL_ATTRIBUTE_VALUE; } else if ((att != null) && (att.values != null) && !att.values.contains(attvalue)) { error("invalid.tagattval", attname, elem.getName()); } HTML.Attribute attkey = HTML.getAttributeKey(attname); if (attkey == null) { attributes.addAttribute(attname, attvalue); } else { attributes.addAttribute(attkey, attvalue); } } } /** * Parses th Document Declaration Type markup declaration. * Currently ignores it. */ public String parseDTDMarkup() throws IOException { StringBuffer strBuff = new StringBuffer(); ch = readCh(); while(true) { switch (ch) { case '>': ch = readCh(); return strBuff.toString(); case -1: error("invalid.markup"); return strBuff.toString(); case '\n': ln++; ch = readCh(); lfCount++; break; case '"': ch = readCh(); break; case '\r': ln++; if ((ch = readCh()) == '\n') { ch = readCh(); crlfCount++; } else { crCount++; } break; default: strBuff.append((char)(ch & 0xFF)); ch = readCh(); break; } } } /** * Parse markup declarations. * Currently only handles the Document Type Declaration markup. * Returns true if it is a markup declaration false otherwise. */ protected boolean parseMarkupDeclarations(StringBuffer strBuff) throws IOException { /* Currently handles only the DOCTYPE */ if ((strBuff.length() == "DOCTYPE".length()) && (strBuff.toString().toUpperCase().equals("DOCTYPE"))) { parseDTDMarkup(); return true; } return false; } /** * Parse an invalid tag. */ void parseInvalidTag() throws IOException { // ignore all data upto the close bracket '>' while (true) { skipSpace(); switch (ch) { case '>': case -1: ch = readCh(); return; case '<': return; default: ch = readCh(); } } } /** * Parse a start or end tag. */ void parseTag() throws IOException { Element elem = null; boolean net = false; boolean warned = false; boolean unknown = false; switch (ch = readCh()) { case '!': switch (ch = readCh()) { case '-': // Parse comment. [92] 391:7 while (true) { if (ch == '-') { if (!strict || ((ch = readCh()) == '-')) { ch = readCh(); if (!strict && ch == '-') { ch = readCh(); } // send over any text you might see // before parsing and sending the // comment if (textpos != 0) { char newtext[] = new char[textpos]; System.arraycopy(text, 0, newtext, 0, textpos); handleText(newtext); lastBlockStartPos = currentBlockStartPos; textpos = 0; } parseComment(); last = makeTag(dtd.getElement("comment"), true); handleComment(getChars(0)); continue; } else if (!warned) { warned = true; error("invalid.commentchar", "-"); } } skipSpace(); switch (ch) { case '-': continue; case '>': ch = readCh(); case -1: return; default: ch = readCh(); if (!warned) { warned = true; error("invalid.commentchar", String.valueOf((char)ch)); } break; } } default: // deal with marked sections StringBuffer strBuff = new StringBuffer(); while (true) { strBuff.append((char)ch); if (parseMarkupDeclarations(strBuff)) { return; } switch(ch) { case '>': ch = readCh(); case -1: error("invalid.markup"); return; case '\n': ln++; ch = readCh(); lfCount++; break; case '\r': ln++; if ((ch = readCh()) == '\n') { ch = readCh(); crlfCount++; } else { crCount++; } break; default: ch = readCh(); break; } } } case '/': // parse end tag [19] 317:4 switch (ch = readCh()) { case '>': ch = readCh(); case '<': // empty end tag. either </> or </< if (recent == null) { error("invalid.shortend"); return; } elem = recent; break; default: if (!parseIdentifier(true)) { error("expected.endtagname"); return; } skipSpace(); switch (ch) { case '>': ch = readCh(); case '<': break; default: error("expected", "'>'"); while ((ch != -1) && (ch != '\n') && (ch != '>')) { ch = readCh(); } if (ch == '>') { ch = readCh(); } break; } String elemStr = getString(0); if (!dtd.elementExists(elemStr)) { error("end.unrecognized", elemStr); // Ignore RE before end tag if ((textpos > 0) && (text[textpos-1] == '\n')) { textpos--; } elem = dtd.getElement("unknown"); elem.name = elemStr; unknown = true; } else { elem = dtd.getElement(elemStr); } break; } // If the stack is null, we're seeing end tags without any begin // tags. Ignore them. if (stack == null) { error("end.extra.tag", elem.getName()); return; } // Ignore RE before end tag if ((textpos > 0) && (text[textpos-1] == '\n')) { // In a pre tag, if there are blank lines // we do not want to remove the newline // before the end tag. Hence this code. // if (stack.pre) { if ((textpos > 1) && (text[textpos-2] != '\n')) { textpos--; } } else { textpos--; } } // If the end tag is a form, since we did not put it // on the tag stack, there is no corresponding start // start tag to find. Hence do not touch the tag stack. // /* if (!strict && elem.getName().equals("form")) { if (lastFormSent != null) { handleEndTag(lastFormSent); return; } else { // do nothing. return; } } */ if (unknown) { // we will not see a corresponding start tag // on the the stack. If we are seeing an // end tag, lets send this on as an empty // tag with the end tag attribute set to
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -