📄 parsecommon.cxx
字号:
// Copyright (c) 1994 James Clark// See the file COPYING for copying permission.#include "splib.h"#include "Parser.h"#include "token.h"#include "MessageArg.h"#include "ParserMessages.h"#include "constant.h"#include "NumericCharRefOrigin.h"#include "macros.h"#ifdef SP_NAMESPACEnamespace SP_NAMESPACE {#endifBoolean Parser::parseProcessingInstruction(){ currentInput()->startToken(); Location location(currentLocation()); StringC buf; for (;;) { Token token = getToken(piMode); if (token == tokenPic) break; switch (token) { case tokenEe: message(ParserMessages::processingInstructionEntityEnd); return 0; case tokenUnrecognized: reportNonSgmlCharacter(); // fall through case tokenChar: buf += *currentInput()->currentTokenStart(); if (buf.size()/2 > syntax().pilen()) { message(ParserMessages::processingInstructionLength, NumberMessageArg(syntax().pilen())); message(ParserMessages::processingInstructionClose); return 0; } break; } } if (buf.size() > syntax().pilen()) message(ParserMessages::processingInstructionLength, NumberMessageArg(syntax().pilen())); if (options().warnPiMissingName) { size_t i = 0; if (buf.size() && syntax().isNameStartCharacter(buf[0])) { for (i = 1; i < buf.size(); i++) if (!syntax().isNameCharacter(buf[i])) break; } if (i == 0 || (i < buf.size() && !syntax().isS(buf[i]))) message(ParserMessages::piMissingName); } noteMarkup(); eventHandler().pi(new (eventAllocator()) ImmediatePiEvent(buf, location)); return 1;}Boolean Parser::parseLiteral(Mode litMode, Mode liteMode, size_t maxLength, const MessageType1 &tooLongMessage, unsigned flags, Text &text){ unsigned startLevel = inputLevel(); Mode currentMode = litMode; // If the literal gets to be longer than this, then we assume // that the closing delimiter has been omitted if we're at the end // of a line and at the starting input level. size_t reallyMaxLength = (maxLength > size_t(-1)/2 ? size_t(-1) : maxLength * 2); text.clear(); Location startLoc(currentLocation()); if (flags & literalDelimInfo) text.addStartDelim(currentLocation()); for (;;) { Token token = getToken(currentMode); switch (token) { case tokenEe: if (inputLevel() == startLevel) { message(ParserMessages::literalLevel); return 0; } text.addEntityEnd(currentLocation()); popInputStack(); if (inputLevel() == startLevel) currentMode = litMode; break; case tokenUnrecognized: if (reportNonSgmlCharacter()) break; message(ParserMessages::literalMinimumData, StringMessageArg(currentToken())); break; case tokenRs: text.ignoreChar(currentChar(), currentLocation()); break; case tokenRe: if (text.size() > reallyMaxLength && inputLevel() == startLevel) {#if 0 message(tooLongMessage, NumberMessageArg(maxLength));#endif // guess that the closing delimiter has been omitted Messenger::setNextLocation(startLoc); message(ParserMessages::literalClosingDelimiter); return 0; } // fall through case tokenSepchar: if ((flags & literalSingleSpace) && (text.size() == 0 || text.lastChar() == syntax().space())) text.ignoreChar(currentChar(), currentLocation()); else text.addChar(syntax().space(), Location(new ReplacementOrigin(currentLocation(), currentChar()), 0)); break; case tokenSpace: if ((flags & literalSingleSpace) && (text.size() == 0 || text.lastChar() == syntax().space())) text.ignoreChar(currentChar(), currentLocation()); else text.addChar(currentChar(), currentLocation()); break; case tokenCroDigit: case tokenHcroHexDigit: { Char c; Location loc; if (!parseNumericCharRef(token== tokenHcroHexDigit, c, loc)) return 0; Boolean isSgmlChar; if (!translateNumericCharRef(c, isSgmlChar)) break; if (!isSgmlChar) { if (flags & literalNonSgml) text.addNonSgmlChar(c, loc); else message(ParserMessages::numericCharRefLiteralNonSgml, NumberMessageArg(c)); break; } if (flags & literalDataTag) { if (!syntax().isSgmlChar(c)) message(ParserMessages::dataTagPatternNonSgml); else if (syntax().charSet(Syntax::functionChar)->contains(c)) message(ParserMessages::dataTagPatternFunction); } if ((flags & literalSingleSpace) && c == syntax().space() && (text.size() == 0 || text.lastChar() == syntax().space())) text.ignoreChar(c, loc); else text.addChar(c, loc); } break; case tokenCroNameStart: if (!parseNamedCharRef()) return 0; break; case tokenEroGrpo: message(inInstance() ? ParserMessages::eroGrpoStartTag : ParserMessages::eroGrpoProlog); break; case tokenLit: case tokenLita: if (flags & literalDelimInfo) text.addEndDelim(currentLocation(), token == tokenLita); goto done; case tokenPeroNameStart: if (options().warnInternalSubsetLiteralParamEntityRef && inputLevel() == 1) message(ParserMessages::internalSubsetLiteralParamEntityRef); // fall through case tokenEroNameStart: { ConstPtr<Entity> entity; Ptr<EntityOrigin> origin; if (!parseEntityReference(token == tokenPeroNameStart, (flags & literalNoProcess) ? 2 : 0, entity, origin)) return 0; if (!entity.isNull()) entity->litReference(text, *this, origin, (flags & literalSingleSpace) != 0); if (inputLevel() > startLevel) currentMode = liteMode; } break; case tokenPeroGrpo: message(ParserMessages::peroGrpoProlog); break; case tokenCharDelim: message(ParserMessages::dataCharDelim, StringMessageArg(StringC(currentInput()->currentTokenStart(), currentInput()->currentTokenLength()))); // fall through case tokenChar: if (text.size() > reallyMaxLength && inputLevel() == startLevel && currentChar() == syntax().standardFunction(Syntax::fRE)) {#if 0 message(tooLongMessage, NumberMessageArg(maxLength));#endif // guess that the closing delimiter has been omitted Messenger::setNextLocation(startLoc); message(ParserMessages::literalClosingDelimiter); return 0; } text.addChar(currentChar(), currentLocation()); break; } } done: if ((flags & literalSingleSpace) && text.size() > 0 && text.lastChar() == syntax().space()) text.ignoreLastChar(); if (text.size() > maxLength) { switch (litMode) { case alitMode: case alitaMode: case talitMode: case talitaMode: if (AttributeValue::handleAsUnterminated(text, *this)) return 0; default: break; } message(tooLongMessage, NumberMessageArg(maxLength)); } return 1;}Boolean Parser::parseNamedCharRef(){ if (options().warnNamedCharRef) message(ParserMessages::namedCharRef); InputSource *in = currentInput(); Index startIndex = currentLocation().index(); in->discardInitial(); extendNameToken(syntax().namelen(), ParserMessages::nameLength); Char c; Boolean valid; StringC name; getCurrentToken(syntax().generalSubstTable(), name); if (!syntax().lookupFunctionChar(name, &c)) { message(ParserMessages::functionName, StringMessageArg(name)); valid = 0; } else { valid = 1; if (wantMarkup()) getCurrentToken(name); // the original name } NamedCharRef::RefEndType refEndType; switch (getToken(refMode)) { case tokenRefc: refEndType = NamedCharRef::endRefc; break; case tokenRe: refEndType = NamedCharRef::endRE; if (options().warnRefc) message(ParserMessages::refc); break; default: refEndType = NamedCharRef::endOmitted; if (options().warnRefc) message(ParserMessages::refc); break; } in->startToken(); if (valid) in->pushCharRef(c, NamedCharRef(startIndex, refEndType, name)); return 1;}Boolean Parser::parseNumericCharRef(Boolean isHex, Char &ch, Location &loc){ InputSource *in = currentInput(); Location startLocation = currentLocation(); in->discardInitial(); Boolean valid = 1; Char c = 0; if (isHex) { extendHexNumber(); const Char *lim = in->currentTokenEnd(); for (const Char *p = in->currentTokenStart(); p < lim; p++) { int val = sd().hexDigitWeight(*p); if (c <= charMax/16 && (c *= 16) <= charMax - val) c += val; else { message(ParserMessages::characterNumber, StringMessageArg(currentToken())); valid = 0; break; } } } else { extendNumber(syntax().namelen(), ParserMessages::numberLength); const Char *lim = in->currentTokenEnd(); for (const Char *p = in->currentTokenStart(); p < lim; p++) { int val = sd().digitWeight(*p); if (c <= charMax/10 && (c *= 10) <= charMax - val) c += val;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -