📄 parsesd.cxx
字号:
"IGNORE", "IMPLICIT", "IMPLIED", "INCLUDE", "INITIAL", "LINK", "LINKTYPE", "MD", "MS", "NAME", "NAMES", "NDATA", "NMTOKEN", "NMTOKENS", "NOTATION", "NUMBER", "NUMBERS", "NUTOKEN", "NUTOKENS", "O", "PCDATA", "PI", "POSTLINK", "PUBLIC", "RCDATA", "RE", "REQUIRED", "RESTORE", "RS", "SDATA", "SHORTREF", "SIMPLE", "SPACE", "STARTTAG", "SUBDOC", "SYSTEM", "TEMP", "USELINK", "USEMAP" }; for (int i = 0; i < Syntax::nNames; i++) { switch (i) { case Syntax::rDATA: case Syntax::rIMPLICIT: if (!www) break; // fall through case Syntax::rALL: if (!www && options().errorAfdr) break; // fall through default: { StringC docName(internalCharset.execToDesc(referenceNames[i])); Syntax::ReservedName tem; if (syntax.lookupReservedName(docName, &tem)) message(ParserMessages::nameReferenceReservedName, StringMessageArg(docName)); if (syntax.reservedName(Syntax::ReservedName(i)).size() == 0) syntax.setName(i, docName); break; } } }}Boolean Parser::addRefDelimShortref(Syntax &syntax, const CharsetInfo &syntaxCharset, const CharsetInfo &internalCharset, CharSwitcher &switcher){ // Column 2 from Figure 4 static const char delimShortref[][3] = { { 9 }, { 13 }, { 10 }, { 10, 66 }, { 10, 13 }, { 10, 66, 13 }, { 66, 13 }, { 32 }, { 66, 66 }, { 34 }, { 35 }, { 37 }, { 39 }, { 40 }, { 41 }, { 42 }, { 43 }, { 44 }, { 45 }, { 45, 45 }, { 58 }, { 59 }, { 61 }, { 64 }, { 91 }, { 93 }, { 94 }, { 95 }, { 123 }, { 124 }, { 125 }, { 126 }, }; ISet<WideChar> missing; for (size_t i = 0; i < SIZEOF(delimShortref); i++) { StringC delim; size_t j; for (j = 0; j < 3 && delimShortref[i][j] != '\0'; j++) { Char c; UnivChar univChar = translateUniv(delimShortref[i][j], switcher, syntaxCharset); if (univToDescCheck(internalCharset, univChar, c)) delim += c; else missing += univChar; } if (delim.size() == j) { if (switcher.nSwitches() > 0 && syntax.isValidShortref(delim)) message(ParserMessages::duplicateDelimShortref, StringMessageArg(delim)); else syntax.addDelimShortref(delim, internalCharset); } } if (!missing.isEmpty()) message(ParserMessages::missingSignificant646, CharsetMessageArg(missing)); return 1;}// Determine whether the document starts with an SGML declaration.// There is no current syntax at this point.Boolean Parser::scanForSgmlDecl(const CharsetInfo &initCharset){ Char rs; if (!univToDescCheck(initCharset, UnivCharsetDesc::rs, rs)) return 0; Char re; if (!univToDescCheck(initCharset, UnivCharsetDesc::re, re)) return 0; Char space; if (!univToDescCheck(initCharset, UnivCharsetDesc::space, space)) return 0; Char tab; if (!univToDescCheck(initCharset, UnivCharsetDesc::tab, tab)) return 0; InputSource *in = currentInput(); Xchar c = in->get(messenger()); while (c == rs || c == space || c == re || c == tab) c = in->tokenChar(messenger()); if (c != initCharset.execToDesc('<')) return 0; if (in->tokenChar(messenger()) != initCharset.execToDesc('!')) return 0; c = in->tokenChar(messenger()); if (c != initCharset.execToDesc('S') && c != initCharset.execToDesc('s')) return 0; c = in->tokenChar(messenger()); if (c != initCharset.execToDesc('G') && c != initCharset.execToDesc('g')) return 0; c = in->tokenChar(messenger()); if (c != initCharset.execToDesc('M') && c != initCharset.execToDesc('m')) return 0; c = in->tokenChar(messenger()); if (c != initCharset.execToDesc('L') && c != initCharset.execToDesc('l')) return 0; c = in->tokenChar(messenger()); // Don't recognize this if SGML is followed by a name character. if (c == InputSource::eE) return 1; in->endToken(in->currentTokenLength() - 1); if (c == initCharset.execToDesc('-')) return 0; if (c == initCharset.execToDesc('.')) return 0; UnivChar univ; if (!initCharset.descToUniv(c, univ)) return 1; if (UnivCharsetDesc::a <= univ && univ < UnivCharsetDesc::a + 26) return 0; if (UnivCharsetDesc::A <= univ && univ < UnivCharsetDesc::A + 26) return 0; if (UnivCharsetDesc::zero <= univ && univ < UnivCharsetDesc::zero + 10) return 0; return 1;} void Parser::findMissingMinimum(const CharsetInfo &charset, ISet<WideChar> &missing){ Char to; size_t i; for (i = 0; i < 26; i++) { if (!univToDescCheck(charset, UnivCharsetDesc::A + i, to)) missing += UnivCharsetDesc::A + i; if (!univToDescCheck(charset, UnivCharsetDesc::a + i, to)) missing += UnivCharsetDesc::a + i; } for (i = 0; i < 10; i++) { Char to; if (!univToDescCheck(charset, UnivCharsetDesc::zero + i, to)) missing += UnivCharsetDesc::zero + i; } static const UnivChar special[] = { 39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63 }; for (i = 0; i < SIZEOF(special); i++) if (!univToDescCheck(charset, special[i], to)) missing += special[i];}Boolean Parser::parseSgmlDecl(){ SdParam parm; SdBuilder sdBuilder; if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral, SdParam::name), parm)) return 0; if (parm.type == SdParam::name) { sdBuilder.external = 1; Location loc(currentLocation()); StringC name; parm.token.swap(name); ExternalId externalId; if (!sdParseSgmlDeclRef(sdBuilder, parm, externalId)) return 0; ExternalEntity *entity = new ExternalTextEntity(name, EntityDecl::sgml, loc, externalId); ConstPtr<Entity> entityPtr(entity); entity->generateSystemId(*this); if (entity->externalId().effectiveSystemId().size() == 0) { message(ParserMessages::cannotGenerateSystemIdSgml); return 0; } Ptr<EntityOrigin> origin(EntityOrigin::make(internalAllocator(), entityPtr, loc)); if (currentMarkup()) currentMarkup()->addEntityStart(origin); pushInput(entityManager().open(entity->externalId().effectiveSystemId(), sd().docCharset(), origin.pointer(), 0, messenger())); if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm)) return 0; } StringC version(sd().execToInternal("ISO 8879:1986")); StringC enrVersion(sd().execToInternal("ISO 8879:1986 (ENR)")); StringC wwwVersion(sd().execToInternal("ISO 8879:1986 (WWW)")); if (parm.literalText.string() == enrVersion) sdBuilder.enr = 1; else if (parm.literalText.string() == wwwVersion) { sdBuilder.enr = 1; sdBuilder.www = 1; } else if (parm.literalText.string() != version) message(ParserMessages::standardVersion, StringMessageArg(parm.literalText.string())); if (sdBuilder.external && !sdBuilder.www) message(ParserMessages::sgmlDeclRefRequiresWww); sdBuilder.sd = new Sd(entityManagerPtr()); if (sdBuilder.www) sdBuilder.sd->setWww(1); typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &); static SdParser parsers[] = { &Parser::sdParseDocumentCharset, &Parser::sdParseCapacity, &Parser::sdParseScope, &Parser::sdParseSyntax, &Parser::sdParseFeatures, &Parser::sdParseAppinfo, &Parser::sdParseSeealso, }; for (size_t i = 0; i < SIZEOF(parsers); i++) { if (!(this->*(parsers[i]))(sdBuilder, parm)) return 0; if (!sdBuilder.valid) return 0; } setSdOverrides(*sdBuilder.sd); if (sdBuilder.sd->formal()) { while (!sdBuilder.formalErrorList.empty()) { SdFormalError *p = sdBuilder.formalErrorList.get(); ParserState *state = this; // work around lcc 3.0 bug p->send(*state); delete p; } } setSd(sdBuilder.sd.pointer()); currentInput()->setDocCharset(sd().docCharset(), entityManager().charset()); if (sdBuilder.sd->scopeInstance()) { Syntax *proSyntax = new Syntax(sd()); CharSwitcher switcher; setStandardSyntax(*proSyntax, refSyntax, sd().internalCharset(), switcher, sdBuilder.www); proSyntax->setSgmlChar(*sdBuilder.syntax->charSet(Syntax::sgmlChar)); ISet<WideChar> invalidSgmlChar; proSyntax->checkSgmlChar(*sdBuilder.sd, sdBuilder.syntax.pointer(), 1, // get results in document character set invalidSgmlChar); sdBuilder.syntax->checkSgmlChar(*sdBuilder.sd, proSyntax, 1, // get results in document character set invalidSgmlChar); if (!invalidSgmlChar.isEmpty()) message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar)); setSyntaxes(proSyntax, sdBuilder.syntax.pointer()); } else setSyntax(sdBuilder.syntax.pointer()); if (syntax().multicode()) currentInput()->setMarkupScanTable(syntax().markupScanTable()); return 1;}Boolean Parser::sdParseSgmlDeclRef(SdBuilder &sdBuilder, SdParam &parm, ExternalId &id){ id.setLocation(currentLocation()); if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYSTEM, SdParam::reservedName + Sd::rPUBLIC, SdParam::mdc), parm)) return 0; if (parm.type == SdParam::mdc) return 1; if (parm.type == SdParam::reservedName + Sd::rPUBLIC) { if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm)) return 0; const MessageType1 *err; PublicId::TextClass textClass; if (!id.setPublic(parm.literalText, sd().internalCharset(), syntax().space(), err)) sdBuilder.addFormalError(currentLocation(), *err, id.publicId()->string()); else if (id.publicId()->getTextClass(textClass) && textClass != PublicId::SD) sdBuilder.addFormalError(currentLocation(), ParserMessages::sdTextClass, id.publicId()->string()); } if (!parseSdParam(AllowedSdParams(SdParam::systemIdentifier, SdParam::mdc), parm)) return 0; if (parm.type == SdParam::mdc) return 1; id.setSystem(parm.literalText); return parseSdParam(AllowedSdParams(SdParam::mdc), parm);}Boolean Parser::sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm){ if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rCHARSET), parm)) return 0; if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET), parm)) return 0; CharsetDecl decl; UnivCharsetDesc desc; if (!sdParseCharset(sdBuilder, parm, 1, decl, desc)) return 0; ISet<WideChar> missing; findMissingMinimum(desc, missing); if (!missing.isEmpty()) { message(ParserMessages::missingMinimumChars, CharsetMessageArg(missing)); return 0; } ISet<Char> sgmlChar; decl.usedSet(sgmlChar); sdBuilder.sd->setDocCharsetDesc(desc); sdBuilder.sd->setDocCharsetDecl(decl); sdBuilder.syntax = new Syntax(*sdBuilder.sd); if (sd().internalCharsetIsDocCharset()) sdBuilder.syntax->setSgmlChar(sgmlChar); else { ISet<Char> internalSgmlChar; translateDocSet(sdBuilder.sd->docCharset(), sdBuilder.sd->internalCharset(), sgmlChar, internalSgmlChar); sdBuilder.syntax->setSgmlChar(internalSgmlChar); } return 1;}void Parser::translateDocSet(const CharsetInfo &fromCharset, const CharsetInfo &toCharset, const ISet<Char> &fromSet, ISet<Char> &toSet){ ISetIter<Char> iter(fromSet); Char min, max; while (iter.next(min, max)) { do { UnivChar univChar; Char internalChar; WideChar count2, alsoMax; if (!fromCharset.descToUniv(min, univChar, alsoMax)) { if (alsoMax >= max) break; min = alsoMax; } else { // FIXME better not to use univToDescCheck here // Maybe OK if multiple internal chars corresponding to doc char int nMap = univToDescCheck(toCharset, univChar, internalChar, count2); if (alsoMax > max) alsoMax = max; if (alsoMax - min > count2 - 1) alsoMax = min + (count2 - 1); if (nMap) toSet.addRange(internalChar, internalChar + (alsoMax - min)); min = alsoMax; } } while (min++ != max); }}Boolean Parser::sdParseCharset(SdBuilder &sdBuilder, SdParam &parm, Boolean isDocument, CharsetDecl &decl, UnivCharsetDesc &desc){ decl.clear(); ISet<WideChar> multiplyDeclared; // This is for checking whether the syntax reference character set // is ISO 646 when SCOPE is INSTANCE. Boolean maybeISO646 = 1; do { if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm)) return 0; UnivCharsetDesc baseDesc; PublicId id; Boolean found; PublicId::TextClass textClass; const MessageType1 *err; if (!id.init(parm.literalText, sd().internalCharset(), syntax().space(), err)) sdBuilder.addFormalError(currentLocation(), *err, id.string()); else if (id.getTextClass(textClass) && textClass != PublicId::CHARSET) sdBuilder.addFormalError(currentLocation(), ParserMessages::basesetTextClass, id.string());
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -