📄 parsesd.cxx
字号:
// Copyright (c) 1994, 1995, 1997 James Clark// See the file COPYING for copying permission.#include "splib.h"#include "Parser.h"#include "macros.h"#include "SdFormalError.h"#include "MessageBuilder.h"#include "ParserMessages.h"#include "MessageArg.h"#include "CharsetRegistry.h"#include "ISetIter.h"#include "token.h"#include "TokenMessageArg.h"#include "constant.h"#include "SdText.h"#include "NumericCharRefOrigin.h"#ifdef SP_NAMESPACEnamespace SP_NAMESPACE {#endifclass CharSwitcher {public: CharSwitcher(); void addSwitch(WideChar from, WideChar to); SyntaxChar subst(WideChar c); size_t nSwitches() const; Boolean switchUsed(size_t i) const; WideChar switchFrom(size_t i) const; WideChar switchTo(size_t i) const;private: Vector<PackedBoolean> switchUsed_; Vector<WideChar> switches_;};// Information about the SGML declaration being built.struct SdBuilder { SdBuilder(); void addFormalError(const Location &, const MessageType1 &, const StringC &); Ptr<Sd> sd; Ptr<Syntax> syntax; CharsetDecl syntaxCharsetDecl; CharsetInfo syntaxCharset; CharSwitcher switcher; Boolean externalSyntax; Boolean enr; Boolean www; Boolean valid; Boolean external; IList<SdFormalError> formalErrorList;};class CharsetMessageArg : public MessageArg {public: CharsetMessageArg(const ISet<WideChar> &set); MessageArg *copy() const; void append(MessageBuilder &) const;private: ISet<WideChar> set_;};struct SdParam { typedef unsigned char Type; enum { invalid, eE, minimumLiteral, mdc, minus, number, capacityName, name, paramLiteral, systemIdentifier, generalDelimiterName, referenceReservedName, quantityName, reservedName // Sd::ReservedName is added to this }; Type type; StringC token; Text literalText; String<SyntaxChar> paramLiteralText; union { Number n; Sd::Capacity capacityIndex; Syntax::Quantity quantityIndex; Syntax::ReservedName reservedNameIndex; Syntax::DelimGeneral delimGeneralIndex; };};class AllowedSdParams {public: AllowedSdParams(SdParam::Type, SdParam::Type = SdParam::invalid, SdParam::Type = SdParam::invalid, SdParam::Type = SdParam::invalid, SdParam::Type = SdParam::invalid, SdParam::Type = SdParam::invalid); Boolean param(SdParam::Type) const; SdParam::Type get(int i) const;private: enum { maxAllow = 6 }; SdParam::Type allow_[maxAllow];};class AllowedSdParamsMessageArg : public MessageArg {public: AllowedSdParamsMessageArg(const AllowedSdParams &allow, const ConstPtr<Sd> &sd); MessageArg *copy() const; void append(MessageBuilder &) const;private: AllowedSdParams allow_; ConstPtr<Sd> sd_;};struct StandardSyntaxSpec { struct AddedFunction { const char *name; Syntax::FunctionClass functionClass; SyntaxChar syntaxChar; }; const AddedFunction *addedFunction; size_t nAddedFunction; Boolean shortref;};static StandardSyntaxSpec::AddedFunction coreFunctions[] = { { "TAB", Syntax::cSEPCHAR, 9 },};static StandardSyntaxSpec coreSyntax = { coreFunctions, SIZEOF(coreFunctions), 0};static StandardSyntaxSpec refSyntax = { coreFunctions, SIZEOF(coreFunctions), 1};void Parser::doInit(){ if (cancelled()) { allDone(); return; } // When document entity doesn't exist, don't give any errors // other than the cannot open error. if (currentInput()->get(messenger()) == InputSource::eE) { if (currentInput()->accessError()) { allDone(); return; } } else currentInput()->ungetToken(); const CharsetInfo &initCharset = sd().internalCharset(); ISet<WideChar> missing; findMissingMinimum(initCharset, missing); if (!missing.isEmpty()) { message(ParserMessages::sdMissingCharacters, CharsetMessageArg(missing)); giveUp(); return; } Boolean found = 0; StringC systemId; if (scanForSgmlDecl(initCharset)) { if (options().warnExplicitSgmlDecl) message(ParserMessages::explicitSgmlDecl); found = 1; } else { currentInput()->ungetToken(); if (entityCatalog().sgmlDecl(initCharset, messenger(), systemId)) { InputSource *in = entityManager().open(systemId, sd().docCharset(), InputSourceOrigin::make(), 0, messenger()); if (in) { pushInput(in); if (scanForSgmlDecl(initCharset)) found = 1; else { message(ParserMessages::badDefaultSgmlDecl); popInputStack(); } } } } if (found) { if (startMarkup(eventsWanted().wantPrologMarkup(), currentLocation())) { size_t nS = currentInput()->currentTokenLength() - 6; for (size_t i = 0; i < nS; i++) currentMarkup()->addS(currentInput()->currentTokenStart()[i]); currentMarkup()->addDelim(Syntax::dMDO); currentMarkup()->addSdReservedName(Sd::rSGML, currentInput()->currentTokenStart() + (currentInput()->currentTokenLength() - 4), 4); } Syntax *syntaxp = new Syntax(sd()); CharSwitcher switcher; if (!setStandardSyntax(*syntaxp, refSyntax, sd().internalCharset(), switcher, 0)) { giveUp(); return; } syntaxp->implySgmlChar(sd()); setSyntax(syntaxp); compileSdModes(); ConstPtr<Sd> refSd(sdPointer()); ConstPtr<Syntax> refSyntax(syntaxPointer()); if (!parseSgmlDecl()) { giveUp(); return; } // queue an SGML declaration event eventHandler().sgmlDecl(new (eventAllocator()) SgmlDeclEvent(sdPointer(), syntaxPointer(), instanceSyntaxPointer(), refSd, refSyntax, currentInput()->nextIndex(), systemId, markupLocation(), currentMarkup())); if (inputLevel() == 2) { // FIXME perhaps check for junk after SGML declaration popInputStack(); } } else { if (!implySgmlDecl()) { giveUp(); return; } currentInput()->willNotSetDocCharset(); // queue an SGML declaration event eventHandler().sgmlDecl(new (eventAllocator()) SgmlDeclEvent(sdPointer(), syntaxPointer())); } // Now we have sd and syntax set up, prepare to parse the prolog. compilePrologModes(); setPhase(prologPhase);}Boolean Parser::implySgmlDecl(){ Syntax *syntaxp = new Syntax(sd()); const StandardSyntaxSpec *spec; if (options().shortref) spec = &refSyntax; else spec = &coreSyntax; CharSwitcher switcher; if (!setStandardSyntax(*syntaxp, *spec, sd().internalCharset(), switcher, 0)) return 0; syntaxp->implySgmlChar(sd()); for (int i = 0; i < Syntax::nQuantity; i++) syntaxp->setQuantity(i, options().quantity[i]); setSyntax(syntaxp); return 1;}Boolean Parser::setStandardSyntax(Syntax &syn, const StandardSyntaxSpec &spec, const CharsetInfo &internalCharset, CharSwitcher &switcher, Boolean www){ static UnivCharsetDesc::Range syntaxCharsetRanges[] = { { 0, 128, 0 }, }; static UnivCharsetDesc syntaxCharsetDesc(syntaxCharsetRanges, SIZEOF(syntaxCharsetRanges)); static CharsetInfo syntaxCharset(syntaxCharsetDesc); Boolean valid = 1; if (!checkSwitches(switcher, syntaxCharset)) valid = 0; size_t i; for (i = 0; i < switcher.nSwitches(); i++) if (switcher.switchTo(i) >= 128) message(ParserMessages::switchNotInCharset, NumberMessageArg(switcher.switchTo(i))); static const Char shunchar[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255 }; for (i = 0; i < SIZEOF(shunchar); i++) syn.addShunchar(shunchar[i]); syn.setShuncharControls(); static Syntax::StandardFunction standardFunctions[3] = { Syntax::fRE, Syntax::fRS, Syntax::fSPACE }; static SyntaxChar functionChars[3] = { 13, 10, 32 }; for (i = 0; i < 3; i++) { Char docChar; if (translateSyntax(switcher, syntaxCharset, internalCharset, functionChars[i], docChar) && checkNotFunction(syn, docChar)) syn.setStandardFunction(standardFunctions[i], docChar); else valid = 0; } for (i = 0; i < spec.nAddedFunction; i++) { Char docChar; if (translateSyntax(switcher, syntaxCharset, internalCharset, spec.addedFunction[i].syntaxChar, docChar) && checkNotFunction(syn, docChar)) syn.addFunctionChar(internalCharset.execToDesc(spec.addedFunction[i].name), spec.addedFunction[i].functionClass, docChar); else valid = 0; } static SyntaxChar nameChars[2] = { 45, 46 }; // '-' '.' ISet<Char> nameCharSet; for (i = 0; i < 2; i++) { Char docChar; if (translateSyntax(switcher, syntaxCharset, internalCharset, nameChars[i], docChar)) nameCharSet.add(docChar); else valid = 0; } if (!checkNmchars(nameCharSet, syn)) valid = 0; else syn.addNameCharacters(nameCharSet); syn.setNamecaseGeneral(1); syn.setNamecaseEntity(0); if (!setRefDelimGeneral(syn, syntaxCharset, internalCharset, switcher)) valid = 0; setRefNames(syn, internalCharset, www); syn.enterStandardFunctionNames(); if (spec.shortref && !addRefDelimShortref(syn, syntaxCharset, internalCharset, switcher)) valid = 0; return valid;}Boolean Parser::setRefDelimGeneral(Syntax &syntax, const CharsetInfo &syntaxCharset, const CharsetInfo &internalCharset, CharSwitcher &switcher){ // Column 3 from Figure 3 static const char delims[][2] = { { 38 }, { 45, 45 }, { 38, 35 }, { 93 }, { 91 }, { 93 }, { 91 }, { 38 }, { 60, 47 }, { 41 }, { 40 }, { 0 }, // HCRO { 34 }, { 39 }, { 62 }, { 60, 33 }, { 45 }, { 93, 93 }, { 47 }, { 47 }, // NESTC { 63 }, { 124 }, { 37 }, { 62 }, { 60, 63 }, { 43 }, { 59 }, { 42 }, { 35 }, { 44 }, { 60 }, { 62 }, { 61 }, }; Boolean valid = 1; ISet<WideChar> missing; for (int i = 0; i < Syntax::nDelimGeneral; i++) if (syntax.delimGeneral(i).size() == 0) { StringC delim; size_t j; for (j = 0; j < 2 && delims[i][j] != '\0'; j++) { UnivChar univChar = translateUniv(delims[i][j], switcher, syntaxCharset); Char c; if (univToDescCheck(internalCharset, univChar, c)) delim += c; else { missing += univChar; valid = 0; } } if (delim.size() == j) { if (checkGeneralDelim(syntax, delim)) syntax.setDelimGeneral(i, delim); else valid = 0; } } if (!missing.isEmpty()) message(ParserMessages::missingSignificant646, CharsetMessageArg(missing)); return valid;}void Parser::setRefNames(Syntax &syntax, const CharsetInfo &internalCharset, Boolean www){ static const char *const referenceNames[] = { "ALL", "ANY", "ATTLIST", "CDATA", "CONREF", "CURRENT", "DATA", "DEFAULT", "DOCTYPE", "ELEMENT", "EMPTY", "ENDTAG", "ENTITIES", "ENTITY", "FIXED", "ID", "IDLINK", "IDREF", "IDREFS",
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -