parsesd.cxx

来自「SP是一个基于GNU C++编译器」· CXX 代码 · 共 2,264 行 · 第 1/5 页

CXX
2,264
字号
    "IGNORE",    "IMPLICIT",    "IMPLIED",    "INCLUDE",    "INITIAL",    "LINK",    "LINKTYPE",    "MD",    "MS",    "NAME",    "NAMES",    "NDATA",    "NMTOKEN",    "NMTOKENS",    "NOTATION",    "NUMBER",    "NUMBERS",    "NUTOKEN",    "NUTOKENS",    "O",    "PCDATA",    "PI",    "POSTLINK",    "PUBLIC",    "RCDATA",    "RE",    "REQUIRED",    "RESTORE",    "RS",    "SDATA",    "SHORTREF",    "SIMPLE",    "SPACE",    "STARTTAG",    "SUBDOC",    "SYSTEM",    "TEMP",    "USELINK",    "USEMAP"    };  for (int i = 0; i < Syntax::nNames; i++) {    switch (i) {    case Syntax::rDATA:    case Syntax::rIMPLICIT:      if (!www)	break;      // fall through    case Syntax::rALL:      if (!www && options().errorAfdr)	break;      // fall through    default:      {	StringC docName(internalCharset.execToDesc(referenceNames[i]));	Syntax::ReservedName tem;        if (syntax.lookupReservedName(docName, &tem))	  message(ParserMessages::nameReferenceReservedName,	          StringMessageArg(docName));        if (syntax.reservedName(Syntax::ReservedName(i)).size() == 0)	  syntax.setName(i, docName);        break;      }    }  }}Boolean Parser::addRefDelimShortref(Syntax &syntax,				    const CharsetInfo &syntaxCharset,				    const CharsetInfo &internalCharset,				    CharSwitcher &switcher){  // Column 2 from Figure 4  static const char delimShortref[][3] = {    { 9 },    { 13 },    { 10 },    { 10, 66 },    { 10, 13 },    { 10, 66, 13 },    { 66, 13 },    { 32 },    { 66, 66 },    { 34 },    { 35 },    { 37 },    { 39 },    { 40 },    { 41 },    { 42 },    { 43 },    { 44 },    { 45 },    { 45, 45 },    { 58 },    { 59 },    { 61 },    { 64 },    { 91 },    { 93 },    { 94 },    { 95 },    { 123 },    { 124 },    { 125 },    { 126 },  };  ISet<WideChar> missing;  for (size_t i = 0; i < SIZEOF(delimShortref); i++) {    StringC delim;        size_t j;    for (j = 0; j < 3 && delimShortref[i][j] != '\0'; j++) {      Char c;      UnivChar univChar = translateUniv(delimShortref[i][j], switcher,					syntaxCharset);      if (univToDescCheck(internalCharset, univChar, c))	delim += c;      else	missing += univChar;    }    if (delim.size() == j) {      if (switcher.nSwitches() > 0 && syntax.isValidShortref(delim))	message(ParserMessages::duplicateDelimShortref,		StringMessageArg(delim));      else	syntax.addDelimShortref(delim, internalCharset);    }  }  if (!missing.isEmpty())    message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));  return 1;}// Determine whether the document starts with an SGML declaration.// There is no current syntax at this point.Boolean Parser::scanForSgmlDecl(const CharsetInfo &initCharset){  Char rs;  if (!univToDescCheck(initCharset, UnivCharsetDesc::rs, rs))    return 0;  Char re;  if (!univToDescCheck(initCharset, UnivCharsetDesc::re, re))    return 0;  Char space;  if (!univToDescCheck(initCharset, UnivCharsetDesc::space, space))    return 0;  Char tab;  if (!univToDescCheck(initCharset, UnivCharsetDesc::tab, tab))    return 0;  InputSource *in = currentInput();  Xchar c = in->get(messenger());  while (c == rs || c == space || c == re || c == tab)    c = in->tokenChar(messenger());  if (c != initCharset.execToDesc('<'))    return 0;  if (in->tokenChar(messenger()) != initCharset.execToDesc('!'))    return 0;  c = in->tokenChar(messenger());  if (c != initCharset.execToDesc('S')      && c != initCharset.execToDesc('s'))    return 0;  c = in->tokenChar(messenger());  if (c != initCharset.execToDesc('G')      && c != initCharset.execToDesc('g'))    return 0;  c = in->tokenChar(messenger());  if (c != initCharset.execToDesc('M')      && c != initCharset.execToDesc('m'))    return 0;  c = in->tokenChar(messenger());  if (c != initCharset.execToDesc('L')      && c != initCharset.execToDesc('l'))    return 0;  c = in->tokenChar(messenger());  // Don't recognize this if SGML is followed by a name character.  if (c == InputSource::eE)    return 1;  in->endToken(in->currentTokenLength() - 1);  if (c == initCharset.execToDesc('-'))    return 0;  if (c == initCharset.execToDesc('.'))    return 0;  UnivChar univ;  if (!initCharset.descToUniv(c, univ))     return 1;  if (UnivCharsetDesc::a <= univ && univ < UnivCharsetDesc::a + 26)    return 0;  if (UnivCharsetDesc::A <= univ && univ < UnivCharsetDesc::A + 26)    return 0;  if (UnivCharsetDesc::zero <= univ && univ < UnivCharsetDesc::zero + 10)    return 0;  return 1;}	    void Parser::findMissingMinimum(const CharsetInfo &charset,				ISet<WideChar> &missing){  Char to;  size_t i;  for (i = 0; i < 26; i++) {    if (!univToDescCheck(charset, UnivCharsetDesc::A + i, to))      missing += UnivCharsetDesc::A + i;    if (!univToDescCheck(charset, UnivCharsetDesc::a + i, to))      missing += UnivCharsetDesc::a + i;  }  for (i = 0; i < 10; i++) {    Char to;    if (!univToDescCheck(charset, UnivCharsetDesc::zero + i, to))      missing += UnivCharsetDesc::zero + i;  }  static const UnivChar special[] = {    39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63    };  for (i = 0; i < SIZEOF(special); i++)    if (!univToDescCheck(charset, special[i], to))      missing += special[i];}Boolean Parser::parseSgmlDecl(){  SdParam parm;  SdBuilder sdBuilder;  if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral, SdParam::name), parm))    return 0;  if (parm.type == SdParam::name) {    sdBuilder.external = 1;    Location loc(currentLocation());    StringC name;    parm.token.swap(name);    ExternalId externalId;    if (!sdParseSgmlDeclRef(sdBuilder, parm, externalId))      return 0;    ExternalEntity *entity      = new ExternalTextEntity(name, EntityDecl::sgml, loc, externalId);    ConstPtr<Entity> entityPtr(entity);    entity->generateSystemId(*this);    if (entity->externalId().effectiveSystemId().size() == 0) {      message(ParserMessages::cannotGenerateSystemIdSgml);      return 0;    }    Ptr<EntityOrigin> origin(EntityOrigin::make(internalAllocator(), entityPtr, loc));    if (currentMarkup())      currentMarkup()->addEntityStart(origin);    pushInput(entityManager().open(entity->externalId().effectiveSystemId(),		                   sd().docCharset(),				   origin.pointer(),				   0,				   messenger()));    if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))      return 0;  }  StringC version(sd().execToInternal("ISO 8879:1986"));  StringC enrVersion(sd().execToInternal("ISO 8879:1986 (ENR)"));  StringC wwwVersion(sd().execToInternal("ISO 8879:1986 (WWW)"));  if (parm.literalText.string() == enrVersion)    sdBuilder.enr = 1;  else if (parm.literalText.string() == wwwVersion) {    sdBuilder.enr = 1;    sdBuilder.www = 1;  }  else if (parm.literalText.string() != version)    message(ParserMessages::standardVersion,	    StringMessageArg(parm.literalText.string()));  if (sdBuilder.external && !sdBuilder.www)    message(ParserMessages::sgmlDeclRefRequiresWww);  sdBuilder.sd = new Sd(entityManagerPtr());  if (sdBuilder.www)    sdBuilder.sd->setWww(1);  typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);  static SdParser parsers[] = {    &Parser::sdParseDocumentCharset,    &Parser::sdParseCapacity,    &Parser::sdParseScope,    &Parser::sdParseSyntax,    &Parser::sdParseFeatures,    &Parser::sdParseAppinfo,    &Parser::sdParseSeealso,  };  for (size_t i = 0; i < SIZEOF(parsers); i++) {    if (!(this->*(parsers[i]))(sdBuilder, parm))      return 0;    if (!sdBuilder.valid)      return 0;  }  setSdOverrides(*sdBuilder.sd);  if (sdBuilder.sd->formal()) {    while (!sdBuilder.formalErrorList.empty()) {      SdFormalError *p = sdBuilder.formalErrorList.get();      ParserState *state = this; // work around lcc 3.0 bug      p->send(*state);      delete p;    }  }  setSd(sdBuilder.sd.pointer());  currentInput()->setDocCharset(sd().docCharset(), entityManager().charset());  if (sdBuilder.sd->scopeInstance()) {    Syntax *proSyntax = new Syntax(sd());    CharSwitcher switcher;    setStandardSyntax(*proSyntax, refSyntax, sd().internalCharset(), switcher, sdBuilder.www);    proSyntax->setSgmlChar(*sdBuilder.syntax->charSet(Syntax::sgmlChar));    ISet<WideChar> invalidSgmlChar;    proSyntax->checkSgmlChar(*sdBuilder.sd,			     sdBuilder.syntax.pointer(),			     1,  // get results in document character set			     invalidSgmlChar);    sdBuilder.syntax->checkSgmlChar(*sdBuilder.sd,				    proSyntax,				    1, // get results in document character set				    invalidSgmlChar);    if (!invalidSgmlChar.isEmpty())      message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));    setSyntaxes(proSyntax, sdBuilder.syntax.pointer());  }  else    setSyntax(sdBuilder.syntax.pointer());  if (syntax().multicode())    currentInput()->setMarkupScanTable(syntax().markupScanTable());  return 1;}Boolean Parser::sdParseSgmlDeclRef(SdBuilder &sdBuilder, SdParam &parm,				   ExternalId &id){  id.setLocation(currentLocation());  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYSTEM,                                    SdParam::reservedName + Sd::rPUBLIC,				    SdParam::mdc),		    parm))    return 0;  if (parm.type == SdParam::mdc)    return 1;  if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {    if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))      return 0;    const MessageType1 *err;    PublicId::TextClass textClass;    if (!id.setPublic(parm.literalText, sd().internalCharset(), syntax().space(), err))      sdBuilder.addFormalError(currentLocation(), *err, id.publicId()->string());    else if (id.publicId()->getTextClass(textClass)	     && textClass != PublicId::SD)      sdBuilder.addFormalError(currentLocation(),			       ParserMessages::sdTextClass,			       id.publicId()->string());  }  if (!parseSdParam(AllowedSdParams(SdParam::systemIdentifier, SdParam::mdc), parm))    return 0;  if (parm.type == SdParam::mdc)    return 1;  id.setSystem(parm.literalText);  return parseSdParam(AllowedSdParams(SdParam::mdc), parm);}Boolean Parser::sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm){  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rCHARSET),		    parm))    return 0;  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),		    parm))    return 0;  CharsetDecl decl;  UnivCharsetDesc desc;  if (!sdParseCharset(sdBuilder, parm, 1, decl, desc))    return 0;  ISet<WideChar> missing;  findMissingMinimum(desc, missing);  if (!missing.isEmpty()) {    message(ParserMessages::missingMinimumChars,	    CharsetMessageArg(missing));    return 0;  }  ISet<Char> sgmlChar;  decl.usedSet(sgmlChar);  sdBuilder.sd->setDocCharsetDesc(desc);  sdBuilder.sd->setDocCharsetDecl(decl);  sdBuilder.syntax = new Syntax(*sdBuilder.sd);  if (sd().internalCharsetIsDocCharset())    sdBuilder.syntax->setSgmlChar(sgmlChar);  else {    ISet<Char> internalSgmlChar;    translateDocSet(sdBuilder.sd->docCharset(), sdBuilder.sd->internalCharset(),                    sgmlChar, internalSgmlChar);    sdBuilder.syntax->setSgmlChar(internalSgmlChar);  }  return 1;}void Parser::translateDocSet(const CharsetInfo &fromCharset,			     const CharsetInfo &toCharset,			     const ISet<Char> &fromSet,			     ISet<Char> &toSet){  ISetIter<Char> iter(fromSet);  Char min, max;  while (iter.next(min, max)) {    do {      UnivChar univChar;      Char internalChar;      WideChar count2, alsoMax;      if (!fromCharset.descToUniv(min, univChar, alsoMax)) {	if (alsoMax >= max)	  break;	min = alsoMax;      }      else {	  // FIXME better not to use univToDescCheck here	  // Maybe OK if multiple internal chars corresponding to doc char	int nMap = univToDescCheck(toCharset, univChar, internalChar, count2);	if (alsoMax > max)	  alsoMax = max;	if (alsoMax - min > count2 - 1)	  alsoMax = min + (count2 - 1);	if (nMap)	  toSet.addRange(internalChar, internalChar + (alsoMax - min));	min = alsoMax;      }    } while (min++ != max);  }}Boolean Parser::sdParseCharset(SdBuilder &sdBuilder,			       SdParam &parm,			       Boolean isDocument,			       CharsetDecl &decl,			       UnivCharsetDesc &desc){  decl.clear();  ISet<WideChar> multiplyDeclared;  // This is for checking whether the syntax reference character set  // is ISO 646 when SCOPE is INSTANCE.  Boolean maybeISO646 = 1;  do {    if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))      return 0;    UnivCharsetDesc baseDesc;    PublicId id;    Boolean found;    PublicId::TextClass textClass;    const MessageType1 *err;    if (!id.init(parm.literalText, sd().internalCharset(), syntax().space(), err))      sdBuilder.addFormalError(currentLocation(),			       *err,			       id.string());    else if (id.getTextClass(textClass)	     && textClass != PublicId::CHARSET)      sdBuilder.addFormalError(currentLocation(),			       ParserMessages::basesetTextClass,			       id.string());

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?