⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parsemode.cxx

📁 SP是一个基于GNU C++编译器
💻 CXX
📖 第 1 页 / 共 2 页
字号:
  }  const String<EquivCode> emptyString;  Boolean multicode = syntax().multicode();  for (i = 0; i < n; i++) {    TrieBuilder tb(partition.maxCode() + 1);    TrieBuilder::TokenVector ambiguities;    Vector<Token> suppressTokens;    if (multicode) {      suppressTokens.assign(partition.maxCode() + 1, 0);      suppressTokens[partition.eECode()] = tokenEe;    }    tb.recognizeEE(partition.eECode(), tokenEe);    ModeInfo iter(modes[i], sd());    TokenInfo ti;    // We try to handle the possibility that some delimiters may be empty;    // this might happen when compiling recognizers for the SGML declaration.    while (iter.nextToken(&ti)) {      switch (ti.type) {      case TokenInfo::delimType:	if (delimCodes[ti.delim1].size() > 0)	  tb.recognize(delimCodes[ti.delim1], ti.token,		       ti.priority, ambiguities);	break;      case TokenInfo::delimDelimType:	{	  String<EquivCode> str(delimCodes[ti.delim1]);	  if (str.size() > 0 && delimCodes[ti.delim2].size() > 0) {	    str += delimCodes[ti.delim2];	    tb.recognize(str, ti.token, ti.priority, ambiguities);	  }	}	break;      case TokenInfo::delimSetType:	if (delimCodes[ti.delim1].size() > 0)	  tb.recognize(delimCodes[ti.delim1], setCodes[ti.set],		       ti.token, ti.priority, ambiguities);	break;      case TokenInfo::setType:	tb.recognize(emptyString, setCodes[ti.set], ti.token,		     ti.priority, ambiguities);	if (multicode) {	  const String<EquivCode> &equivCodes = setCodes[ti.set];	  for (size_t j = 0; j < equivCodes.size(); j++)	    suppressTokens[equivCodes[j]] = ti.token;	}	break;      case TokenInfo::functionType:	tb.recognize(functionCode[ti.function], ti.token,		     ti.priority, ambiguities);	if (multicode)	  suppressTokens[functionCode[ti.function][0]] = ti.token;	break;      }    }    if (iter.includesShortref()) {      for (int j = 0; j < nShortref; j++) {	const SrInfo *p = &srInfo[j];	if (p->bSequenceLength > 0)	  tb.recognizeB(p->chars, p->bSequenceLength,			syntax().quantity(Syntax::qBSEQLEN),			setCodes[Syntax::blank],			p->chars2, tokenFirstShortref + j,			ambiguities);	else	  tb.recognize(p->chars, tokenFirstShortref + j,		       Priority::delim, ambiguities);      }    }    if (options().warnDataDelim) {      switch (modes[i]) {      default:        if (!iter.includesShortref())	  break;	// fall through      case alitMode:      case alitaMode:      case aliteMode:      case talitMode:      case talitaMode:      case taliteMode:	for (size_t j = 0; j < dataDelimCodes.size(); j++) {	  String<EquivCode> code;	  code += dataDelimCodes[j];	  tb.recognize(code, tokenCharDelim, Priority::dataDelim, ambiguities);	}	break;      case plitMode:      case plitaMode:      case pliteMode:	{	  String<EquivCode> code;	  code += partition.charCode(syntax().delimGeneral(Syntax::dPERO)[0]);	  tb.recognize(code, tokenCharDelim, Priority::dataDelim, ambiguities);	}	break;      }    }    setRecognizer(modes[i],		  (multicode		   ? new Recognizer(tb.extractTrie(), partition.map(),				    suppressTokens)		   : new Recognizer(tb.extractTrie(), partition.map())));    // FIXME give more information    for (size_t j = 0; j < ambiguities.size(); j += 2)      message(ParserMessages::lexicalAmbiguity,	      TokenMessageArg(ambiguities[j], modes[i], syntaxPointer(),			      sdPointer()),	      TokenMessageArg(ambiguities[j + 1], modes[i], syntaxPointer(),			      sdPointer()));  }}void Parser::compileNormalMap(){  XcharMap<PackedBoolean> map(0);  ISetIter<Char> sgmlCharIter(*syntax().charSet(Syntax::sgmlChar));  Char min, max;  while (sgmlCharIter.next(min, max))    map.setRange(min, max, 1);  ModeInfo iter(mconnetMode, sd());  TokenInfo ti;  while (iter.nextToken(&ti)) {    switch (ti.type) {    case TokenInfo::delimType:    case TokenInfo::delimDelimType:    case TokenInfo::delimSetType:      {	const StringC &delim = syntax().delimGeneral(ti.delim1);	if (!delim.size())	  break;	Char c = delim[0];	map.setChar(c, 0);	StringC str(syntax().generalSubstTable()->inverse(c));	for (size_t i = 0; i < str.size(); i++)	  map.setChar(str[i], 0);      }      break;    case TokenInfo::setType:      if (ti.token != tokenChar) {	ISetIter<Char> setIter(*syntax().charSet(ti.set));	Char min, max;	while (setIter.next(min, max))	  map.setRange(min, max, 0);      }      break;    case TokenInfo::functionType:      if (ti.token != tokenChar)	map.setChar(syntax().standardFunction(ti.function), 0);      break;    }  }  int nShortref = currentDtd().nShortref();  for (int i = 0; i < nShortref; i++) {    Char c = currentDtd().shortref(i)[0];    if (c == sd().execToInternal('B')) {      ISetIter<Char> setIter(*syntax().charSet(Syntax::blank));      Char min, max;      while (setIter.next(min, max))	map.setRange(min, max, 0);    }    else {      map.setChar(c, 0);      StringC str(syntax().generalSubstTable()->inverse(c));      for (size_t j = 0; j < str.size(); j++)	map.setChar(str[j], 0);    }  }  setNormalMap(map);}void Parser::addNeededShortrefs(Dtd &dtd, const Syntax &syntax){  if (!syntax.hasShortrefs())    return;  PackedBoolean delimRelevant[Syntax::nDelimGeneral];  size_t i;  for (i = 0; i < Syntax::nDelimGeneral; i++)    delimRelevant[i] = 0;  ModeInfo iter(mconnetMode, sd());  TokenInfo ti;  while (iter.nextToken(&ti)) {    switch (ti.type) {    case TokenInfo::delimType:    case TokenInfo::delimDelimType:    case TokenInfo::delimSetType:      delimRelevant[ti.delim1] = 1;      break;    default:      break;    }  }  // PIO and NET are the only delimiters that are recognized in con  // mode without context.  If a short reference delimiter is  // identical to one of these delimiters, then we'll have an  // ambiguity.   We make such a short reference delimiter needed  // to ensure that this ambiguity is reported.  if (syntax.isValidShortref(syntax.delimGeneral(Syntax::dPIO)))    dtd.addNeededShortref(syntax.delimGeneral(Syntax::dPIO));  if (syntax.isValidShortref(syntax.delimGeneral(Syntax::dNET)))    dtd.addNeededShortref(syntax.delimGeneral(Syntax::dNET));  size_t nShortrefComplex = syntax.nDelimShortrefComplex();  // A short reference delimiter is needed if it is used or if it can  // contains some other shorter delimiter that is either a relevant general  // delimiter or a shortref delimiter that is used.  for (i = 0; i < nShortrefComplex; i++) {    size_t j;    for (j = 0; j < Syntax::nDelimGeneral; j++)      if (delimRelevant[j]	  && shortrefCanPreemptDelim(syntax.delimShortrefComplex(i),				     syntax.delimGeneral(j),				     0,				     syntax)) {	dtd.addNeededShortref(syntax.delimShortrefComplex(i));	break;      }    for (j = 0; j < dtd.nShortref(); j++)      if (shortrefCanPreemptDelim(syntax.delimShortrefComplex(i),				  dtd.shortref(j),				  1,				  syntax)) {	dtd.addNeededShortref(syntax.delimShortrefComplex(i));	break;      }  }  }Boolean Parser::shortrefCanPreemptDelim(const StringC &sr,					const StringC &d,					Boolean dIsSr,					const Syntax &syntax){  Char letterB = sd().execToInternal('B');  for (size_t i = 0; i < sr.size(); i++) {    size_t j = 0;    size_t k = i;    for (;;) {      if (j == d.size())	return 1;      if (k >= sr.size())	break;      if (sr[k] == letterB) {	if (dIsSr && d[j] == letterB) {	  j++;	  k++;	}	else if (syntax.isB(d[j])) {	  j++;	  k++;	  if (k == sr.size() || sr[k] != letterB) {	    // it was the last B in the sequence	    while (j < d.size() && syntax.isB(d[j]))	      j++;	  }	}	else	  break;      }      else if (dIsSr && d[j] == letterB) {	if (syntax.isB(sr[k])) {	  ++j;	  ++k;	  if (j < d.size() && d[j] != letterB) {	    while (k < sr.size() && syntax.isB(sr[k]))	      k++;	  }	}	else	  break;      }      else if (d[j] == sr[k]) {	j++;	k++;      }      else	break;    }  }  return 0;}#ifdef SP_NAMESPACE}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -