📄 parsemode.cxx
字号:
} const String<EquivCode> emptyString; Boolean multicode = syntax().multicode(); for (i = 0; i < n; i++) { TrieBuilder tb(partition.maxCode() + 1); TrieBuilder::TokenVector ambiguities; Vector<Token> suppressTokens; if (multicode) { suppressTokens.assign(partition.maxCode() + 1, 0); suppressTokens[partition.eECode()] = tokenEe; } tb.recognizeEE(partition.eECode(), tokenEe); ModeInfo iter(modes[i], sd()); TokenInfo ti; // We try to handle the possibility that some delimiters may be empty; // this might happen when compiling recognizers for the SGML declaration. while (iter.nextToken(&ti)) { switch (ti.type) { case TokenInfo::delimType: if (delimCodes[ti.delim1].size() > 0) tb.recognize(delimCodes[ti.delim1], ti.token, ti.priority, ambiguities); break; case TokenInfo::delimDelimType: { String<EquivCode> str(delimCodes[ti.delim1]); if (str.size() > 0 && delimCodes[ti.delim2].size() > 0) { str += delimCodes[ti.delim2]; tb.recognize(str, ti.token, ti.priority, ambiguities); } } break; case TokenInfo::delimSetType: if (delimCodes[ti.delim1].size() > 0) tb.recognize(delimCodes[ti.delim1], setCodes[ti.set], ti.token, ti.priority, ambiguities); break; case TokenInfo::setType: tb.recognize(emptyString, setCodes[ti.set], ti.token, ti.priority, ambiguities); if (multicode) { const String<EquivCode> &equivCodes = setCodes[ti.set]; for (size_t j = 0; j < equivCodes.size(); j++) suppressTokens[equivCodes[j]] = ti.token; } break; case TokenInfo::functionType: tb.recognize(functionCode[ti.function], ti.token, ti.priority, ambiguities); if (multicode) suppressTokens[functionCode[ti.function][0]] = ti.token; break; } } if (iter.includesShortref()) { for (int j = 0; j < nShortref; j++) { const SrInfo *p = &srInfo[j]; if (p->bSequenceLength > 0) tb.recognizeB(p->chars, p->bSequenceLength, syntax().quantity(Syntax::qBSEQLEN), setCodes[Syntax::blank], p->chars2, tokenFirstShortref + j, ambiguities); else tb.recognize(p->chars, tokenFirstShortref + j, Priority::delim, ambiguities); } } if (options().warnDataDelim) { switch (modes[i]) { default: if (!iter.includesShortref()) break; // fall through case alitMode: case alitaMode: case aliteMode: case talitMode: case talitaMode: case taliteMode: for (size_t j = 0; j < dataDelimCodes.size(); j++) { String<EquivCode> code; code += dataDelimCodes[j]; tb.recognize(code, tokenCharDelim, Priority::dataDelim, ambiguities); } break; case plitMode: case plitaMode: case pliteMode: { String<EquivCode> code; code += partition.charCode(syntax().delimGeneral(Syntax::dPERO)[0]); tb.recognize(code, tokenCharDelim, Priority::dataDelim, ambiguities); } break; } } setRecognizer(modes[i], (multicode ? new Recognizer(tb.extractTrie(), partition.map(), suppressTokens) : new Recognizer(tb.extractTrie(), partition.map()))); // FIXME give more information for (size_t j = 0; j < ambiguities.size(); j += 2) message(ParserMessages::lexicalAmbiguity, TokenMessageArg(ambiguities[j], modes[i], syntaxPointer(), sdPointer()), TokenMessageArg(ambiguities[j + 1], modes[i], syntaxPointer(), sdPointer())); }}void Parser::compileNormalMap(){ XcharMap<PackedBoolean> map(0); ISetIter<Char> sgmlCharIter(*syntax().charSet(Syntax::sgmlChar)); Char min, max; while (sgmlCharIter.next(min, max)) map.setRange(min, max, 1); ModeInfo iter(mconnetMode, sd()); TokenInfo ti; while (iter.nextToken(&ti)) { switch (ti.type) { case TokenInfo::delimType: case TokenInfo::delimDelimType: case TokenInfo::delimSetType: { const StringC &delim = syntax().delimGeneral(ti.delim1); if (!delim.size()) break; Char c = delim[0]; map.setChar(c, 0); StringC str(syntax().generalSubstTable()->inverse(c)); for (size_t i = 0; i < str.size(); i++) map.setChar(str[i], 0); } break; case TokenInfo::setType: if (ti.token != tokenChar) { ISetIter<Char> setIter(*syntax().charSet(ti.set)); Char min, max; while (setIter.next(min, max)) map.setRange(min, max, 0); } break; case TokenInfo::functionType: if (ti.token != tokenChar) map.setChar(syntax().standardFunction(ti.function), 0); break; } } int nShortref = currentDtd().nShortref(); for (int i = 0; i < nShortref; i++) { Char c = currentDtd().shortref(i)[0]; if (c == sd().execToInternal('B')) { ISetIter<Char> setIter(*syntax().charSet(Syntax::blank)); Char min, max; while (setIter.next(min, max)) map.setRange(min, max, 0); } else { map.setChar(c, 0); StringC str(syntax().generalSubstTable()->inverse(c)); for (size_t j = 0; j < str.size(); j++) map.setChar(str[j], 0); } } setNormalMap(map);}void Parser::addNeededShortrefs(Dtd &dtd, const Syntax &syntax){ if (!syntax.hasShortrefs()) return; PackedBoolean delimRelevant[Syntax::nDelimGeneral]; size_t i; for (i = 0; i < Syntax::nDelimGeneral; i++) delimRelevant[i] = 0; ModeInfo iter(mconnetMode, sd()); TokenInfo ti; while (iter.nextToken(&ti)) { switch (ti.type) { case TokenInfo::delimType: case TokenInfo::delimDelimType: case TokenInfo::delimSetType: delimRelevant[ti.delim1] = 1; break; default: break; } } // PIO and NET are the only delimiters that are recognized in con // mode without context. If a short reference delimiter is // identical to one of these delimiters, then we'll have an // ambiguity. We make such a short reference delimiter needed // to ensure that this ambiguity is reported. if (syntax.isValidShortref(syntax.delimGeneral(Syntax::dPIO))) dtd.addNeededShortref(syntax.delimGeneral(Syntax::dPIO)); if (syntax.isValidShortref(syntax.delimGeneral(Syntax::dNET))) dtd.addNeededShortref(syntax.delimGeneral(Syntax::dNET)); size_t nShortrefComplex = syntax.nDelimShortrefComplex(); // A short reference delimiter is needed if it is used or if it can // contains some other shorter delimiter that is either a relevant general // delimiter or a shortref delimiter that is used. for (i = 0; i < nShortrefComplex; i++) { size_t j; for (j = 0; j < Syntax::nDelimGeneral; j++) if (delimRelevant[j] && shortrefCanPreemptDelim(syntax.delimShortrefComplex(i), syntax.delimGeneral(j), 0, syntax)) { dtd.addNeededShortref(syntax.delimShortrefComplex(i)); break; } for (j = 0; j < dtd.nShortref(); j++) if (shortrefCanPreemptDelim(syntax.delimShortrefComplex(i), dtd.shortref(j), 1, syntax)) { dtd.addNeededShortref(syntax.delimShortrefComplex(i)); break; } } }Boolean Parser::shortrefCanPreemptDelim(const StringC &sr, const StringC &d, Boolean dIsSr, const Syntax &syntax){ Char letterB = sd().execToInternal('B'); for (size_t i = 0; i < sr.size(); i++) { size_t j = 0; size_t k = i; for (;;) { if (j == d.size()) return 1; if (k >= sr.size()) break; if (sr[k] == letterB) { if (dIsSr && d[j] == letterB) { j++; k++; } else if (syntax.isB(d[j])) { j++; k++; if (k == sr.size() || sr[k] != letterB) { // it was the last B in the sequence while (j < d.size() && syntax.isB(d[j])) j++; } } else break; } else if (dIsSr && d[j] == letterB) { if (syntax.isB(sr[k])) { ++j; ++k; if (j < d.size() && d[j] != letterB) { while (k < sr.size() && syntax.isB(sr[k])) k++; } } else break; } else if (d[j] == sr[k]) { j++; k++; } else break; } } return 0;}#ifdef SP_NAMESPACE}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -