actions.cc
来自「a little DFA compiler.」· CC 代码 · 共 1,063 行 · 第 1/2 页
CC
1,063 行
s.len--; switch (c = *s.str++) { case 'n': return xlat('\n'); case 't': return xlat('\t'); case 'v': return xlat('\v'); case 'b': return xlat('\b'); case 'r': return xlat('\r'); case 'f': return xlat('\f'); case 'a': return xlat('\a'); case 'x': { if (s.len < 2) { fatal(s.ofs()+s.len, "Illegal hexadecimal character code, two hexadecimal digits are required"); return ~0; } const char *p1 = strchr(hex, tolower(s.str[0])); const char *p2 = strchr(hex, tolower(s.str[1])); if (!p1 || !p2) { fatal(s.ofs()+(p1?1:0), "Illegal hexadecimal character code"); return ~0; } else { s.len -= 2; s.str += 2; uint v = (uint)((p1 - hex) << 4) + (uint)((p2 - hex)); return v; } } case 'U': { if (s.len < 8) { fatal(s.ofs()+s.len, "Illegal unicode character, eight hexadecimal digits are required"); return ~0; } uint l = 0; if (s.str[0] == '0') { l++; if (s.str[1] == '0') { l++; if (s.str[2] == '0' || (s.str[2] == '1' && uFlag)) { l++; if (uFlag) { const char *u3 = strchr(hex, tolower(s.str[2])); const char *u4 = strchr(hex, tolower(s.str[3])); if (u3 && u4) { ucb = (uint)((u3 - hex) << 20) + (uint)((u4 - hex) << 16); l++; } } else if (s.str[3] == '0') { l++; } } } } if (l != 4) { fatal(s.ofs()+l, "Illegal unicode character, eight hexadecimal digits are required"); } s.len -= 4; s.str += 4; // no break; } case 'X': case 'u': { if (s.len < 4) { fatal(s.ofs()+s.len, c == 'X' ? "Illegal hexadecimal character code, four hexadecimal digits are required" : "Illegal unicode character, four hexadecimal digits are required"); return ~0; } const char *p1 = strchr(hex, tolower(s.str[0])); const char *p2 = strchr(hex, tolower(s.str[1])); const char *p3 = strchr(hex, tolower(s.str[2])); const char *p4 = strchr(hex, tolower(s.str[3])); if (!p1 || !p2 || !p3 || !p4) { fatal(s.ofs()+(p1?1:0)+(p2?1:0)+(p3?1:0), c == 'X' ? "Illegal hexadecimal character code, non hexxdecimal digit found" : "Illegal unicode character, non hexadecimal digit found"); return ~0; } else { s.len -= 4; s.str += 4; uint v = (uint)((p1 - hex) << 12) + (uint)((p2 - hex) << 8) + (uint)((p3 - hex) << 4) + (uint)((p4 - hex)) + ucb; if (v >= nRealChars) { fatal(s.ofs(), c == 'X' ? "Illegal hexadecimal character code, out of range" : "Illegal unicode character, out of range"); } return v; } } case '4': case '5': case '6': case '7': { fatal(s.ofs()-1, "Illegal octal character code, first digit must be 0 thru 3"); return ~0; } case '0': case '1': case '2': case '3': { if (s.len < 2) { fatal(s.ofs()+s.len, "Illegal octal character code, three octal digits are required"); return ~0; } const char *p0 = strchr(oct, c); const char *p1 = strchr(oct, s.str[0]); const char *p2 = strchr(oct, s.str[1]); if (!p0 || !p1 || !p2) { fatal(s.ofs()+(p1?1:0), "Illegal octal character code, non octal digit found"); return ~0; } else { s.len -= 2; s.str += 2; uint v = (uint)((p0 - oct) << 6) + (uint)((p1 - oct) << 3) + (uint)(p2 - oct); return v; } } default: return xlat(c); }}std::string& Scanner::unescape(SubStr& str_in, std::string& str_out) const{ str_out.clear(); while(str_in.len) { uint c = unescape(str_in); if (c > 0xFF) { fatal(str_in.ofs(), "Illegal character"); } str_out += static_cast<char>(c); } return str_out;}Range * Scanner::getRange(SubStr &s) const{ uint lb = unescape(s), ub, xlb, xub, c; if (s.len < 2 || *s.str != '-') { ub = lb; } else { s.len--; s.str++; ub = unescape(s); if (ub < lb) { uint tmp = lb; lb = ub; ub = tmp; } xlb = xlat(lb); xub = xlat(ub); for(c = lb; c <= ub; c++) { if (!(xlb <= xlat(c) && xlat(c) <= ub)) { /* range doesn't work */ Range * r = new Range(xlb, xlb + 1); for (c = lb + 1; c <= ub; c++) { r = doUnion(r, new Range(xlat(c), xlat(c) + 1)); } return r; } } lb = xlb; ub = xub; } return new Range(lb, ub + 1);}RegExp * Scanner::matchChar(uint c) const{ return new MatchOp(new Range(c, c + 1));}RegExp * Scanner::strToRE(SubStr s) const{ s.len -= 2; s.str += 1; if (s.len == 0) return new NullOp; RegExp *re = matchChar(unescape(s)); while (s.len > 0) re = new CatOp(re, matchChar(unescape(s))); return re;}RegExp * Scanner::strToCaseInsensitiveRE(SubStr s) const{ s.len -= 2; s.str += 1; if (s.len == 0) return new NullOp; uint c = unescape(s); RegExp *re, *reL, *reU; if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { reL = matchChar(xlat(tolower(c))); reU = matchChar(xlat(toupper(c))); re = mkAlt(reL, reU); } else { re = matchChar(c); } while (s.len > 0) { uint c = unescape(s); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { reL = matchChar(xlat(tolower(c))); reU = matchChar(xlat(toupper(c))); re = new CatOp(re, mkAlt(reL, reU)); } else { re = new CatOp(re, matchChar(c)); } } return re;}RegExp * Scanner::ranToRE(SubStr s) const{ s.len -= 2; s.str += 1; if (s.len == 0) return new NullOp; Range *r = getRange(s); while (s.len > 0) r = doUnion(r, getRange(s)); return new MatchOp(r);}RegExp * Scanner::invToRE(SubStr s) const{ s.len--; s.str++; RegExp * any = ranToRE(SubStr(wFlag ? "[\\X0000-\\XFFFF]" : "[\\000-\\377]")); if (s.len <= 2) { return any; } RegExp * ran = ranToRE(s); RegExp * inv = mkDiff(any, ran); delete ran; delete any; return inv;}RegExp * Scanner::mkDot() const{ RegExp * any = ranToRE(SubStr(wFlag ? "[\\X0000-\\XFFFF]" : "[\\000-\\377]")); RegExp * ran = matchChar(xlat('\n')); RegExp * inv = mkDiff(any, ran); delete ran; delete any; return inv;}const char *RuleOp::type = "RuleOp";RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, uint a) : exp(e) , ctx(c) , ins(NULL) , accept(a) , code(t) , line(0){ ;}void RuleOp::calcSize(Char *rep){ exp->calcSize(rep); ctx->calcSize(rep); size = exp->size + (ctx->size ? ctx->size + 2 : 1);}void RuleOp::compile(Char *rep, Ins *i){ ins = i; exp->compile(rep, &i[0]); i += exp->size; if (ctx->size) { i->i.tag = CTXT; i->i.link = &i[1]; i++; ctx->compile(rep, &i[0]); i += ctx->size; } i->i.tag = TERM; i->i.link = this;}void RuleOp::split(CharSet &s){ exp->split(s); ctx->split(s);}void optimize(Ins *i){ while (!isMarked(i)) { mark(i); if (i->i.tag == CHAR) { i = (Ins*) i->i.link; } else if (i->i.tag == GOTO || i->i.tag == FORK) { Ins *target = (Ins*) i->i.link; optimize(target); if (target->i.tag == GOTO) i->i.link = target->i.link == target ? i : target; if (i->i.tag == FORK) { Ins *follow = (Ins*) & i[1]; optimize(follow); if (follow->i.tag == GOTO && follow->i.link == follow) { i->i.tag = GOTO; } else if (i->i.link == i) { i->i.tag = GOTO; i->i.link = follow; } } return ; } else { ++i; } }}void genCode(std::ostream& o, RegExp *re){ genCode(o, 0, re);}CharSet::CharSet() : fix(0) , freeHead(0) , freeTail(0) , rep(new CharPtr[nRealChars]) , ptn(new CharPtn[nRealChars]){ for (uint j = 0; j < nRealChars; ++j) { rep[j] = &ptn[0]; ptn[j].nxt = &ptn[j + 1]; /* wrong for j=nRealChars but will be corrected below */ ptn[j].card = 0; } freeHead = &ptn[1]; *(freeTail = &ptn[nRealChars - 1].nxt) = NULL; ptn[0].card = nRealChars; ptn[0].nxt = NULL;} CharSet::~CharSet(){ delete[] rep; delete[] ptn;}void genCode(std::ostream& o, uint ind, RegExp *re){ CharSet cs; uint j; re->split(cs); /* for(uint k = 0; k < nChars;){ for(j = k; ++k < nRealChars && cs.rep[k] == cs.rep[j];); printSpan(cerr, j, k); cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl; } */ Char *rep = new Char[nRealChars]; for (j = 0; j < nRealChars; ++j) { if (!cs.rep[j]->nxt) cs.rep[j]->nxt = &cs.ptn[j]; rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]); } re->calcSize(rep); Ins *ins = new Ins[re->size + 1]; memset(ins, 0, (re->size + 1)*sizeof(Ins)); re->compile(rep, ins); Ins *eoi = &ins[re->size]; eoi->i.tag = GOTO; eoi->i.link = eoi; optimize(ins); for (j = 0; j < re->size;) { unmark(&ins[j]); if (ins[j].i.tag == CHAR) { j = (Ins*) ins[j].i.link - ins; } else { j++; } } DFA *dfa = new DFA(ins, re->size, 0, nRealChars, rep); dfa->emit(o, ind); delete dfa; delete [] ins; delete [] rep;}} // end namespace re2c
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?