actions.cc

来自「a little DFA compiler.」· CC 代码 · 共 1,063 行 · 第 1/2 页

CC
1,063
字号
	s.len--;	switch (c = *s.str++)	{		case 'n': return xlat('\n');		case 't': return xlat('\t');		case 'v': return xlat('\v');		case 'b': return xlat('\b');		case 'r': return xlat('\r');		case 'f': return xlat('\f');		case 'a': return xlat('\a');		case 'x':		{			if (s.len < 2)			{				fatal(s.ofs()+s.len, "Illegal hexadecimal character code, two hexadecimal digits are required");				return ~0;			}						const char *p1 = strchr(hex, tolower(s.str[0]));			const char *p2 = strchr(hex, tolower(s.str[1]));			if (!p1 || !p2)			{				fatal(s.ofs()+(p1?1:0), "Illegal hexadecimal character code");				return ~0;			}			else			{				s.len -= 2;				s.str += 2;								uint v = (uint)((p1 - hex) << 4) 				       + (uint)((p2 - hex));					return v;			}		}		case 'U':		{			if (s.len < 8)			{				fatal(s.ofs()+s.len, "Illegal unicode character, eight hexadecimal digits are required");				return ~0;			}			uint l = 0;									if (s.str[0] == '0')			{				l++;				if (s.str[1] == '0')				{					l++;					if (s.str[2] == '0' || (s.str[2] == '1' && uFlag))					{						l++;						if (uFlag) {							const char *u3 = strchr(hex, tolower(s.str[2]));							const char *u4 = strchr(hex, tolower(s.str[3]));							if (u3 && u4)							{								ucb = (uint)((u3 - hex) << 20)							        + (uint)((u4 - hex) << 16);								l++;							}						}						else if (s.str[3] == '0')						{							l++;						}					}				}			}			if (l != 4)			{				fatal(s.ofs()+l, "Illegal unicode character, eight hexadecimal digits are required");			}			s.len -= 4;			s.str += 4;						// no break;		}		case 'X':		case 'u':		{			if (s.len < 4)			{				fatal(s.ofs()+s.len, 					c == 'X'					? "Illegal hexadecimal character code, four hexadecimal digits are required"					: "Illegal unicode character, four hexadecimal digits are required");				return ~0;			}						const char *p1 = strchr(hex, tolower(s.str[0]));			const char *p2 = strchr(hex, tolower(s.str[1]));			const char *p3 = strchr(hex, tolower(s.str[2]));			const char *p4 = strchr(hex, tolower(s.str[3]));			if (!p1 || !p2 || !p3 || !p4)			{				fatal(s.ofs()+(p1?1:0)+(p2?1:0)+(p3?1:0), 					c == 'X'					? "Illegal hexadecimal character code, non hexxdecimal digit found"					: "Illegal unicode character, non hexadecimal digit found");				return ~0;			}			else			{				s.len -= 4;				s.str += 4;								uint v = (uint)((p1 - hex) << 12) 				       + (uint)((p2 - hex) <<  8)				       + (uint)((p3 - hex) <<  4)				       + (uint)((p4 - hex))				       + ucb;					if (v >= nRealChars)				{					fatal(s.ofs(),						c == 'X'						? "Illegal hexadecimal character code, out of range"						: "Illegal unicode character, out of range");				}					return v;			}		}		case '4':		case '5':		case '6':		case '7':		{			fatal(s.ofs()-1, "Illegal octal character code, first digit must be 0 thru 3");			return ~0;		}		case '0':		case '1':		case '2':		case '3':		{			if (s.len < 2)			{				fatal(s.ofs()+s.len, "Illegal octal character code, three octal digits are required");				return ~0;			}			const char *p0 = strchr(oct, c);			const char *p1 = strchr(oct, s.str[0]);			const char *p2 = strchr(oct, s.str[1]);			if (!p0 || !p1 || !p2)			{				fatal(s.ofs()+(p1?1:0), "Illegal octal character code, non octal digit found");				return ~0;			}			else			{				s.len -= 2;				s.str += 2;								uint v = (uint)((p0 - oct) << 6) + (uint)((p1 - oct) << 3) + (uint)(p2 - oct);					return v;			}		}		default:		return xlat(c);	}}std::string& Scanner::unescape(SubStr& str_in, std::string& str_out) const{	str_out.clear();	while(str_in.len)	{		uint c = unescape(str_in);				if (c > 0xFF)		{			fatal(str_in.ofs(), "Illegal character");		}		str_out += static_cast<char>(c);	}	return str_out;}Range * Scanner::getRange(SubStr &s) const{	uint lb = unescape(s), ub, xlb, xub, c;	if (s.len < 2 || *s.str != '-')	{		ub = lb;	}	else	{		s.len--;		s.str++;		ub = unescape(s);		if (ub < lb)		{			uint tmp = lb;			lb = ub;			ub = tmp;		}				xlb = xlat(lb);		xub = xlat(ub);				for(c = lb; c <= ub; c++)		{			if (!(xlb <= xlat(c) && xlat(c) <= ub))			{				/* range doesn't work */				Range * r = new Range(xlb, xlb + 1);				for (c = lb + 1; c <= ub; c++)				{					r = doUnion(r, new Range(xlat(c), xlat(c) + 1));				}				return r;			}		}				lb = xlb;		ub = xub;	}	return new Range(lb, ub + 1);}RegExp * Scanner::matchChar(uint c) const{	return new MatchOp(new Range(c, c + 1));}RegExp * Scanner::strToRE(SubStr s) const{	s.len -= 2;	s.str += 1;	if (s.len == 0)		return new NullOp;	RegExp *re = matchChar(unescape(s));	while (s.len > 0)		re = new CatOp(re, matchChar(unescape(s)));	return re;}RegExp * Scanner::strToCaseInsensitiveRE(SubStr s) const{	s.len -= 2;	s.str += 1;	if (s.len == 0)		return new NullOp;	uint c = unescape(s);	RegExp *re, *reL, *reU;	if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))	{		reL = matchChar(xlat(tolower(c)));		reU = matchChar(xlat(toupper(c)));		re = mkAlt(reL, reU);	}	else	{		re = matchChar(c);	}	while (s.len > 0)	{		uint c = unescape(s);		if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))		{			reL = matchChar(xlat(tolower(c)));			reU = matchChar(xlat(toupper(c)));			re = new CatOp(re, mkAlt(reL, reU));		}		else		{			re = new CatOp(re, matchChar(c));		}	}	return re;}RegExp * Scanner::ranToRE(SubStr s) const{	s.len -= 2;	s.str += 1;	if (s.len == 0)		return new NullOp;	Range *r = getRange(s);	while (s.len > 0)		r = doUnion(r, getRange(s));	return new MatchOp(r);}RegExp * Scanner::invToRE(SubStr s) const{	s.len--;	s.str++;		RegExp * any = ranToRE(SubStr(wFlag ? "[\\X0000-\\XFFFF]" : "[\\000-\\377]"));	if (s.len <= 2)	{		return any;	}	RegExp * ran = ranToRE(s);	RegExp * inv = mkDiff(any, ran);		delete ran;	delete any;		return inv;}RegExp * Scanner::mkDot() const{	RegExp * any = ranToRE(SubStr(wFlag ? "[\\X0000-\\XFFFF]" : "[\\000-\\377]"));	RegExp * ran = matchChar(xlat('\n'));	RegExp * inv = mkDiff(any, ran);		delete ran;	delete any;		return inv;}const char *RuleOp::type = "RuleOp";RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, uint a)	: exp(e)	, ctx(c)	, ins(NULL)	, accept(a)	, code(t)	, line(0){	;}void RuleOp::calcSize(Char *rep){	exp->calcSize(rep);	ctx->calcSize(rep);	size = exp->size + (ctx->size ? ctx->size + 2 : 1);}void RuleOp::compile(Char *rep, Ins *i){	ins = i;	exp->compile(rep, &i[0]);	i += exp->size;	if (ctx->size)	{		i->i.tag = CTXT;		i->i.link = &i[1];		i++;		ctx->compile(rep, &i[0]);		i += ctx->size;	}	i->i.tag = TERM;	i->i.link = this;}void RuleOp::split(CharSet &s){	exp->split(s);	ctx->split(s);}void optimize(Ins *i){	while (!isMarked(i))	{		mark(i);		if (i->i.tag == CHAR)		{			i = (Ins*) i->i.link;		}		else if (i->i.tag == GOTO || i->i.tag == FORK)		{			Ins *target = (Ins*) i->i.link;			optimize(target);			if (target->i.tag == GOTO)				i->i.link = target->i.link == target ? i : target;			if (i->i.tag == FORK)			{				Ins *follow = (Ins*) & i[1];				optimize(follow);				if (follow->i.tag == GOTO && follow->i.link == follow)				{					i->i.tag = GOTO;				}				else if (i->i.link == i)				{					i->i.tag = GOTO;					i->i.link = follow;				}			}			return ;		}		else		{			++i;		}	}}void genCode(std::ostream& o, RegExp *re){	genCode(o, 0, re);}CharSet::CharSet()	: fix(0)	, freeHead(0)	, freeTail(0)	, rep(new CharPtr[nRealChars])	, ptn(new CharPtn[nRealChars]){	for (uint j = 0; j < nRealChars; ++j)	{		rep[j] = &ptn[0];		ptn[j].nxt = &ptn[j + 1]; /* wrong for j=nRealChars but will be corrected below */		ptn[j].card = 0;	}	freeHead = &ptn[1];	*(freeTail = &ptn[nRealChars - 1].nxt) = NULL;	ptn[0].card = nRealChars;	ptn[0].nxt = NULL;}	CharSet::~CharSet(){	delete[] rep;	delete[] ptn;}void genCode(std::ostream& o, uint ind, RegExp *re){	CharSet cs;	uint j;	re->split(cs);	/*	    for(uint k = 0; k < nChars;){		for(j = k; ++k < nRealChars && cs.rep[k] == cs.rep[j];);		printSpan(cerr, j, k);		cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl;	    }	*/	Char *rep = new Char[nRealChars];	for (j = 0; j < nRealChars; ++j)	{		if (!cs.rep[j]->nxt)			cs.rep[j]->nxt = &cs.ptn[j];		rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]);	}	re->calcSize(rep);	Ins *ins = new Ins[re->size + 1];	memset(ins, 0, (re->size + 1)*sizeof(Ins));	re->compile(rep, ins);	Ins *eoi = &ins[re->size];	eoi->i.tag = GOTO;	eoi->i.link = eoi;	optimize(ins);	for (j = 0; j < re->size;)	{		unmark(&ins[j]);		if (ins[j].i.tag == CHAR)		{			j = (Ins*) ins[j].i.link - ins;		}		else		{			j++;		}	}	DFA *dfa = new DFA(ins, re->size, 0, nRealChars, rep);	dfa->emit(o, ind);	delete dfa;	delete [] ins;	delete [] rep;}} // end namespace re2c

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?