standardlexerrules.java

来自「java 词法分析器,用于一般的C,C++,VB,PS/SQL 语句的翻译」· Java 代码 · 共 947 行 · 第 1/3 页
JAVA
947 行
package fri.patterns.interpreter.parsergenerator.lexer;import fri.patterns.interpreter.parsergenerator.Token;/**	Standard lexer rules are building blocks for lexers dealing with text input.	This class resolves nonterminals enclosed in `backquotes` within an EBNF,	e.g. `cstylecomment`.	<p>	Furthermore it provides methods to retrieve sets of rules describing certain standard	scan items like `number` or `identifier`. The resulting arrays can be built together	by <i>SyntaxUtil.catenizeRules(...)</i>.	<p>	This class provides rules for comments with an arbitrary start character or start/end sequence:	<ul>		<li>getCustomOneLineCommentRules(String startChar)</li> and		<li>getCustomMultiLineCommentRules(String startSeq, String endSeq)</li>.	</ul>	<p>	Example (CStyleCommentStrip):	<pre>	String [][] rules = {		{ Token.TOKEN, "others" },	// define what we want to receive		{ Token.TOKEN, "`stringdef`" },	// need this rule as string definitions could contain comments		{ Token.IGNORED, "`cstylecomment`" },		{ "others", "others", "other" },		{ "others", "other" },		{ "other", "`char`", Token.BUTNOT, "`cstylecomment`", Token.BUTNOT, "`stringdef`" },	};	Syntax syntax = new Syntax(rules);	SyntaxSeparation separation = new SyntaxSeparation(syntax);	LexerBuilder builder = new LexerBuilder(separation.getLexerSyntax(), separation.getIgnoredSymbols());	Lexer lexer = builder.getLexer();	</pre>	TODO: Refactor this class and make smaller units with better names.	@see fri.patterns.interpreter.parsergenerator.lexer.LexerBuilder	@author (c) 2002, Fritz Ritzberger*/public abstract class StandardLexerRules{	/**		Returns e.g. the Letter-Rules <i>getUnicodeLetterRules()</i> for id "letter".		Using this, one can write things like `identifier` in a Lexer specification text,		as LexerBuilder tries to resolve these words calling this method.		Possible values for id are:		<ul>			<li>char (all UNICODE characters)</li>			<li>newline</li>			<li>newlines</li>			<li>space</li>			<li>spaces</li>			<li>whitespace</li>			<li>whitespaces</li>			<li>letter</li>			<li>digit</li>			<li>digits</li>			<li>hexdigit</li>			<li>hexdigits (does NOT include preceeding "0x")</li>			<li>identifier</li>			<li>stringdef</li>			<li>chardef</li>			<li>bnf_chardef (differs as in BNF characters can be written as "020" instead of '\020')</li>			<li>ruleref	(`lexerrule`)</li>			<li>quantifier	(*+?)</li>			<li>cstylecomment</li>			<li>comment</li>			<li>shellstylecomment</li>			<li>octdigits</li>			<li>bindigits</li>			<li>number</li>			<li>float</li>			<li>integer</li>			<li>xmlchar</li>			<li>combiningchar</li>			<li>extenderchar</li>		</ul>	*/	public static String [][] rulesForIdentifier(String id)	{		//System.err.println("searching for syntax rules for nonterminal "+id);		if (id.equals("char"))			return getUnicodeCharRules();		if (id.equals("newline"))			return getNewlineRules();		if (id.equals("newlines"))			return getNewlinesRules();		if (id.equals("space"))			return getSpaceRules();		if (id.equals("spaces"))			return getSpacesRules();		if (id.equals("whitespace"))			return getWhitespaceRules();		if (id.equals("whitespaces"))			return getWhitespacesRules();		if (id.equals("letter"))			return getUnicodeLetterRules();		if (id.equals("digit"))			return getUnicodeDigitRules();		if (id.equals("digits"))			return getUnicodeDigitsRules();		if (id.equals("hexdigit"))			return getHexDigitRules();		if (id.equals("hexdigits"))			return getHexDigitsRules();		if (id.equals("octdigits"))			return getOctDigitsRules();		if (id.equals("bindigits"))			return getBinDigitsRules();		if (id.equals("number"))			return getNumberRules();		if (id.equals("integer"))			return getIntegerRules();		if (id.equals("float"))			return getFloatRules();		if (id.equals("identifier"))			return getUnicodeIdentifierRules();		if (id.equals("stringdef"))			return getUnicodeStringdefRules();		if (id.equals("chardef"))			return getUnicodeChardefRules();		if (id.equals("bnf_chardef"))			return getUnicodeBNFChardefRules();		if (id.equals("ruleref"))			return getRulerefRules();		if (id.equals("quantifier"))			return getQuantifierRules();		if (id.equals("comment"))			return getCommentRules();		if (id.equals("cstylecomment"))			return getCStyleCommentRules();		if (id.equals("shellstylecomment"))			return getShellStyleCommentRules();		if (id.equals("xmlchar"))			return getUnicodeXmlCharRules();		if (id.equals("combiningchar"))			return getUnicodeCombiningCharRules();		if (id.equals("extenderchar"))			return getUnicodeExtenderCharRules();		return null;	}	/**		Returns rules for a custom comment (like C-style "//", but with passed start sequence).		@param nonterminalName name of comment to be used within syntax, e.g. "basicComment".		@param startChar string (1-n characters) defining the start sequence of the comment, e.g. ";"	*/	public static final String [][] getCustomOneLineCommentRules(String nonterminalName, String startChar)	{		String [][] sarr0 = getUnicodeCharRules();		String [][] sarr1 = getNewlineRules();		String [][] sarr2 = getSomeRules(290, 296);		String [] customRule = new String[sarr2[0].length];		System.arraycopy(sarr2[0], 0, customRule, 0, customRule.length);		customRule[0] = nonterminalName;		customRule[1] = "\""+startChar+"\"";	// put custom sequence where where "//" sits		sarr2[0] = customRule;		return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 });	}		/**		Returns rules for a custom comment (like C-style "/*", but with passed start and end sequence).		@param nonterminalName name of comment to be used within syntax, e.g. "pascalComment".		@param startSeq string defining the start sequence of the comment, e.g. "(*"		@param endSeq string defining the end sequence of the comment, e.g. "*)"	*/	public static final String [][] getCustomMultiLineCommentRules(String nonterminalName, String startSeq, String endSeq)	{		String [][] sarr0 = getUnicodeCharRules();		String [][] sarr1 = getNewlineRules();		String [][] customRules = new String [6][];		customRules[0] = new String [] { nonterminalName,  "\""+startSeq+"\"", "char_minus_star_slash_list_opt", "\""+endSeq+"\"" };		customRules[1] = new String [] { "char_minus_"+nonterminalName, "char", Token.BUTNOT, "\""+endSeq+"\"" };		customRules[2] = new String [] { "char_minus_"+nonterminalName+"_list", "char_minus_"+nonterminalName+"_list", "char_minus_"+nonterminalName };		customRules[3] = new String [] { "char_minus_"+nonterminalName+"_list", "char_minus_"+nonterminalName };		customRules[4] = new String [] { "char_minus_"+nonterminalName+"_list_opt", "char_minus_"+nonterminalName+"_list" };		customRules[5] = new String [] { "char_minus_"+nonterminalName+"_list_opt" /*nothing*/ };		return catenizeRules(new String [][][] { sarr0, sarr1, customRules });	}			/** Rules to scan one UNICODE character: 0x0 .. 0xFFFF. */	public static final String [][] getUnicodeCharRules()	{		return getSomeRules(21, 22);	}	/** Rules to scan one platform independent newline. */	public static final String [][] getNewlineRules()	{		return getSomeRules(16, 21);	}	/** Rules to scan one platform independent newline. */	public static final String [][] getNewlinesRules()	{		String [][] sarr0 = getNewlineRules();		String [][] sarr1 = getSomeRules(0, 2, newlinesRules);		return catenizeRules(new String [][][] { sarr0, sarr1 });	}	/** Rules to scan one space. */	public static final String [][] getSpaceRules()	{		return getSomeRules(13, 16);	}	/** Rules to scan spaces. */	public static final String [][] getSpacesRules()	{		String [][] sarr0 = getSpaceRules();		String [][] sarr1 = getSomeRules(242, 244);		return catenizeRules(new String [][][] { sarr0, sarr1 });	}	/** Rules to scan one space or newline. */	public static final String [][] getWhitespaceRules()	{		String [][] sarr0 = getSpaceRules();		String [][] sarr1 = getNewlineRules();		String [][] sarr2 = getSomeRules(0, 2, whitespaceRules);		return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 });	}	/** Rules to scan spaces or newlines. */	public static final String [][] getWhitespacesRules()	{		String [][] sarr0 = getSpacesRules();		String [][] sarr1 = getNewlinesRules();		String [][] sarr2 = getSomeRules(0, 4, whitespaceRules);		return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 });	}	/** Rules to scan one hexdigit. */	public static final String [][] getHexDigitRules()	{		return getSomeRules(10, 13);	}	/** Rules to scan hexdigits that form a number, starting "0x" not included. */	public static final String [][] getHexDigitsRules()	{		String [][] sarr0 = getHexDigitRules();		String [][] sarr1 = getSomeRules(246, 248);	// more hexdigits		return catenizeRules(new String [][][] { sarr0, sarr1 });	}	/** Rules to scan one letter. */	public static final String [][] getUnicodeLetterRules()	{		return getSomeRules(37, 242);	}	/** Rules to scan one digit. */	public static final String [][] getUnicodeDigitRules()	{		return getSomeRules(22, 37);	}	/** Rules to scan digits. */	public static final String [][] getUnicodeDigitsRules()	{		String [][] sarr0 = getUnicodeDigitRules();		String [][] sarr1 = getSomeRules(244, 246);	// more digits		return catenizeRules(new String [][][] { sarr0, sarr1 });	}		/** Rules to scan identifiers that start with letter and continue with letter or digit or '_'. */	public static final String [][] getUnicodeIdentifierRules()	{		String [][] sarr0 = getUnicodeDigitRules();		String [][] sarr1 = getUnicodeLetterRules();		String [][] sarr2 = getSomeRules(259, 268);		return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 });	}		/** Rules to scan C/Java-like 'c'haracterdefinitions: '\377', 'A', '\n'. */	public static final String [][] getUnicodeChardefRules()	{		String [][] sarr0 = getUnicodeCharRules();		String [][] sarr1 = getSomeRules(0, 1, digitRules);	// octdigit		String [][] sarr2 = getSomeRules(0, 2, chardefRules);		String [][] sarr3 = getSomeRules(248, 249);	// part of bnf_chardef		String [][] sarr4 = getSomeRules(251, 258);	// part of bnf_chardef		return catenizeRules(new String [][][] { sarr0, sarr1, sarr2, sarr3, sarr4 });	}		/** Rules to scan BNF-like 'c'haracterdefinitions. They differ from C/Java-chardefs in that they can be written as digits: 0x20. */	public static final String [][] getUnicodeBNFChardefRules()	{		String [][] sarr0 = getUnicodeCharRules();		String [][] sarr1 = getHexDigitsRules();		String [][] sarr2 = getUnicodeDigitsRules();		String [][] sarr3 = getSomeRules(248, 259);		return catenizeRules(new String [][][] { sarr0, sarr1, sarr2, sarr3 });	}		/** Rules to scan "stringdefinitions" that can contain backslash as masking character. */	public static final String [][] getUnicodeStringdefRules()	{		String [][] sarr0 = getUnicodeCharRules();		String [][] sarr1 = getSomeRules(268, 284);		return catenizeRules(new String [][][] { sarr0, sarr1 });	}	/** Rules to read a `lexerrule` within EBNF syntax specifications. */	public static final String [][] getRulerefRules()	{		String [][] sarr0 = getUnicodeIdentifierRules();		String [][] sarr1 = getSomeRules(297, 298);		return catenizeRules(new String [][][] { sarr0, sarr1 });	}		/** Rules to read quantifiers "*+?" within EBNF syntax specifications. */	public static final String [][] getQuantifierRules()	{		return getSomeRules(7, 10);	}		/** Rules to scan C-style slash-star and slash-slash AND shell-style # comments. */	public static final String [][] getCommentRules()	{		String [][] sarr0 = getCStyleCommentRules();		String [][] sarr1 = getSomeRules(296, 297);		String [][] sarr2 = getSomeRules(299, 301);		return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 });	}		/** Rules to scan C-style slash-star and slash-slash comments. */	public static final String [][] getCStyleCommentRules()	{		String [][] sarr0 = getUnicodeCharRules();		String [][] sarr1 = getNewlineRules();		String [][] sarr2 = getSomeRules(284, 296);		return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 });	}
standardlexerrules.java - 源码说明

本页面展示了「java 词法分析器,用于一般的C,C++,VB,PS/SQL 语句的翻译」中的 standardlexerrules.java 源码文件，采用 Java 编程语言编写，共 947 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?