📄 standardlexerrules.java
字号:
package fri.patterns.interpreter.parsergenerator.lexer;import fri.patterns.interpreter.parsergenerator.Token;/** Standard lexer rules are building blocks for lexers dealing with text input. This class resolves nonterminals enclosed in `backquotes` within an EBNF, e.g. `cstylecomment`. <p> Furthermore it provides methods to retrieve sets of rules describing certain standard scan items like `number` or `identifier`. The resulting arrays can be built together by <i>SyntaxUtil.catenizeRules(...)</i>. <p> This class provides rules for comments with an arbitrary start character or start/end sequence: <ul> <li>getCustomOneLineCommentRules(String startChar)</li> and <li>getCustomMultiLineCommentRules(String startSeq, String endSeq)</li>. </ul> <p> Example (CStyleCommentStrip): <pre> String [][] rules = { { Token.TOKEN, "others" }, // define what we want to receive { Token.TOKEN, "`stringdef`" }, // need this rule as string definitions could contain comments { Token.IGNORED, "`cstylecomment`" }, { "others", "others", "other" }, { "others", "other" }, { "other", "`char`", Token.BUTNOT, "`cstylecomment`", Token.BUTNOT, "`stringdef`" }, }; Syntax syntax = new Syntax(rules); SyntaxSeparation separation = new SyntaxSeparation(syntax); LexerBuilder builder = new LexerBuilder(separation.getLexerSyntax(), separation.getIgnoredSymbols()); Lexer lexer = builder.getLexer(); </pre> TODO: Refactor this class and make smaller units with better names. @see fri.patterns.interpreter.parsergenerator.lexer.LexerBuilder @author (c) 2002, Fritz Ritzberger*/public abstract class StandardLexerRules{ /** Returns e.g. the Letter-Rules <i>getUnicodeLetterRules()</i> for id "letter". Using this, one can write things like `identifier` in a Lexer specification text, as LexerBuilder tries to resolve these words calling this method. Possible values for id are: <ul> <li>char (all UNICODE characters)</li> <li>newline</li> <li>newlines</li> <li>space</li> <li>spaces</li> <li>whitespace</li> <li>whitespaces</li> <li>letter</li> <li>digit</li> <li>digits</li> <li>hexdigit</li> <li>hexdigits (does NOT include preceeding "0x")</li> <li>identifier</li> <li>stringdef</li> <li>chardef</li> <li>bnf_chardef (differs as in BNF characters can be written as "020" instead of '\020')</li> <li>ruleref (`lexerrule`)</li> <li>quantifier (*+?)</li> <li>cstylecomment</li> <li>comment</li> <li>shellstylecomment</li> <li>octdigits</li> <li>bindigits</li> <li>number</li> <li>float</li> <li>integer</li> <li>xmlchar</li> <li>combiningchar</li> <li>extenderchar</li> </ul> */ public static String [][] rulesForIdentifier(String id) { //System.err.println("searching for syntax rules for nonterminal "+id); if (id.equals("char")) return getUnicodeCharRules(); if (id.equals("newline")) return getNewlineRules(); if (id.equals("newlines")) return getNewlinesRules(); if (id.equals("space")) return getSpaceRules(); if (id.equals("spaces")) return getSpacesRules(); if (id.equals("whitespace")) return getWhitespaceRules(); if (id.equals("whitespaces")) return getWhitespacesRules(); if (id.equals("letter")) return getUnicodeLetterRules(); if (id.equals("digit")) return getUnicodeDigitRules(); if (id.equals("digits")) return getUnicodeDigitsRules(); if (id.equals("hexdigit")) return getHexDigitRules(); if (id.equals("hexdigits")) return getHexDigitsRules(); if (id.equals("octdigits")) return getOctDigitsRules(); if (id.equals("bindigits")) return getBinDigitsRules(); if (id.equals("number")) return getNumberRules(); if (id.equals("integer")) return getIntegerRules(); if (id.equals("float")) return getFloatRules(); if (id.equals("identifier")) return getUnicodeIdentifierRules(); if (id.equals("stringdef")) return getUnicodeStringdefRules(); if (id.equals("chardef")) return getUnicodeChardefRules(); if (id.equals("bnf_chardef")) return getUnicodeBNFChardefRules(); if (id.equals("ruleref")) return getRulerefRules(); if (id.equals("quantifier")) return getQuantifierRules(); if (id.equals("comment")) return getCommentRules(); if (id.equals("cstylecomment")) return getCStyleCommentRules(); if (id.equals("shellstylecomment")) return getShellStyleCommentRules(); if (id.equals("xmlchar")) return getUnicodeXmlCharRules(); if (id.equals("combiningchar")) return getUnicodeCombiningCharRules(); if (id.equals("extenderchar")) return getUnicodeExtenderCharRules(); return null; } /** Returns rules for a custom comment (like C-style "//", but with passed start sequence). @param nonterminalName name of comment to be used within syntax, e.g. "basicComment". @param startChar string (1-n characters) defining the start sequence of the comment, e.g. ";" */ public static final String [][] getCustomOneLineCommentRules(String nonterminalName, String startChar) { String [][] sarr0 = getUnicodeCharRules(); String [][] sarr1 = getNewlineRules(); String [][] sarr2 = getSomeRules(290, 296); String [] customRule = new String[sarr2[0].length]; System.arraycopy(sarr2[0], 0, customRule, 0, customRule.length); customRule[0] = nonterminalName; customRule[1] = "\""+startChar+"\""; // put custom sequence where where "//" sits sarr2[0] = customRule; return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 }); } /** Returns rules for a custom comment (like C-style "/*", but with passed start and end sequence). @param nonterminalName name of comment to be used within syntax, e.g. "pascalComment". @param startSeq string defining the start sequence of the comment, e.g. "(*" @param endSeq string defining the end sequence of the comment, e.g. "*)" */ public static final String [][] getCustomMultiLineCommentRules(String nonterminalName, String startSeq, String endSeq) { String [][] sarr0 = getUnicodeCharRules(); String [][] sarr1 = getNewlineRules(); String [][] customRules = new String [6][]; customRules[0] = new String [] { nonterminalName, "\""+startSeq+"\"", "char_minus_star_slash_list_opt", "\""+endSeq+"\"" }; customRules[1] = new String [] { "char_minus_"+nonterminalName, "char", Token.BUTNOT, "\""+endSeq+"\"" }; customRules[2] = new String [] { "char_minus_"+nonterminalName+"_list", "char_minus_"+nonterminalName+"_list", "char_minus_"+nonterminalName }; customRules[3] = new String [] { "char_minus_"+nonterminalName+"_list", "char_minus_"+nonterminalName }; customRules[4] = new String [] { "char_minus_"+nonterminalName+"_list_opt", "char_minus_"+nonterminalName+"_list" }; customRules[5] = new String [] { "char_minus_"+nonterminalName+"_list_opt" /*nothing*/ }; return catenizeRules(new String [][][] { sarr0, sarr1, customRules }); } /** Rules to scan one UNICODE character: 0x0 .. 0xFFFF. */ public static final String [][] getUnicodeCharRules() { return getSomeRules(21, 22); } /** Rules to scan one platform independent newline. */ public static final String [][] getNewlineRules() { return getSomeRules(16, 21); } /** Rules to scan one platform independent newline. */ public static final String [][] getNewlinesRules() { String [][] sarr0 = getNewlineRules(); String [][] sarr1 = getSomeRules(0, 2, newlinesRules); return catenizeRules(new String [][][] { sarr0, sarr1 }); } /** Rules to scan one space. */ public static final String [][] getSpaceRules() { return getSomeRules(13, 16); } /** Rules to scan spaces. */ public static final String [][] getSpacesRules() { String [][] sarr0 = getSpaceRules(); String [][] sarr1 = getSomeRules(242, 244); return catenizeRules(new String [][][] { sarr0, sarr1 }); } /** Rules to scan one space or newline. */ public static final String [][] getWhitespaceRules() { String [][] sarr0 = getSpaceRules(); String [][] sarr1 = getNewlineRules(); String [][] sarr2 = getSomeRules(0, 2, whitespaceRules); return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 }); } /** Rules to scan spaces or newlines. */ public static final String [][] getWhitespacesRules() { String [][] sarr0 = getSpacesRules(); String [][] sarr1 = getNewlinesRules(); String [][] sarr2 = getSomeRules(0, 4, whitespaceRules); return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 }); } /** Rules to scan one hexdigit. */ public static final String [][] getHexDigitRules() { return getSomeRules(10, 13); } /** Rules to scan hexdigits that form a number, starting "0x" not included. */ public static final String [][] getHexDigitsRules() { String [][] sarr0 = getHexDigitRules(); String [][] sarr1 = getSomeRules(246, 248); // more hexdigits return catenizeRules(new String [][][] { sarr0, sarr1 }); } /** Rules to scan one letter. */ public static final String [][] getUnicodeLetterRules() { return getSomeRules(37, 242); } /** Rules to scan one digit. */ public static final String [][] getUnicodeDigitRules() { return getSomeRules(22, 37); } /** Rules to scan digits. */ public static final String [][] getUnicodeDigitsRules() { String [][] sarr0 = getUnicodeDigitRules(); String [][] sarr1 = getSomeRules(244, 246); // more digits return catenizeRules(new String [][][] { sarr0, sarr1 }); } /** Rules to scan identifiers that start with letter and continue with letter or digit or '_'. */ public static final String [][] getUnicodeIdentifierRules() { String [][] sarr0 = getUnicodeDigitRules(); String [][] sarr1 = getUnicodeLetterRules(); String [][] sarr2 = getSomeRules(259, 268); return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 }); } /** Rules to scan C/Java-like 'c'haracterdefinitions: '\377', 'A', '\n'. */ public static final String [][] getUnicodeChardefRules() { String [][] sarr0 = getUnicodeCharRules(); String [][] sarr1 = getSomeRules(0, 1, digitRules); // octdigit String [][] sarr2 = getSomeRules(0, 2, chardefRules); String [][] sarr3 = getSomeRules(248, 249); // part of bnf_chardef String [][] sarr4 = getSomeRules(251, 258); // part of bnf_chardef return catenizeRules(new String [][][] { sarr0, sarr1, sarr2, sarr3, sarr4 }); } /** Rules to scan BNF-like 'c'haracterdefinitions. They differ from C/Java-chardefs in that they can be written as digits: 0x20. */ public static final String [][] getUnicodeBNFChardefRules() { String [][] sarr0 = getUnicodeCharRules(); String [][] sarr1 = getHexDigitsRules(); String [][] sarr2 = getUnicodeDigitsRules(); String [][] sarr3 = getSomeRules(248, 259); return catenizeRules(new String [][][] { sarr0, sarr1, sarr2, sarr3 }); } /** Rules to scan "stringdefinitions" that can contain backslash as masking character. */ public static final String [][] getUnicodeStringdefRules() { String [][] sarr0 = getUnicodeCharRules(); String [][] sarr1 = getSomeRules(268, 284); return catenizeRules(new String [][][] { sarr0, sarr1 }); } /** Rules to read a `lexerrule` within EBNF syntax specifications. */ public static final String [][] getRulerefRules() { String [][] sarr0 = getUnicodeIdentifierRules(); String [][] sarr1 = getSomeRules(297, 298); return catenizeRules(new String [][][] { sarr0, sarr1 }); } /** Rules to read quantifiers "*+?" within EBNF syntax specifications. */ public static final String [][] getQuantifierRules() { return getSomeRules(7, 10); } /** Rules to scan C-style slash-star and slash-slash AND shell-style # comments. */ public static final String [][] getCommentRules() { String [][] sarr0 = getCStyleCommentRules(); String [][] sarr1 = getSomeRules(296, 297); String [][] sarr2 = getSomeRules(299, 301); return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 }); } /** Rules to scan C-style slash-star and slash-slash comments. */ public static final String [][] getCStyleCommentRules() { String [][] sarr0 = getUnicodeCharRules(); String [][] sarr1 = getNewlineRules(); String [][] sarr2 = getSomeRules(284, 296); return catenizeRules(new String [][][] { sarr0, sarr1, sarr2 }); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -