📄 perltokenmaker.java
字号:
tokenMap.put("undef", function);
tokenMap.put("unlink", function);
tokenMap.put("unpack", function);
tokenMap.put("unshift", function);
tokenMap.put("untie", function);
tokenMap.put("use", function);
tokenMap.put("utime", function);
tokenMap.put("values", function);
tokenMap.put("vec", function);
tokenMap.put("wait", function);
tokenMap.put("waitpid", function);
tokenMap.put("wantarray", function);
tokenMap.put("warn", function);
tokenMap.put("write", function);
return tokenMap;
}
/*****************************************************************************/
/**
* Returns a list of tokens representing the given text.
*
* @param text The text to break into tokens.
* @param startTokenType The token with which to start tokenizing.
* @param startOffset The offset at which the line of tokens begins.
* @return A linked list of tokens representing <code>text</code>.
*/
public Token getTokenList(Segment text, int startTokenType, final int startOffset) {
resetTokenList();
char[] array = text.array;
int offset = text.offset;
int count = text.count;
int end = offset + count;
// See, when we find a token, its starting position is always of the form:
// 'startOffset + (currentTokenStart-offset)'; but since startOffset and
// offset are constant, tokens' starting positions become:
// 'newStartOffset+currentTokenStart' for one less subraction operation.
int newStartOffset = startOffset - offset;
currentTokenStart = offset;
currentTokenType = startTokenType;
boolean backslash = false;
boolean numContainsExponent = false;
boolean numContainsEndCharacter = false;
for (int i=offset; i<end; i++) {
char c = array[i];
switch (currentTokenType) {
case Token.NULL:
currentTokenStart = i; // Starting a new token here.
switch (c) {
case ' ':
case '\t':
currentTokenType = Token.WHITESPACE;
break;
case '`':
if (backslash) { // Escaped back quote => call '`' an identifier..
addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
backslash = false;
}
else {
currentTokenType = Token.LITERAL_BACKQUOTE;
}
break;
case '"':
if (backslash) { // Escaped double quote => call '"' an identifier..
addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
backslash = false;
}
else {
currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
}
break;
case '\'':
if (backslash) { // Escaped single quote => call '\'' an identifier.
addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
backslash = false;
}
else {
currentTokenType = Token.LITERAL_CHAR;
}
break;
case '\\':
addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
currentTokenType = Token.NULL;
backslash = !backslash;
break;
case '$':
case '@':
if (backslash) { // Escaped dollar sign => call '$' an identifier..
addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
backslash = false;
}
else {
currentTokenType = Token.VARIABLE;
}
break;
case '#':
backslash = false;
currentTokenType = Token.COMMENT_EOL;
break;
default:
if (RSyntaxUtilities.isDigit(c)) {
currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT;
break;
}
else if (RSyntaxUtilities.isLetter(c) || c=='/' || c=='_') {
currentTokenType = Token.IDENTIFIER;
break;
}
int indexOf = operators.indexOf(c,0);
if (indexOf>-1) {
currentTokenType = Token.OPERATOR;
break;
}
indexOf = separators.indexOf(c,0);
if (indexOf>-1) {
addToken(text, currentTokenStart,i, Token.SEPARATOR, newStartOffset+currentTokenStart);
currentTokenType = Token.NULL;
break;
}
indexOf = separators2.indexOf(c,0);
if (indexOf>-1) {
addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
currentTokenType = Token.NULL;
break;
}
else {
currentTokenType = Token.IDENTIFIER;
break;
}
} // End of switch (c).
break;
case Token.WHITESPACE:
switch (c) {
case ' ':
case '\t':
break; // Still whitespace.
case '\\':
addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
currentTokenType = Token.NULL;
backslash = true; // Previous char whitespace => this must be first backslash.
break;
case '`': // Don't need to worry about backslashes as previous char is space.
addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
currentTokenStart = i;
currentTokenType = Token.LITERAL_BACKQUOTE;
backslash = false;
break;
case '"': // Don't need to worry about backslashes as previous char is space.
addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
currentTokenStart = i;
currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
backslash = false;
break;
case '\'': // Don't need to worry about backslashes as previous char is space.
addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
currentTokenStart = i;
currentTokenType = Token.LITERAL_CHAR;
backslash = false;
break;
case '$': // Don't need to worry about backslashes as previous char is space.
case '@':
addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
currentTokenStart = i;
currentTokenType = Token.VARIABLE;
backslash = false;
break;
case '#':
addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
currentTokenStart = i;
currentTokenType = Token.COMMENT_EOL;
break;
default: // Add the whitespace token and start anew.
addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
currentTokenStart = i;
if (RSyntaxUtilities.isDigit(c)) {
currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT;
break;
}
else if (RSyntaxUtilities.isLetter(c) || c=='/' || c=='_') {
currentTokenType = Token.IDENTIFIER;
break;
}
int indexOf = operators.indexOf(c,0);
if (indexOf>-1) {
currentTokenType = Token.OPERATOR;
break;
}
indexOf = separators.indexOf(c,0);
if (indexOf>-1) {
addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
currentTokenType = Token.NULL;
break;
}
indexOf = separators2.indexOf(c,0);
if (indexOf>-1) {
addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
currentTokenType = Token.NULL;
break;
}
else {
currentTokenType = Token.IDENTIFIER;
}
} // End of switch (c).
break;
case Token.IDENTIFIER:
switch (c) {
case ' ':
case '\t':
addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
currentTokenStart = i;
currentTokenType = Token.WHITESPACE;
break;
case '`': // Don't need to worry about backslashes as previous char is space.
addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
currentTokenStart = i;
currentTokenType = Token.LITERAL_BACKQUOTE;
backslash = false;
break;
case '"': // Don't need to worry about backslashes as previous char is non-backslash.
addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
currentTokenStart = i;
currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
backslash = false;
break;
case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
currentTokenStart = i;
currentTokenType = Token.LITERAL_CHAR;
backslash = false;
break;
case '\\':
addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
currentTokenType = Token.NULL;
backslash = true;
break;
case '$': // Don't need to worry about backslashes as previous char is non-backslash.
case '@':
addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
currentTokenStart = i;
currentTokenType = Token.VARIABLE;
backslash = false;
break;
default:
if (RSyntaxUtilities.isLetterOrDigit(c) || c=='/' || c=='_') {
break; // Still an identifier of some type.
}
int indexOf = operators.indexOf(c);
if (indexOf>-1) {
addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
currentTokenStart = i;
currentTokenType = Token.OPERATOR;
break;
}
indexOf = separators.indexOf(c,0);
if (indexOf>-1) {
addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
currentTokenType = Token.NULL;
break;
}
indexOf = separators2.indexOf(c,0);
if (indexOf>-1) {
addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
currentTokenType = Token.NULL;
break;
}
// Otherwise, we're still an identifier (?).
} // End of switch (c).
break;
case Token.LITERAL_NUMBER_DECIMAL_INT:
// Reset our boolean states if we only have one digit char before
// the current one.
if (currentTokenStart==i-1) {
numContainsExponent = false;
numContainsEndCharacter = false;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -