📄 perltokenmaker.java

📁 具有不同语法高亮的编辑器实例
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
		tokenMap.put("undef",			function);
		tokenMap.put("unlink",			function);
		tokenMap.put("unpack",			function);
		tokenMap.put("unshift",			function);
		tokenMap.put("untie",			function);
		tokenMap.put("use",				function);
		tokenMap.put("utime",			function);
		tokenMap.put("values",			function);
		tokenMap.put("vec",				function);
		tokenMap.put("wait",				function);
		tokenMap.put("waitpid",			function);
		tokenMap.put("wantarray",			function);
		tokenMap.put("warn",				function);
		tokenMap.put("write",			function);

		return tokenMap;

	}


/*****************************************************************************/


	/**
	 * Returns a list of tokens representing the given text.
	 *
	 * @param text The text to break into tokens.
	 * @param startTokenType The token with which to start tokenizing.
	 * @param startOffset The offset at which the line of tokens begins.
	 * @return A linked list of tokens representing <code>text</code>.
	 */
	public Token getTokenList(Segment text, int startTokenType, final int startOffset) {

		resetTokenList();

		char[] array = text.array;
		int offset = text.offset;
		int count = text.count;
		int end = offset + count;

		// See, when we find a token, its starting position is always of the form:
		// 'startOffset + (currentTokenStart-offset)'; but since startOffset and
		// offset are constant, tokens' starting positions become:
		// 'newStartOffset+currentTokenStart' for one less subraction operation.
		int newStartOffset = startOffset - offset;

		currentTokenStart = offset;
		currentTokenType  = startTokenType;
		boolean backslash = false;
		boolean numContainsExponent = false;
		boolean numContainsEndCharacter = false;

		for (int i=offset; i<end; i++) {

			char c = array[i];

			switch (currentTokenType) {

				case Token.NULL:

					currentTokenStart = i;	// Starting a new token here.

					switch (c) {

						case ' ':
						case '\t':
							currentTokenType = Token.WHITESPACE;
							break;

						case '`':
							if (backslash) { // Escaped back quote => call '`' an identifier..
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								backslash = false;
							}
							else {
								currentTokenType = Token.LITERAL_BACKQUOTE;
							}
							break;

						case '"':
							if (backslash) { // Escaped double quote => call '"' an identifier..
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								backslash = false;
							}
							else {
								currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
							}
							break;

						case '\'':
							if (backslash) { // Escaped single quote => call '\'' an identifier.
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								backslash = false;
							}
							else {
								currentTokenType = Token.LITERAL_CHAR;
							}
							break;

						case '\\':
							addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenType = Token.NULL;
							backslash = !backslash;
							break;

						case '$':
						case '@':
							if (backslash) { // Escaped dollar sign => call '$' an identifier..
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								backslash = false;
							}
							else {
								currentTokenType = Token.VARIABLE;
							}
							break;

						case '#':
							backslash = false;
							currentTokenType = Token.COMMENT_EOL;
							break;

						default:
							if (RSyntaxUtilities.isDigit(c)) {
								currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT;
								break;
							}
							else if (RSyntaxUtilities.isLetter(c) || c=='/' || c=='_') {
								currentTokenType = Token.IDENTIFIER;
								break;
							}
							int indexOf = operators.indexOf(c,0);
							if (indexOf>-1) {
								currentTokenType = Token.OPERATOR;
								break;
							}
							indexOf = separators.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i, Token.SEPARATOR, newStartOffset+currentTokenStart);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								currentTokenType = Token.NULL;
								break;
							}
							else {
								currentTokenType = Token.IDENTIFIER;
								break;
							}

					} // End of switch (c).

					break;

				case Token.WHITESPACE:

					switch (c) {

						case ' ':
						case '\t':
							break;	// Still whitespace.

						case '\\':
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							backslash = true; // Previous char whitespace => this must be first backslash.
							break;

						case '`': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_BACKQUOTE;
							backslash = false;
							break;

						case '"': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
							backslash = false;
							break;

						case '\'': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_CHAR;
							backslash = false;
							break;
						
						case '$': // Don't need to worry about backslashes as previous char is space.
						case '@':
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.VARIABLE;
							backslash = false;
							break;

						case '#':
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.COMMENT_EOL;
							break;

						default:	// Add the whitespace token and start anew.

							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;

							if (RSyntaxUtilities.isDigit(c)) {
								currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT;
								break;
							}
							else if (RSyntaxUtilities.isLetter(c) || c=='/' || c=='_') {
								currentTokenType = Token.IDENTIFIER;
								break;
							}
							int indexOf = operators.indexOf(c,0);
							if (indexOf>-1) {
								currentTokenType = Token.OPERATOR;
								break;
							}
							indexOf = separators.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							else {
								currentTokenType = Token.IDENTIFIER;
							}

					} // End of switch (c).

					break;

				case Token.IDENTIFIER:

					switch (c) {

						case ' ':
						case '\t':
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.WHITESPACE;
							break;

						case '`': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_BACKQUOTE;
							backslash = false;
							break;

						case '"': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
							backslash = false;
							break;

						case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_CHAR;
							backslash = false;
							break;

						case '\\':
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							backslash = true;
							break;

						case '$': // Don't need to worry about backslashes as previous char is non-backslash.
						case '@':
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.VARIABLE;
							backslash = false;
							break;
						
						default:
							if (RSyntaxUtilities.isLetterOrDigit(c) || c=='/' || c=='_') {
								break;	// Still an identifier of some type.
							}
							int indexOf = operators.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								currentTokenStart = i;
								currentTokenType = Token.OPERATOR;
								break;
							}
							indexOf = separators.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							// Otherwise, we're still an identifier (?).

					} // End of switch (c).

					break;

				case Token.LITERAL_NUMBER_DECIMAL_INT:

					// Reset our boolean states if we only have one digit char before
					// the current one.
					if (currentTokenStart==i-1) {
						numContainsExponent = false;
						numContainsEndCharacter = false;
💿 文件大小 4021 K
👤 上传用户 johni
📂 所属分类 Java编程
🏷️ 相关标签

#编辑器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -