⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexperl.cxx

📁 非常好用的可移植的多平台C/C++源代码编辑器
💻 CXX
📖 第 1 页 / 共 3 页
字号:
// Scintilla source code edit control
/** @file LexPerl.cxx
 ** Lexer for subset of Perl.
 **/
// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
// The License.txt file describes the conditions under which this software may be distributed.

#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <stdarg.h>

#include "Platform.h"

#include "PropSet.h"
#include "Accessor.h"
#include "KeyWords.h"
#include "Scintilla.h"
#include "SciLexer.h"

#define PERLNUM_DECIMAL 1
#define PERLNUM_NON_DEC 2
#define PERLNUM_FLOAT 3
#define PERLNUM_VECTOR 4
#define PERLNUM_V_VECTOR 5

#define HERE_DELIM_MAX 256

static inline bool isEOLChar(char ch) {
	return (ch == '\r') || (ch == '\n');
}

static bool isSingleCharOp(char ch) {
	char strCharSet[2];
	strCharSet[0] = ch;
	strCharSet[1] = '\0';
	return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet));
}

static inline bool isPerlOperator(char ch) {
	if (ch == '^' || ch == '&' || ch == '\\' ||
	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
	        ch == '>' || ch == ',' || 
	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
		return true;
	// these chars are already tested before this call
	// ch == '%' || ch == '*' || ch == '<' || ch == '/' ||
	return false;
}

static int classifyWordPerl(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
	char s[100];
	for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
		s[i] = styler[start + i];
		s[i + 1] = '\0';
	}
	char chAttr = SCE_PL_IDENTIFIER;
	if (keywords.InList(s))
		chAttr = SCE_PL_WORD;
	styler.ColourTo(end, chAttr);
	return chAttr;
}

static inline bool isEndVar(char ch) {
	return !isalnum(ch) && ch != '#' && ch != '$' &&
	       ch != '_' && ch != '\'';
}


static inline bool isNonQuote(char ch) {
	return isalnum(ch) || ch == '_';
}

static inline char actualNumStyle(int numberStyle) {
	switch (numberStyle) {
	case PERLNUM_VECTOR:
	case PERLNUM_V_VECTOR:
		return SCE_PL_STRING;
	case PERLNUM_DECIMAL:
	case PERLNUM_NON_DEC:
	case PERLNUM_FLOAT:
	default:
		return SCE_PL_NUMBER;
	}
}

static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
	if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
		return false;
	}
	while (*val) {
		if (*val != styler[pos++]) {
			return false;
		}
		val++;
	}
	return true;
}

static char opposite(char ch) {
	if (ch == '(')
		return ')';
	if (ch == '[')
		return ']';
	if (ch == '{')
		return '}';
	if (ch == '<')
		return '>';
	return ch;
}

static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
                             WordList *keywordlists[], Accessor &styler) {

	// Lexer for perl often has to backtrack to start of current style to determine
	// which characters are being used as quotes, how deeply nested is the
	// start position and what the termination string is for here documents

	WordList &keywords = *keywordlists[0];

	class HereDocCls {
	public:
		int State;		// 0: '<<' encountered
		// 1: collect the delimiter
		// 2: here doc text (lines after the delimiter)
		char Quote;		// the char after '<<'
		bool Quoted;		// true if Quote in ('\'','"','`')
		int DelimiterLength;	// strlen(Delimiter)
		char *Delimiter;	// the Delimiter, 256: sizeof PL_tokenbuf
		HereDocCls() {
			State = 0;
			DelimiterLength = 0;
			Delimiter = new char[HERE_DELIM_MAX];
			Delimiter[0] = '\0';
		}
		~HereDocCls() {
			delete []Delimiter;
		}
	};
	HereDocCls HereDoc;	// TODO: FIFO for stacked here-docs

	class QuoteCls {
		public:
		int  Rep;
		int  Count;
		char Up;
		char Down;
		QuoteCls() {
			this->New(1);
		}
		void New(int r) {
			Rep   = r;
			Count = 0;
			Up    = '\0';
			Down  = '\0';
		}
		void Open(char u) {
			Count++;
			Up    = u;
			Down  = opposite(Up);
		}
	};
	QuoteCls Quote;

	int state = initStyle;
	char numState = PERLNUM_DECIMAL;
	int dotCount = 0;
	unsigned int lengthDoc = startPos + length;
	//int sookedpos = 0; // these have no apparent use, see POD state
	//char sooked[100];
	//sooked[sookedpos] = '\0';

	// If in a long distance lexical state, seek to the beginning to find quote characters
	// Perl strings can be multi-line with embedded newlines, so backtrack.
	// Perl numbers have additional state during lexing, so backtrack too.
	if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {
		while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {
			startPos--;
		}
		startPos = styler.LineStart(styler.GetLine(startPos));
		state = styler.StyleAt(startPos - 1);
	}
	if ( state == SCE_PL_STRING_Q
	|| state == SCE_PL_STRING_QQ
	|| state == SCE_PL_STRING_QX
	|| state == SCE_PL_STRING_QR
	|| state == SCE_PL_STRING_QW
	|| state == SCE_PL_REGEX
	|| state == SCE_PL_REGSUBST
	|| state == SCE_PL_STRING
	|| state == SCE_PL_BACKTICKS
	|| state == SCE_PL_CHARACTER
	|| state == SCE_PL_NUMBER
	|| state == SCE_PL_IDENTIFIER
	) {
		while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
			startPos--;
		}
		state = SCE_PL_DEFAULT;
	}

	styler.StartAt(startPos);
	char chPrev = styler.SafeGetCharAt(startPos - 1);
	if (startPos == 0)
		chPrev = '\n';
	char chNext = styler[startPos];
	styler.StartSegment(startPos);

	for (unsigned int i = startPos; i < lengthDoc; i++) {
		char ch = chNext;
		// if the current character is not consumed due to the completion of an
		// earlier style, lexing can be restarted via a simple goto
	restartLexer:
		chNext = styler.SafeGetCharAt(i + 1);
		char chNext2 = styler.SafeGetCharAt(i + 2);

		if (styler.IsLeadByte(ch)) {
			chNext = styler.SafeGetCharAt(i + 2);
			chPrev = ' ';
			i += 1;
			continue;
		}
		if ((chPrev == '\r' && ch == '\n')) {	// skip on DOS/Windows
			styler.ColourTo(i, state);
			chPrev = ch;
			continue;
		}

		if (HereDoc.State == 1 && isEOLChar(ch)) {
			// Begin of here-doc (the line after the here-doc delimiter):
			// Lexically, the here-doc starts from the next line after the >>, but the
			// first line of here-doc seem to follow the style of the last EOL sequence
			HereDoc.State = 2;
			if (HereDoc.Quoted) {
				if (state == SCE_PL_HERE_DELIM) {
					// Missing quote at end of string! We are stricter than perl.
					// Colour here-doc anyway while marking this bit as an error.
					state = SCE_PL_ERROR;
				}
				styler.ColourTo(i - 1, state);
				switch (HereDoc.Quote) {
				case '\'':
					state = SCE_PL_HERE_Q ;
					break;
				case '"':
					state = SCE_PL_HERE_QQ;
					break;
				case '`':
					state = SCE_PL_HERE_QX;
					break;
				}
			} else {
				styler.ColourTo(i - 1, state);
				switch (HereDoc.Quote) {
				case '\\':
					state = SCE_PL_HERE_Q ;
					break;
				default :
					state = SCE_PL_HERE_QQ;
				}
			}
		}

		if (state == SCE_PL_DEFAULT) {
			if (isdigit(ch) || (isdigit(chNext) &&
				(ch == '.' || ch == 'v'))) {
				state = SCE_PL_NUMBER;
				numState = PERLNUM_DECIMAL;
				dotCount = 0;
				if (ch == '0') {	// hex,bin,octal
					if (chNext == 'x' || chNext == 'b' || isdigit(chNext)) {
						numState = PERLNUM_NON_DEC;
					}
				} else if (ch == 'v') {	// vector
					numState = PERLNUM_V_VECTOR;
				}
			} else if (iswordstart(ch)) {
				if (chPrev == '>' && styler.SafeGetCharAt(i - 2) == '-') {
					state = SCE_PL_IDENTIFIER;	// part of "->" expr
					if ((!iswordchar(chNext) && chNext != '\'')
						|| (chNext == '.' && chNext2 == '.')) {
						// We need that if length of word == 1!
						styler.ColourTo(i, SCE_PL_IDENTIFIER);
						state = SCE_PL_DEFAULT;
					}
				} else if (ch == 's' && !isNonQuote(chNext)) {
					state = SCE_PL_REGSUBST;
					Quote.New(2);
				} else if (ch == 'm' && !isNonQuote(chNext)) {
					state = SCE_PL_REGEX;
					Quote.New(1);
				} else if (ch == 'q' && !isNonQuote(chNext)) {
					state = SCE_PL_STRING_Q;
					Quote.New(1);
				} else if (ch == 'y' && !isNonQuote(chNext)) {
					state = SCE_PL_REGSUBST;
					Quote.New(2);
				} else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) {
					state = SCE_PL_REGSUBST;
					Quote.New(2);
					i++;
					chNext = chNext2;
				} else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) {
					if      (chNext == 'q') state = SCE_PL_STRING_QQ;
					else if (chNext == 'x') state = SCE_PL_STRING_QX;
					else if (chNext == 'r') state = SCE_PL_STRING_QR;
					else if (chNext == 'w') state = SCE_PL_STRING_QW;
					i++;
					chNext = chNext2;
					Quote.New(1);
				} else if (ch == 'x' && (chNext == '=' ||	// repetition
					   (chNext != '_' && !isalnum(chNext)) ||
					   (isdigit(chPrev) && isdigit(chNext)))) {
					styler.ColourTo(i, SCE_PL_OPERATOR);
				} else {
					state = SCE_PL_WORD;
					if ((!iswordchar(chNext) && chNext != '\'')
						|| (chNext == '.' && chNext2 == '.')) {
						// We need that if length of word == 1!
						// This test is copied from the SCE_PL_WORD handler.
						classifyWordPerl(styler.GetStartSegment(), i, keywords, styler);
						state = SCE_PL_DEFAULT;
					}
				}
			} else if (ch == '#') {
				state = SCE_PL_COMMENTLINE;
			} else if (ch == '\"') {
				state = SCE_PL_STRING;
				Quote.New(1);
				Quote.Open(ch);
			} else if (ch == '\'') {
				if (chPrev == '&') {
					// Archaic call
					styler.ColourTo(i, state);
				} else {
					state = SCE_PL_CHARACTER;
					Quote.New(1);
					Quote.Open(ch);
				}
			} else if (ch == '`') {
				state = SCE_PL_BACKTICKS;
				Quote.New(1);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -