lexperl.cxx

来自「robocup rcssserver 运行防真机器人足球比赛所用的服务器端」· CXX 代码 · 共 1,233 行 · 第 1/3 页
CXX
1,233 行
// Scintilla source code edit control/** @file LexPerl.cxx ** Lexer for subset of Perl. **/// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>// The License.txt file describes the conditions under which this software may be distributed.#include <stdlib.h>#include <string.h>#include <ctype.h>#include <stdio.h>#include <stdarg.h>#include "Platform.h"#include "PropSet.h"#include "Accessor.h"#include "KeyWords.h"#include "Scintilla.h"#include "SciLexer.h"#define PERLNUM_BINARY 1    // order is significant: 1-4 cannot have a dot#define PERLNUM_HEX 2#define PERLNUM_OCTAL 3#define PERLNUM_FLOAT 4     // actually exponent part#define PERLNUM_DECIMAL 5   // 1-5 are numbers; 6-7 are strings#define PERLNUM_VECTOR 6#define PERLNUM_V_VECTOR 7#define PERLNUM_BAD 8#define BACK_NONE 0         // lookback state for bareword disambiguation:#define BACK_OPERATOR 1     // whitespace/comments are insignificant#define BACK_KEYWORD 2      // operators/keywords are needed for disambiguation#define HERE_DELIM_MAX 256static inline bool isEOLChar(char ch) {	return (ch == '\r') || (ch == '\n');}static bool isSingleCharOp(char ch) {	char strCharSet[2];	strCharSet[0] = ch;	strCharSet[1] = '\0';	return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet));}static inline bool isPerlOperator(char ch) {	if (ch == '^' || ch == '&' || ch == '\\' ||	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||	        ch == '>' || ch == ',' ||	        ch == '?' || ch == '!' || ch == '.' || ch == '~')		return true;	// these chars are already tested before this call	// ch == '%' || ch == '*' || ch == '<' || ch == '/' ||	return false;}static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {	char s[100];    unsigned int i, len = end - start;    if (len > 30) { len = 30; }	for (i = 0; i < len; i++, start++) s[i] = styler[start];    s[i] = '\0';	return keywords.InList(s);}static inline bool isEndVar(char ch) {	return !isalnum(ch) && ch != '#' && ch != '$' &&	       ch != '_' && ch != '\'';}static inline bool isNonQuote(char ch) {	return isalnum(ch) || ch == '_';}static inline char actualNumStyle(int numberStyle) {    if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {        return SCE_PL_STRING;    } else if (numberStyle == PERLNUM_BAD) {        return SCE_PL_ERROR;    }    return SCE_PL_NUMBER;}static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {	if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {		return false;	}	while (*val) {		if (*val != styler[pos++]) {			return false;		}		val++;	}	return true;}static char opposite(char ch) {	if (ch == '(')		return ')';	if (ch == '[')		return ']';	if (ch == '{')		return '}';	if (ch == '<')		return '>';	return ch;}static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,                             WordList *keywordlists[], Accessor &styler) {	// Lexer for perl often has to backtrack to start of current style to determine	// which characters are being used as quotes, how deeply nested is the	// start position and what the termination string is for here documents	WordList &keywords = *keywordlists[0];	class HereDocCls {	public:		int State;		// 0: '<<' encountered		// 1: collect the delimiter		// 2: here doc text (lines after the delimiter)		char Quote;		// the char after '<<'		bool Quoted;		// true if Quote in ('\'','"','`')		int DelimiterLength;	// strlen(Delimiter)		char *Delimiter;	// the Delimiter, 256: sizeof PL_tokenbuf		HereDocCls() {			State = 0;            Quote = 0;            Quoted = false;			DelimiterLength = 0;			Delimiter = new char[HERE_DELIM_MAX];			Delimiter[0] = '\0';		}		~HereDocCls() {			delete []Delimiter;		}	};	HereDocCls HereDoc;	// TODO: FIFO for stacked here-docs	class QuoteCls {		public:		int  Rep;		int  Count;		char Up;		char Down;		QuoteCls() {			this->New(1);		}		void New(int r) {			Rep   = r;			Count = 0;			Up    = '\0';			Down  = '\0';		}		void Open(char u) {			Count++;			Up    = u;			Down  = opposite(Up);		}	};	QuoteCls Quote;	int state = initStyle;	char numState = PERLNUM_DECIMAL;	int dotCount = 0;	unsigned int lengthDoc = startPos + length;	//int sookedpos = 0; // these have no apparent use, see POD state	//char sooked[100];	//sooked[sookedpos] = '\0';	// If in a long distance lexical state, seek to the beginning to find quote characters	// Perl strings can be multi-line with embedded newlines, so backtrack.	// Perl numbers have additional state during lexing, so backtrack too.	if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {		while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {			startPos--;		}		startPos = styler.LineStart(styler.GetLine(startPos));		state = styler.StyleAt(startPos - 1);	}	if ( state == SCE_PL_STRING_Q	|| state == SCE_PL_STRING_QQ	|| state == SCE_PL_STRING_QX	|| state == SCE_PL_STRING_QR	|| state == SCE_PL_STRING_QW	|| state == SCE_PL_REGEX	|| state == SCE_PL_REGSUBST	|| state == SCE_PL_STRING	|| state == SCE_PL_BACKTICKS	|| state == SCE_PL_CHARACTER	|| state == SCE_PL_NUMBER	|| state == SCE_PL_IDENTIFIER    || state == SCE_PL_ERROR	) {		while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {			startPos--;		}		state = SCE_PL_DEFAULT;	}    // lookback at start of lexing to set proper state for backflag    // after this, they are updated when elements are lexed    int backflag = BACK_NONE;    unsigned int backPos = startPos;    if (backPos > 0) {        backPos--;        int sty = SCE_PL_DEFAULT;        while ((backPos > 0) && (sty = styler.StyleAt(backPos),               sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE))            backPos--;        if (sty == SCE_PL_OPERATOR)            backflag = BACK_OPERATOR;        else if (sty == SCE_PL_WORD)            backflag = BACK_KEYWORD;    }	styler.StartAt(startPos);	char chPrev = styler.SafeGetCharAt(startPos - 1);	if (startPos == 0)		chPrev = '\n';	char chNext = styler[startPos];	styler.StartSegment(startPos);	for (unsigned int i = startPos; i < lengthDoc; i++) {		char ch = chNext;		// if the current character is not consumed due to the completion of an		// earlier style, lexing can be restarted via a simple goto	restartLexer:		chNext = styler.SafeGetCharAt(i + 1);		char chNext2 = styler.SafeGetCharAt(i + 2);		if (styler.IsLeadByte(ch)) {			chNext = styler.SafeGetCharAt(i + 2);			chPrev = ' ';			i += 1;			continue;		}		if ((chPrev == '\r' && ch == '\n')) {	// skip on DOS/Windows			styler.ColourTo(i, state);			chPrev = ch;			continue;		}		if (HereDoc.State == 1 && isEOLChar(ch)) {			// Begin of here-doc (the line after the here-doc delimiter):			// Lexically, the here-doc starts from the next line after the >>, but the			// first line of here-doc seem to follow the style of the last EOL sequence			HereDoc.State = 2;			if (HereDoc.Quoted) {				if (state == SCE_PL_HERE_DELIM) {					// Missing quote at end of string! We are stricter than perl.					// Colour here-doc anyway while marking this bit as an error.					state = SCE_PL_ERROR;				}				styler.ColourTo(i - 1, state);				switch (HereDoc.Quote) {				case '\'':					state = SCE_PL_HERE_Q ;					break;				case '"':					state = SCE_PL_HERE_QQ;					break;				case '`':					state = SCE_PL_HERE_QX;					break;				}			} else {				styler.ColourTo(i - 1, state);				switch (HereDoc.Quote) {				case '\\':					state = SCE_PL_HERE_Q ;					break;				default :					state = SCE_PL_HERE_QQ;				}			}		}		if (state == SCE_PL_DEFAULT) {			if (isdigit(ch) || (isdigit(chNext) &&				(ch == '.' || ch == 'v'))) {				state = SCE_PL_NUMBER;                backflag = BACK_NONE;				numState = PERLNUM_DECIMAL;				dotCount = 0;				if (ch == '0') {	// hex,bin,octal					if (chNext == 'x') {						numState = PERLNUM_HEX;					} else if (chNext == 'b') {                        numState = PERLNUM_BINARY;                    } else if (isdigit(chNext)) {                        numState = PERLNUM_OCTAL;                    }                    if (numState != PERLNUM_DECIMAL) {						i++;						ch = chNext;						chNext = chNext2;                    }				} else if (ch == 'v') {	// vector					numState = PERLNUM_V_VECTOR;				}			} else if (iswordstart(ch)) {                // if immediately prefixed by '::', always a bareword                state = SCE_PL_WORD;                if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') {                    state = SCE_PL_IDENTIFIER;                }                unsigned int kw = i + 1;                // first check for possible quote-like delimiter				if (ch == 's' && !isNonQuote(chNext)) {					state = SCE_PL_REGSUBST;					Quote.New(2);				} else if (ch == 'm' && !isNonQuote(chNext)) {					state = SCE_PL_REGEX;					Quote.New(1);				} else if (ch == 'q' && !isNonQuote(chNext)) {					state = SCE_PL_STRING_Q;					Quote.New(1);				} else if (ch == 'y' && !isNonQuote(chNext)) {					state = SCE_PL_REGSUBST;					Quote.New(2);				} else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) {					state = SCE_PL_REGSUBST;					Quote.New(2);                    kw++;				} else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) {					if      (chNext == 'q') state = SCE_PL_STRING_QQ;					else if (chNext == 'x') state = SCE_PL_STRING_QX;					else if (chNext == 'r') state = SCE_PL_STRING_QR;					else if (chNext == 'w') state = SCE_PL_STRING_QW;					Quote.New(1);                    kw++;				} else if (ch == 'x' && (chNext == '=' ||	// repetition                           (chNext != '_' && !isalnum(chNext)) ||                           (isdigit(chPrev) && isdigit(chNext)))) {                    state = SCE_PL_OPERATOR;                }                // if potentially a keyword, scan forward and grab word, then check                // if it's really one; if yes, disambiguation test is performed                // otherwise it is always a bareword and we skip a lot of scanning                // note: keywords assumed to be limited to [_a-zA-Z] only                if (state == SCE_PL_WORD) {                    while (iswordstart(styler.SafeGetCharAt(kw))) kw++;                    if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) {                        state = SCE_PL_IDENTIFIER;                    }                }                // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this                // for quote-like delimiters/keywords, attempt to disambiguate                // to select for bareword, change state -> SCE_PL_IDENTIFIER                if (state != SCE_PL_IDENTIFIER && i > 0) {                    unsigned int j = i;                    bool moreback = false;      // true if passed newline/comments                    bool brace = false;         // true if opening brace found                    char ch2;                    // first look backwards past whitespace/comments for EOLs                    // if BACK_NONE, neither operator nor keyword, so skip test                    if (backflag != BACK_NONE) {                        while (--j > backPos) {                            if (isEOLChar(styler.SafeGetCharAt(j)))                                moreback = true;                        }                        ch2 = styler.SafeGetCharAt(j);                        if (ch2 == '{' && !moreback) {                            // {bareword: possible variable spec                            brace = true;                        } else if ((ch2 == '&')                                // &bareword: subroutine call                                || (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-')                                // ->bareword: part of variable spec                                || (ch2 == 'b' && styler.Match(j - 2, "su"))) {                                // sub bareword: subroutine declaration                                // (implied BACK_KEYWORD, no keywords end in 'sub'!)                            state = SCE_PL_IDENTIFIER;                        }                        // if status still ambiguous, look forward after word past                        // tabs/spaces only; if ch2 isn't one of '[{(,' it can never                        // match anything, so skip the whole thing                        j = kw;                        if (state != SCE_PL_IDENTIFIER                            && (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',')                            && kw < lengthDoc) {                            while (ch2 = styler.SafeGetCharAt(j),                                   (ch2 == ' ' || ch2 == '\t') && j < lengthDoc) {                                j++;                            }                            if ((ch2 == '}' && brace)                             // {bareword}: variable spec                             || (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) {                             // [{(, bareword=>: hash literal                                state = SCE_PL_IDENTIFIER;                            }                        }                    }                }                backflag = BACK_NONE;                // an identifier or bareword                if (state == SCE_PL_IDENTIFIER) {                    if ((!iswordchar(chNext) && chNext != '\'')                        || (chNext == '.' && chNext2 == '.')) {                        // We need that if length of word == 1!                        // This test is copied from the SCE_PL_WORD handler.                        styler.ColourTo(i, SCE_PL_IDENTIFIER);                        state = SCE_PL_DEFAULT;
lexperl.cxx - 源码说明

本页面展示了「robocup rcssserver 运行防真机器人足球比赛所用的服务器端」中的 lexperl.cxx 源码文件，采用 CXX 编程语言编写，共 1,233 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与rcssserver相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?