📄 lexperl.cxx
字号:
// Scintilla source code edit control/** @file LexPerl.cxx ** Lexer for subset of Perl. **/// Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>// The License.txt file describes the conditions under which this software may be distributed.#include <stdlib.h>#include <string.h>#include <ctype.h>#include <stdio.h>#include <stdarg.h>#include "Platform.h"#include "PropSet.h"#include "Accessor.h"#include "KeyWords.h"#include "Scintilla.h"#include "SciLexer.h"#ifdef SCI_NAMESPACEusing namespace Scintilla;#endif#define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot#define PERLNUM_HEX 2#define PERLNUM_OCTAL 3#define PERLNUM_FLOAT 4 // actually exponent part#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings#define PERLNUM_VECTOR 6#define PERLNUM_V_VECTOR 7#define PERLNUM_BAD 8#define BACK_NONE 0 // lookback state for bareword disambiguation:#define BACK_OPERATOR 1 // whitespace/comments are insignificant#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation#define HERE_DELIM_MAX 256static inline bool isEOLChar(char ch) { return (ch == '\r') || (ch == '\n');}static bool isSingleCharOp(char ch) { char strCharSet[2]; strCharSet[0] = ch; strCharSet[1] = '\0'; return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet));}static inline bool isPerlOperator(char ch) { if (ch == '^' || ch == '&' || ch == '\\' || ch == '(' || ch == ')' || ch == '-' || ch == '+' || ch == '=' || ch == '|' || ch == '{' || ch == '}' || ch == '[' || ch == ']' || ch == ':' || ch == ';' || ch == '>' || ch == ',' || ch == '?' || ch == '!' || ch == '.' || ch == '~') return true; // these chars are already tested before this call // ch == '%' || ch == '*' || ch == '<' || ch == '/' || return false;}static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { char s[100]; unsigned int i, len = end - start; if (len > 30) { len = 30; } for (i = 0; i < len; i++, start++) s[i] = styler[start]; s[i] = '\0'; return keywords.InList(s);}// Note: as lexer uses chars, UTF-8 bytes are considered as <0 values// Note: iswordchar() was used in only one place in LexPerl, it is// unnecessary as '.' is processed as the concatenation operator, so// only isWordStart() is used in LexPerlstatic inline bool isWordStart(char ch) { return !isascii(ch) || isalnum(ch) || ch == '_';}static inline bool isEndVar(char ch) { return isascii(ch) && !isalnum(ch) && ch != '#' && ch != '$' && ch != '_' && ch != '\'';}static inline bool isNonQuote(char ch) { return !isascii(ch) || isalnum(ch) || ch == '_';}static inline char actualNumStyle(int numberStyle) { if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) { return SCE_PL_STRING; } else if (numberStyle == PERLNUM_BAD) { return SCE_PL_ERROR; } return SCE_PL_NUMBER;}static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) { if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) { return false; } while (*val) { if (*val != styler[pos++]) { return false; } val++; } return true;}static char opposite(char ch) { if (ch == '(') return ')'; if (ch == '[') return ']'; if (ch == '{') return '}'; if (ch == '<') return '>'; return ch;}static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[], Accessor &styler) { // Lexer for perl often has to backtrack to start of current style to determine // which characters are being used as quotes, how deeply nested is the // start position and what the termination string is for here documents WordList &keywords = *keywordlists[0]; // keywords that forces /PATTERN/ at all times WordList reWords; reWords.Set("elsif if split while"); class HereDocCls { public: int State; // 0: '<<' encountered // 1: collect the delimiter // 2: here doc text (lines after the delimiter) char Quote; // the char after '<<' bool Quoted; // true if Quote in ('\'','"','`') int DelimiterLength; // strlen(Delimiter) char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf HereDocCls() { State = 0; Quote = 0; Quoted = false; DelimiterLength = 0; Delimiter = new char[HERE_DELIM_MAX]; Delimiter[0] = '\0'; } ~HereDocCls() { delete []Delimiter; } }; HereDocCls HereDoc; // TODO: FIFO for stacked here-docs class QuoteCls { public: int Rep; int Count; char Up; char Down; QuoteCls() { this->New(1); } void New(int r) { Rep = r; Count = 0; Up = '\0'; Down = '\0'; } void Open(char u) { Count++; Up = u; Down = opposite(Up); } }; QuoteCls Quote; int state = initStyle; char numState = PERLNUM_DECIMAL; int dotCount = 0; unsigned int lengthDoc = startPos + length; //int sookedpos = 0; // these have no apparent use, see POD state //char sooked[100]; //sooked[sookedpos] = '\0'; styler.StartAt(startPos, static_cast<char>(STYLE_MAX)); // If in a long distance lexical state, seek to the beginning to find quote characters // Perl strings can be multi-line with embedded newlines, so backtrack. // Perl numbers have additional state during lexing, so backtrack too. if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) { while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) { startPos--; } startPos = styler.LineStart(styler.GetLine(startPos)); state = styler.StyleAt(startPos - 1); } // Backtrack for format body. if (state == SCE_PL_FORMAT) { while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_FORMAT_IDENT)) { startPos--; } startPos = styler.LineStart(styler.GetLine(startPos)); state = styler.StyleAt(startPos - 1); } if ( state == SCE_PL_STRING_Q || state == SCE_PL_STRING_QQ || state == SCE_PL_STRING_QX || state == SCE_PL_STRING_QR || state == SCE_PL_STRING_QW || state == SCE_PL_REGEX || state == SCE_PL_REGSUBST || state == SCE_PL_STRING || state == SCE_PL_BACKTICKS || state == SCE_PL_CHARACTER || state == SCE_PL_NUMBER || state == SCE_PL_IDENTIFIER || state == SCE_PL_ERROR || state == SCE_PL_SUB_PROTOTYPE ) { while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) { startPos--; } state = SCE_PL_DEFAULT; } // lookback at start of lexing to set proper state for backflag // after this, they are updated when elements are lexed int backflag = BACK_NONE; unsigned int backPos = startPos; if (backPos > 0) { backPos--; int sty = SCE_PL_DEFAULT; while ((backPos > 0) && (sty = styler.StyleAt(backPos), sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE)) backPos--; if (sty == SCE_PL_OPERATOR) backflag = BACK_OPERATOR; else if (sty == SCE_PL_WORD) backflag = BACK_KEYWORD; } styler.StartAt(startPos, static_cast<char>(STYLE_MAX)); char chPrev = styler.SafeGetCharAt(startPos - 1); if (startPos == 0) chPrev = '\n'; char chNext = styler[startPos]; styler.StartSegment(startPos); for (unsigned int i = startPos; i < lengthDoc; i++) { char ch = chNext; // if the current character is not consumed due to the completion of an // earlier style, lexing can be restarted via a simple goto restartLexer: chNext = styler.SafeGetCharAt(i + 1); char chNext2 = styler.SafeGetCharAt(i + 2); if (styler.IsLeadByte(ch)) { chNext = styler.SafeGetCharAt(i + 2); chPrev = ' '; i += 1; continue; } if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows styler.ColourTo(i, state); chPrev = ch; continue; } if (HereDoc.State == 1 && isEOLChar(ch)) { // Begin of here-doc (the line after the here-doc delimiter): // Lexically, the here-doc starts from the next line after the >>, but the // first line of here-doc seem to follow the style of the last EOL sequence HereDoc.State = 2; if (HereDoc.Quoted) { if (state == SCE_PL_HERE_DELIM) { // Missing quote at end of string! We are stricter than perl. // Colour here-doc anyway while marking this bit as an error. state = SCE_PL_ERROR; } styler.ColourTo(i - 1, state); switch (HereDoc.Quote) { case '\'': state = SCE_PL_HERE_Q ; break; case '"': state = SCE_PL_HERE_QQ; break; case '`': state = SCE_PL_HERE_QX; break; } } else { styler.ColourTo(i - 1, state); switch (HereDoc.Quote) { case '\\': state = SCE_PL_HERE_Q ; break; default : state = SCE_PL_HERE_QQ; } } } if (HereDoc.State == 4 && isEOLChar(ch)) { // Start of format body. HereDoc.State = 0; styler.ColourTo(i - 1, state); state = SCE_PL_FORMAT; } if (state == SCE_PL_DEFAULT) { if ((isascii(ch) && isdigit(ch)) || (isascii(chNext) && isdigit(chNext) && (ch == '.' || ch == 'v'))) { state = SCE_PL_NUMBER; backflag = BACK_NONE; numState = PERLNUM_DECIMAL; dotCount = 0; if (ch == '0') { // hex,bin,octal if (chNext == 'x') { numState = PERLNUM_HEX; } else if (chNext == 'b') { numState = PERLNUM_BINARY; } else if (isascii(chNext) && isdigit(chNext)) { numState = PERLNUM_OCTAL; } if (numState != PERLNUM_DECIMAL) { i++; ch = chNext; chNext = chNext2; } } else if (ch == 'v') { // vector numState = PERLNUM_V_VECTOR; } } else if (isWordStart(ch)) { // if immediately prefixed by '::', always a bareword state = SCE_PL_WORD; if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') { state = SCE_PL_IDENTIFIER; } unsigned int kw = i + 1; // first check for possible quote-like delimiter if (ch == 's' && !isNonQuote(chNext)) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -