📄 lexhtml.cxx
字号:
// Scintilla source code edit control/** @file LexHTML.cxx ** Lexer for HTML. **/// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>// The License.txt file describes the conditions under which this software may be distributed.#include <stdlib.h>#include <string.h>#include <ctype.h>#include <stdio.h>#include <stdarg.h>#include "Platform.h"#include "PropSet.h"#include "Accessor.h"#include "StyleContext.h"#include "KeyWords.h"#include "Scintilla.h"#include "SciLexer.h"#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)#define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock };enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };static inline bool IsAWordChar(const int ch) { return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');}static inline bool IsAWordStart(const int ch) { return (ch < 0x80) && (isalnum(ch) || ch == '_');}static inline int MakeLowerCase(int ch) { if (ch < 'A' || ch > 'Z') return ch; else return ch - 'A' + 'a';}static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) { size_t i = 0; for (; (i < end - start + 1) && (i < len-1); i++) { s[i] = static_cast<char>(MakeLowerCase(styler[start + i])); } s[i] = '\0';}static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) { char s[100]; GetTextSegment(styler, start, end, s, sizeof(s)); //Platform::DebugPrintf("Scripting indicator [%s]\n", s); if (strstr(s, "src")) // External script return eScriptNone; if (strstr(s, "vbs")) return eScriptVBS; if (strstr(s, "pyth")) return eScriptPython; if (strstr(s, "javas")) return eScriptJS; if (strstr(s, "jscr")) return eScriptJS; if (strstr(s, "php")) return eScriptPHP; if (strstr(s, "xml")) return eScriptXML; return prevValue;}static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) { int iResult = 0; char s[100]; GetTextSegment(styler, start, end, s, sizeof(s)); if (0 == strncmp(s, "php", 3)) { iResult = 3; } return iResult;}static script_type ScriptOfState(int state) { if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) { return eScriptPython; } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) { return eScriptVBS; } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) { return eScriptJS; } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) { return eScriptPHP; } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) { return eScriptSGML; } else if (state == SCE_H_SGML_BLOCK_DEFAULT) { return eScriptSGMLblock; } else { return eScriptNone; }}static int statePrintForState(int state, script_mode inScriptType) { int StateToPrint; if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) { StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON); } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) { StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS); } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) { StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS); } else { StateToPrint = state; } return StateToPrint;}static int stateForPrintState(int StateToPrint) { int state; if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) { state = StateToPrint - SCE_HA_PYTHON; } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) { state = StateToPrint - SCE_HA_VBS; } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) { state = StateToPrint - SCE_HA_JS; } else { state = StateToPrint; } return state;}static inline bool IsNumber(unsigned int start, Accessor &styler) { return IsADigit(styler[start]) || (styler[start] == '.') || (styler[start] == '-') || (styler[start] == '#');}static inline bool isStringState(int state) { bool bResult; switch (state) { case SCE_HJ_DOUBLESTRING: case SCE_HJ_SINGLESTRING: case SCE_HJA_DOUBLESTRING: case SCE_HJA_SINGLESTRING: case SCE_HB_STRING: case SCE_HBA_STRING: case SCE_HP_STRING: case SCE_HP_CHARACTER: case SCE_HP_TRIPLE: case SCE_HP_TRIPLEDOUBLE: case SCE_HPA_STRING: case SCE_HPA_CHARACTER: case SCE_HPA_TRIPLE: case SCE_HPA_TRIPLEDOUBLE: case SCE_HPHP_HSTRING: case SCE_HPHP_SIMPLESTRING: case SCE_HPHP_HSTRING_VARIABLE: case SCE_HPHP_COMPLEX_VARIABLE: bResult = true; break; default : bResult = false; break; } return bResult;}static inline bool stateAllowsTermination(int state) { bool allowTermination = !isStringState(state); if (allowTermination) { switch (state) { case SCE_HPHP_COMMENT: case SCE_HP_COMMENTLINE: case SCE_HPA_COMMENTLINE: allowTermination = false; } } return allowTermination;}// not really well done, since it's only comments that should lex the %> and <%static inline bool isCommentASPState(int state) { bool bResult; switch (state) { case SCE_HJ_COMMENT: case SCE_HJ_COMMENTLINE: case SCE_HJ_COMMENTDOC: case SCE_HB_COMMENTLINE: case SCE_HP_COMMENTLINE: case SCE_HPHP_COMMENT: case SCE_HPHP_COMMENTLINE: bResult = true; break; default : bResult = false; break; } return bResult;}static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { bool wordIsNumber = IsNumber(start, styler); char chAttr = SCE_H_ATTRIBUTEUNKNOWN; if (wordIsNumber) { chAttr = SCE_H_NUMBER; } else { char s[100]; GetTextSegment(styler, start, end, s, sizeof(s)); if (keywords.InList(s)) chAttr = SCE_H_ATTRIBUTE; } if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords) // No keywords -> all are known chAttr = SCE_H_ATTRIBUTE; styler.ColourTo(end, chAttr);}static int classifyTagHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, bool &tagDontFold, bool caseSensitive) { char s[30 + 2]; // Copy after the '<' unsigned int i = 0; for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) { char ch = styler[cPos]; if ((ch != '<') && (ch != '/')) { s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch)); } } //The following is only a quick hack, to see if this whole thing would work //we first need the tagname with a trailing space... s[i] = ' '; s[i+1] = '\0'; //...to find it in the list of no-container-tags // (There are many more. We will need a keywordlist in the property file for this) tagDontFold = (NULL != strstr("meta link img area br hr input ",s)); //now we can remove the trailing space s[i] = '\0'; bool isScript = false; char chAttr = SCE_H_TAGUNKNOWN; if (s[0] == '!') { chAttr = SCE_H_SGML_DEFAULT; } else if (s[0] == '/') { // Closing tag if (keywords.InList(s + 1)) chAttr = SCE_H_TAG; } else { if (keywords.InList(s)) { chAttr = SCE_H_TAG; isScript = 0 == strcmp(s, "script"); } } if ((chAttr == SCE_H_TAGUNKNOWN) && !keywords) { // No keywords -> all are known chAttr = SCE_H_TAG; isScript = 0 == strcmp(s, "script"); } styler.ColourTo(end, chAttr); return isScript ? SCE_H_SCRIPT : chAttr;}static void classifyWordHTJS(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) { char chAttr = SCE_HJ_WORD; bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.'); if (wordIsNumber) chAttr = SCE_HJ_NUMBER; else { char s[30 + 1]; unsigned int i = 0; for (; i < end - start + 1 && i < 30; i++) { s[i] = styler[start + i]; } s[i] = '\0'; if (keywords.InList(s)) chAttr = SCE_HJ_KEYWORD; } styler.ColourTo(end, statePrintForState(chAttr, inScriptType));}static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) { char chAttr = SCE_HB_IDENTIFIER; bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.'); if (wordIsNumber) chAttr = SCE_HB_NUMBER; else { char s[100]; GetTextSegment(styler, start, end, s, sizeof(s)); if (keywords.InList(s)) { chAttr = SCE_HB_WORD; if (strcmp(s, "rem") == 0) chAttr = SCE_HB_COMMENTLINE; } } styler.ColourTo(end, statePrintForState(chAttr, inScriptType)); if (chAttr == SCE_HB_COMMENTLINE) return SCE_HB_COMMENTLINE; else return SCE_HB_DEFAULT;}static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) { bool wordIsNumber = IsADigit(styler[start]); char s[30 + 1]; unsigned int i = 0; for (; i < end - start + 1 && i < 30; i++) { s[i] = styler[start + i]; } s[i] = '\0'; char chAttr = SCE_HP_IDENTIFIER; if (0 == strcmp(prevWord, "class")) chAttr = SCE_HP_CLASSNAME; else if (0 == strcmp(prevWord, "def")) chAttr = SCE_HP_DEFNAME; else if (wordIsNumber) chAttr = SCE_HP_NUMBER; else if (keywords.InList(s)) chAttr = SCE_HP_WORD; styler.ColourTo(end, statePrintForState(chAttr, inScriptType)); strcpy(prevWord, s);}// Update the word colour to default or keyword// Called when in a PHP wordstatic void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { char chAttr = SCE_HPHP_DEFAULT; bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1])); if (wordIsNumber) chAttr = SCE_HPHP_NUMBER; else { char s[100]; GetTextSegment(styler, start, end, s, sizeof(s)); if (keywords.InList(s)) chAttr = SCE_HPHP_WORD; } styler.ColourTo(end, chAttr);}static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { char s[30 + 1]; unsigned int i = 0; for (; i < end - start + 1 && i < 30; i++) { s[i] = styler[start + i]; } s[i] = '\0'; return keywords.InList(s);}static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) { char s[30 + 1]; unsigned int i = 0; for (; i < end - start + 1 && i < 30; i++) { s[i] = styler[start + i]; } s[i] = '\0'; return (0 == strcmp(s, "[CDATA["));}// Return the first state to reach when entering a scripting languagestatic int StateForScript(script_type scriptLanguage) { int Result; switch (scriptLanguage) { case eScriptVBS: Result = SCE_HB_START; break; case eScriptPython: Result = SCE_HP_START; break; case eScriptPHP: Result = SCE_HPHP_DEFAULT; break; case eScriptXML: Result = SCE_H_TAGUNKNOWN; break; case eScriptSGML: Result = SCE_H_SGML_DEFAULT; break; default : Result = SCE_HJ_START; break; } return Result;}static inline bool ishtmlwordchar(char ch) { return !isascii(ch) || (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');}static inline bool issgmlwordchar(char ch) { return !isascii(ch) || (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');}static inline bool IsPhpWordStart(const unsigned char ch) { return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);}static inline bool IsPhpWordChar(char ch) { return IsADigit(ch) || IsPhpWordStart(ch);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -