📄 html-lex
字号:
(* html-lex * * COPYRIGHT (c) 1995 AT&T Bell Laboratories. * COPYRIGHT (c) 1996 AT&T Research. * * A scanner for HTML. * * TODO: * Recognize the DOCTYPE element * <!DOCTYPE HTML PUBLIC "..."> * Clean-up the scanning of start tags (do we need Err?). * Whitespace in PRE elements should be preserved, but how? *)structure T = Tokensstructure Elems = HTMLElementsFn ( structure Tokens = Tokens structure Err = Err structure HTMLAttrs = HTMLAttrs)type pos = inttype svalue = T.svaluetype arg = (((string * int * int) -> unit) * string option)type ('a, 'b) token = ('a, 'b) T.tokentype lexresult= (svalue, pos) tokenfun eof _ = Tokens.EOF(0, 0)(* a buffer for collecting a string piecewise *)val buffer = ref ([] : string list)fun addStr s = (buffer := s :: !buffer)fun getStr () = (String.concat(List.rev(! buffer)) before (buffer := []))%%%s COM1 COM2 STAG;%header (functor HTMLLexFn ( structure Tokens : HTML_TOKENS structure Err : HTML_ERROR structure HTMLAttrs : HTML_ATTRS));%arg (errorFn, file);%full%countalpha=[A-Za-z];digit=[0-9];namechar=[-A-Za-z0-9.];tag=({alpha}{namechar}*);ws = [\ \t];%%<INITIAL>"<"{tag} => (addStr yytext; YYBEGIN STAG; continue());<STAG>">" => (addStr yytext; YYBEGIN INITIAL; case Elems.startTag file (getStr(), !yylineno, !yylineno) of NONE => continue() | (SOME tag) => tag (* end case *));<STAG>\n => (addStr " "; continue());<STAG>{ws}+ => (addStr yytext; continue());<STAG>{namechar}+ => (addStr yytext; continue());<STAG>"=" => (addStr yytext; continue());<STAG>"\""[^\"\n]*"\"" => (addStr yytext; continue());<STAG>"'"[^'\n]*"'" => (addStr yytext; continue());<STAG>. => (addStr yytext; continue());<INITIAL>"</"{tag}{ws}*">" => (case Elems.endTag file (yytext, !yylineno, !yylineno) of NONE => continue() | (SOME tag) => tag (* end case *));<INITIAL>"<!--" => (YYBEGIN COM1; continue());<COM1>"--" => (YYBEGIN COM2; continue());<COM1>\n => (continue());<COM1>. => (continue());<COM2>"--" => (YYBEGIN COM1; continue());<COM2>">" => (YYBEGIN INITIAL; continue());<COM2>\n => (continue());<COM2>{ws} => (continue());<COM2>. => (errorFn("bad comment syntax", !yylineno, !yylineno+1); YYBEGIN INITIAL; continue());<INITIAL>"&#"[A-Za-z]+";" => ((** At some point, we should support &#SPACE; and &#TAB; **) continue());<INITIAL>"&#"[0-9]+";" => (T.CHAR_REF(yytext, !yylineno, !yylineno));<INITIAL>"&"{tag}";" => (T.ENTITY_REF(yytext, !yylineno, !yylineno));<INITIAL>"\n" => (continue());<INITIAL>{ws} => (continue());<INITIAL>[^<]+ => (T.PCDATA(yytext, !yylineno, !yylineno));<INITIAL>. => (errorFn(concat[ "bogus character #\"", Char.toString(String.sub(yytext, 0)), "\" in PCDATA\n" ], !yylineno, !yylineno+1); continue());
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -