📄 parser.l
字号:
%{#include "postgres.h"#include "deflex.h"#include "parser.h"#include "common.h"/* Avoid exit() on fatal scanner errors */#undef fprintf#define fprintf(file, fmt, msg) ts_error(ERROR, fmt, msg)char *token = NULL; /* pointer to token */int tokenlen;static char *s = NULL; /* to return WHOLE hyphenated-word */YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */typedef struct { int tlen; int clen; char *str;} TagStorage;static TagStorage ts={0,0,NULL};static voidaddTag(void){ while( ts.clen+tsearch2_yyleng+1 > ts.tlen ) { ts.tlen*=2; ts.str=realloc(ts.str,ts.tlen); if (!ts.str) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); } memcpy(ts.str+ts.clen,tsearch2_yytext,tsearch2_yyleng); ts.clen+=tsearch2_yyleng; ts.str[ts.clen]='\0';}static voidstartTag(void){ if ( ts.str==NULL ) { ts.tlen=tsearch2_yyleng+1; ts.str=malloc(ts.tlen); if (!ts.str) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); } ts.clen=0; ts.str[0]='\0'; addTag();}%}%option 8bit%option never-interactive%option nodefault%option nounput%option noyywrap/* parser's state for parsing hyphenated-word */%x DELIM /* parser's state for parsing URL*/%x URL %x SERVER /* parser's state for parsing TAGS */%x INTAG%x QINTAG%x INCOMMENT%x INSCRIPT/* cyrillic koi8 char */CYRALNUM [0-9\200-\377]CYRALPHA [\200-\377]ALPHA [a-zA-Z\200-\377]ALNUM [0-9a-zA-Z\200-\377]HOSTNAME ([-_[:alnum:]]+\.)+[[:alpha:]]+URI [-_[:alnum:]/%,\.;=&?#]+%%"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; startTag(); }<INSCRIPT>"</"[Ss][Cc][Rr][Ii][Pp][Tt]">" { BEGIN INITIAL; addTag(); token = ts.str; tokenlen = ts.clen; return TAG;}"<!--" { BEGIN INCOMMENT; startTag(); }<INCOMMENT>"-->" { BEGIN INITIAL; addTag(); token = ts.str; tokenlen = ts.clen; return TAG;}"<"[\![:alpha:]] { BEGIN INTAG; startTag(); }"</"[[:alpha:]] { BEGIN INTAG; startTag(); }<INTAG>"\"" { BEGIN QINTAG; addTag(); }<QINTAG>"\\\"" { addTag(); }<QINTAG>"\"" { BEGIN INTAG; addTag(); }<INTAG>">" { BEGIN INITIAL; addTag(); token = ts.str; tokenlen = ts.clen; return TAG;}<QINTAG,INTAG,INCOMMENT,INSCRIPT>.|\n { addTag(); } \&(quot|amp|nbsp|lt|gt)\; { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return HTMLENTITY;}\&\#[0-9][0-9]?[0-9]?\; { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return HTMLENTITY;} [-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return EMAIL; }[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+ /* float */ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return SCIENTIFIC; }[0-9]+\.[0-9]+\.[0-9\.]*[0-9] { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return VERSIONNUMBER;}[+-]?[0-9]+\.[0-9]+ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return DECIMAL;}[+-][0-9]+ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return SIGNEDINT; }<DELIM,INITIAL>[0-9]+ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return UNSIGNEDINT; }http"://" { BEGIN URL; token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return HTTP;}ftp"://" { BEGIN URL; token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return HTTP;}<URL,INITIAL>{HOSTNAME}[/:]{URI} { BEGIN SERVER; if (s) { free(s); s=NULL; } s = strdup( tsearch2_yytext ); tokenlen = tsearch2_yyleng; yyless( 0 ); token = s; return FURL;}<SERVER,URL,INITIAL>{HOSTNAME} { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return HOST;}<SERVER>[/:]{URI} { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return URI;}[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return FILEPATH;}({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */ { BEGIN DELIM; if (s) { free(s); s=NULL; } s = strdup( tsearch2_yytext ); tokenlen = tsearch2_yyleng; yyless( 0 ); token = s; return CYRHYPHENWORD;}([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */ { BEGIN DELIM; if (s) { free(s); s=NULL; } s = strdup( tsearch2_yytext ); tokenlen = tsearch2_yyleng; yyless( 0 ); token = s; return LATHYPHENWORD;}({ALNUM}+-)+{ALNUM}+ /* composite-word */ { BEGIN DELIM; if (s) { free(s); s=NULL; } s = strdup( tsearch2_yytext ); tokenlen = tsearch2_yyleng; yyless( 0 ); token = s; return HYPHENWORD;}<DELIM>[0-9]+\.[0-9]+\.[0-9\.]*[0-9] { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return VERSIONNUMBER;}<DELIM>\+?[0-9]+\.[0-9]+ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return DECIMAL;}<DELIM>{CYRALPHA}+ /* one word in composite-word */ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return CYRPARTHYPHENWORD; }<DELIM>[[:alpha:]]+ /* one word in composite-word */ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return LATPARTHYPHENWORD; }<DELIM>{ALNUM}+ /* one word in composite-word */ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return PARTHYPHENWORD; }<DELIM>- { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return SPACE;}<DELIM,SERVER,URL>.|\n /* return in basic state */ { BEGIN INITIAL; yyless( 0 );}{CYRALPHA}+ /* normal word */ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return CYRWORD; }[[:alpha:]]+ /* normal word */ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return LATWORD; }{ALNUM}+ /* normal word */ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return UWORD; }[ \r\n\t]+ { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return SPACE;}. { token = tsearch2_yytext; tokenlen = tsearch2_yyleng; return SPACE;} %%/* clearing after parsing from string */voidtsearch2_end_parse(void){ if (s) { free(s); s = NULL; } tsearch2_yy_delete_buffer( buf ); buf = NULL;} /* start parse from string */voidtsearch2_start_parse_str(char* str, int limit){ if (buf) tsearch2_end_parse(); buf = tsearch2_yy_scan_bytes( str, limit ); tsearch2_yy_switch_to_buffer( buf ); BEGIN INITIAL;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -