📄 ldhtml.c
字号:
/* ldhtml.c - loads and HTML file/*/* Copyright (c) 1995-1999 Applied Information Technologies, Inc./* All Rights Reserved./* /* Distributed uder the GNU General Public License which was included in/* the file named "LICENSE" in the package that you recieved./* If not, write to:/* The Free Software Foundation, Inc.,/* 675 Mass Ave, Cambridge, MA 02139, USA. */#include <stdlib.h>#include <stdio.h>#include <ctype.h>#include <stdarg.h>#include <string.h>/*/* Local Includes */#include "sqlweb.h"/*/* TOKENS */typedef enum { OpenTag_t ,OpenEndTag_t ,OpenComment_t ,EndTag_t ,AttAssign_t ,String_t ,EndOfFile_t} eToken_t;/*/* STATES */typedef enum { Init_s ,InBetween_s ,InTag_s ,InAtt_s ,InAttAssign_s ,InValue_s ,InContents_s ,InComment_s ,InEndTag_s ,InEndTag2_s ,Err_s ,EndOfFile_s} eParseState_t;/*/* Global Variables, basically, in a STATE MACHINE everything/* is global! */static char gsStringBuf[MAX_BUFSIZ]; /* String from "TOKENIZER" */static char *gpStringBuf; /* Current Location in memory- /* mapped file */static eParseState_t gsParseState =Init_s; /* The STATE */static eToken_t gtToken; /* The TOKEN */static TAG *gpTAG; /* The TAG */static SYMBOL *gpSym; /* The Attribute */static PI *gpPI; /* The PageItem */static PAGE *gpPAGE; /* The PAGE */static LIST *galPI[MAX_TAG_LEVELS]; /* The PageItem List Array */static PI *gaPI[MAX_TAG_LEVELS]; /* The PI Array (context) */static int giLevel /* The Level */ ,giErr = 0 /* Error counter */ ,giLineNbr = 1 /* Line Counter */ ;static eBoolean_t gbFatal = eFalse /* Fatal Errors */ ;static TAG gNULLTag = {"NULL" /* pTagName */ ,"Y" /* pTagEmptyInd */ ,"" /* pTagDesc */ ,"A" /* pTagAfterInd(HIDDEN) */ ,"" /* pTagLinkDesc */ };#define EXACT_TOKENS 7#define EXACT_STATES 12/* #define MAX_PARSE_ERRORS 25 *//*/* State Transition Functions */eParseState_t f011(),f012(),f013(),f014(),f015(),f016() ,f021(),f022(),f023(),f024(),f025(),f026() ,f031(),f032(),f033(),f034(),f035(),f036() ,f041(),f042(),f043(),f044(),f045(),f046() ,f051(),f052(),f053(),f054(),f055(),f056() ,f061(),f062(),f063(),f064(),f065(),f066() ,f071(),f072(),f073(),f074(),f075(),f076() ,f081(),f082(),f083(),f084(),f085(),f086() ,f091(),f092(),f093(),f094(),f095(),f096() ,f101(),f102(),f103(),f104(),f105(),f106() ,f111(),f112(),f113(),f114(),f115(),f116() ,f_eof()/* end of file */ ;/*/* The STATE MACHINE! */typedef eParseState_t (*PFS_t)(); /* Pointer to Function returning STATE */PFS_t gaTransTab [ EXACT_STATES ] [ EXACT_TOKENS ] = {/* Open- Open- Open- Att-/* Tag EndTag Comment EndTag Assign STRING EOF/* === ====== ======= ====== ====== ====== ===/* Init_s */ f011 ,f012 ,f013 ,f014 ,f015 ,f016 ,f_eof/* InBetween_s */,f021 ,f022 ,f023 ,f024 ,f025 ,f026 ,f_eof/* InTag_s */,f031 ,f032 ,f033 ,f034 ,f035 ,f036 ,f_eof/* InAtt_s */,f041 ,f042 ,f043 ,f044 ,f045 ,f046 ,f_eof/* InAttAssign_s*/,f051 ,f052 ,f053 ,f054 ,f055 ,f056 ,f_eof/* InValue_s */,f061 ,f062 ,f063 ,f064 ,f065 ,f066 ,f_eof/* InContents_s */,f071 ,f072 ,f073 ,f074 ,f075 ,f076 ,f_eof/* InComment_s */,f081 ,f082 ,f083 ,f084 ,f085 ,f086 ,f_eof/* InEndTag_s */,f091 ,f092 ,f093 ,f094 ,f095 ,f096 ,f_eof/* InEndTag2_s */,f101 ,f102 ,f103 ,f104 ,f105 ,f106 ,f_eof/* Err_s */,f111 ,f112 ,f113 ,f114 ,f115 ,f116 ,f_eof};static eBoolean_t ParseStream();static eToken_t GetNextToken();static eBoolean_t mk_PI(char *pTagName, char *pPiContents);static void ParseErr(const char *pFmt, ...);/*/* Routines for processing input characters */static int EatSpaces(int c);static int GetC();static void unGetC(int c);/*/* Here we go..... *//* The main interface routine. This function takes the SQLweb HTML/* file and builds the in-memory PAGE / PI Tree that is represented/* by it. */eBoolean_tLoadHTML(char *pFilename ,PAGE **pout_Page){ eBoolean_t bParseRet; PI PI; /* temporary PI for loading File Text */ char sBuf[BUFSIZ]; /* for dumping ERRStack */ RETeFalse2(LoadTEXT(pFilename,"HTML",&PI) ,"LoadHTML Failed on %s" ,pFilename ); (*pout_Page) = (PAGE*)malloc(sizeof(PAGE)); if(!(*pout_Page)){ ParseErr("malloc failed in parser"); return(eFalse); } (void)memset((*pout_Page),0,sizeof(PAGE)); (*pout_Page)->lPI = galPI[0] = l_create("QUEUE"); /* Level 0:Page List*/ (*pout_Page)->pFileText = gpStringBuf = PI.pPiContents; bParseRet = ParseStream(); /* This method, pop's off ALL Parse Errors /* and if we are NOT COOKING the page it /* displays the errors as HTML Comments /* are silently ignored..... */ while( MsgPop(sBuf) ) { if(ISCOOKED) { DebugHTML(__FILE__,__LINE__,0,"%s",sBuf); } else { fprintf(stderr,"%s\n",sBuf); } } return(bParseRet);}/*/* The file parser. runs "The State Machine" */static eBoolean_tParseStream(){ giErr=0; /* Global Error Count */ gbFatal=eFalse; /* Fatal Error Flag */ giLevel=0; /* Global PageItem Level */ giLineNbr=1; /* Line Number */ gsParseState = Init_s; do { gtToken = GetNextToken(); gsParseState = (gaTransTab[gsParseState][gtToken])(); } while( gsParseState != EndOfFile_s); return( ISeTrue(gbFatal) ? eFalse : eTrue );}/*/* The Tokenizer */static eToken_tGetNextToken(){ int c; char *pBuf=gsStringBuf; eBoolean_t bQuote=eFalse; /* /* Get the first character */ c=GetC(); /* /* Initialize the global STRING BUFFER */ *pBuf=0; if(c==EOF){ return(EndOfFile_t); } if(gsParseState==InAttAssign_s && c=='=') { return( AttAssign_t ); } if(c == '<'){ c=GetC(); if(c=='!') { return(OpenComment_t); } if(c=='/') { return(OpenEndTag_t); } unGetC(c); return(OpenTag_t); } if(c == '>') { return(EndTag_t); } for( ;c!=EOF && (pBuf-gsStringBuf)<sizeof(gsStringBuf);c=GetC()){ /* /* Deal with Quotes (excluding InContents_s and InComments_s) */ if(c == '"' && ( gsParseState==InAtt_s ||gsParseState==InAttAssign_s ||gsParseState==InValue_s)) { bQuote = ISeTrue(bQuote) ? eFalse :eTrue; /* Toggle Quote Flag */ continue; /* And eat it! */ } if(c == '\\' && ISeTrue(bQuote)) { int c2 = GetC(); if(c2=='"'){ *pBuf++ = (char) c2; continue; } unGetC(c2); } /* Deal with < and > 'tings */ if((c == '<' || c == '>') && ISeFalse(bQuote)) { unGetC(c); *pBuf = 0; return(String_t); } /* /* The Attribute Assignment */ if(c == '=' && ISeFalse(bQuote) && (gsParseState==InAtt_s || gsParseState==InAttAssign_s)) { unGetC(c); *pBuf = 0; return(String_t); } /* /* White space separator within <> pairs... */ if(isspace(c) && ISeFalse(bQuote) &&( gsParseState==InTag_s || gsParseState==InAtt_s || gsParseState==InAttAssign_s || gsParseState==InValue_s)) { *pBuf=0; c=EatSpaces(c); unGetC(c); if( pBuf==gsStringBuf ) continue; return(String_t); } /* Fillin the Buffer.... */ *pBuf++ = (char) c; } if( pBuf==gsStringBuf ) return(EndOfFile_t); *pBuf=0; unGetC(c); return(String_t);}/*/* Eat Spaces between TAGS */static intEatSpaces(int c){ do{ c=GetC(); } while(isspace(c)); return(c);}eParseState_t f_eof(){ return(EndOfFile_s); }/*/* The Init_s State */eParseState_t f011(){return(InTag_s);}eParseState_t f012(){return(Init_s);}eParseState_t f013(){return(InComment_s);}eParseState_t f014(){return(Init_s);}eParseState_t f015(){return(Init_s);}eParseState_t f016(){return(Init_s);}/*/* The InBetween State */eParseState_t f021(){return(InTag_s);}eParseState_t f022(){return(InEndTag_s);}eParseState_t f023(){return(InComment_s);}eParseState_t f024(){ParseErr("Encountered unmatched 'gt'");return(Err_s); }eParseState_t f025(){ParseErr("Internal error got '=' inBTWN");return(Err_s);}eParseState_t f026(){/*/* int iWLen = strspn(gsStringBuf," \t\n\r");/* if(iWLen=iStrLen(gsStringBuf))/* return(InBetween_s); */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -