📄 htmlparser.c
字号:
} break;#if WWW_CONF_FORMS case TAG_FORM: PRINTF(("Form tag\n")); switch_majorstate(MAJORSTATE_FORM); if(strncmp(s.tagattr, html_action, sizeof(html_action)) == 0) { PRINTF(("Form action '%s'\n", s.tagattrparam)); strncpy(s.formaction, s.tagattrparam, WWW_CONF_MAX_FORMACTIONLEN - 1); } else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) { PRINTF(("Form name '%s'\n", s.tagattrparam)); strncpy(s.formname, s.tagattrparam, WWW_CONF_MAX_FORMNAMELEN - 1); } s.inputname[0] = s.inputvalue[0] = 0; break; case TAG_SLASHFORM: switch_majorstate(MAJORSTATE_BODY); s.formaction[0] = s.formname[0] = 0; break; case TAG_INPUT: if(s.majorstate == MAJORSTATE_FORM) { /* First check if we are called at the end of an input tag. If so, we should render the input widget. */ if(s.tagattr[0] == 0 && s.inputname[0] != 0) { PRINTF(("Render input type %d\n", s.inputtype)); switch(s.inputtype) { case HTMLPARSER_INPUTTYPE_NONE: case HTMLPARSER_INPUTTYPE_TEXT: for(i = 0; i < s.inputvaluesize; ++i) { if(s.inputvalue[i] == 0) { memset(&s.inputvalue[i], ISO_space, s.inputvaluesize - i); s.inputvalue[s.inputvaluesize] = 0; break; } } htmlparser_inputfield(s.inputvalue, s.inputname, s.formname, s.formaction); break; case HTMLPARSER_INPUTTYPE_SUBMIT: case HTMLPARSER_INPUTTYPE_IMAGE: htmlparser_submitbutton(s.inputvalue, s.inputname, s.formname, s.formaction); break; } s.inputtype = HTMLPARSER_INPUTTYPE_NONE; } else { PRINTF(("Input '%s' '%s'\n", s.tagattr, s.tagattrparam)); if(strncmp(s.tagattr, html_type, sizeof(html_type)) == 0) { if(strncmp(s.tagattrparam, html_submit, sizeof(html_submit)) == 0) { s.inputtype = HTMLPARSER_INPUTTYPE_SUBMIT; } else if(strncmp(s.tagattrparam, html_image, sizeof(html_image)) == 0) { s.inputtype = HTMLPARSER_INPUTTYPE_IMAGE; } else if(strncmp(s.tagattrparam, html_text, sizeof(html_text)) == 0) { s.inputtype = HTMLPARSER_INPUTTYPE_TEXT; } else { s.inputtype = HTMLPARSER_INPUTTYPE_OTHER; } } else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) { strncpy(s.inputname, s.tagattrparam, WWW_CONF_MAX_INPUTNAMELEN); } else if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 && s.inputtype == HTMLPARSER_INPUTTYPE_IMAGE) { strncpy(s.inputvalue, s.tagattrparam, WWW_CONF_MAX_INPUTVALUELEN); } else if(strncmp(s.tagattr, html_value, sizeof(html_value)) == 0) { strncpy(s.inputvalue, s.tagattrparam, WWW_CONF_MAX_INPUTVALUELEN); } else if(strncmp(s.tagattr, html_size, sizeof(html_size)) == 0) { size = 0; if(s.tagattrparam[0] >= '0' && s.tagattrparam[0] <= '9') { size = s.tagattrparam[0] - '0'; if(s.tagattrparam[1] >= '0' && s.tagattrparam[1] <= '9') { size = size * 10 + (s.tagattrparam[1] - '0'); } } if(size >= WWW_CONF_MAX_INPUTVALUELEN) { size = WWW_CONF_MAX_INPUTVALUELEN - 1; } s.inputvaluesize = size; /* strncpy(s.inputvalue, s.tagattrparam, WWW_CONF_MAX_INPUTVALUELEN);*/ } } } break;#endif /* WWW_CONF_FORMS */ #if WWW_CONF_RENDERSTATE case TAG_CENTER: parse_char(ISO_nl); htmlparser_renderstate(HTMLPARSER_RENDERSTATE_BEGIN | HTMLPARSER_RENDERSTATE_CENTER); break; case TAG_SLASHCENTER: parse_char(ISO_nl); htmlparser_renderstate(HTMLPARSER_RENDERSTATE_END | HTMLPARSER_RENDERSTATE_CENTER); break;#endif /* WWW_CONF_RENDERSTATE */ }}/*-----------------------------------------------------------------------------------*/voidhtmlparser_init(void){ s.majorstate = s.lastmajorstate = MAJORSTATE_DISCARD; s.minorstate = MINORSTATE_TEXT; s.lastchar = 0;}/*-----------------------------------------------------------------------------------*/static char CC_FASTCALLlowercase(char c){ /* XXX: This is a *brute force* approach to lower-case converting and should *not* be used anywhere else! It works for our purposes, however (i.e., HTML tags). */ if(c > 0x40) { return (c & 0x1f) | 0x60; } else { return c; }}/*-----------------------------------------------------------------------------------*/static void endtagfound(void){ s.tag[s.tagptr] = 0; s.tagattr[s.tagattrptr] = 0; s.tagattrparam[s.tagattrparamptr] = 0;}/*-----------------------------------------------------------------------------------*//* htmlparser_parse(): * * This is the main function in the HTML parser module and it parses * the HTML data in the input buffer. The htmlparser_state is updated * as the buffer is parsed character by character. The functions * parse_char() and parse_tag() (defined earlier in this file) are * called to process regular characters and HTML tags, * respectively. * * Note that the input buffer does not have to contain full HTML tags; * the parser is state machine driven in order to be able to work with * buffers that have been divided in any way. */voidhtmlparser_parse(char *data, u16_t len){ static char c; while(len > 0) { c = *data; --len; ++data; switch(s.minorstate) { case MINORSTATE_NONE: break; case MINORSTATE_TEXT: /* We are currently parsing some text, so we look for signs of an HTML tag starting (i.e., a '<' character). We also compress any whitespace character to one single space character (' '). */ if(c == ISO_lt) { s.minorstate = MINORSTATE_TAG; s.tagptr = 0; endtagfound(); } else if(c == ISO_ampersand) { s.minorstate = MINORSTATE_EXTCHAR; } else { if(iswhitespace(c)) { if(s.lastchar != ISO_space) { parse_char(' '); s.lastchar = ISO_space; c = ISO_space; } } else { parse_char(c); } } break; case MINORSTATE_EXTCHAR: if(c == ISO_semicolon) { s.minorstate = MINORSTATE_TEXT; parse_char(' '); } else if(iswhitespace(c)) { s.minorstate = MINORSTATE_TEXT; parse_char('&'); parse_char(' '); } break; case MINORSTATE_TAG: /* We are currently parsing within the name of a tag. We check for the end of a tag (the '>' character) or whitespace (which indicates that we should parse a tag attr argument instead). */ if(c == ISO_gt) { /* Full tag found. We continue parsing regular text. */ s.minorstate = MINORSTATE_TEXT; s.tagattrptr = s.tagattrparamptr = 0; endtagfound(); parse_tag(); } else if(iswhitespace(c)) { /* The name of the tag found. We continue parsing the tag attr.*/ s.minorstate = MINORSTATE_TAGATTR; s.tagattrptr = 0; endtagfound(); } else { /* Keep track of the name of the tag, but convert it to lower case. */ s.tag[s.tagptr] = lowercase(c); ++s.tagptr; /* Check if the ->tag field is full. If so, we just eat up any data left in the tag. */ if(s.tagptr == sizeof(s.tag)) { s.minorstate = MINORSTATE_TAGEND; } } /* Check for HTML comment, indicated by <!-- */ if(s.tagptr == 3 && s.tag[0] == ISO_bang && s.tag[1] == ISO_dash && s.tag[2] == ISO_dash) { PRINTF(("Starting comment...\n")); s.minorstate = MINORSTATE_HTMLCOMMENT; s.tagptr = 0; endtagfound(); } break; case MINORSTATE_TAGATTR: /* We parse the "tag attr", i.e., the "href" in <a href="...">. */ if(c == ISO_gt) { /* Full tag found. */ s.minorstate = MINORSTATE_TEXT; s.tagattrparamptr = 0; s.tagattrptr = 0; endtagfound(); parse_tag(); s.tagptr = 0; endtagfound(); } else if(iswhitespace(c)) { if(s.tagattrptr == 0) { /* Discard leading spaces. */ } else { /* A non-leading space is the end of the attribute. */ s.tagattrparamptr = 0; endtagfound(); parse_tag(); s.minorstate = MINORSTATE_TAGATTRSPACE; /* s.tagattrptr = 0; endtagfound();*/ } } else if(c == ISO_eq) { s.minorstate = MINORSTATE_TAGATTRPARAMNQ; s.tagattrparamptr = 0; endtagfound(); } else { s.tagattr[s.tagattrptr] = lowercase(c); ++s.tagattrptr; /* Check if the "tagattr" field is full. If so, we just eat up any data left in the tag. */ if(s.tagattrptr == sizeof(s.tagattr)) { s.minorstate = MINORSTATE_TAGEND; } } break; case MINORSTATE_TAGATTRSPACE: if(iswhitespace(c)) { /* Discard spaces. */ } else if(c == ISO_eq) { s.minorstate = MINORSTATE_TAGATTRPARAMNQ; s.tagattrparamptr = 0; endtagfound(); parse_tag(); } else { s.tagattr[0] = lowercase(c); s.tagattrptr = 1; s.minorstate = MINORSTATE_TAGATTR; } break; case MINORSTATE_TAGATTRPARAMNQ: /* We are parsing the "tag attr parameter", i.e., the link part in <a href="link">. */ if(c == ISO_gt) { /* Full tag found. */ endtagfound(); parse_tag(); s.minorstate = MINORSTATE_TEXT; s.tagattrptr = 0; endtagfound(); parse_tag(); s.tagptr = 0; endtagfound(); } else if(iswhitespace(c) && s.tagattrparamptr == 0) { /* Discard leading spaces. */ } else if((c == ISO_citation || c == ISO_citation2) && s.tagattrparamptr == 0) { s.minorstate = MINORSTATE_TAGATTRPARAM; s.quotechar = c; PRINTF(("tag attr param q found\n")); } else if(iswhitespace(c)) { PRINTF(("Non-leading space found at %d\n", s.tagattrparamptr)); /* Stop parsing if a non-leading space was found */ endtagfound(); parse_tag(); s.minorstate = MINORSTATE_TAGATTR; s.tagattrptr = 0; endtagfound(); } else { s.tagattrparam[s.tagattrparamptr] = c; ++s.tagattrparamptr; /* Check if the "tagattr" field is full. If so, we just eat up any data left in the tag. */ if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) { s.minorstate = MINORSTATE_TAGEND; } } break; case MINORSTATE_TAGATTRPARAM: /* We are parsing the "tag attr parameter", i.e., the link part in <a href="link">. */ if(c == s.quotechar) { /* Found end of tag attr parameter. */ endtagfound(); parse_tag(); s.minorstate = MINORSTATE_TAGATTR; s.tagattrptr = 0; endtagfound(); } else { if(iswhitespace(c)) { c = ISO_space; } s.tagattrparam[s.tagattrparamptr] = c; ++s.tagattrparamptr; /* Check if the "tagattr" field is full. If so, we just eat up any data left in the tag. */ if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) { s.minorstate = MINORSTATE_TAGEND; } } break; case MINORSTATE_HTMLCOMMENT: if(c == ISO_dash) { ++s.tagptr; } else if(c == ISO_gt && s.tagptr > 0) { PRINTF(("Comment done.\n")); s.minorstate = MINORSTATE_TEXT; } else { s.tagptr = 0; } break; case MINORSTATE_TAGEND: /* Discard characters until a '>' is seen. */ if(c == ISO_gt) { s.minorstate = MINORSTATE_TEXT; s.tagattrptr = 0; endtagfound(); parse_tag(); } break; } s.lastchar = c; }}/*-----------------------------------------------------------------------------------*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -