📄 htmlparser.c

📁 一个小的RTOS具有UIP网络功能
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
    }    break;#if WWW_CONF_FORMS  case TAG_FORM:    PRINTF(("Form tag\n"));    switch_majorstate(MAJORSTATE_FORM);    if(strncmp(s.tagattr, html_action, sizeof(html_action)) == 0) {      PRINTF(("Form action '%s'\n", s.tagattrparam));      strncpy(s.formaction, s.tagattrparam, WWW_CONF_MAX_FORMACTIONLEN - 1);    } else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) {      PRINTF(("Form name '%s'\n", s.tagattrparam));      strncpy(s.formname, s.tagattrparam, WWW_CONF_MAX_FORMNAMELEN - 1);    }    s.inputname[0] = s.inputvalue[0] = 0;    break;  case TAG_SLASHFORM:    switch_majorstate(MAJORSTATE_BODY);    s.formaction[0] = s.formname[0] = 0;    break;  case TAG_INPUT:    if(s.majorstate == MAJORSTATE_FORM) {      /* First check if we are called at the end of an input tag. If	 so, we should render the input widget. */      if(s.tagattr[0] == 0 &&	 s.inputname[0] != 0) {	PRINTF(("Render input type %d\n", s.inputtype));	switch(s.inputtype) {	case HTMLPARSER_INPUTTYPE_NONE:	case HTMLPARSER_INPUTTYPE_TEXT:	  for(i = 0; i < s.inputvaluesize; ++i) {	    if(s.inputvalue[i] == 0) {	      memset(&s.inputvalue[i], ISO_space, s.inputvaluesize - i);	      s.inputvalue[s.inputvaluesize] = 0;	      break;	    }	  }	  	  htmlparser_inputfield(s.inputvalue, s.inputname,				s.formname, s.formaction);	  break;	case HTMLPARSER_INPUTTYPE_SUBMIT:	case HTMLPARSER_INPUTTYPE_IMAGE:	  htmlparser_submitbutton(s.inputvalue, s.inputname,				  s.formname, s.formaction);	  break;	}	s.inputtype = HTMLPARSER_INPUTTYPE_NONE;      } else {	PRINTF(("Input '%s' '%s'\n", s.tagattr, s.tagattrparam));	if(strncmp(s.tagattr, html_type, sizeof(html_type)) == 0) {	  if(strncmp(s.tagattrparam, html_submit,		     sizeof(html_submit)) == 0) {	    s.inputtype = HTMLPARSER_INPUTTYPE_SUBMIT;	  } else if(strncmp(s.tagattrparam, html_image,			    sizeof(html_image)) == 0) {	    s.inputtype = HTMLPARSER_INPUTTYPE_IMAGE;	  } else if(strncmp(s.tagattrparam, html_text,			    sizeof(html_text)) == 0) {	    s.inputtype = HTMLPARSER_INPUTTYPE_TEXT;	  } else {	    s.inputtype = HTMLPARSER_INPUTTYPE_OTHER;	  }	} else if(strncmp(s.tagattr, html_name,			  sizeof(html_name)) == 0) {	  strncpy(s.inputname, s.tagattrparam,		  WWW_CONF_MAX_INPUTNAMELEN);	} else if(strncmp(s.tagattr, html_alt,			  sizeof(html_alt)) == 0 &&		  s.inputtype == HTMLPARSER_INPUTTYPE_IMAGE) {	  	  strncpy(s.inputvalue, s.tagattrparam,		  WWW_CONF_MAX_INPUTVALUELEN);	  	} else if(strncmp(s.tagattr, html_value,			  sizeof(html_value)) == 0) {	  strncpy(s.inputvalue, s.tagattrparam,		  WWW_CONF_MAX_INPUTVALUELEN);	} else if(strncmp(s.tagattr, html_size,			  sizeof(html_size)) == 0) {	  size = 0;	  if(s.tagattrparam[0] >= '0' &&	     s.tagattrparam[0] <= '9') {	    size = s.tagattrparam[0] - '0';	    if(s.tagattrparam[1] >= '0' &&	       s.tagattrparam[1] <= '9') {	      size = size * 10 + (s.tagattrparam[1] - '0');	    }	  }	  if(size >= WWW_CONF_MAX_INPUTVALUELEN) {	    size = WWW_CONF_MAX_INPUTVALUELEN - 1;	  }	  s.inputvaluesize = size;	  /*	  strncpy(s.inputvalue, s.tagattrparam,		  WWW_CONF_MAX_INPUTVALUELEN);*/	}      }          }    break;#endif /* WWW_CONF_FORMS */    #if WWW_CONF_RENDERSTATE  case TAG_CENTER:    parse_char(ISO_nl);        htmlparser_renderstate(HTMLPARSER_RENDERSTATE_BEGIN |			   HTMLPARSER_RENDERSTATE_CENTER);    break;  case TAG_SLASHCENTER:    parse_char(ISO_nl);    htmlparser_renderstate(HTMLPARSER_RENDERSTATE_END |			   HTMLPARSER_RENDERSTATE_CENTER);    break;#endif /* WWW_CONF_RENDERSTATE */  }}/*-----------------------------------------------------------------------------------*/voidhtmlparser_init(void){  s.majorstate = s.lastmajorstate = MAJORSTATE_DISCARD;  s.minorstate = MINORSTATE_TEXT;  s.lastchar = 0;}/*-----------------------------------------------------------------------------------*/static char CC_FASTCALLlowercase(char c){  /* XXX: This is a *brute force* approach to lower-case     converting and should *not* be used anywhere else! It     works for our purposes, however (i.e., HTML tags). */  if(c > 0x40) {    return (c & 0x1f) | 0x60;  } else {    return c;  }}/*-----------------------------------------------------------------------------------*/static void endtagfound(void){  s.tag[s.tagptr] = 0;  s.tagattr[s.tagattrptr] = 0;  s.tagattrparam[s.tagattrparamptr] = 0;}/*-----------------------------------------------------------------------------------*//* htmlparser_parse(): * * This is the main function in the HTML parser module and it parses * the HTML data in the input buffer. The htmlparser_state is updated * as the buffer is parsed character by character. The functions * parse_char() and parse_tag() (defined earlier in this file) are * called to process regular characters and HTML tags, * respectively. * * Note that the input buffer does not have to contain full HTML tags; * the parser is state machine driven in order to be able to work with * buffers that have been divided in any way. */voidhtmlparser_parse(char *data, u16_t len){  static char c;    while(len > 0) {    c = *data;    --len;    ++data;        switch(s.minorstate) {    case MINORSTATE_NONE:      break;    case MINORSTATE_TEXT:      /* We are currently parsing some text, so we look for signs of	 an HTML tag starting (i.e., a '<' character). We also	 compress any whitespace character to one single space	 character (' '). */      if(c == ISO_lt) {	s.minorstate = MINORSTATE_TAG;	s.tagptr = 0;	endtagfound();      } else if(c == ISO_ampersand) {	s.minorstate = MINORSTATE_EXTCHAR;      } else {	if(iswhitespace(c)) {	  if(s.lastchar != ISO_space) {	    parse_char(' ');	    s.lastchar = ISO_space;	    c = ISO_space;	  }	} else {	  parse_char(c);	}      }      break;    case MINORSTATE_EXTCHAR:      if(c == ISO_semicolon) {		s.minorstate = MINORSTATE_TEXT;	parse_char(' ');      } else if(iswhitespace(c)) {		s.minorstate = MINORSTATE_TEXT;	parse_char('&');	parse_char(' ');      }      break;    case MINORSTATE_TAG:      /* We are currently parsing within the name of a tag. We check	 for the end of a tag (the '>' character) or whitespace (which	 indicates that we should parse a tag attr argument	 instead). */      if(c == ISO_gt) {	/* Full tag found. We continue parsing regular text. */	s.minorstate = MINORSTATE_TEXT;	s.tagattrptr = s.tagattrparamptr = 0;	endtagfound();	  	parse_tag();      } else if(iswhitespace(c)) {	/* The name of the tag found. We continue parsing the tag	   attr.*/	s.minorstate = MINORSTATE_TAGATTR;	s.tagattrptr = 0;	endtagfound();      } else {	/* Keep track of the name of the tag, but convert it to	   lower case. */	s.tag[s.tagptr] = lowercase(c);	++s.tagptr;	/* Check if the ->tag field is full. If so, we just eat up	   any data left in the tag. */	if(s.tagptr == sizeof(s.tag)) {	  s.minorstate = MINORSTATE_TAGEND;	}      }      /* Check for HTML comment, indicated by <!-- */      if(s.tagptr == 3 &&	 s.tag[0] == ISO_bang &&	 s.tag[1] == ISO_dash &&	 s.tag[2] == ISO_dash) {	PRINTF(("Starting comment...\n"));	s.minorstate = MINORSTATE_HTMLCOMMENT;	s.tagptr = 0;	endtagfound();      }	               break;    case MINORSTATE_TAGATTR:      /* We parse the "tag attr", i.e., the "href" in <a	 href="...">. */      if(c == ISO_gt) {	/* Full tag found. */	s.minorstate = MINORSTATE_TEXT;	s.tagattrparamptr = 0;	s.tagattrptr = 0;	endtagfound();	parse_tag();	s.tagptr = 0;	endtagfound();	      } else if(iswhitespace(c)) {	if(s.tagattrptr == 0) {	  /* Discard leading spaces. */	} else {	  /* A non-leading space is the end of the attribute. */	  s.tagattrparamptr = 0;	  endtagfound();	  parse_tag();	  s.minorstate = MINORSTATE_TAGATTRSPACE;	  /*	    s.tagattrptr = 0;		    endtagfound();*/	}      } else if(c == ISO_eq) {		s.minorstate = MINORSTATE_TAGATTRPARAMNQ;	s.tagattrparamptr = 0;	endtagfound();      } else {	s.tagattr[s.tagattrptr] = lowercase(c);	++s.tagattrptr;	/* Check if the "tagattr" field is full. If so, we just eat	   up any data left in the tag. */	if(s.tagattrptr == sizeof(s.tagattr)) {	  s.minorstate = MINORSTATE_TAGEND;	}      }      break;    case MINORSTATE_TAGATTRSPACE:      if(iswhitespace(c)) {	/* Discard spaces. */      } else if(c == ISO_eq) {	s.minorstate = MINORSTATE_TAGATTRPARAMNQ;	s.tagattrparamptr = 0;	endtagfound();	parse_tag();      } else {	s.tagattr[0] = lowercase(c);	s.tagattrptr = 1;	s.minorstate = MINORSTATE_TAGATTR;      }      break;    case MINORSTATE_TAGATTRPARAMNQ:      /* We are parsing the "tag attr parameter", i.e., the link part	 in <a href="link">. */      if(c == ISO_gt) {	/* Full tag found. */	endtagfound();	parse_tag();	s.minorstate = MINORSTATE_TEXT;	s.tagattrptr = 0;       	endtagfound();      	parse_tag();	s.tagptr = 0;       	endtagfound();      } else if(iswhitespace(c) &&		s.tagattrparamptr == 0) {	/* Discard leading spaces. */	        } else if((c == ISO_citation ||		 c == ISO_citation2) &&		s.tagattrparamptr == 0) {	s.minorstate = MINORSTATE_TAGATTRPARAM;	s.quotechar = c;	PRINTF(("tag attr param q found\n"));      } else if(iswhitespace(c)) {	PRINTF(("Non-leading space found at %d\n",		s.tagattrparamptr));	/* Stop parsing if a non-leading space was found */	endtagfound();	parse_tag();	  	s.minorstate = MINORSTATE_TAGATTR;	s.tagattrptr = 0;	endtagfound();      } else {	s.tagattrparam[s.tagattrparamptr] = c;	++s.tagattrparamptr;	/* Check if the "tagattr" field is full. If so, we just eat	   up any data left in the tag. */	if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {	  s.minorstate = MINORSTATE_TAGEND;	}      }      break;    case MINORSTATE_TAGATTRPARAM:      /* We are parsing the "tag attr parameter", i.e., the link	 part in <a href="link">. */      if(c == s.quotechar) {	/* Found end of tag attr parameter. */	endtagfound();	parse_tag();		s.minorstate = MINORSTATE_TAGATTR;	s.tagattrptr = 0;	endtagfound();      } else {	if(iswhitespace(c)) {	  c = ISO_space;	}	s.tagattrparam[s.tagattrparamptr] = c;	++s.tagattrparamptr;	/* Check if the "tagattr" field is full. If so, we just eat	   up any data left in the tag. */	if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {	  s.minorstate = MINORSTATE_TAGEND;	}      }      break;    case MINORSTATE_HTMLCOMMENT:      if(c == ISO_dash) {	++s.tagptr;      } else if(c == ISO_gt && s.tagptr > 0) {	PRINTF(("Comment done.\n"));	s.minorstate = MINORSTATE_TEXT;      } else {	s.tagptr = 0;      }      break;    case MINORSTATE_TAGEND:      /* Discard characters until a '>' is seen. */      if(c == ISO_gt) {	s.minorstate = MINORSTATE_TEXT;	s.tagattrptr = 0;	endtagfound();	parse_tag();      }      break;    }      s.lastchar = c;  }}/*-----------------------------------------------------------------------------------*/
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -