⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 form.c

📁 网络爬虫程序
💻 C
📖 第 1 页 / 共 3 页
字号:
/***************************************************************************//*    This code is part of WWW grabber called pavuk                        *//*    Copyright (c) 1997 - 2001 Stefan Ondrejicka                          *//*    Distributed under GPL 2 or later                                     *//***************************************************************************/#include "config.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#ifdef GTK_FACE#include <gtk/gtk.h>#include "icons/cancel.xpm"#include "icons/load.xpm"#include "icons/restart_small.xpm"#endif#include "ainterface.h"#include "form.h"#include "gui.h"#include "html.h"#include "tools.h"#define NARAZNIK 512/* FIXME: Translate me (NARAZNIK)! */#define SEXPAND(sv) \  if((ssz - (sv + sr + NARAZNIK)) < 0) \  { \    stack = realloc(stack, ssz + sv  + NARAZNIK); \    ssz += NARAZNIK + sv; \  }#define TEXPAND(sv) \  if((tsz - (sv + tr + NARAZNIK)) < 0) \  { \    text = realloc(text, tsz + sv  + NARAZNIK); \    tsz += NARAZNIK + sv; \  }char *form_get_text(int num, char *html_text, int html_len, int *form_len){  char *stack = NULL;  int i, sr, ssz;  int formstart = FALSE;  int commentstart = FALSE;  int scriptstart = FALSE;  int formnum = 0;  ssz = 0;  sr = 0;  for(i = 0; i < html_len; i++)  {    if(commentstart)    {      if(!strncmp(html_text + i, "-->", 3))      {        commentstart = FALSE;        i += 2;      }      continue;    }    if(scriptstart)    {      if(!strncasecmp(html_text + i, "</SCRIPT>", 9))      {        scriptstart = FALSE;        i += 8;      }      continue;    }    if(html_text[i] == '<')    {      if(!strncasecmp(html_text + i + 1, "SCRIPT", 6) &&        (i + 7) < html_len &&        (tl_ascii_isspace(html_text[i + 7]) || html_text[i + 7] == '>'))      {        scriptstart = TRUE;        continue;      }      else if(!strncasecmp(html_text + i + 1, "!--", 3))      {        commentstart = TRUE;        continue;      }    }    if(formstart)    {      if(!strncasecmp(html_text + i, "</FORM>", 7))      {        stack[sr] = '\0';        break;      }      else      {        SEXPAND(1);        stack[sr] = html_text[i];        sr++;      }      continue;    }    if(!strncasecmp(html_text + i, "<FORM", 5) &&      (i + 5) < html_len && tl_ascii_isspace(html_text[i + 5]))    {      if(formnum == num)      {        formstart = TRUE;        ssz = 2 * NARAZNIK;        stack = malloc(ssz);        sr = 0;        stack[sr] = html_text[i];        sr++;      }      formnum++;    }  }  *form_len = sr;  return stack;}int form_field_compare(dllist_t dl1, dllist_t dl2){  form_field * ff1 = (form_field *)dl1;  form_field * ff2 = (form_field *)dl2;  if(strcmp(ff1->name, ff2->name))    return FALSE;  if(strcmp(ff1->value, ff2->value))    return FALSE;  return TRUE;}int form_field_compare_name(dllist_t dl1, dllist_t dl2){  form_field * ff1 = (form_field *)dl1;  form_field * ff2 = (form_field *)dl2;  if(!ff1->name || !ff2->name)    return FALSE;  if(strcmp(ff1->name, ff2->name))    return FALSE;  return TRUE;}form_field *form_field_new(char *name, char *value){  form_field *rv;  rv = _malloc(sizeof(form_field));  rv->type = FORM_T_TEXT;  rv->name = tl_strdup(name);  rv->value = tl_strdup(value);  return rv;}form_field *form_field_duplicate(form_field * ff){  form_field *rv;  rv = _malloc(sizeof(form_field));  rv->type = ff->type;  rv->name = tl_strdup(ff->name);  rv->value = tl_strdup(ff->value);  return rv;}static form_field_types form_input_type(char *str){  int i;  struct  {    char *str;    form_field_types id;  } it[] =  {    {"text", FORM_T_TEXT},    {"password", FORM_T_PASSWORD},    {"checkbox", FORM_T_CHECKBOX},    {"radio", FORM_T_RADIO},    {"submit", FORM_T_SUBMIT},    {"reset", FORM_T_RESET},    {"file", FORM_T_FILE},    {"hidden", FORM_T_HIDDEN},    {"image", FORM_T_IMAGE},    {"button", FORM_T_BUTTON}  };  for(i = 0; i < (sizeof(it) / sizeof(it[0])); i++)  {    if(!strcasecmp(str, it[i].str))      return it[i].id;  }  return FORM_T_UNKNOWN;}static form_field_info *form_field_info_new(void){  form_field_info *retv = malloc(sizeof(form_field_info));  retv->type = FORM_T_TEXT;  retv->name = NULL;  retv->default_value = NULL;  retv->text = NULL;  retv->width = 0;  retv->height = 0;  retv->maxlen = 0;  retv->readonly = FALSE;  retv->checked = FALSE;  retv->disabled = FALSE;  retv->multiple = FALSE;  retv->selected = FALSE;  retv->infos = NULL;#if GTK_FACE  retv->rg = NULL;  retv->widget = NULL;  retv->idx = -1;#endif  return retv;}static form_field_info *form_parse_inputtag(char *tag){  char *p;  form_field_info *retv;  retv = form_field_info_new();  if((p = html_get_attrib_from_tag(tag, "type")))  {    retv->type = form_input_type(p);    free(p);  }  if((p = html_get_attrib_from_tag(tag, "size")))  {    retv->width = atoi(p);    free(p);  }  if((p = html_get_attrib_from_tag(tag, "maxlength")))  {    retv->maxlen = atoi(p);    free(p);  }  retv->name = html_get_attrib_from_tag(tag, "name");  retv->default_value = html_get_attrib_from_tag(tag, "value");  retv->readonly = html_tag_co_elem(tag, "readonly");  retv->checked = html_tag_co_elem(tag, "checked");  retv->disabled = html_tag_co_elem(tag, "disabled");  return retv;}static form_field_info *form_parse_buttontag(char *tag){  char *p;  form_field_info *retv;  retv = form_field_info_new();  retv->type = FORM_T_BUTTON;  if((p = html_get_attrib_from_tag(tag, "type")))  {    retv->type = form_input_type(p);    free(p);  }  retv->name = html_get_attrib_from_tag(tag, "name");  retv->default_value = html_get_attrib_from_tag(tag, "value");  retv->disabled = html_tag_co_elem(tag, "disabled");  return retv;}static form_field_info *form_parse_selecttag(char *tag){  char *p;  form_field_info *retv;  retv = form_field_info_new();  retv->type = FORM_T_SELECT;  retv->name = html_get_attrib_from_tag(tag, "name");  if((p = html_get_attrib_from_tag(tag, "size")))  {    retv->height = atoi(p);    free(p);  }  retv->disabled = html_tag_co_elem(tag, "disabled");  retv->multiple = html_tag_co_elem(tag, "multiple");  return retv;}static form_field_info *form_parse_optgrouptag(char *tag){  form_field_info *retv;  retv = form_field_info_new();  retv->type = FORM_T_OPTGROUP;  retv->disabled = html_tag_co_elem(tag, "disabled");  retv->text = html_get_attrib_from_tag(tag, "label");  return retv;}static form_field_info *form_parse_optiontag(char *tag){  form_field_info *retv;  retv = form_field_info_new();  retv->type = FORM_T_OPTION;  retv->disabled = html_tag_co_elem(tag, "disabled");  retv->selected = html_tag_co_elem(tag, "selected");  retv->text = html_get_attrib_from_tag(tag, "label");  retv->default_value = html_get_attrib_from_tag(tag, "value");  return retv;}static form_field_info *form_parse_textareatag(char *tag){  char *p;  form_field_info *retv;  retv = form_field_info_new();  retv->type = FORM_T_TEXTAREA;  if((p = html_get_attrib_from_tag(tag, "cols")))  {    retv->width = _atoi(p);    free(p);  }  if((p = html_get_attrib_from_tag(tag, "rows")))  {    retv->height = _atoi(p);    free(p);  }  retv->name = html_get_attrib_from_tag(tag, "name");  retv->disabled = html_tag_co_elem(tag, "disabled");  retv->readonly = html_tag_co_elem(tag, "readonly");  return retv;}static form_info *form_parse_formtag(char *tag){  char *p;  form_info *retv = malloc(sizeof(form_info));  retv->method = FORM_M_GET;  retv->encoding = FORM_E_URLENCODED;  retv->action = NULL;  retv->infos = NULL;  retv->text = NULL;  retv->parent_url = NULL;  if((p = html_get_attrib_from_tag(tag, "method")))  {    if(!strcasecmp(p, "GET"))      retv->method = FORM_M_GET;    else if(!strcasecmp(p, "POST"))      retv->method = FORM_M_POST;    else      retv->method = FORM_M_UNKNOWN;    free(p);  }  if((p = html_get_attrib_from_tag(tag, "enctype")))  {    if(!strcasecmp(p, "multipart/form-data"))      retv->encoding = FORM_E_MULTIPART;    else if(!strcasecmp(p, "application/x-www-form-urlencoded"))      retv->encoding = FORM_E_URLENCODED;    else      retv->encoding = FORM_E_UNKNOWN;    _free(p);  }  retv->action = html_get_attrib_from_tag(tag, "action");  return retv;}static void form_field_info_free(form_field_info * ffi){  dllist *ptr;  _free(ffi->name);  _free(ffi->default_value);  _free(ffi->text);  ptr = ffi->infos;  while(ptr)  {    form_field_info_free((form_field_info *) ptr->data);    ptr = dllist_remove_entry(ptr, ptr);  }  free(ffi);}void form_free(form_info * formi){  dllist *ptr;  _free(formi->action);  _free(formi->text);  ptr = formi->infos;  while(ptr)  {    form_field_info_free((form_field_info *) ptr->data);    ptr = dllist_remove_entry(ptr, ptr);  }  free(formi);}form_info *form_info_dup(form_info * formi){  dllist *ptr;  form_info *retv = malloc(sizeof(form_info));  retv->method = formi->method;  retv->encoding = formi->encoding;  retv->action = tl_strdup(formi->action);  retv->text = NULL;  retv->parent_url = NULL;  retv->infos = NULL;  for(ptr = formi->infos; ptr; ptr = ptr->next)  {    retv->infos = dllist_append(retv->infos,    (dllist_t) form_field_duplicate((form_field *)ptr->data));  }  return retv;}form_info *form_parse(char *form_text, int form_len){  char *stack;  int i, sr, ssz;  int tagstart = FALSE;  form_info *retv = NULL;  form_field_info *selectgroup = NULL;  dllist *optgroups = NULL;  form_field_info *optgroup = NULL;  form_field_info *lastformtag = NULL;  form_field_info *plastformtag = NULL;  int tsz, tr;  char *text;  bool_t fstag;  char *sp;  ssz = 2 * NARAZNIK;  stack = malloc(ssz);  sr = 0;  tsz = 2 * NARAZNIK;  text = malloc(tsz);  tr = 0;  for(i = 0; i < form_len; i++)  {    if(form_text[i] == '<')    {      tagstart = TRUE;      sr = 0;    }    else if(form_text[i] == '>' && tagstart)    {      tagstart = FALSE;      stack[sr] = '\0';      fstag = FALSE;#define IS_TAG(s) (!strncasecmp(stack, s, strlen(s)) && \      sr >= strlen(s) && \      (tl_ascii_isspace(stack[strlen(s)]) || \       stack[strlen(s)] == '\0'))      if(IS_TAG("FORM"))      {        retv = form_parse_formtag(stack);      }      else if(retv && IS_TAG("INPUT"))      {        fstag = TRUE;        plastformtag = lastformtag;        lastformtag = form_parse_inputtag(stack);        retv->infos = dllist_append(retv->infos, (dllist_t)lastformtag);      }      else if(retv && lastformtag && IS_TAG("/INPUT"))      {        if(tr)        {          text[tr] = '\0';          sp = text;          while(*sp && tl_ascii_isspace(*sp))            sp++;          if(!lastformtag->text && *sp)          {            int l = strcspn(sp, "\r\n");            lastformtag->text = tl_strndup(sp, l);            memmove(text, sp + l, strlen(sp + l + 1));            tr = strlen(text);          }          else            tr = 0;        }      }      else if(retv && IS_TAG("BUTTON"))      {        fstag = TRUE;        plastformtag = lastformtag;        lastformtag = form_parse_buttontag(stack);        retv->infos = dllist_append(retv->infos, (dllist_t)lastformtag);      }      else if(retv && lastformtag && IS_TAG("/BUTTON"))      {        if(tr)        {          text[tr] = '\0';          sp = text;          while(*sp && tl_ascii_isspace(*sp))            sp++;          if(!lastformtag->text && *sp)          {            int l = strcspn(sp, "\r\n");            lastformtag->text = tl_strndup(sp, l);            memmove(text, sp + l, strlen(sp + l + 1));            tr = strlen(text);          }          else            tr = 0;        }      }      else if(retv && IS_TAG("SELECT"))      {        fstag = TRUE;        plastformtag = lastformtag;        lastformtag = form_parse_selecttag(stack);        selectgroup = lastformtag;        retv->infos = dllist_append(retv->infos, (dllist_t)lastformtag);      }      else if(selectgroup && IS_TAG("/SELECT"))      {        selectgroup = NULL;      }      else if(selectgroup && IS_TAG("OPTGROUP"))      {        fstag = TRUE;        plastformtag = lastformtag;        lastformtag = form_parse_optgrouptag(stack);        optgroup = lastformtag;        optgroups = dllist_prepend(optgroups, (dllist_t) lastformtag);        selectgroup->infos = dllist_append(selectgroup->infos,        (dllist_t) lastformtag);      }      else if(optgroups && IS_TAG("/OPTGROUP"))      {        optgroups = dllist_remove_entry(optgroups, optgroups);        if(optgroups)          optgroup = (form_field_info *) optgroups->data;        else          optgroup = NULL;      }      else if(selectgroup && IS_TAG("OPTION"))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -