⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgmls.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
  }}staticvoid parse_external_entity(s, sp, e)     char *s;     struct sgmls *sp;     external_entity_s *e;{  char *type;  char *notation;  e->name = strsave(scan_token(&s));  type = scan_token(&s);  if (strcmp(type, "CDATA") == 0)    e->type = SGMLS_ENTITY_CDATA;  else if (strcmp(type, "SDATA") == 0)    e->type = SGMLS_ENTITY_SDATA;  else if (strcmp(type, "NDATA") == 0)    e->type = SGMLS_ENTITY_NDATA;  else    error(E_BADEXTERNAL);  notation = scan_token(&s);  e->notation = lookup_notation(sp, notation);}staticvoid parse_subdoc_entity(s, e)     char *s;     external_entity_s *e;{  e->name = strsave(scan_token(&s));  e->type = SGMLS_ENTITY_SUBDOC;}staticattribute_s *parse_attribute(sp, s)     struct sgmls *sp;     char *s;{  attribute_s *a;  char *type;  a = (attribute_s *)xmalloc(sizeof(*a));  a->name = strsave(scan_token(&s));  type = scan_token(&s);  if (strcmp(type, "CDATA") == 0) {    unsigned long lineno = 0;    a->type = SGMLS_ATTR_CDATA;    a->value.data.n = parse_data(s, &lineno);    a->value.data.v = copy_data(datav, a->value.data.n);  }  else if (strcmp(type, "IMPLIED") == 0) {    a->type = SGMLS_ATTR_IMPLIED;  }  else if (strcmp(type, "NOTATION") == 0) {    a->type = SGMLS_ATTR_NOTATION;    a->value.notation = lookup_notation(sp, scan_token(&s));  }  else if (strcmp(type, "ENTITY") == 0) {    int n, i;    a->type = SGMLS_ATTR_ENTITY;    n = count_args(s);    if (n == 0)      error(E_MISSING);    a->value.entity.v = (entity_s **)xmalloc(n*sizeof(entity_s *));    a->value.entity.n = n;    for (i = 0; i < n; i++)      a->value.entity.v[i] = lookup_entity(sp, scan_token(&s));  }  else if (strcmp(type, "TOKEN") == 0) {    int n, i;    a->type = SGMLS_ATTR_TOKEN;    n = count_args(s);    if (n == 0)      error(E_MISSING);    a->value.token.v = (char **)xmalloc(n * sizeof(char *));    for (i = 0; i < n; i++)      a->value.token.v[i] = strsave(scan_token(&s));    a->value.token.n = n;  }  else    error(E_ATTR);  return a;}void sgmls_free_attributes(p)     attribute_s *p;{  while (p) {    attribute_s *nextp = p->next;    switch (p->type) {    case SGMLS_ATTR_CDATA:      if (p->value.data.v) {	free(p->value.data.v[0].s);	free(p->value.data.v);      }      break;    case SGMLS_ATTR_TOKEN:      {	int i;	for (i = 0; i < p->value.token.n; i++)	  free(p->value.token.v[i]);	xfree(p->value.token.v);      }      break;    case SGMLS_ATTR_ENTITY:      xfree(p->value.entity.v);      break;    case SGMLS_ATTR_IMPLIED:    case SGMLS_ATTR_NOTATION:      break;    }    free(p->name);    free(p);    p = nextp;  }}staticdata_s *copy_data(v, n)     data_s *v;     int n;{  if (n == 0)    return 0;  else {    int i;    unsigned total;    char *p;    data_s *result;        result = (data_s *)xmalloc(n*sizeof(data_s));    total = 0;    for (i = 0; i < n; i++)      total += v[i].len;    if (!total)      total++;    p = xmalloc(total);    for (i = 0; i < n; i++) {      result[i].s = p;      memcpy(result[i].s, v[i].s, v[i].len);      result[i].len = v[i].len;      p += v[i].len;      result[i].is_sdata = v[i].is_sdata;    }    return result;  }}/* Unescape s, and return nul-terminated data.  Give an errorif the data contains 0. */staticchar *unescape(s)     char *s;{  int len = unescape1(s);  if (#ifdef __BORLANDC__      len > 0 &&#endif      memchr(s, '\0', len))    error(E_NULESCAPE);  s[len] = '\0';  return s;}/* Like unescape(), but REs are represented by 012 not 015. */staticchar *unescape_file(s)     char *s;{  char *p;  p = s = unescape(s);  while ((p = strchr(p, RECHAR)) != 0)    *p++ = '\n';  return s;}/* Unescape s, and return length of data.  The data may contain 0. */staticint unescape1(s)     char *s;{  const char *p;  char *q;  q = strchr(s, '\\');  if (!q)    return strlen(s);  p = q;  while (*p) {    if (*p == '\\') {      switch (*++p) {      case '\\':	*q++ = *p++;	break;      case 'n':	*q++ = RECHAR;	p++;	break;      case '0':      case '1':      case '2':      case '3':      case '4':      case '5':      case '6':      case '7':	{	  int val = *p++ - '0';	  if (*p >= '0' && *p <= '7') {	    val = val*8 + (*p++ - '0');	    if (*p >= '0' && *p <= '7')	      val = val*8 + (*p++ - '0');	  }	  *q++ = (char)val;	}	break;      case '|':	error(E_SDATA);      default:	error(E_BADESCAPE);      }    }    else      *q++ = *p++;  }  return q - s;}staticchar *scan_token(pp)     char **pp;{  char *start = *pp;  while (**pp != '\0') {    if (**pp == ' ') {      **pp = '\0';      *pp += 1;      break;    }    *pp += 1;  }  if (!*start)    error(E_MISSING);  return start;}staticint count_args(p)     char *p;{  int n = 0;  while (*p != '\0') {    n++;    do {      ++p;      if (*p == ' ') {	p++;	break;      }    } while (*p != '\0');  }  return n;}staticint read_line(sp)     struct sgmls *sp;{  unsigned i = 0;  FILE *fp = sp->fp;  int c;  char *buf = sp->buf;  unsigned buf_size = sp->buf_size;  c = getc(fp);  if (c == EOF) {    input_lineno = sp->input_lineno;    if (ferror(fp))      error(E_SYSTEM);    return 0;  }    sp->input_lineno++;  input_lineno = sp->input_lineno;  for (;;) {    if (i >= buf_size) {      if (buf_size == 0)	buf_size = 24;      else if (buf_size > (unsigned)UINT_MAX/2) {	if (buf_size == (unsigned)UINT_MAX)	  error(E_LINELENGTH);	buf_size = (unsigned)UINT_MAX;      }      else	buf_size *= 2;      buf = xrealloc(buf, buf_size);      sp->buf = buf;      sp->buf_size = buf_size;    }    if (c == '\0')      error(E_NUL);    if (c == '\n') {      buf[i] = '\0';      break;    }    buf[i++] = c;    c = getc(fp);    if (c == EOF) {      if (ferror(fp))	error(E_SYSTEM);      else	error(E_EOF);    }  }  return 1;}staticnotation_s *lookup_notation(sp, name)struct sgmls *sp;char *name;{  struct notation_list *p    = (struct notation_list *)list_find((struct list *)sp->notations, name,					sp->subdoc_level);  if (!p)    error(E_BADNOTATION);  return &p->notation;}staticentity_s *lookup_entity(sp, name)struct sgmls *sp;char *name;{  struct entity_list *p    = (struct entity_list *)list_find((struct list *)sp->entities, name,				      sp->subdoc_level);  if (!p)    error(E_BADENTITY);  return &p->entity;}staticexternal_entity_s *lookup_external_entity(sp, name)struct sgmls *sp;char *name;{  entity_s *p = lookup_entity(sp, name);  if (p->is_internal)    error(E_INTERNALENTITY);  return &p->u.external;}staticvoid define_external_entity(sp, e)struct sgmls *sp;external_entity_s *e;{  struct entity_list *p;  e->attributes = 0;  e->filenames = sp->files;  e->nfilenames = sp->nfiles;  sp->files = 0;  sp->nfiles = 0;  e->pubid = sp->pubid;  sp->pubid = 0;  e->sysid = sp->sysid;  sp->sysid = 0;  p = (struct entity_list *)xmalloc(sizeof(struct entity_list));  memcpy((UNIV)&p->entity.u.external, (UNIV)e, sizeof(*e));  p->entity.is_internal = 0;  p->subdoc_level = sp->subdoc_level;  p->next = sp->entities;  sp->entities = p;}staticvoid define_internal_entity(sp, e)struct sgmls *sp;internal_entity_s *e;{  struct entity_list *p;  p = (struct entity_list *)xmalloc(sizeof(struct entity_list));  memcpy((UNIV)&p->entity.u.internal, (UNIV)e, sizeof(*e));  p->entity.is_internal = 1;  p->subdoc_level = sp->subdoc_level;  p->next = sp->entities;  sp->entities = p;}staticvoid define_notation(sp, np)struct sgmls *sp;notation_s *np;{  struct notation_list *p;  np->sysid = sp->sysid;  sp->sysid = 0;  np->pubid = sp->pubid;  sp->pubid = 0;  p = (struct notation_list *)xmalloc(sizeof(struct notation_list));  memcpy((UNIV)&p->notation, (UNIV)np, sizeof(*np));  p->subdoc_level = sp->subdoc_level;  p->next = sp->notations;  sp->notations = p;}staticstruct list *list_find(p, name, level)     struct list *p;     char *name;     int level;{  for (; p && p->subdoc_level == level; p = p->next)    if (strcmp(p->name, name) == 0)      return p;  return 0;}/* Move all the items in the list whose subdoc level is level to theend of the list and make their subdoc_level -1. */staticvoid list_finish_level(listp, level)     struct list **listp;     int level;{  struct list **pp, *next_level, *old_level;  for (pp = listp; *pp && (*pp)->subdoc_level == level; pp = &(*pp)->next)    (*pp)->subdoc_level = -1;  next_level = *pp;  *pp = 0;  old_level = *listp;  *listp = next_level;  for (pp = listp; *pp; pp = &(*pp)->next)    ;  *pp = old_level;}staticvoid add_attribute(pp, a)     attribute_s **pp, *a;{#if 0  for (; *pp && strcmp((*pp)->name, a->name) < 0; pp = &(*pp)->next)    ;#endif  a->next = *pp;  *pp = a;}     staticchar *strsave(s)char *s;{  if (!s)    return s;  else {    char *p = xmalloc(strlen(s) + 1);    strcpy(p, s);    return p;  }}staticUNIV xmalloc(n)  unsigned n;{  UNIV p = malloc(n);  if (!p)    error(E_NOMEM);  return p;}/* ANSI C says first argument to realloc can be NULL, but not everybody   appears to support this. */staticUNIV xrealloc(p, n)     UNIV p;     unsigned n;{  p = p ? realloc(p, n) : malloc(n);  if (!p)    error(E_NOMEM);  return p;}staticvoid error(num)     enum error_code num;{  (*errhandler)((int)num, errlist[num], input_lineno);  abort();}staticvoid default_errhandler(num, msg, lineno)     int num;     char *msg;     unsigned long lineno;{  fprintf(stderr, "Line %lu: %s\n", lineno, msg);  exit(1);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -