⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgmldecl.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
     for (i = 0; i < NQUANTITY; i++)	  quantity[i] = -1;     if (sdckname(tbuf, kquantity) == FAIL)	  return FAIL;     if (sdname(tbuf, ksgmlref) == FAIL)	  return FAIL;     while (sdparm(tbuf, 0) == NAS1 && !matches(tbuf, kfeatures)) {	  long n;	  for (i = 0; i < SIZEOF(quantity_names); i++)	       if (matches(tbuf, quantity_names[i]))		    break;	  if (i >= SIZEOF(quantity_names)) {	       sderr(E_BADQUANTITY, tbuf + 1, (UNCH *)0);	       return FAIL;	  }	  if (sdparm(tbuf, 0) != NUM1) {	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);	       return FAIL;	  }	  n = atol((char *)tbuf);	  if (n < sd.quantity[i])	       sderr(E_QUANTITY, (UNCH *)quantity_names[i],		     ltous((long)sd.quantity[i]));	  else if (n > max_quantity[i]) {	       sderr(E_QTOOBIG, (UNCH *)quantity_names[i],		     ltous((long)max_quantity[i]));	       quantity[i] = max_quantity[i];	  }	  else	       quantity[i] = (int)n;     }     for (i = 0; i < NQUANTITY; i++)	  if (quantity[i] > 0) {	       sd.quantity[i] = quantity[i];	       if (!quantity_changed)		    quantity_changed = (char *)rmalloc(NQUANTITY);	       quantity_changed[i] = 1;	  }     return SUCCESS;}/* Parse the FEATURES section.  Uses no lookahead. */static int sdfeatures(tbuf)UNCH *tbuf;{     static struct  {	  UNCH *name;	  UNCH argtype;  /* 0 = no argument, 1 = boolean, 2 = numeric */	  UNIV valp;     /* UNCH * if boolean, long * if numeric. */     } features[] = {	  { kminimize, 0, 0 },	  { kdatatag, 1, 0 },	  { komittag, 1, (UNIV)&sd.omittag },	  { krank, 1, 0 },	  { kshorttag, 1, (UNIV)&sd.shorttag },	  { klink, 0, 0 },	  { ksimple, 2, 0 },	  { kimplicit, 1, 0 },	  { kexplicit, 2, 0 },	  { kother, 0, 0 },	  { kconcur, 2, 0 },	  { ksubdoc, 2, (UNIV)&sd.subdoc },	  { kformal, 1, (UNIV)&sd.formal },     };     int i;     if (sdckname(tbuf, kfeatures) == FAIL)	  return FAIL;     for (i = 0; i < SIZEOF(features); i++) {	  if (sdname(tbuf, features[i].name) == FAIL) return FAIL;	  if (features[i].argtype > 0) {	       long n;	       if (sdparm(tbuf, 0) != NAS1) {		    sderr(120, (UNCH *)0, (UNCH *)0);		    return FAIL;	       }	       if (matches(tbuf, kyes)) {		    if (features[i].argtype > 1) {			 if (sdparm(tbuf, 0) != NUM1) {			      sderr(E_XNUM, (UNCH *)0, (UNCH *)0);			      return FAIL;			 }			 n = atol((char *)tbuf);			 if (n == 0)			      sderr(E_ZEROFEATURE, features[i].name, (UNCH *)0);		    }		    else			 n = 1;	       }	       else if (matches(tbuf, kno))		    n = 0;	       else {		    sderr(E_YESNO, tbuf+1, (UNCH *)0);		    return FAIL;	       }	       if (features[i].valp == 0) {		    if (n > 0)			 sderr(E_NOTSUPPORTED, features[i].name,			      (UNCH *)0);	       }	       else if (features[i].argtype > 1)		    *(long *)features[i].valp = n;	       else		    *(UNCH *)features[i].valp = (UNCH)n;	  }     }     if (!sd.shorttag)	  noemptytag();     return SUCCESS;}/* Parse the APPINFO section.  Uses no lookahead. */static int sdappinfo(tbuf)UNCH *tbuf;{     if (sdname(tbuf, kappinfo) == FAIL) return FAIL;     switch (sdparm(tbuf, &pcblitv)) {     case LIT1:	  appinfosw = 1;	  break;     case NAS1:	  if (matches(tbuf, knone))	       break;	  sderr(118, tbuf+1, knone);	  return FAIL;     default:	  sderr(E_XNMLIT, knone, (UNCH *)0);	  return FAIL;     }     return SUCCESS;}/* Change a prefix of ISO 8879-1986 to ISO 8879:1986.  Amendment 1 tothe standard requires the latter. */static VOID sdfixstandard(tbuf, silently)UNCH *tbuf;int silently;{     if (strncmp((char *)tbuf, "ISO 8879-1986", 13) == 0) {	  if (!silently)	       sderr(E_STANDARD, (UNCH *)0, (UNCH *)0);	  tbuf[8] = ':';     }}static int sdname(tbuf, key)UNCH *tbuf;UNCH *key;{     if (sdparm(tbuf, 0) != NAS1) {	  sderr(120, (UNCH *)0, (UNCH *)0);	  return FAIL;     }     if (!matches(tbuf, key)) {	  sderr(118, tbuf+1, key);	  return FAIL;     }     return SUCCESS;}static int sdckname(tbuf, key)UNCH *tbuf;UNCH *key;{     if (pcbsd.action != NAS1) {	  sderr(120, (UNCH *)0, (UNCH *)0);	  return FAIL;     }     if (!matches(tbuf, key)) {	  sderr(118, tbuf+1, key);	  return FAIL;     }     return SUCCESS;}/* Parse a SGML declaration parameter.  If lpcb is NULL, pt must beREFNAMELEN+2 characters long, otherwise at least LITLEN+2 characterslong. LPCB should be NULL if a literal is not allowed. */static int sdparm(pt, lpcb)UNCH *pt;			/* Token buffer. */struct parse *lpcb;		/* PCB for literal parse. */{     for (;;) {	  parse(&pcbsd);	  if (pcbsd.action != ISIG)	       break;	  sderr(E_SIGNIFICANT, (UNCH *)0, (UNCH *)0);     }     ++parmno;     switch (pcbsd.action) {     case LIT1:	  if (!lpcb) {	       sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);	       REPEATCC;	       return pcbsd.action = INV_;	  }	  parselit(pt, lpcb, REFLITLEN, lex.d.lit);	  return pcbsd.action;     case LIT2:	  if (!lpcb) {	       sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);	       REPEATCC;	       return pcbsd.action = INV_;	  }	  parselit(pt, lpcb, REFLITLEN, lex.d.lita);	  return pcbsd.action = LIT1;     case NAS1:	  parsenm(pt, 1);	  return pcbsd.action;     case NUM1:	  parsetkn(pt, NU, REFNAMELEN);	  return pcbsd.action;     }     return pcbsd.action;}VOID sdinit(){     int i;     /* Shunned character numbers in the reference concrete syntax. */     static UNCH refshun[] = { 	  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,	  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255	  };     UNCH **p;     /* A character is magic if it is a non-SGML character used for     some internal purpose in the parser. */     char_flags[EOS] |= CHAR_MAGIC;     char_flags[EOBCHAR] |= CHAR_MAGIC;     char_flags[EOFCHAR] |= CHAR_MAGIC;     char_flags[GENRECHAR] |= CHAR_MAGIC;     char_flags[DELNONCH] |= CHAR_MAGIC;     char_flags[DELCDATA] |= CHAR_MAGIC;     char_flags[DELSDATA] |= CHAR_MAGIC;     /* Figure out the significant SGML characters. */     for (p = lextabs; *p; p++) {	  UNCH datclass = (*p)[CANON_DATACHAR];	  UNCH nonclass = (*p)[CANON_NONSGML];	  for (i = 0; i < 256; i++)	       if (!(char_flags[i] & CHAR_MAGIC)		   && (*p)[i] != datclass && (*p)[i] != nonclass)		    char_flags[i] |= CHAR_SIGNIFICANT;     }     for (i = 0; i < SIZEOF(refshun); i++)	  char_flags[refshun[i]] |= CHAR_SHUNNED;     for (i = 0; i < 256; i++)	  if (ISASCII(i) && iscntrl(i))	       char_flags[i] |= CHAR_SHUNNED;     bufsalloc();}staticVOID bufsalloc(){     scbs = (struct source *)rmalloc((REFENTLVL+1)*sizeof(struct source));     tbuf = (UNCH *)rmalloc(REFATTSPLEN+REFLITLEN+1);     /* entbuf is used for parsing numeric character references */     entbuf = (UNCH *)rmalloc(REFNAMELEN + 2);}staticVOID bufsrealloc(){     UNS size;          if (ENTLVL != REFENTLVL)	  scbs = (struct source *)rrealloc((UNIV)scbs,					   (ENTLVL+1)*sizeof(struct source));     /* Calculate the size for tbuf. */     size = LITLEN + ATTSPLEN;     if (PILEN > size)	  size = PILEN;     if (BSEQLEN > size)	  size = BSEQLEN;     if (size != REFATTSPLEN + REFLITLEN)	  tbuf = (UNCH *)rrealloc((UNIV)tbuf, size + 1);     if (NAMELEN != REFNAMELEN)	  entbuf = (UNCH *)rrealloc((UNIV)entbuf, NAMELEN + 2);}/* Check that the non-SGML characters are compatible with the concretesyntax and munge the lexical tables accordingly.  If IMPLIED isnon-zero, then the SGML declaration was implied; in this case, don'tgive error messages about shunned characters not being declarednon-SGML.  Also make any changes that are required by the NAMING section.*/static VOID setlexical(){     int i;     UNCH **p;          if (nlextoke) {	  /* Handle characters that were made significant by the	     NAMING section. */	  for (i = 0; i < 256; i++)	       if (nlextoke[i] == NMC || nlextoke[i] == NMS)		    char_flags[i] |= CHAR_SIGNIFICANT;     }     for (i = 0; i < 256; i++)	  if (char_flags[i] & CHAR_SIGNIFICANT) {	       /* Significant SGML characters musn't be non-SGML. */	       if (char_flags[i] & CHAR_NONSGML) {		    UNCH buf[2];		    buf[0] = i;		    buf[1] = '\0';		    sderr(E_NONSGML, buf, (UNCH *)0);		    char_flags[i] &= ~CHAR_NONSGML;	       }	  }	  else {	       /* Shunned characters that are not significant SGML characters		  must be non-SGML. */	       if ((char_flags[i] & (CHAR_SHUNNED | CHAR_NONSGML))		   == CHAR_SHUNNED) {		   sderr(E_SHUNNED, ltous((long)i), (UNCH *)0);		   char_flags[i] |= CHAR_NONSGML;	       }	  }          /* Now munge the lexical tables. */     for (p = lextabs; *p; p++) {	  UNCH nonclass = (*p)[CANON_NONSGML];	  UNCH datclass = (*p)[CANON_DATACHAR];	  UNCH nmcclass = (*p)[CANON_NMC];	  UNCH nmsclass = (*p)[CANON_NMS];	  UNCH minclass = (*p)[CANON_MIN];	  for (i = 0; i < 256; i++) {	       if (char_flags[i] & CHAR_NONSGML) {		    /* We already know that it's not significant. */		    if (!(char_flags[i] & CHAR_MAGIC))			 (*p)[i] = nonclass;	       }	       else {		    if (char_flags[i] & CHAR_MAGIC) {			 sderr(E_MUSTBENON, ltous((long)i), (UNCH *)0);		    }		    else if (!(char_flags[i] & CHAR_SIGNIFICANT))			 (*p)[i] = datclass;		    else if (*p == lexmin) {			 /* If it used to be NONSGML, but its now significant,			    treat it like a datachar. */			 if ((*p)[i] == nonclass)			      (*p)[i] = datclass;		    }		    else if (nlextoke			     /* This relies on the fact that lextoke				occurs last in lextabs. */			     && lextoke[i] != nlextoke[i]) {			 switch (nlextoke[i]) {			 case NMC:			      (*p)[i] = nmcclass;			      break;			 case NMS:			      (*p)[i] = nmsclass;			      break;			 case INV:			      /* This will happen if period is not a				 name character. */			      (*p)[i] = minclass;			      break;			 default:			      abort();			 }		    }	       }	  }     }     if (nlextran) {	  memcpy((UNIV)lextran, (UNIV)nlextran, 256);	  frem((UNIV)nlextran);     }     if (nlextoke) {	  frem((UNIV)nlextoke);	  nlextoke = 0;     }     }/* Munge parse tables so that empty start and end tags are not recognized. */static VOID noemptytag(){     static struct parse *pcbs[] = { &pcbconm, &pcbcone, &pcbconr, &pcbconc };     int i;          for (i = 0; i < SIZEOF(pcbs); i++) {	  int maxclass, maxstate;	  int j, k, act;	  UNCH *plex = pcbs[i]->plex;	  UNCH **ptab = pcbs[i]->ptab;	  /* Figure out the maximum lexical class. */	  maxclass = 0;	  for (j = 0; j < 256; j++)	       if (plex[j] > maxclass)		    maxclass = plex[j];	  /* Now figure out the maximum state number and at the same time	     change actions. */	  maxstate = 0;	  for (j = 0; j <= maxstate; j += 2) {	       for (k = 0; k <= maxclass; k++)		    if (ptab[j][k] > maxstate)			 maxstate = ptab[j][k];	       /* If the '>' class has an empty start or end tag action,		  change it to the action that the NMC class has. */	       act = ptab[j + 1][plex['>']];	       if (act == NET_ || act == NST_)		    ptab[j + 1][plex['>']] = ptab[j + 1][plex['_']];	  }     }}/* Lookup the value of the entry in pmap PTR whose key is KEY. */static UNIV pmaplookup(ptr, key)struct pmap *ptr;char *key;{     for (; ptr->name; ptr++)	  if (strcmp(key, ptr->name) == 0)	       return ptr->value;     return 0;}/* Return an ASCII representation of N. */static UNCH *ltous(n)long n;{     static char buf[sizeof(long)*3 + 2];     sprintf(buf, "%ld", n);     return (UNCH *)buf;}VOID sgmlwrsd(fp)FILE *fp;{     int i;     int changed;     char *p;     char uc[256];		/* upper case characters (with different lower				   case characters) */     char lcletter[256];	/* LC letters: a-z */     fprintf(fp, "<!SGML \"%s\"\n", standard);     fprintf(fp,	     "CHARSET\nBASESET \"-//Dummy//CHARSET Dummy//%s\"\nDESCSET\n",	     SYSTEM_CHARSET_DESIGNATING_SEQUENCE);          if (!done_nonsgml) {	  done_nonsgml = 1;	  for (i = 0; i < 256; i++)	       if ((char_flags[i] & (CHAR_SIGNIFICANT | CHAR_SHUNNED))		   == CHAR_SHUNNED)	            char_flags[i] |= CHAR_NONSGML;     }     i = 0;     while (i < 256) {	  int j;	  for (j = i + 1; j < 256; j++)	       if ((char_flags[j] & CHAR_NONSGML)		   != (char_flags[i] & CHAR_NONSGML))		    break;	  if (char_flags[i] & CHAR_NONSGML)	       fprintf(fp, "%d %d UNUSED\n", i, j - i);	  else	       fprintf(fp, "%d %d %d\n", i, j - i, i);	  i = j;     }     fprintf(fp, "CAPACITY\n");     changed = 0;     for (i = 0; i < NCAPACITY; i++)	  if (refcapset[i] != sd.capacity[i]) {	       if (!changed) {		    fprintf(fp, "SGMLREF\n");		    changed = 1;	       }	       fprintf(fp, "%s %ld\n", captab[i], sd.capacity[i]);	  }     if (!changed)	  fprintf(fp, "PUBLIC \"%s\"\n", capset_map[0].name);     fprintf(fp, "SCOPE DOCUMENT\n");          fprintf(fp, "SYNTAX\nSHUNCHAR");     for (i = 0; i < 256; i++)	  if (char_flags[i] & CHAR_SHUNNED)	       break;     if (i == 256)	  fprintf(fp, " NONE\n");     else {	  for (; i < 256; i++)	       if (char_flags[i] & CHAR_SHUNNED)		    fprintf(fp, " %d", i);	  fprintf(fp, "\n");     }     fprintf(fp,	     "BASESET \"-//Dummy//CHARSET Dummy//%s\"\nDESCSET 0 256 0\n",	     SYSTEM_CHARSET_DESIGNATING_SEQUENCE);     fprintf(fp, "FUNCTION\nRE %d\nRS %d\nSPACE %d\nTAB SEPCHAR %d\n",	     RECHAR, RSCHAR, ' ', TABCHAR);     MEMZERO((UNIV)uc, 256);     for (i = 0; i < 256; i++)	  if (lextran[i] != i)	       uc[lextran[i]] = 1;     MEMZERO((UNIV)lcletter, 256);     for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++)	  lcletter[(unsigned char)*p]= 1;     fprintf(fp, "NAMING\n");     fputs("LCNMSTRT \"", fp);     for (i = 0; i < 256; i++)	  if (lextoke[i] == NMS && !uc[i] && !lcletter[i])	       fprintf(fp, "&#%d;", i);     fputs("\"\n", fp);     fputs("UCNMSTRT \"", fp);     for (i = 0; i < 256; i++)	  if (lextoke[i] == NMS && !uc[i] && !lcletter[i])	       fprintf(fp, "&#%d;", lextran[i]);     fputs("\"\n", fp);     fputs("LCNMCHAR \"", fp);     for (i = 0; i < 256; i++)	  if (lextoke[i] == NMC && !uc[i])	       fprintf(fp, "&#%d;", i);     fputs("\"\n", fp);     fputs("UCNMCHAR \"", fp);     for (i = 0; i < 256; i++)	  if (lextoke[i] == NMC && !uc[i])	       fprintf(fp, "&#%d;", lextran[i]);     fputs("\"\n", fp);     fprintf(fp, "NAMECASE\nGENERAL %s\nENTITY %s\n",	     sd.namecase[0] ? "YES" : "NO",	     sd.namecase[1] ? "YES" : "NO");     fprintf(fp, "DELIM\nGENERAL SGMLREF\nSHORTREF %s\n",	     sd.shortref ? "SGMLREF" : "NONE");     fprintf(fp, "NAMES SGMLREF\n");     if (newkey) {	  /* The reference key was saved in newkey. */	  for (i = 0; i < NKEYS; i++)	       if (newkey[i][0])		    fprintf(fp, "%s %s\n", newkey[i], key[i]);     }     fprintf(fp, "QUANTITY SGMLREF\n");     if (quantity_changed)	  for (i = 0; i < NQUANTITY; i++)	       if (quantity_changed[i])		    fprintf(fp, "%s %d\n", quantity_names[i], sd.quantity[i]);     fprintf(fp,	     "FEATURES\nMINIMIZE\nDATATAG NO OMITTAG %s RANK NO SHORTTAG %s\n",	     sd.omittag ? "YES" : "NO",	     sd.shorttag ? "YES" : "NO");     fprintf(fp, "LINK SIMPLE NO IMPLICIT NO EXPLICIT NO\n");     fprintf(fp, "OTHER CONCUR NO ");     if (sd.subdoc > 0)	  fprintf(fp, "SUBDOC YES %ld ", sd.subdoc);     else	  fprintf(fp, "SUBDOC NO ");     fprintf(fp, "FORMAL %s\n", sd.formal ? "YES" : "NO");     fprintf(fp, "APPINFO NONE");     fprintf(fp, ">\n");}/* Save an error to be printed only if FORMAL is declared as YES. */staticVOID sdsaverr(number, parm1, parm2)UNS number;UNCH *parm1;UNCH *parm2;{     saved_errs[nsaved_errs++] = savmderr(number, parm1, parm2);}/*Local Variables:c-indent-level: 5c-continued-statement-offset: 5c-brace-offset: -5c-argdecl-indent: 0c-label-offset: -5End:*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -