⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgmldecl.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
static int sdcapacity(tbuf)UNCH *tbuf;{     int ncap;     int i;     if (sdckname(tbuf, kcapacity) == FAIL)	  return FAIL;     if (sdparm(tbuf, 0) != NAS1) {	  sderr(120, (UNCH *)0, (UNCH *)0);	  return FAIL;     }     if (matches(tbuf, kpublic))	  return sdpubcapacity(tbuf);     if (!matches(tbuf, ksgmlref)) {	  sderr(E_CAPACITY, tbuf+1, (UNCH *)0);	  return FAIL;     }     memcpy((UNIV)sd.capacity, (UNIV)refcapset, sizeof(sd.capacity));     ncap = 0;     for (;;) {	  int capno = -1;	  int i;	  if (sdparm(tbuf, 0) != NAS1)	       break;	  for (i = 0; i < SIZEOF(captab); i++)	       if (matches(tbuf, captab[i])) {		    capno = i;		    break;	       }	  if (capno < 0)	       break;	  if (sdparm(tbuf, 0) != NUM1) {	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);	       return FAIL;	  }	  sd.capacity[capno] = atol((char *)tbuf);	  ncap++;     }     if (ncap == 0) {	  sderr(E_CAPMISSING, (UNCH *)0, (UNCH *)0);	  return FAIL;     }     for (i = 1; i < NCAPACITY; i++)	  if (sd.capacity[i] > sd.capacity[0])	       sderr(E_TOTALCAP, (UNCH *)captab[i], (UNCH *)0);     return SUCCESS;}/* Parse a CAPACITY section that started with PUBLIC.  Must do onetoken lookahead, since sdcapacity() also does. */static int sdpubcapacity(tbuf)UNCH *tbuf;{     UNIV ptr;     if (sdparm(tbuf, &pcblitv) != LIT1) {	  sderr(123, (UNCH *)0, (UNCH *)0);	  return FAIL;     }     sdfixstandard(tbuf, 1);     ptr = pmaplookup(capset_map, (char *)tbuf);     if (!ptr)	  sderr(E_CAPSET, tbuf, (UNCH *)0);     else	  memcpy((UNIV)sd.capacity, (UNIV)ptr, sizeof(sd.capacity));     (void)sdparm(tbuf, 0);     return SUCCESS;}/* Parse the SCOPE section. Uses no lookahead. */static int sdscope(tbuf)UNCH *tbuf;{     if (sdckname(tbuf, kscope) == FAIL)	  return FAIL;     if (sdparm(tbuf, 0) != NAS1) {	  sderr(120, (UNCH *)0, (UNCH *)0);	  return FAIL;     }     if (matches(tbuf, kdocument))	  ;     else if (matches(tbuf, kinstance))	  sderr(E_INSTANCE, (UNCH *)0, (UNCH *)0);     else {	  sderr(E_SCOPE, tbuf+1, (UNCH *)0);	  return FAIL;     }     return SUCCESS;}/* Parse the SYNTAX section.  Uses one token lookahead. */static int sdsyntax(tbuf)UNCH *tbuf;{     if (sdname(tbuf, ksyntax) == FAIL) return FAIL;     if (sdparm(tbuf, 0) != NAS1) {	  sderr(120, (UNCH *)0, (UNCH *)0);	  return FAIL;     }     if (matches(tbuf, kpublic))	  return sdpubsyntax(tbuf);     return sdxsyntax(tbuf);}/* Parse the SYNTAX section which starts with PUBLIC.  Uses one tokenlookahead. */static int sdpubsyntax(tbuf)UNCH *tbuf;{     int nswitches;     if (sdparm(tbuf, &pcblitv) != LIT1)	  return FAIL;     sdfixstandard(tbuf, 1);     if (ustrcmp(tbuf, CORE_SYNTAX) == 0)	  sd.shortref = 0;     else if (ustrcmp(tbuf, REFERENCE_SYNTAX) == 0)	  sd.shortref = 1;     else	  sderr(E_SYNTAX, tbuf, (UNCH *)0);     if (sdparm(tbuf, 0) != NAS1)	  return SUCCESS;     if (!matches(tbuf, kswitches))	  return SUCCESS;     nswitches = 0;     for (;;) {	  int errsw = 0;	  if (sdparm(tbuf, 0) != NUM1)	       break;	  if (atol((char *)tbuf) > 255) {	       sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);	       errsw = 1;	  }	  if (sdparm(tbuf, 0) != NUM1) {	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);	       return FAIL;	  }	  if (!errsw) {	       if (atol((char *)tbuf) > 255)		    sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);	  }	  nswitches++;     }     if (nswitches == 0) {	  sderr(E_XNUM, (UNCH *)0, (UNCH *)0);	  return FAIL;     }     sderr(E_SWITCHES, (UNCH *)0, (UNCH *)0);     return SUCCESS;}/* Parse an explicit concrete syntax. Uses one token lookahead. */staticint sdxsyntax(tbuf)UNCH *tbuf;{     static int (*section[]) P((UNCH *)) = {	  sdshunchar,	  sdsynref,	  sdfunction,	  sdnaming,	  sddelim,	  sdnames,	  sdquantity,     };     int i;     for (i = 0; i < SIZEOF(section); i++)	  if ((*section[i])(tbuf) == FAIL)	       return FAIL;     return SUCCESS;}/* Parse the SHUNCHAR section. Uses one token lookahead. */staticint sdshunchar(tbuf)UNCH *tbuf;{     int i;     for (i = 0; i < 256; i++)	  char_flags[i] &= ~CHAR_SHUNNED;     if (sdckname(tbuf, kshunchar) == FAIL)	  return FAIL;     if (sdparm(tbuf, 0) == NAS1) {	  if (matches(tbuf, knone)) {	       (void)sdparm(tbuf, 0);	       return SUCCESS;	  }	  if (matches(tbuf, kcontrols)) {	       for (i = 0; i < 256; i++)		    if (ISASCII(i) && iscntrl(i))			 char_flags[i] |= CHAR_SHUNNED;	       if (sdparm(tbuf, 0) != NUM1)		    return SUCCESS;	  }     }     if (pcbsd.action != NUM1) {	  sderr(E_SHUNCHAR, (UNCH *)0, (UNCH *)0);	  return FAIL;     }     do {	  long n = atol((char *)tbuf);	  if (n > 255)	       sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);	  else	       char_flags[(int)n] |= CHAR_SHUNNED;     } while (sdparm(tbuf, 0) == NUM1);     return SUCCESS;}/* Parse the syntax reference character set. Uses one token lookahead. */staticint sdsynref(tbuf)UNCH *tbuf;{     return sdcsdesc(tbuf, synrefcharset);}/* Translate a character number from the syntax reference character setto the system character set. If it can't be done, give an error messageand return -1. */staticint sdtranscharnum(tbuf)UNCH *tbuf;{     long n = atol((char *)tbuf);     if (n > 255) {	  sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);	  return -1;     }     return sdtranschar((int)n);}staticint sdtranschar(n)int n;{     int ch = synrefcharset[n];     if (ch >= 0)	  return ch;     switch (ch) {     case UNUSED:	  sderr(E_SYNREFUNUSED, ltous((long)n), (UNCH *)0);	  break;     case UNDESC:	  sderr(E_SYNREFUNDESC, ltous((long)n), (UNCH *)0);	  break;     case UNKNOWN:	  sderr(E_SYNREFUNKNOWN, ltous((long)n), (UNCH *)0);	  break;     case UNKNOWN_SET:	  sderr(E_SYNREFUNKNOWNSET, ltous((long)n), (UNCH *)0);	  break;     default:	  abort();     }     return -1;}/* Parse the function section. Uses two tokens lookahead. "NAMING"could be a function name. */staticint sdfunction(tbuf)UNCH *tbuf;{     static UNCH *fun[] = { kre, krs, kspace };     static int funval[] = { RECHAR, RSCHAR, ' ' };     int i;     int had_tab = 0;     int changed = 0;		/* attempted to change reference syntax */     if (sdckname(tbuf, kfunction) == FAIL)	  return FAIL;     for (i = 0; i < SIZEOF(fun); i++) {	  int ch;	  if (sdname(tbuf, fun[i]) == FAIL)	       return FAIL;	  if (sdparm(tbuf, 0) != NUM1) {	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);	       return FAIL;	  }	  ch = sdtranscharnum(tbuf);	  if (ch >= 0 && ch != funval[i])	       changed = 1;     }     for (;;) {	  int tabsw = 0;	  int namingsw = 0;	  if (sdparm(tbuf, 0) != NAS1) {	       sderr(120, (UNCH *)0, (UNCH *)0);	       return FAIL;	  }	  if (matches(tbuf, (UNCH *)"TAB")) {	       tabsw = 1;	       if (had_tab)		    sderr(E_FUNDUP, (UNCH *)0, (UNCH *)0);	  }	  else {	       for (i = 0; i < SIZEOF(fun); i++)		    if (matches(tbuf, fun[i]))			 sderr(E_BADFUN, fun[i], (UNCH *)0);	       if (matches(tbuf, knaming))		    namingsw = 1;	       else		    changed = 1;	  }	  if (sdparm(tbuf, 0) != NAS1) {	       sderr(120, (UNCH *)0, (UNCH *)0);	       return FAIL;	  }	  if (namingsw) {	       if (matches(tbuf, klcnmstrt))		    break;	       changed = 1;	  }	  if (sdparm(tbuf, 0) != NUM1) {	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);	       return FAIL;	  }	  if (tabsw && !had_tab) {	       int ch = sdtranscharnum(tbuf);	       if (ch >= 0 && ch != TABCHAR)		    changed = 1;	       had_tab = 1;	  }     }     if (!had_tab)	  changed = 1;     if (changed)	  sderr(E_FUNCHAR, (UNCH *)0, (UNCH *)0);     return SUCCESS;}/* Parse the NAMING section.  Uses no lookahead. */staticint sdnaming(tbuf)UNCH *tbuf;{     int i;     int bad = 0;     static UNCH *classes[] = { klcnmstrt, kucnmstrt, klcnmchar, kucnmchar };     static UNCH *types[] = { kgeneral, kentity };#define NCLASSES SIZEOF(classes)     int bufsize = 4;		/* allocated size of buf */     UNCH *buf = (UNCH *)rmalloc(bufsize); /* holds characters					      in naming classes */     int bufi = 0;		/* next index into buf */     int start[NCLASSES];	/* index of first character for each class */     int count[NCLASSES];	/* number of characters for each class */     for (i = 0; i < NCLASSES; i++) {	  UNCH *s;	  if (sdckname(tbuf, classes[i]) == FAIL) {	       frem((UNIV)buf);	       return FAIL;	  }	  if (sdparm(tbuf, &pcblitp) != LIT1) {	       sderr(123, (UNCH *)0, (UNCH *)0);	       frem((UNIV)buf);	       return FAIL;	  }	  start[i] = bufi;	  	  for (s = tbuf; *s; s++) {	       int c = *s;	       if (c == DELNONCH) {		    c = UNSHIFTNON(*s);		    s++;	       }	       c = sdtranschar(c);	       if (c < 0)		    bad = 1;	       else if ((char_flags[c] & (CHAR_SIGNIFICANT | CHAR_MAGIC))			&& c != '.' && c != '-') {		    int class = lextoke[c];		    if (class == SEP || class == SP || class == NMC			|| class == NMS || class == NU)			 sderr(E_NMBAD, ltous((long)c), (UNCH *)0);		    else			 sderr(E_NMUNSUP, ltous((long)c), (UNCH *)0);		    bad = 1;	       }	       if (bufi >= bufsize)		    buf = (UNCH *)rrealloc((UNIV)buf, bufsize *= 2);	       buf[bufi++] = c;	  }	  count[i] = bufi - start[i];	  (void)sdparm(tbuf, 0);     }     if (!bad && count[0] != count[1]) {	  sderr(E_NMSTRTCNT, (UNCH *)0, (UNCH *)0);	  bad = 1;     }     if (!bad && count[2] != count[3]) {	  sderr(E_NMCHARCNT, (UNCH *)0, (UNCH *)0);	  bad = 1;     }     if (!bad) {	  nlextoke = (UNCH *)rmalloc(256);	  memcpy((UNIV)nlextoke, lextoke, 256);	  nlextoke['.'] = nlextoke['-'] = INV;	  nlextran = (UNCH *)rmalloc(256);	  memcpy((UNIV)nlextran, lextran, 256);	  for (i = 0; i < count[0]; i++) {	       UNCH lc = buf[start[0] + i];	       UNCH uc = buf[start[1] + i];	       nlextoke[lc] = NMS;	       nlextoke[uc] = NMS;	       nlextran[lc] = uc;	  }		       	  for (i = 0; i < count[2]; i++) {	       UNCH lc = buf[start[2] + i];	       UNCH uc = buf[start[3] + i];	       if (nlextoke[lc] == NMS) {		    sderr(E_NMDUP, ltous((long)lc), (UNCH *)0);		    bad = 1;	       }	       else if (nlextoke[uc] == NMS) {		    sderr(E_NMDUP, ltous((long)uc), (UNCH *)0);		    bad = 1;	       }	       else {		    nlextoke[lc] = NMC;		    nlextoke[uc] = NMC;		    nlextran[lc] = uc;	       }	  }	  if (nlextoke['-'] != NMC) {	       sderr(E_NMMINUS, (UNCH *)0, (UNCH *)0);	       bad = 1;	  }	  if (bad) {	       if (nlextoke) {		    frem((UNIV)nlextoke);		    nlextoke = 0;	       }	       if (nlextran) {		    frem((UNIV)nlextran);		    nlextran = 0;	       }	  }     }     frem((UNIV)buf);     if (sdckname(tbuf, knamecase) == FAIL)	  return FAIL;     for (i = 0; i < SIZEOF(types); ++i) {	  if (sdname(tbuf, types[i]) == FAIL)	       return FAIL;	  if (sdparm(tbuf, 0) != NAS1) {	       sderr(120, (UNCH *)0, (UNCH *)0);	       return FAIL;	  }	  if (matches(tbuf, kyes))	       sd.namecase[i] = 1;	  else if (matches(tbuf, kno))	       sd.namecase[i] = 0;	  else {	       sderr(E_YESNO, tbuf+1, (UNCH *)0);	       return FAIL;	  }     }     return SUCCESS;}/* Parse the DELIM section. Uses one token lookahead. */staticint sddelim(tbuf)UNCH *tbuf;{     int changed = 0;     if (sdname(tbuf, kdelim) == FAIL	 || sdname(tbuf, kgeneral) == FAIL	 || sdname(tbuf, ksgmlref) == FAIL)	  return FAIL;     for (;;) {	  if (sdparm(tbuf, 0) != NAS1) {	       sderr(120, (UNCH *)0, (UNCH *)0);	       return FAIL;	  }	  if (matches(tbuf, kshortref))	       break;	  if (sdparm(tbuf, &pcblitp) != LIT1) {	       sderr(123, (UNCH *)0, (UNCH *)0);	       return FAIL;	  }	  changed = 1;     }     if (changed) {	  sderr(E_GENDELIM, (UNCH *)0,(UNCH *)0);	  changed = 0;     }     if (sdparm(tbuf, 0) != NAS1) {	  sderr(120, (UNCH *)0, (UNCH *)0);	  return FAIL;     }     if (matches(tbuf, ksgmlref))	  sd.shortref = 1;     else if (matches(tbuf, knone))	  sd.shortref = 0;     else {	  sderr(118, tbuf+1, ksgmlref);	/* probably they forgot SGMLREF */	  return FAIL;     }     while (sdparm(tbuf, &pcblitp) == LIT1)	  changed = 1;     if (changed)	  sderr(E_SRDELIM, (UNCH *)0, (UNCH *)0);     return SUCCESS;}/* Parse the NAMES section. Uses one token lookahead. */staticint sdnames(tbuf)UNCH *tbuf;{     int i;     if (sdckname(tbuf, knames) == FAIL)	  return FAIL;     if (sdname(tbuf, ksgmlref) == FAIL)	  return FAIL;     while (sdparm(tbuf, 0) == NAS1) {	  int j;	  if (matches(tbuf, kquantity))	       break;	  for (i = 0; i < NKEYS; i++)	       if (matches(tbuf, key[i]))		    break;	  if (i >= NKEYS) {	       sderr(E_BADKEY, tbuf+1, (UNCH *)0);	       return FAIL;	  }	  if (sdparm(tbuf, &pcblitp) != NAS1) {	       sderr(120, (UNCH *)0, (UNCH *)0);	       return FAIL;	  }	  if (!newkey) {	       newkey = (UNCH (*)[REFNAMELEN+1])rmalloc((REFNAMELEN+1)*NKEYS);	       MEMZERO((UNIV)newkey, (REFNAMELEN+1)*NKEYS);	  }	  for (j = 0; j < NKEYS; j++) {	       if (matches(tbuf, key[j])) {		    sderr(E_REFNAME, tbuf + 1, (UNCH *)0);		    break;	       }	       if (matches(tbuf, newkey[j])) {		    sderr(E_DUPNAME, tbuf + 1, (UNCH *)0);		    break;	       }	  }	  if (j >= NKEYS)	       ustrcpy(newkey[i], tbuf + 1);     }     /* Now install the new keys. */     if (newkey) {	  for (i = 0; i < NKEYS; i++)	       if (newkey[i][0] != '\0') {		    UNCH temp[REFNAMELEN + 1];		    		    ustrcpy(temp, key[i]);		    ustrcpy(key[i], newkey[i]);		    ustrcpy(newkey[i], temp);	       }     }     return SUCCESS;}/* Parse the QUANTITY section. Uses one token lookahead. */static int sdquantity(tbuf)UNCH *tbuf;{     int quantity[NQUANTITY];     int i;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -