⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pars1.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
     return ETG_;}/* SHORTREF: Process a short (alternative) reference to an entity.             Returns ENTUNDEF if entity is not defined, otherwise returns             the return code from stagetd or etagetd if the entity was             a tag, or zero if an error occurred somewhere.*/int shortref(srn, pcb)int srn;                      /* Short reference number. */struct parse *pcb;            /* Parse control block for this parse. */{     int rc;                  /* Return code from entopen. */     if (tags[ts].tsrm==SRMNULL || !tags[ts].tsrm[srn]) return ENTUNDEF;     rc = entopen(tags[ts].tsrm[srn]);     if (rc==ENTDATA) return DEF_;     if (rc==ENTPI) return PIS_;     return(0);}/* PARSEPRO: Parse prolog.             Note: ptpro cannot overrun tbuf (and therefore needn't be             tested), as long as the buffer exceeds the longest             lookahead sequence in the content parse tables.*/int parsepro(){     struct parse *oldpcb;     while (1) {          int rc;                  /* Return code: DAF MSS DCE */          switch (parse(propcb)) {          case LAS_:          /* Start lookahead buffer with current char. */               *(ptpro = data = tbuf+1) = *FPOS;               continue;          case LAM_:          /* Move character to lookahead buffer. */               *++ptpro = *FPOS;               continue;          case LAF_:          /* Return data in lookahead buffer. */               datalen = (UNS)(ptpro+1 - data);               REPEATCC;               rc = DAF_;               break;         /* Prolog ended; data pending. */          case DTD_:          /* Process document type declaration. */               parsenm(tbuf, NAMECASE); /* Get declaration name. */               if (!ustrcmp(tbuf+1, sgmlkey)		   && !dtdsw && !sgmlsw++) {#if 0		    parse(&pcbmdi);#endif		    /* If we got some appinfo, return. */		    if (sgmldecl())			 return APP_;		    continue;	       }               if (!ustrcmp(tbuf+1, key[KDOCTYPE]) && !dtdsw++) {		    startdtd();		    mddtds(tbuf);		    continue;	       }               sgmlerr(E_MDNAME, propcb, tbuf+1, (UNCH *)0);               continue;          case DTE_:          /* DOCTYPE declaration (and prolog) ended. */               REPEATCC;      /* Put back char that followed MSC. */	       if (es != 0)		    sgmlerr(143, propcb, (UNCH *)0, (UNCH *)0);               else if (dtdrefsw) {/* Process referenced DTD before real DTE. */                    dtdrefsw = 0; /* Keep us from coming through here again. */                    REPEATCC; /* Put back MSC so it follows referenced DTD. */                    entref(indtdent);               }               else {		    if (mslevel > 0) {			 sgmlerr(230, propcb, (UNCH *)0, (UNCH *)0);			 mslevel = 0;			 msplevel = 0;		    }		    mddtde(tbuf);	       }               continue;          case MD_:	       /* Process markup declaration within DTD or LPD. */               parsenm(tbuf, NAMECASE); /* Get declaration name. */               if (!ustrcmp(tbuf+1, key[KENTITY]))		    mdentity(tbuf);               else if (!ustrcmp(tbuf+1, key[KUSEMAP]))		    mdsrmuse(tbuf);               else if (!ustrcmp(tbuf+1, key[KATTLIST]))		    mdadl(tbuf);               else if (!ustrcmp(tbuf+1, key[KSHORTREF]))		    mdsrmdef(tbuf);               else if (!ustrcmp(tbuf+1, key[KELEMENT]))		    mdelem(tbuf);               else if (!ustrcmp(tbuf+1, key[KNOTATION]))		    mdnot(tbuf);               else		    sgmlerr(E_MDNAME, propcb, tbuf+1, (UNCH *)0);               continue;          case MDC_:          /* Process markup declaration comment. */	       sgmlsw++;      /* SGML declaration not allowed after comment */               parsemd(tbuf, NAMECASE, (struct parse *)0, NAMELEN);               continue;          case MSS_:	      /* Process marked section start. */	       oldpcb = propcb;               propcb = mdms(tbuf, propcb);               if (propcb==&pcbmsc || propcb==&pcbmsrc) {		    if (oldpcb == &pcbmds)			 sgmlerr(135, oldpcb, (UNCH *)0, (UNCH *)0);		    conpcb = propcb;		    rc = DCE_;		    break;	       }               continue;          case MSE_:	      /* Process marked section end. */               if (mdmse()) propcb = &pcbmds;               continue;	  case MSP_:	      /* Marked section start in prolog outside DTD */	       rc = MSS_;	       break;          case PIE_:          /* PI entity: same as PIS_. */               return(PIS_);          case EOD_:          /* Return end of primary entity. */	       if (dtdsw && propcb == &pcbpro) {		    /* We've had a DTD, so check it. */		    setdtype();		    checkdtd();	       }	       if (!sw.onlypro || propcb != &pcbpro || !dtdsw)		    sgmlerr(127, propcb, (UNCH *)0, (UNCH *)0);               return propcb->action;          case PIS_:          /* Return processing instruction (string). */	       sgmlsw++;      /* SGML declaration not allowed after PI */               return((int)propcb->action);  /* Prolog will continue later. */          case CIR_:          /* Chars ignored; trying to resume parse. */#ifdef LOG_IGNORED_CHARACTERS               synerr(E_RESTART, propcb);#endif               REPEATCC;               continue;	  case ETE_:	      /* End tag ended prolog */	       REPEATCC;	       /* fall through */	  case STE_:	      /* Start tag ended prolog */	       REPEATCC;	       REPEATCC;	       rc = STE_;	       break;          case PEP_:          /* Previous character ended prolog. */               REPEATCC;          case DCE_:          /* Data character ended prolog. */               REPEATCC;               rc = DCE_;               break;	  case EE_:	      /* Illegal entity end in ignored marked section. */	       /* An error message has already been given. */	       continue;	  default:	       abort();          } /* switch */          setdtype();		   /* First pass only: set document type. */	  checkdtd();	  if (sw.onlypro)	       return EOD_;          TRACESET();              /* Set trace switches. */	  endprolog();          /* *DOC is first element; stack it at level 0. */          stack(newetd = nextetd = stagreal = etagreal = docetd);          return(rc);     } /* while */}/* Allocate buffers that are used in the DTD. */VOID startdtd(){     nmgrp = (struct etd **)rmalloc((GRPCNT+1)*sizeof(struct etd *));     nnmgrp = (PDCB *)rmalloc((GRPCNT+1)*sizeof(PDCB));     gbuf = (struct thdr *)rmalloc((GRPGTCNT+3)*sizeof(struct thdr));     /* The extra 1 is for parsing the name of a parameter entity in	mdentity(). */     nmbuf = (UNCH *)rmalloc(NAMELEN+3);     pubibuf = (UNCH *)rmalloc(LITLEN+1);     sysibuf = (UNCH *)rmalloc(LITLEN+1);     commbufs();     doincludes();}staticVOID checkdtd(){     struct dcncb *np;     struct srh *sp;     if (sw.swundef) {	  int i;	  struct etd *ep;	  for (i = 0; i < ETDHASH; i++)	       for (ep = etdtab[i]; ep; ep = ep->etdnext)		    if (!ep->etdmod)			 sgmlerr(140, (struct parse *)0, ep->etdgi + 1,				 (UNCH *)0);     }     for (sp = srhtab[0]; sp; sp = sp->enext)	  if (sp->srhsrm[0] == 0)	       sgmlerr(152, (struct parse *)0, sp->ename + 1, (UNCH *)0);	  else {	       int i;	       for (i = 1; i < lex.s.dtb[0].mapdata + 1; i++) {		    struct entity *ecb = sp->srhsrm[i];		    if (ecb && !ecb->estore) {			 sgmlerr(93, (struct parse *)0,				 ecb->ename + 1,				 sp->srhsrm[0]->ename + 1);			 sp->srhsrm[i] = 0;		    }	       }	  }     for (np = dcntab[0]; np; np = np->enext)	  if (!np->defined)	       sgmlerr(192, (struct parse *)0, np->ename + 1, (UNCH *)0);}/* Return non-zero if s is a valid parameter entity name.If so put a transformed name in entbuf. */staticint pentname(s)char *s;{     int i;     if (lextoke[(UNCH)*s] != NMS)	  return 0;     entbuf[2] = ENTCASE ? lextran[(UNCH)*s] : (UNCH)*s;     for (i = 1; s[i]; i++) {	  if (i > NAMELEN - 1)	       return 0;	  if (lextoke[(UNCH)s[i]] < NMC || s[i] == EOBCHAR)	       return 0;	  entbuf[i + 2] = ENTCASE ? lextran[(UNCH)s[i]] : (UNCH)s[i];     }     entbuf[1] = lex.d.pero;     entbuf[i + 2] = '\0';     entbuf[0] = (UNCH)(i + 3);	/* length byte, PERO and '\0' */     return 1;}/* Handle sw.includes. */staticVOID doincludes(){     char **p;     if (!sw.includes)	  return;     for (p = sw.includes; *p; p++) {	  if (pentname(*p)) {	       if (!entfind(entbuf)) {		    union etext etx;		    etx.c = savestr(key[KINCLUDE]);		    entdef(entbuf, ESM, &etx);		    ++ds.ecbcnt;		    ds.ecbtext += ustrlen(key[KINCLUDE]);	       }	  }	  else	       sgmlerr(138, (struct parse *)0, (UNCH *)*p, (UNCH *)0);     }}/* Allocate buffers that are use both in the DTD and the instance. */staticVOID commbufs(){     al = (struct ad *)rmalloc((ATTCNT+2)*sizeof(struct ad));     lbuf = (UNCH *)rmalloc(LITLEN + 1);}staticstruct mpos *newmpos(){     int j;     unsigned long *h;     struct mpos *p = (struct mpos *)rmalloc((GRPLVL+2)*sizeof(struct mpos));     assert(grplongs > 0);     h = (unsigned long *)rmalloc((GRPLVL+2)*grplongs*sizeof(unsigned long));     for (j = 0; j < GRPLVL+2; j++) {	  p[j].h = h;	  h += grplongs;     }     return p;}/* Perform end of prolog buffer allocation. */VOID endprolog(){     int i;          ambigfree();     if (dtdsw) {	  frem((UNIV)nmgrp);	  frem((UNIV)nnmgrp);	  frem((UNIV)gbuf);	  frem((UNIV)nmbuf);	  frem((UNIV)sysibuf);	  frem((UNIV)pubibuf);     }     else {	  commbufs();	  doincludes();     }     scbsgml = (struct restate *)rmalloc((TAGLVL+1)*sizeof(struct restate));     tags = (struct tag *)rmalloc((TAGLVL+1)*sizeof(struct tag));     grplongs = (GRPCNT + LONGBITS - 1)/LONGBITS;     for (i = 0; i < TAGLVL+1; i++)	  tags[i].tpos = newmpos();     savedpos = newmpos();}/* SETDTYPE: Establish specified or default document type.*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -