⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgml2.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* Added exiterr() for terminal errors to prevent SGML.MSG errors.            */#include "sgmlincl.h"         /* #INCLUDE statements for SGML parser. */static int iorc;              /* Return code from io* functions *//* ENTDEF: Process an entity definition and return the pointer to it.           The entity text must be in permanent storage.           There is no checking to see if the entity already exists;           the caller must have done that.*/#ifdef USE_PROTOTYPESPECB entdef(UNCH *ename, UNCH estore, union etext *petx)#elsePECB entdef(ename, estore, petx)UNCH *ename;                  /* Entity name (with length and EOS). */UNCH estore;                  /* Entity storage class. */union etext *petx;            /* Ptr to entity text union. */#endif{     PECB p;     p = (PECB)hin((THASH)etab, ename, hash(ename, ENTHASH), ENTSZ);     memcpy((UNIV)&p->etx, (UNIV)petx, ETEXTSZ);     p->estore = estore;     TRACEECB("ENTDEF", p);     return(p);}/* ENTFIND: If an entity exists, return ptr to its ecb.            Return NULL if it is not defined.*/PECB entfind(ename)UNCH *ename;                  /* Entity name (with length and EOS). */{     PECB p;     p = (PECB)hfind((THASH)etab, ename, hash(ename, ENTHASH));     TRACEECB("ENTFIND", p);     return p;}/* ENTREF: Process a general or parameter entity reference.           If the entity is defined it returns the return code from ENTOPEN.           It returns ENTUNDEF for undefined parameter entity references           and for general entity references when defaulting is not allowed.           Otherwise, it uses the default entity text.*/int entref(ename)UNCH *ename;                  /* Entity name (with length and EOS). */{     PECB ecb;                /* Entity control block. */     /* Get the entity control block, if the entity has been defined. */     if ((ecb = (PECB)hfind((THASH)etab, ename, hash(ename, ENTHASH)))==0	 || ecb->estore == 0) {          if (ename[1] == lex.d.pero || ecbdeflt == 0) {               sgmlerr(35, (struct parse *)0, ename+1, (UNCH *)0);               return(ENTUNDEF);          }          else	       ecb = usedef(ename);     }     return(entopen(ecb));}/* ENTOPEN: Open a newly referenced entity.            Increment the stack pointer (es) and initialize the new entry.            ENTDATA if entity is CDATA or SDATA, ENTPI if it is PI,            0 if normal and all o.k.; <0 if not.*/int entopen(ecb)struct entity *ecb;           /* Entity control block. */{     int i;                   /* Loop counter. */     /* See if we have exceeded the entity nesting level. */     if (es>=ENTLVL) {          sgmlerr(34, (struct parse *)0, ecb->ename+1, ntoa(ENTLVL));          return(ENTMAX);     }     if (docelsw) sgmlerr(234, (struct parse *)0, (UNCH *)0, (UNCH *)0);     /* If entity is an etd, pi, or data, return it without creating an scb. */     switch (ecb->estore) {     case ESN:          if (NEXTYPE(ecb->etx.n)!=ESNSUB) {	       if (!NEDCNDEFINED(ecb->etx.n))		    sgmlerr(78, (struct parse *)0, NEDCN(ecb->etx.n)+1,			    ecb->ename+1);	  }	  else {#if 0	       if (!NEID(ecb->etx.n)) {		    sgmlerr(149, (struct parse *)0, ecb->ename + 1, (UNCH *)0);		    return ENTFILE;	       }#endif	       if (sw.nopen >= sd.subdoc)		    sgmlerr(188, (struct parse *)0,			    (UNCH *)NULL, (UNCH *)NULL);	  }          data = (UNCH *)ecb->etx.n;          entdatsw = NDECONT;          return(ENTDATA);     case ESC:     case ESX:	  datalen = ustrlen(ecb->etx.c);	  /* Ignore reference to empty CDATA entity. */	  if (datalen == 0 && ecb->estore == ESC) return(0);          data = ecb->etx.c;          entdatsw = (ecb->estore==ESC) ? CDECONT : SDECONT;          return(ENTDATA);     case ESI:          datalen = ustrlen(ecb->etx.c);          data = ecb->etx.c;          entpisw = 4;          return(ENTPI);     }     /* If the same entity is already open, send msg and ignore it.        Level 0 needn't be tested, as its entity name is always *DOC.     */     for (i = 0; ++i<=es;) if (scbs[i].ecb.enext==ecb) {          sgmlerr(36, (struct parse *)0, ecb->ename+1, (UNCH *)0);          return(ENTLOOP);     }     /* Update SCB if entity trace is wanted in messages or entity is a file.        (Avoid this at start when es==-1 or memory will be corrupted.)     */     if (es >= 0 && (sw.swenttr || FILESW)) scbset();     /* Stack the new source control block (we know there is room). */     ++es;                                      /* Increment scbs index. */     RCNT = CCO = RSCC = 0;                     /* No records or chars yet. */     COPIEDSW = 0;     memcpy((UNIV)&ECB, (UNIV)ecb, (UNS)ENTSZ); /* Copy the ecb into the scb. */     ECBPTR = ecb;            /* Save the ecb pointer in scb.ecb.enext. */     TRACEECB("ENTOPEN", ECBPTR);     /* For memory entities, the read buffer is the entity text.        The text starts at FBUF, so FPOS should be FBUF-1        because it is bumped before each character is read.     */     if (ECB.estore<ESFM) {FPOS = (FBUF = ECB.etx.c)-1; return 0;}     /* For file entities, suspend any open file and do first read. */     if (ECB.etx.x == 0) {	  --es;	  switch (ecb->estore) {	  case ESF:	       sgmlerr(149, (struct parse *)0, ecb->ename + 1, (UNCH *)0);	       break;	  case ESP:	       sgmlerr(229, (struct parse *)0, ecb->ename + 2, (UNCH *)0);	       break;	  default:	       abort();	  }	  return ENTFILE;     }     fileopen();                             /* Open new external file. */     if (iorc<0) {                           /* If open not successful: */          FPOS = FBUF-1;                     /* Clean CCNT for OPEN error msg.*/          filerr(32, ecb->ename+1);          --es;                              /* Pop the stack. */          return(ENTFILE);     }     filepend(es);                           /* Suspend any open file. */     fileread();                             /* First read of file must be ok.*/     return 0;}/* ENTGET: Get next record of entity (if there is one).           Otherwise, close the file (if entity is a file) and           pop the entity stack.  If nothing else is on the stack,           return -1 to advise the caller.*/int entget(){     RSCC += (CCO = FPOS-FBUF);                                   /* Characters-in-record (ignore EOB/EOF). */     if (es == tages)	  tagctr += CCO;           /* Update tag length counter. */     switch (*FPOS) {     case EOBCHAR:                 /* End of file buffer: refill it. */          rbufs[-2] = FPOS[-2];	  rbufs[-1] = FPOS[-1];          fileread();                         /* Read the file. */          if (iorc > 0) break;     readerr:          filerr(31, ENTITY+1);    /* Treat error as EOF. */     case EOFCHAR:                 /* End of file: close it. */          fileclos();              /* Call SGMLIO to close file. */     conterr:          if (es==0) {             /* Report if it is primary file. */               FPOS = FBUF-1;      /* Preserve CCNT for omitted end-tags. */               return -1;          }     case EOS:                /* End of memory entity: pop the stack. */          TRACEECB("ENTPOP", ECBPTR);	  if (COPIEDSW) {	       frem((UNIV)(FBUF + 1));	       COPIEDSW = 0;	  }          --es;                                   /* Pop the SCB stack. */          if (FBUF) break;                        /* Not a PEND file. */          filecont();                             /* Resume previous file. */          if (iorc<0) {                           /* If CONT not successful: */               filerr(94, ENTITY+1);               goto conterr;          }          fileread();                             /* Read the file. */          if (iorc<=0) goto readerr;              /* If READ not successful: */	  rbufs[-1] = SCB.pushback;	  FPOS += CCO;	  CCO = 0;          if (delmscsw && es==0) {                /* End of DTD. */               delmscsw = 0;	       *rbufs = lex.d.msc;	  }          break;     }     return 0;}/* USEDEF: Use the default value for an entity reference.           Returns the ECB for the defaulted entity.*/PECB usedef(ename)UNCH *ename;                  /* Entity name (with length and EOS). */{     union etext etx;         /* Save return from entgen. */     PECB ecb;                /* Entity control block. */     PNE pne = 0;             /* Ptr to NDATA entity control block. */     UNCH estore;             /* Default entity storage type. */     if ((estore = ecbdeflt->estore)<ESFM) /* Default is an internal string. */          etx.c = ecbdeflt->etx.c;     else {      /* Move entity name into fpi. */      fpidf.fpinm = ename + 1;      if ((etx.x = entgen(&fpidf))==0)	  sgmlerr(150, (struct parse *)0, ename + 1, (UNCH *)0);      if (estore==ESN) {           memcpy((UNIV)(pne=(PNE)rmalloc((UNS)NESZ)),(UNIV)ecbdeflt->etx.n,(UNS)NESZ);           NEID(pne) = etx.x;           etx.n = pne;      }     }     if (sw.swrefmsg) sgmlerr(45, (struct parse *)0, ename+1, (UNCH *)0);     ++ds.ecbcnt;     ecb = entdef(ename, estore, &etx);     ecb->dflt = 1;     if (pne) NEENAME(pne) = ecb->ename;     return(ecb);}/* SCBSET: Set source control block to current location in the current entity.           This routine is called by SGML when it returns to the text           processor and by ERROR when it reports an error.*/VOID scbset(){     if (es >= 0 && FBUF) {	  CC = *FPOS;	  if (*FPOS == DELNONCH)	       NEXTC = FPOS[1];	  else	       NEXTC = 0;          CCO = FPOS + 1 - FBUF;     }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -