⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pars1.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
VOID setdtype(){     /* Initialize default model hdr for declared content. */     undechdr.ttype = MANY+MCHARS+MGI;  /* Declared content is ANY. */     undechdr.tu.tnum = 0;              /* No content model. */     /* Initialize content model and etd for *DOC. */     prcon[0].ttype = MGI;    /* Model is an element model. */     prcon[0].tu.tnum = 2;    /* A single group with a single GI in it. */     prcon[1].ttype = TTSEQ;  /* Non-repeatable SEQ group. */     prcon[1].tu.tnum = 1;    /* Only one token in group. */     prcon[2].ttype = TTETD;  /* Token is an etd. */     docetd = etddef(indocetd);  /* etd for document as a whole. */     etdset(docetd, ETDOCC, prcon, (PETD *)0, (PETD *)0, SRMNULL);     /* Put specified or default document type etd in *DOC model. */     if (!dtype) {          sgmlerr(E_DOCTYPE, propcb, (UNCH *)0, (UNCH *)0);	  dtype = indefetd;     }     prcon[2].tu.thetd = etddef(dtype);     if (!prcon[2].tu.thetd->etdmod) {	  if (dtype != indefetd)	       sgmlerr(52, propcb, dtype+1, (UNCH *)0);          ++ds.etdercnt;          etdset(prcon[2].tu.thetd, (UNCH)SMO+EMO+ETDUSED+ETDOCC, &undechdr,                (PETD *)0, (PETD *)0, (PECB *)0);     }     TRACEETD(docetd);     TRACEMOD(prcon);     TRACEETD(prcon[2].tu.thetd);     return;}/* PARSETAG: Tag end parser for SGML documents.             For start-tags, it             sets etisw to TAGNET if tag ended with ETI; otherwise to 0.*/VOID parsetag(pcb)struct parse *pcb;            /* Parse control block: pcbstag or pcbetag. */{     tagdelsw = 1;            /* Assume tag had an ETI or TAGC. */     switch (parse(pcb)) {     case ETIC:               /* Tag closed with ETI. */          if (!sd.shorttag) synerr(194, pcb);          etisw = TAGNET;     /* Set switch for stack entry flag. */          return;     case DSC:	  synerr(9, pcb);	  REPEATCC;	  etisw = 0;	  return;     case NVS:                /* Att name or value token found. */     case NTV:                /* Name token value found. */          synerr(E_POSSATT, pcb);          pcb->newstate = 0;  /* Reset parse state. */          REPEATCC;           /* Put it back for next read. */          tagdelsw = 0;       /* Tag had no closing delimiter. */          etisw = 0;          /* Don't flag stack entry. */	  return;     case TAGO:               /* Tag closing implied by TAGO. */	  if (!sd.shorttag) synerr(193, pcb);          REPEATCC;           /* Put it back for next read. */          tagdelsw = 0;       /* Tag had no closing delimiter. */     case TAGC:               /* Normal close. */     default:                 /* Invalid character (msg was sent). */          etisw = 0;          /* Don't flag stack entry. */          return;     }}/* STAG: Check whether a start-tag is valid at this point in the document         structure, or whether other tags must precede it.         Special case processing is done for the fake tag, #CDATA, as         it is never stacked.*/int stag(dataret)int dataret;                  /* Data pending: DAF_ REF_ 0=not #PCDATA. */{     int rc, realrc;          /* Return code from context or other test. */     int mexts = 0;           /* >0=stack level of minus grp; -1=plus; 0=none.*/     badresw = pexsw = 0;     /* If real element (i.e., not #PCDATA) set mexts and test if empty. */     if (dataret==0) {          mexts = pexmex(newetd);          /* If element is declared empty, it is same as a conref. */          if (GET(newetd->etdmod->ttype, MNONE)) conrefsw = TAGREF;     }     if (GET(tags[ts].tetd->etdmod->ttype, MANY))          rc = mexts>0 ? RCMEX : RCHIT;     else rc = context(newetd, tags[ts].tetd->etdmod, tags[ts].tpos,                       &tags[ts].status, mexts);     TRACESTG(newetd, dataret, rc, nextetd, mexts);     switch (rc) {     case RCEND:         /* End current element, then retry start-tag. */          if (ts<1) realrc = RCMISS;          else      realrc = RCEND;          break;     case RCREQ:         /* Stack compulsory GI, then retry start-tag. */          realrc = RCREQ;          break;     case RCMISS:        /* Start-tag invalid (#PCDATA or real). */          if (ts>0 && GET(tags[ts].tetd->etdmod->ttype, MANY))               realrc = RCEND;          else realrc = RCMISS;          break;     case RCMEX:         /* Start-tag invalid (minus exception). */          etagimct = ts - mexts;          realrc = RCEND;          break;     case RCHITMEX:      /* Invalid minus exclusion for required element. */          sgmlerr(216, &pcbstag, NEWGI, tags[mexts].tetd->etdgi+1);	  /* fall through */     case RCHIT:         /* Start-tag was valid. */          realrc = RCHIT;          break;     case RCPEX:         /* Start-tag valid only because of plus exception. */          pexsw = TAGPEX;          realrc = RCHIT;          break;     default:	  abort();     }     switch (realrc) {     case RCEND:         /* End current element, then retry start-tag. */          if (didreq) sgmlerr(07, &pcbstag, nextetd->etdgi+1, (UNCH *)0);          didreq = 0;                   /* No required start-tag done. */          dostag = 1; etiswsv = etisw;  /* Save real start-tag status. */          conrefsv = conrefsw;          /* Save real start-tag conref. */          conrefsw = 0;                 /* Current element is not empty. */          etagmin = MINSTAG; destack(); /* Process omitted end-tag. */          return ETG_;     case RCREQ:         /* Stack compulsory GI, then retry start-tag. */          if (!BADPTR(nextetd)) {               if ((mexts = pexmex(nextetd))>0)		    sgmlerr(E_MEXERR, &pcbstag, nextetd->etdgi+1,			    tags[mexts].tetd->etdgi+1);               if (!nextetd->etdmod) {                    sgmlerr(53, &pcbstag, nextetd->etdgi+1, (UNCH *)0);                    etdset(nextetd, (UNCH)SMO+EMO+ETDOCC, &undechdr,                          (PETD *)0, (PETD *)0, (PECB *)0);                    ++ds.etdercnt;                    TRACEETD(nextetd);               }          }          if (BITOFF(nextetd->etdmin, SMO)) {               if (!BADPTR(stagreal))                    sgmlerr(21, &pcbstag, nextetd->etdgi+1, stagreal->etdgi+1);               else if (stagreal==ETDCDATA)                    sgmlerr(49, &pcbstag, nextetd->etdgi+1, (UNCH *)0);               else sgmlerr(50, &pcbstag, nextetd->etdgi+1, (UNCH *)0);          }          didreq = 1;                   /* Required start-tag done. */          dostag = 1; etiswsv = etisw;  /* Save real start-tag status. */          etisw = 0; conrefsv = conrefsw;  /* Save real start-tag conref. */          /* If element is declared empty, it is same as a conref. */          conrefsw = (GET(nextetd->etdmod->ttype, MNONE)) ? TAGREF : 0;          stack(nextetd);               /* Process omitted start-tag. */          return STG_;     case RCMISS:        /* Start-tag invalid (#PCDATA or actual). */          dostag = 0; contersw |= 1; didreq = 0;          if (dataret) {               if (dataret==REF_) badresw = 1;               else sgmlerr(E_CHARS, conpcb, tags[ts].tetd->etdgi+1, (UNCH *)0);               return dataret;          }          sgmlerr(E_CONTEXT, &pcbstag, NEWGI, tags[ts].tetd->etdgi+1);          if (stagmin!=MINNULL) stagmin = MINNONE; stack(newetd);          return STG_;     case RCHIT:         /* Start-tag was valid. */          dostag = 0; didreq = 0;          if (dataret) return dataret;          stack(newetd);          return STG_;     }     return NOP_;        /* To avoid Borland C++ warning */}/* PEXMEX: See if a GI is in a plus or minus exception group on the stack.           If in a minus, returns stack level of minus group; otherwise,           returns -1 if in a plus and not a minus, and zero if in neither.*/int pexmex(curetd)struct etd *curetd;           /* The etd for this GI. */{     int tsl;                 /* Temporary stack level for looping. */     int pex = 0;             /* 1=found in plus grp; 0=not. */     for (tsl = ts; tsl>0; --tsl) {          if (tags[tsl].tetd->etdmex && ingrp(tags[tsl].tetd->etdmex, curetd))               return(tsl);          if (tags[tsl].tetd->etdpex && ingrp(tags[tsl].tetd->etdpex, curetd))               pex = -1;     }     return(pex);}/* STACK: Add a new entry to the tag stack.          If there is no room, issue a message and reuse last position.*/VOID stack(curetd)struct etd *curetd;           /* The etd for this entry. */{     /* Stack the new element type definition (error if no room). */     if (++ts>TAGLVL)          sgmlerr(E_STAGMAX, conpcb, curetd->etdgi+1, tags[--ts].tetd->etdgi+1);     tags[ts].tetd = curetd;     /* Set flags: plus exception + tag had ETI + context error + empty. */     tags[ts].tflags = (UNCH)pexsw + etisw + contersw + conrefsw; contersw = 0;     /* If tag had ETI, update ETI counter and enable NET if first ETI. */     if (etisw && ++etictr==1) lexcon[lex.d.net] = lexcnm[lex.d.net] = lex.l.net;     /* If etd has ALT table, use it; otherwise, use last element's ALT. */     if (curetd->etdsrm) {          if (curetd->etdsrm != SRMNULL && curetd->etdsrm[0] == NULL) {	       /* Map hasn't been defined.  Ignore it.		  We already gave an error. */	       curetd->etdsrm = 0;	       tags[ts].tsrm = tags[ts-1].tsrm;	  }	  else    	       tags[ts].tsrm = curetd->etdsrm;     }     else          tags[ts].tsrm = tags[ts-1].tsrm;     /* Initialize rest of stack entry. */     tags[ts].status = 0;     tags[ts].tpos[0].g = 1;       /* M: Index in model of next token to test.*/     tags[ts].tpos[0].t = 1;       /* P: Index in tpos of current group. */     HITCLEAR(tags[ts].tpos[0].h);     tags[ts].tpos[1].g = 1;       /* Index of group in model (dummy grp). */     tags[ts].tpos[1].t = 1;       /* 1st token is next in grp to be tested. */     HITCLEAR(tags[ts].tpos[1].h); /* No hits yet as yet. */     TRACESTK(&tags[ts], ts, etictr);     return;}/* ETAG: Check validity of an end-tag by seeing if it matches any tag         on the stack.  If so, return the offset of the match from the         current entry (0=current).  If there is no match, issue a message         and return an error code (-1).         If the newetd is ETDNET, a NET delimiter was found, so check for         a tag that ended with ETI instead of a matching GI.*/int etag(){     int tsl = ts+1;          /* Temporary stack level for looping. */     /* See if end-tag is anywhere on stack, starting at current entry. */     while (--tsl) {          if (newetd!=ETDNET ? newetd==tags[tsl].tetd : tags[tsl].tflags) {               TRACEETG(&tags[ts], newetd, tsl, ts-tsl);               return(ts-tsl);          }     }     return (-1);             /* End-tag didn't match any start-tag. */}/* DESTACK:            Call ECONTEXT to see if element can be ended at this point.            and issue message if there are required tags left.            Remove the current entry from the tag stack.            Issue an error if the destacked element was not minimizable            and its end-tag was omitted.*/VOID destack(){     register int ecode = 0;  /* Error code (0=o.k.). */     UNCH *eparm2 = NULL;     /* Second parameter of error message. */     register int minmsgsw;   /* 1=message if tag omitted; 0=no message. */     /* If element has a content model (i.e., not a keyword) and there        are required tags left, and no CONREF attribute was specified,        issue an error message.     */     lastetd = tags[ts].tetd;     if (!GET(tags[ts].tetd->etdmod->ttype, MKEYWORD)	 && !conrefsw	 && !econtext(tags[ts].tetd->etdmod, tags[ts].tpos, &tags[ts].status)) {          if (BADPTR(nextetd))               sgmlerr(54, conpcb, tags[ts].tetd->etdgi+1, (UNCH *)0);          else	       sgmlerr(30, conpcb, tags[ts].tetd->etdgi+1, nextetd->etdgi+1);     }     /* If the current tag ended with ETI, decrement the etictr.        If etictr is now zero, disable the NET delimiter.     */     if (GET(tags[ts--].tflags, TAGNET) && --etictr==0)          lexcon[lex.d.net] = lexcnm[lex.d.net] = lex.l.nonet;     minmsgsw = BITOFF(tags[ts+1].tetd->etdmin, EMO);     if (!conrefsw && minmsgsw && (etagimsw || etagmin==MINETAG)) {          /* Minimization caused by NET delimiter. */          if (BADPTR(etagreal)) ecode = 46;          /* Minimization caused by a containing end-tag. */          else {ecode = 20; eparm2 = etagreal->etdgi+1;}     }     else if (!conrefsw && etagmin==MINSTAG && (minmsgsw || ts<=0)) {          /* Minimization caused by out-of-context start-tag. */          if (!BADPTR(stagreal)) {               ecode = ts>0 ? 39 : 89;               eparm2 = stagreal->etdgi+1;          }          /* Minimization caused by out-of-context data. */          else if (stagreal==ETDCDATA) ecode = ts>0 ? 47 : 95;          /* Minimization caused by out-of-context short start-tag. */          else ecode = ts>0 ? 48 : 96;          if (ts<=0 && ecode) eodsw = 1;     }     if (ecode) sgmlerr((UNS)ecode, conpcb, tags[ts+1].tetd->etdgi+1, eparm2);     /* TEMP: See if parser bug caused stack to go below zero. */     else if (ts<0) {sgmlerr(64, conpcb, (UNCH *)0, (UNCH *)0); ts = 0;}     TRACEDSK(&tags[ts], &tags[ts+1], ts, etictr);     if (ts == 0) {	  docelsw = 1;	      /* Finished document element. */	  if (es > 0) sgmlerr(231, conpcb, (UNCH *)0, (UNCH *)0);     }}/*Local Variables:c-indent-level: 5c-continued-statement-offset: 5c-brace-offset: -5c-argdecl-indent: 0c-label-offset: -5comment-column: 30End:*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -