⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 md1.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
     switch (pcbmd.action) {     case LIT:                /* Literal. */     case LITE:               /* Literal. */          /* Null string (except CDATA) is error: msg and treat as IMPLIED. */          if (*lbuf == '\0' && ADTYPE(al,attadn)!=ACHARS) {               mderr(82, ADNAME(al,attadn), (UNCH *)0);               novalsw = 1;          }	  break;     case NAS:                /* Name character string. */     case NMT:                /* Name character string. */     case NUM:                /* Number or number token string. */	  /* The name won't have a length byte because AVALCASE was specified. */          break;     case CDR:	  parsetkn(lbuf, NMC, LITLEN);	  break;     case EMD:          mderr(109, ADNAME(al,attadn), (UNCH *)0);          return 1;     default:          mderr(110, ADNAME(al,attadn), (UNCH *)0);          return 1;     }     if (errsw) {	  /* Ignore erroneous att. */	  adlfree(al, attadn);	  --AN(al);	  ADN(al) = (UNCH)attadn-1;	  return(0);     }     if (novalsw) return(0);     /* PARAMETER 3y: Validate and store default value.     */     if (ADTYPE(al,attadn)==ACHARS) {	  UNS len = vallen(ACHARS, 0, lbuf);	  if (len > LITLEN) {	       /* Treat as implied. */	       sgmlerr(224, &pcbmd, ADNAME(al,attadn), (UNCH *)0);	       return 0;	  }          /* No more checking for CDATA value. */          ADNUM(al,attadn) = 0;             /* CDATA is 0 tokens. */          ADVAL(al,attadn) = savestr(lbuf);/* Store default; save ptr. */          ADLEN(al,attadn) = len;          ds.attdef += len;          return 0;     }     /* Parse value and save token count (GROUP implies 1 token). */     advalsv = (UNCH *)rmalloc(ustrlen(lbuf)+2); /* Storage for tokenized value. */     errcode = parseval(lbuf, (UNS)ADTYPE(al,attadn), advalsv);     if (BITOFF(ADFLAGS(al,attadn), AGROUP)) ADNUM(al,attadn) = (UNCH)tokencnt;     /* If value was invalid, or was a group member that was not in the group,        issue an appropriate message and set the error switch. */     if (errcode)          {sgmlerr((UNS)errcode, &pcbmd, ADNAME(al,attadn), lbuf); errsw = 1;}     else if ( BITON(ADFLAGS(al,attadn), AGROUP)          && !amemget(&al[attadn], (int)ADNUM(al,attadn), advalsv) ) {               sgmlerr(79, &pcbmd, ADNAME(al,attadn), advalsv+1);               errsw = 1;     }     ADLEN(al,attadn) = vallen(ADTYPE(al,attadn), ADNUM(al,attadn), advalsv);     if (ADLEN(al,attadn) > LITLEN) {	  sgmlerr(224, &pcbmd, ADNAME(al,attadn), (UNCH *)0);	  ADLEN(al,attadn) = 0;	  errsw = 1;     }     /* For valid tokenized value, save it and update statistics. */     if (!errsw) {	  ADVAL(al,attadn) = advalsv;          ds.attdef += ADLEN(al,attadn);          return 0;     }     /* If value was bad, free the value's storage and treat as        IMPLIED or REQUIRED. */     frem((UNIV)advalsv);          /* Release storage for value. */     ADVAL(al,attadn) = NULL;         /* And make value NULL. */     return 0;}/* ANMTGRP: Parse a name or name token group, create attribute descriptors            for its members, and add them to the attribute descriptor list.            The parse either terminates or returns a good token, so no            switch is needed.*/int anmtgrp(pcb, nt, grplim, adn, adsz)struct parse *pcb;            /* PCB for name or name token grp. */struct ad nt[];               /* Buffer for creating name token list. */int grplim;                   /* Maximum size of list (plus 1). */UNS *adn;		      /* Ptr to number of names or tokens in grp. */int adsz;                     /* Size of att def list. */{     UNCH adtype = (UNCH)(pcb==&pcbgrnt ? ANMTGRP:ANOTEGRP);/*Attribute type.*/     int essv = es;           /* Entity stack level when grp started. */     *adn = 0;                /* Group is empty to start. */     while (parse(pcb)!=GRPE && *adn<grplim) {          switch (pcb->action) {          case NAS_:          /* Name or name token (depending on pcb). */          case NMT_:               parsenm(lbuf, NAMECASE);	       nt[*adn+1].adname = savenm(lbuf);               if (antvget((int)(adsz+*adn), nt[*adn+1].adname, (UNCH **)0))                    mderr(98, ntoa((int)*adn+1), nt[*adn+1].adname+1);               nt[++*adn].adtype = adtype;               nt[*adn].addef    = NULL;               continue;          case EE_:           /* Entity ended (correctly or incorrectly). */               if (es<essv) {synerr(37, pcb); essv = es;}               continue;          case PIE_:          /* PI entity reference (invalid). */               entpisw = 0;   /* Reset PI entity indicator. */               synerr(59, pcb);               continue;          default:               break;          }          break;     }     if (es!=essv) synerr(37, pcb);     if (*adn==grplim) return -1;     else return *adn;        /* Return number of tokens. */}/* MDDTDS: Process start of DOCTYPE declaration (through MSO).*/VOID mddtds(tbuf)UNCH *tbuf;                   /* Work area for tokenization[LITLEN+2]. */{     struct fpi fpicb;        /* Formal public identifier structure. */     union etext etx;         /* Ptr to entity text. */     UNCH estore = ESD;       /* Entity storage class. */     int emdsw = 0;           /* 1=end of declaration found; 0=not yet. */     mdname = key[KDOCTYPE];  /* Identify declaration for messages. */     subdcl = NULL;           /* No subject as yet. */     parmno = 0;              /* No parameters as yet. */     mdessv = es;             /* Save es for checking entity nesting. */     dtdrefsw = 0;            /* No external DTD entity as yet. */     /* PARAMETER 1: Document type name.     */     pcbmd.newstate = 0;     parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);     TRACEMD("1: doc type name");     if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); return;}     dtype = savenm(tbuf);     subdcl = dtype+1;        /* Subject of declaration for error msgs. */     /* PARAMETER 2: External identifier keyword or MDS.     */     parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);     TRACEMD("2: extid or MDS");     switch (pcbmd.action) {     case NAS:          if (mdextid(tbuf, &fpicb, dtype+1, &estore, (PNE)0)==0) return;          if ((etx.x = entgen(&fpicb))==0)	       mderr(146, dtype+1, (UNCH *)0);	  else	       dtdrefsw = 1;  /* Signal external DTD entity. */          break;     case MDS:          goto execute;     default:          mderr(128, (UNCH *)0, (UNCH *)0);          return;     }     /* PARAMETER 3: MDS or end of declaration.     */     TRACEMD("3: MDS or EMD");     switch (pcbmd.action) {     default:                      /* Treat as end of declaration. */          mderr(126, (UNCH *)0, (UNCH *)0);     case EMD:          emdsw = 1;     case MDS:          break;     }     /* EXECUTE: Store entity definition if an external ID was specified.     */     execute:     if (es!=mdessv) synerr(37, &pcbmd);     propcb = &pcbmds;        /* Prepare to parse doc type definition (MDS). */     if (dtdrefsw) {	  /* TO DO: If concurrent DTD's supported, free existing	     etext for all but first DTD (or reuse it). */	  entdef(indtdent, estore, &etx);	  ++ds.ecbcnt; ds.ecbtext += entlen;          if (emdsw) {               REPEATCC;                /* Push back the MDC. */               *FPOS = lex.d.msc;       /* Simulate end of DTD subset. */               REPEATCC;                /* Back up to read MSC next. */               delmscsw = 1;            /* Insert MSC after referenced DTD. */          }     }     indtdsw = 1;                       /* Allow "DTD only" parameters. */     return;}/* MDDTDE: Process DOCTYPE declaration end.*/VOID mddtde(tbuf)UNCH *tbuf;                   /* Work area for tokenization. */{     mdessv = es;             /* Save es for checking entity nesting. */     propcb = &pcbpro;        /* Restore normal prolog parse. */     indtdsw = 0;             /* Prohibit "DTD only" parameters. */     mdname = key[KDOCTYPE];  /* Identify declaration for messages. */     subdcl = dtype+1;        /* Subject of declaration for error msgs. */     parmno = 0;              /* No parameters as yet. */     /* PARAMETER 4: End of declaration.     */     pcbmd.newstate = pcbmdtk;     parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);     TRACEMD(emd);     if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0);     if (es!=mdessv) synerr(37, &pcbmd);}/* MDELEM: Process ELEMENT declaration.*/VOID mdelem(tbuf)UNCH *tbuf;                   /* Work area for tokenization (tbuf). */{     UNCH *ranksuff = lbuf;   /* Rank suffix. */     UNS dctype = 0;          /* Declared content type (from dctab). */     UNCH fmin = 0;           /* Minimization bit flags. */     int i;                   /* Loop counter. */     UNS u;                   /* Temporary variable. */     struct etd **mexgrp, **pexgrp; /* Ptr to model exceptions array. */     struct thdr *cmod, *cmodsv;    /* Ptr to content model. */     UNCH *etdgi;             /* GI of current etd (when going through group).*/     int minomitted = 0;      /*  Tag minimization parameters omitted. */     mdname = key[KELEMENT];  /* Identify declaration for messages. */     subdcl = NULL;           /* No subject as yet. */     parmno = 0;              /* No parameters as yet. */     mdessv = es;             /* Save es level for entity nesting check. */     ranksuff[0] = 0;     mexgrp = pexgrp = 0;     /* PARAMETER 1: Element name or a group of them.     */     parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);     TRACEMD("1: element name or grp");     switch (pcbmd.action) {     case NAS:          nmgrp[0] = etddef(tbuf);          nmgrp[1] = 0;          break;     case GRPS:          parsegrp(nmgrp, &pcbgrnm, tbuf);          break;     default:          mderr(121, (UNCH *)0, (UNCH *)0);          return;     }     /* Save first GI for trace and error messages. */     if (nmgrp[0])	  subdcl = nmgrp[0]->etdgi+1;     /* PARAMETER 1A: Rank suffix (optional).     */     parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);     TRACEMD("1A: rank suffix");     switch (pcbmd.action) {     case NUM:          ustrcpy(ranksuff, tbuf);          parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);     default:          break;     }     /* PARAMETER 2A: Start-tag minimization.     */     TRACEMD("2A: start min");     switch (pcbmd.action) {     case CDR:          break;     case NAS:	  if (!ustrcmp(tbuf+1, key[KO])) {	       if (OMITTAG==YES) SET(fmin, SMO);	       break;	  }	  /* fall through */     default:	  if (OMITTAG==NO) {minomitted=1; break;}          mderr(129, tbuf+1, (UNCH *)0);          return;     }     /* Must omit omitted end-tag minimization, if omitted 	start-tag minimization was omitted (because OMITTAG == NO). */     if (!minomitted) {	  /* PARAMETER 2B: End-tag minimization.	   */	  parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);	  TRACEMD("2B: end min");	  switch (pcbmd.action) {	  case NAS:	       if (ustrcmp(tbuf+1, key[KO])) {mderr(129, tbuf+1, (UNCH *)0); return;}	       if (OMITTAG==YES) SET(fmin, EMO);	       break;	  case MGRP:	       REPEATCC;	       /* fall through */	  case CDR:	       SET(fmin, EMM);	       break;	  default:	       mderr(129, tbuf+1, (UNCH *)0);	       return;	  }	  /* PARAMETER 3: Declared content.	   */	  parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);     }     TRACEMD("3: declared content");     switch (pcbmd.action) {     case NAS:          dctype = mapsrch(dctab, tbuf+1);          if (!dctype) {mderr(24, tbuf+1, (UNCH *)0); return;}          /* Eliminate incompatibilities among parameters. */          if (GET(fmin, SMO) && GET(dctype, MNONE+MCDATA+MRCDATA)) {               mderr(58, (UNCH *)0, (UNCH *)0);               RESET(fmin, SMO);          }          if (GET(dctype, MNONE) && BITON(fmin, EMM)) {	       mderr(87, (UNCH *)0, (UNCH *)0);               SET(fmin, EMO);          }          /* If valid, process like a content model. */     case GRPS:          cmodsv = parsemod((int)(pcbmd.action==GRPS ? 0 : dctype));          if (cmodsv==0) return;	  u = (dctype ? 1 : cmodsv->tu.tnum+2) * THSZ;          cmod = (struct thdr *)rmalloc(u);          memcpy((UNIV)cmod  , (UNIV)cmodsv, u );	  ds.modcnt += cmod->tu.tnum;          TRACEMOD(cmod);          break;     default:          mderr(130, (UNCH *)0, (UNCH *)0);          return;     }     /* PARAMETERS 3A, 3B: Exceptions or end.     */     parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);     if (BITOFF(cmod->ttype, MCDATA+MRCDATA+MNONE)) {          /* PARAMETER 3A: Minus exceptions.          */          TRACEMD("3A: -grp");          switch (pcbmd.action) {          case MGRP:	       /* We cheat and use nnmgrp for this. */               mexgrp = copygrp((PETD *)nnmgrp,				u = parsegrp((PETD *)nnmgrp, &pcbgrnm, tbuf));               ++ds.pmexgcnt; ds.pmexcnt += u-1;               TRACEGRP(mexgrp);               parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);          default:               break;          }          /* PARAMETER 3B: Plus exceptions.          */          TRACEMD("3B: +grp");          switch (pcbmd.action) {          case PGRP:               pexgrp = copygrp((PETD *)nnmgrp,				u = parsegrp((PETD *)nnmgrp, &pcbgrnm, tbuf));               ++ds.pmexgcnt; ds.pmexcnt += u-1;               TRACEGRP(pexgrp);               parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);          default:               break;          }     }     /* PARAMETER 4: End of declaration.     */     TRACEMD(emd);     if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0);     if (es!=mdessv) synerr(37, &pcbmd);     /* EXECUTE: Store the definition for each element name specified.     */     TRACEGRP(nmgrp);     for (i = -1; nmgrp[++i];) {          etdgi = nmgrp[i]->etdgi;          if (*ranksuff) {               if ((tbuf[0] = *etdgi + ustrlen(ranksuff)) - 2 > NAMELEN) {                    mderr(131, etdgi+1, ranksuff);                    continue;               }               memcpy(tbuf+1, etdgi+1, *etdgi-1);               ustrcpy(tbuf+*etdgi-1, ranksuff);               etdcan(etdgi);               nmgrp[i] = etddef(tbuf);          }          if (nmgrp[i]->etdmod) {mderr(56, etdgi+1, (UNCH *)0); continue;}          etdset(nmgrp[i], fmin+ETDDCL, cmod, mexgrp, pexgrp, nmgrp[i]->etdsrm);          ++ds.etdcnt;          if (nmgrp[i]->adl) etdadl(nmgrp[i]); /* Check ETD conflicts. */          TRACEETD(nmgrp[i]);     }}VOID adlfree(al, aln)struct ad *al;int aln;{     for (; aln <= ADN(al); aln++) {	  frem((UNIV)al[aln].adname);	  if (ADVAL(al, aln))	       frem((UNIV)ADVAL(al, aln));	  if (BITON(ADFLAGS(al, aln), AGROUP)) {	       int i;	       for (i = 0; i < ADNUM(al, aln); i++)		    frem((UNIV)al[aln + i + 1].adname);	       aln += ADNUM(al, aln);	  }     }}/*Local Variables:c-indent-level: 5c-continued-statement-offset: 5c-brace-offset: -5c-argdecl-indent: 0c-label-offset: -5comment-column: 30End:*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -