⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 context.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
#include "sgmlincl.h"         /* #INCLUDE statements for SGML parser. */#include "context.h"#define GI (tags[ts].tetd->etdgi+1)              /* GI of current element. */#define NEWGI (newetd->etdgi+1)                  /* GI of new tag. */#define STATUS (*statuspt)    /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/#define PEX (-1)              /* GI is a plus exception and not a minus. */#define ANYHIT(h) (grplongs == 1 ? ((h)[0] != 0) : anyhit(h))#define HITSET(h, n) (h[(unsigned)(n-1)>>LONGPOW] \		      |= (1L<<((n-1)&(LONGBITS-1))))#define HITON(h, n) (h[(unsigned)(n-1)>>LONGPOW] & (1L<<((n-1)&(LONGBITS-1))))#define HITOFF(h, n) (!(HITON(h, n)))#define TOKENHIT HITON(H,T)staticVOID copypos(to, from)struct mpos *to, *from;{     int i;     for (i = 0; i <= (int)from[0].t; i++) {	  to[i].g = from[i].g;	  to[i].t = from[i].t;	  memcpy(to[i].h, from[i].h, grplongs*sizeof(unsigned long));     }}/* CONTEXT: Determine whether a GI is valid in the present structural context.            Returns RCHIT if valid, RCEND if element has ended, RCREQ if a            different element is required, and RCMISS if it is totally invalid.            On entry, pos points to the model token to be tested against the GI.            TO DO: Save allowed GIs for an error message on an RCMISS.                   Support a "query" mode (what is allowed now?) by working                   with a copy of pos.*/int context(gi, mod, pos, statuspt, mexts)struct etd *gi;               /* ETD of new GI. */struct thdr mod[];            /* Model of current open element. */struct mpos pos[];            /* Position in open element's model. */UNCH *statuspt;               /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/int mexts;                    /* >0=stack level of minus grp; -1=plus; 0=none.*/{     UNCH toccsv, gtypesv;    /* Save token's TOCC and GTYPE in case grp ends.*/     if (mexts != 0) {	  if (mexts == -1 && STATUS == RCEND)	       return RCPEX;	  copypos(savedpos, pos);     }     Tstart = T;              /* Save starting token for AND group testing. */     while (STATUS!=RCMISS && STATUS!=RCEND) {          TRACEGI("CONTEXT", gi, mod, pos);          while (TTYPE==TTOR || TTYPE==TTSEQ || TTYPE==TTAND) {               pos[P+1].g = M++; pos[++P].t = 1; HITCLEAR(H);               Tstart = T;    /* Save starting token for AND group testing. */               TRACEGI("OPENGRP", gi, mod, pos);          }          STATUS = (UNCH)tokenreq(gi, mod, pos);          TRACEGI("STATUS", gi, mod, pos);          if (gi==TOKEN.tu.thetd) {     /* Hit in model. */               STATUS = (UNCH)RCHIT;               gtypesv = GTYPE; toccsv = TOCC;               newtoken(mod, pos, statuspt);	       if (mexts <= 0)		    return RCHIT;	       else if (gtypesv==TTOR || BITON(toccsv, TOPT)) {		    /* restore position */		    copypos(pos, savedpos);		    return RCMEX;	       }	       else		    return RCHITMEX;          }          if (STATUS==RCREQ) {	       if (mexts == -1)		    break;               STATUS = RCHIT;               nextetd = TOKEN.tu.thetd;               newtoken(mod, pos, statuspt);               return(RCREQ);          }          /* else if (STATUS==RCNREQ) */               if (mexts>0) return(RCMEX);               newtoken(mod, pos, statuspt);     }     if (mexts == -1) {	  copypos(pos, savedpos);	  return STATUS = RCPEX;     }     return((int)STATUS);}/* ECONTEXT: Determine whether the current element can be ended, or whether             non-optional tokens remain at the current level or higher.             Returns 1 if element can be ended, or 0 if tokens remain.             On entry, STATUS==RCEND if there are no tokens left; if not,             pos points to the next model token to be tested.             TO DO: Support a "query" mode (what is required now?) by working                    with a copy of pos.*/int econtext(mod, pos, statuspt)struct thdr mod[];            /* Model of current open element. */struct mpos pos[];            /* Position in open element's model. */UNCH *statuspt;               /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/{     unsigned next;           /* Position in AND group of next testable token.*/     Tstart = T;     TRACEEND("ECONT", mod, pos, 0, 0);     if (P<=1) {nextetd = 0; return(TOKENHIT || BITON(TOCC, TOPT));}     nextetd = TTYPE == TTETD ? TOKEN.tu.thetd : 0;     while (STATUS!=RCMISS && STATUS!=RCEND) {          STATUS = (UNCH)testend(mod, pos, 0, 0);          TRACEEND("ECONTEND", mod, pos, 0, 0);          nextetd = P<=1 || TTYPE != TTETD ? 0 : TOKEN.tu.thetd;          if (STATUS==RCEND)       return(1);          if (P<=1)                return(TOKENHIT || BITON(TOCC, TOPT));          if (STATUS==RCMISS) {               if (BITON(TOCC, TOPT)) nextetd = 0;               return(0);          }          if (!tokenopt(mod, pos)) return(0);          STATUS = RCNREQ;          if (GTYPE!=TTAND) ++T;   /* T!=GNUM or group would have ended. */          else T = (UNCH)(((next = (UNS)offbit(H, (int)T, GNUM))!=0) ?               next : offbit(H, 0, GNUM));          M = G + grpsz(&GHDR, (int)T-1) + 1;          TRACEEND("ECONTNEW", mod, pos, 0, 0);     }     if (STATUS==RCMISS) {          if (BITON(TOCC, TOPT)) nextetd = 0;          return(0);     }     return(1);               /* STATUS==RCEND */}/* NEWTOKEN: Find the next token to test.  Set STATUS to indicate results:                  RCEND  if element has ended (no more tokens to test);                  RCREQ  if required new token was found;                  RCNREQ if non-required new token was found;                  RCHIT  if a hit token was repeated (now non-required);              and RCMISS if a new token can't be found because current token              (which was not hit) was neither unconditionally required nor              optional.*/VOID newtoken(mod, pos, statuspt)struct thdr mod[];            /* Model of current open element. */struct mpos pos[];            /* Position in open element's model. */UNCH *statuspt;               /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/{     unsigned nextand = 0;    /* Position in AND group of next testable token.*/     int currhit = (STATUS==RCHIT); /* 1=current GI hit; 0=not. */     /* If the GI was a hit, turn on the hit bit and set the status to        assume that the token to be tested against the next GI will        be non-required.  If the current token is repeatable, exit so        it will stand as the next token to test.     */     if (STATUS==RCHIT) {          HITSET(H, T);	  STATUS = RCNREQ;          if (BITON(TOCC, TREP)) return;     }     /* At this point, we must determine the next token to test:        either against the next GI, if this one was a hit, or        against the same GI if conditions permit a retry.        To find the next token, we must first end the current group,        if possible, and any we can that contain it.        If the outermost group was a hit and is repeatable, or        if the element has ended, we exit now.        If it hasn't ended, or was optional and ended with a miss,        we can retry the GI against the next token.     */     if ((STATUS = (UNCH)testend(mod, pos, 1, 1))!=RCNREQ) return;     /* At this point, the "current token" is either the original one,        or the token for the highest level unhit group that it ended.        We will retry a missed GI, by testing it against the next        token, if the current token:        1. Is optional;        2. Was hit (i.e., because it is repeatable and was hit by a           previous GI or because it is a hit group that just ended);        3. Is in an AND or OR group and is not the last testable token.        It will be the next sequential one (unhit one, in an AND group);        if there are none left, use the first unhit token in the group.        In either case, set M to correspond to the new T.     */     retest:     TRACEEND("RETEST", mod, pos, (int)nextand, 1);     if (GTYPE==TTAND) {          nextand = offbit(H, (int)T, GNUM);	  if (!nextand)	       nextand = offbit(H, 0, GNUM);     }     if ( BITON(TOCC, TOPT)       || TOKENHIT       || GTYPE==TTOR              /* T!=GNUM or group would have ended. */       || nextand ) {          if (GTYPE!=TTAND) ++T;   /* T!=GNUM or group would have ended. */          else T = nextand;          M = G + grpsz(&GHDR, (int)T-1) + 1;          if (GTYPE==TTAND) {	       /* If AND group wrapped, it can end if all non-optionals were		  hit. */	       if (T==Tstart && !currhit) {                    UNCH Psave = P;                    int rc = testend(mod, pos, 0, 1);                    if (Psave!=P) {if ((STATUS = (UNCH)rc)==RCNREQ) goto retest;}                    else STATUS = RCMISS;               }	       /* We only test unhit tokens, so we must use an unhit token		  as Tstart (which is used to detect when the AND group has		  wrapped). */	       else if (HITON(H,Tstart)) Tstart = T;	  }     }     else STATUS = RCMISS;     TRACEEND("NEWTOKEN", mod, pos, (int)nextand, 1);}/* TESTEND: End the current group, if possible, and any that it is nested in.            The current token will either be a group header, or some token            that could not end its group.  Return 1 if the (possibly new)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -