⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 merge-d.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
/*************************************************************** * Main block operations ***************************************************************/static ISAMD_PP get_new_main_block( ISAMD_PP firstpp, ISAMD_PP pp){  /* allocates a new block for the main data, and links it in */  int newblock;  if (0 == firstpp->next)   {  /* special case, pp not yet allocated. */     /*Started as largest size, that's fine */     pp->pos = isamd_alloc_block(pp->is,pp->cat);     firstpp->next = isamd_addr(pp->pos,pp->cat);     if (pp->is->method->debug >3)         logf(LOG_LOG,"isamd_build: Alloc 1. dblock  p=%d=%d:%d",           isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos);  }  newblock=isamd_alloc_block(pp->is,pp->cat);  pp->next=isamd_addr(newblock,pp->cat);  isamd_buildlaterblock(pp);  isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf);  if (pp->is->method->debug >3)      logf(LOG_LOG,"isamd_build: Alloc nxt %d=%d:%d -> %d=%d:%d",        isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,        isamd_addr(newblock,pp->cat), pp->cat, newblock);  pp->next=0;  pp->pos=newblock;  pp->size=pp->offset=ISAMD_BLOCK_OFFSET_N;    return pp;} /* get_new_main_block */static ISAMD_PP  append_main_item(ISAMD_PP firstpp,                                   ISAMD_PP pp,                                   struct it_key *i_key){  /* appends one item in the main data block, allocates new if needed */   char *i_item= (char *) i_key;  /* same as char */   char *i_ptr=i_item;   char codebuff[128];   char *c_ptr = codebuff;   int codelen;   char hexbuff[64];   int maxsize = pp->is->method->filecat[pp->is->max_cat].bsize;    c_ptr=codebuff;   i_ptr=i_item;   (*pp->is->method->code_item)(ISAMD_ENCODE, pp->decodeClientData,                                &c_ptr, &i_ptr);   codelen = c_ptr - codebuff;   assert ( (codelen<128) && (codelen>0));   if (pp->is->method->debug >7)      logf(LOG_LOG,"isamd:build: coded %s nk=%d,ofs=%d-%d",          hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1,          pp->offset, pp->offset+codelen);   if (pp->offset + codelen > maxsize )   { /* oops, block full - get a new one */      pp =  get_new_main_block( firstpp, pp );      /* reset encoging and code again */      (*pp->is->method->code_reset)(pp->decodeClientData);      c_ptr=codebuff;      i_ptr=i_item;      (*pp->is->method->code_item)(ISAMD_ENCODE, pp->decodeClientData,                                    &c_ptr, &i_ptr);      codelen = c_ptr - codebuff;      assert ( (codelen<128) && (codelen>0));      if (pp->is->method->debug >7)         logf(LOG_LOG,"isamd:build: recoded into %s  (nk=%d)",             hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1);   } /* block full */       assert (pp->offset + codelen <= maxsize );      /* write the data into pp, now we must have room */    memcpy(&(pp->buf[pp->offset]),codebuff,codelen);   pp->offset += codelen;   pp->size += codelen;   firstpp->numKeys++;   /* clear the next 4 bytes in block, to avoid confusions with diff lens */   /* dirty, it should not be done here, but something slips somewhere, and */   /* I hope this fixes it...  - Heikki */   codelen = pp->offset;   while ( (codelen < maxsize ) && (codelen <= pp->offset+4) )     pp->buf[codelen++] = '\0';   return pp;    } /* append_main_item *//*************************************************************** * Read with merge ***************************************************************//* Reads one item and corrects for the diffs, if any *//* return 1 for ok, 0 for eof */int isamd_read_item_merge (                     ISAMD_PP pp,                      char **dst,                     struct it_key *p_key,  /* the data item that didn't fit*/              /*       ISAMD_I data)  */    /* more input data comes here */                     FILTER filt)           /* more input data comes here */{                    /* The last two args can be null for ordinary reads */  char *keyptr;  char *codeptr;  char *codestart;  int winner=0; /* which diff holds the day */  int i; /* looping diffs */  int cmp;  int retry=1;  int oldoffs;  int rc;  if (!pp->diffinfo)    { /* first time */     getDiffInfo(pp);     for(i=1; pp->diffinfo[i].difftype!=DT_NONE; i++)       ;  /* find last diff */     if (p_key)       {  /* we have an extra item to inject into the merge */       if (pp->is->method->debug >9)  /* !!!!! */          logf(LOG_LOG,"isamd_read_item: going to merge with %d.%d",               p_key->sysno, p_key->seqno);       pp->diffinfo[i].key = *p_key;  /* the key merge could not handle */       pp->diffinfo[i].mode = pp->diffinfo[i].key.seqno & 1;       pp->diffinfo[i].key.seqno >>= 1;       pp->diffinfo[i].difftype=DT_INPU;       if (pp->is->method->debug > 7)         logf(LOG_LOG,"isamd_read_item: inpu key %d sys=%d  seq=%d=2*%d+%d",            i, p_key->sysno,            pp->diffinfo[i].key.seqno*2 + pp->diffinfo[1].mode,            pp->diffinfo[i].key.seqno,            pp->diffinfo[i].mode);       p_key->sysno=p_key->seqno=0;  /* used it up */     }     if (filt)     { /* we have a whole input stream to inject */       pp->diffinfo[i].difftype=DT_INPU;     }  } /* first time */           while (retry)  {     retry=0;          winner = 0;     for (i=0; (!retry) && (pp->diffinfo[i].difftype); i++)     {        if (0==pp->diffinfo[i].key.sysno)        {/* read a new one, if possible */           if ((pp->diffinfo[i].difftype==DT_DIFF) &&             (pp->diffinfo[i].diffidx < pp->diffinfo[i].maxidx))           { /* a normal kind of diff */              oldoffs=pp->diffinfo[i].diffidx;              codeptr= codestart = &(pp->diffbuf[pp->diffinfo[i].diffidx]);              keyptr=(char *)&(pp->diffinfo[i].key);              (*pp->is->method->code_item)(ISAMD_DECODE,                    pp->diffinfo[i].decodeData, &keyptr, &codeptr);              pp->diffinfo[i].diffidx += codeptr-codestart;              pp->diffinfo[i].mode = pp->diffinfo[i].key.seqno & 1;              pp->diffinfo[i].key.seqno = pp->diffinfo[i].key.seqno >>1 ;              if (pp->is->method->debug > 9)                logf(LOG_LOG,"isamd_read_item: dif[%d] at %d-%d: %s",                  i,oldoffs, pp->diffinfo[i].diffidx,                  hexdump(pp->buf+oldoffs, pp->diffinfo[i].diffidx-oldoffs,0));              if (pp->is->method->debug > 7)                logf(LOG_LOG,"isamd_read_item: rd dif[%d] %d.%d (%x.%x)",                  i,                  pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,                  pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);           }           else if ( pp->diffinfo[i].difftype==DT_MAIN)           { /* read a main item */              assert(i==0);  /* main data goes before any diffs */              oldoffs=pp->offset;              keyptr=(char*) &(pp->diffinfo[0].key);              rc= isamd_read_main_item(pp,&keyptr);              if (0==rc)               { /* eof */                 if (pp->is->method->debug > 7)                   logf(LOG_LOG,"isamd_read_item: eof (rc=%d) main ",                           rc);                pp->diffinfo[i].maxidx=-1;                pp->diffinfo[i].key.sysno=0;                pp->diffinfo[i].key.seqno=0;                pp->diffinfo[i].difftype= DT_DONE;              }               else              { /* not eof */                 pp->diffinfo[i].mode = 1;                 if (pp->is->method->debug > 7)                   logf(LOG_LOG,"isamd_read_item: rd main %d-%d %d.%d (%x.%x) m=%d",                     oldoffs,pp->offset,                     pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,                     pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,                     pp->diffinfo[i].mode);              } /* not eof */           }           else if (pp->diffinfo[i].difftype==DT_INPU)           {              keyptr = (char *) &pp->diffinfo[i].key;         /*     rc = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[i].mode); */              rc = filter_read(filt, &pp->diffinfo[i].key,                                      &pp->diffinfo[i].mode);               if (!rc)               {  /* did not get it */                 pp->diffinfo[i].key.sysno=0;                 pp->diffinfo[i].maxidx=0; /* signal the end */                 pp->diffinfo[i].difftype=DT_DONE;              }              if (pp->is->method->debug >7)                 logf(LOG_LOG,"merge: read inpu m=%d %d.%d (%x.%x)",                    pp->diffinfo[i].mode,                     pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,                    pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno );                   } /* read an input item */        } /* read a new one */                if (pp->is->method->debug > 8)          logf(LOG_LOG,"isamd_read_item: considering d%d %d.%d ix=%d mx=%d",               i, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,                  pp->diffinfo[i].diffidx,   pp->diffinfo[i].maxidx);                    if ( 0!= pp->diffinfo[i].key.sysno)        { /* got a key, compare */          if (i!=winner)             cmp=key_compare(&pp->diffinfo[i].key, &pp->diffinfo[winner].key);          else             cmp=-1;          if (0==pp->diffinfo[winner].key.sysno)            cmp=-1; /* end of main sequence, take all diffs */          if (cmp<0)          {             if (pp->is->method->debug > 8)               logf(LOG_LOG,"isamd_read_item: ins [%d]%d.%d < [%d]%d.%d",                 i,                   pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,                 winner,                  pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);             if (pp->diffinfo[i].mode)  /* insert diff, should always be */               winner = i;             else             {               if (pp->is->method->debug > 1)                 logf(LOG_LOG,"delete diff for nonexisting item");               assert(!"delete diff for nonexisting item");                 /* is an assert too steep here? Not really.*/             }          } /* earlier key */          else if (cmp==0)          {             if (!pp->diffinfo[i].mode) /* delete diff. should always be */             {                if (pp->is->method->debug > 8)                  logf(LOG_LOG,"isamd_read_item: del %d at%d %d.%d (%x.%x)",                    i, winner,                    pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,                    pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);                pp->diffinfo[winner].key.sysno=0; /* delete it */             }             else                if (pp->is->method->debug > 2)                  logf(LOG_LOG,"isamd_read_item: duplicate ins %d at%d %d.%d (%x.%x)",                    i, winner,                    pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,                    pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);                /* skip the insert, since we already have it in the base */                /* Should we fail an assertion here??? */             pp->diffinfo[i].key.sysno=0; /* done with the delete */             retry=1; /* start all over again */          } /* matching key */          /* else it is a later key, its turn will come */        } /* got a key */     } /* for each diff */  } /* not retry */  if ( pp->diffinfo[winner].key.sysno)  {    if (pp->is->method->debug > 7)      logf(LOG_LOG,"isamd_read_item: got %d  %d.%d (%x.%x)",        winner,        pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,        pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);    memcpy(*dst, &pp->diffinfo[winner].key, sizeof(struct it_key) );    *dst += sizeof(struct it_key);    pp->diffinfo[winner].key.sysno=0; /* used that one up */    cmp= 1;  }   else   {    if (pp->is->method->debug > 7)      logf(LOG_LOG,"isamd_read_item: eof w=%d  %d.%d (%x.%x)",        winner,        pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,        pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);    assert(winner==0); /* if nothing found, nothing comes from a diff */    cmp= 0; /* eof */  }  if (cmp)    ++(pp->is->no_read_keys);  else    ++(pp->is->no_read_eof);  return cmp;   } /* isamd_read_item */int isamd_read_item (ISAMD_PP pp, char **dst){  return isamd_read_item_merge(pp,dst,0,0);}/*************************************************************** * Merge ***************************************************************/static int merge ( ISAMD_PP firstpp,      /* first pp (with diffs) */                   struct it_key *p_key,  /* the data item that didn't fit*/                   FILTER filt,           /* more input data arriving here */                   char *dictentry,       /* the thin in the dictionary */                   int dictlen)           /* and its size */{  int diffidx;    int killblk=0;  struct it_key r_key;  char * r_ptr;  int r_more = 1;  ISAMD_PP pp;  ISAMD_PP readpp=firstpp;  int retpos=0;  int diffcat = firstpp->cat;  /* keep the category of the diffblock even */                               /* if it is going to be empty now. */                               /* Alternative: Make it the minimal, and */                               /* resize later. Saves disk, but will lead */                               /* into bad seeks. */    ++(readpp->is->no_merges);       /* set up diffs as they should be for reading */  diffidx = ISAMD_BLOCK_OFFSET_1;     if (readpp->is->method->debug >4)       logf(LOG_LOG,"isamd_merge: f=%d=%d:%d n=%d=%d:%d",        isamd_addr(firstpp->pos,firstpp->cat), firstpp->cat, firstpp->pos,        firstpp->next, isamd_type(firstpp->next), isamd_block(firstpp->next));    /* release our data block. Do before reading, when pos is stable ! */  killblk=firstpp->pos;  if (killblk)  {      isamd_release_block(firstpp->is, firstpp->cat, killblk);      if (readpp->is->method->debug >3)             logf(LOG_LOG,"isamd_merge: released old firstblock %d (%d:%d)",              isamd_addr(killblk,firstpp->cat), firstpp->cat, killblk );  }    r_ptr= (char *) &r_key;  r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, filt);  if (!r_more)    { /* oops, all data has been deleted! what to do??? */    /* never mind, we have at least one more delta to add to the block */    /* pray that is not a delete as well... */    r_key.sysno = 0;    r_key.seqno = 0;     if (readpp->is->method->debug >5)          logf(LOG_LOG,"isamd_merge:all data has been deleted (nk=%d) ",            readpp->numKeys);  }  /* set up the new blocks for simple writing */  /* firstpp=isamd_pp_open(readpp->is,isamd_addr(0, diffcat)); */  firstpp=isamd_pp_create(readpp->is, diffcat);  firstpp->pos=isamd_alloc_block(firstpp->is,diffcat);  if (readpp->is->method->debug >3)         logf(LOG_LOG,"isamd_merge: allocated new firstpp %d=%d:%d",          isamd_addr(firstpp->pos,firstpp->cat), firstpp->cat, firstpp->pos );    pp=isamd_pp_create(readpp->is,readpp->is->max_cat );  pp->offset=pp->size=ISAMD_BLOCK_OFFSET_N;    while (r_more)  {     if (readpp->is->method->debug >6)          logf(LOG_LOG,"isamd_merge: got key %d.%d",           r_key.sysno, r_key.seqno );     pp= append_main_item(firstpp, pp, &r_key);     if ( (readpp->pos != killblk ) && (0!=readpp->pos) )     {  /* pos can get to 0 at end of main seq, if still diffs left...*/        if (readpp->is->method->debug >3)              logf(LOG_LOG,"isamd_merge: released block %d (%d:%d) now %d=%d:%d",                isamd_addr(killblk,readpp->cat), readpp->cat, killblk,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -