⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 merge-d.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
                isamd_addr(readpp->pos,readpp->cat),readpp->cat, readpp->pos );        isamd_release_block(readpp->is, readpp->cat, readpp->pos);        killblk=readpp->pos;     }     /* (try to) read next item */     r_ptr= (char *) &r_key;     r_more = isamd_read_item_merge( readpp, &r_ptr,0,filt);  } /* while read */      isamd_reduceblock(pp);  /* reduce size if possible */  if (0==firstpp->next)    firstpp->next = isamd_addr(pp->pos,pp->cat);  save_last_pp(pp);  if (readpp->is->method->debug >4)       logf(LOG_LOG,"isamd_merge: saved last block %d=%d:%d",            isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos);  isamd_pp_close(pp);  if (readpp->is->method->debug >5)         logf(LOG_LOG,"isamd_merge: closing readpp %d=%d:%d di=%p",              isamd_addr(readpp->pos,readpp->cat), readpp->cat, readpp->pos,              readpp->diffinfo);  isamd_pp_close(readpp); /* pos is 0 by now, at eof. close works anyway */  if (readpp->is->method->debug >2)        logf(LOG_LOG,"isamd_merge: merge ret f=%d=%d:%d pp=%d=%d:%d",            isamd_addr(firstpp->pos,pp->cat), firstpp->cat, firstpp->pos,            isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos);  firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1;  /* nothing there */  memset(firstpp->buf,'\0',firstpp->is->method->filecat[firstpp->cat].bsize);  save_first_pp(firstpp);  retpos = isamd_addr(firstpp->pos, firstpp->cat);  isamd_pp_close(firstpp);  /* Create the dict entry */  /*!*/ /* it could be this could go in the dict as well, if there's */        /* been really many deletes. Somehow I suspect that is not the */        /* case. FIXME: Collect statistics and see if needed */  dictentry[0]=0; /* mark as a real isam */  memcpy(dictentry+1, &retpos, sizeof(ISAMD_P));  dictlen=sizeof(ISAMD_P)+1;  return dictlen;  } /* merge *//*************************************************************** * Appending diffs  ***************************************************************/static int append_diffs(      ISAMD is,       char *dictentry, int dictlen,      FILTER filt){   ISAMD_P ipos;   struct it_key i_key;    /* one input item */   char *i_item = (char *) &i_key;  /* same as chars */   char *i_ptr=i_item;   int i_more =1;   int i_mode;     /* 0 for delete, 1 for insert */    ISAMD_PP firstpp;   char hexbuff[64];   int diffidx=0;   int maxsize=0;   int difflenidx;   char codebuff[128];   char *c_ptr = codebuff;   int codelen;   int merge_rc;   ISAMD_P retpos;   int dsize;   if (0==dictlen)   {       firstpp=isamd_pp_create(is, 0 );       firstpp->size=firstpp->offset=ISAMD_BLOCK_OFFSET_1;         /* create in smallest category, will expand later */       ++(is->no_fbuilds);   }    else   {       firstpp=isamd_pp_open(is, dictentry, dictlen);       if (dictentry[0] )          ipos=0;       else            memcpy(&ipos,dictentry+1,sizeof(ISAMD_P));       ++(is->no_appds);   }   if (is->method->debug >2)       logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d n=%d=%d:%d nk=%d sz=%d",        ipos, isamd_type(ipos), isamd_block(ipos),        firstpp->next, isamd_type(firstpp->next), isamd_block(firstpp->next),        firstpp->numKeys, firstpp->size);   maxsize = is->method->filecat[firstpp->cat].bsize;       difflenidx = diffidx = firstpp->size;      diffidx+=sizeof(int);  /* difflen will be stored here */      /* read first input */   i_more = filter_read(filt, &i_key, &i_mode);    /* i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */   if (is->method->debug >6)      logf(LOG_LOG,"isamd_appd: start m=%d %d.%d=%x.%x: %d",         i_mode,          i_key.sysno, i_key.seqno,          i_key.sysno, i_key.seqno,         i_key.sysno*2+i_mode);   while (i_more)   {           /* store the mode bit inside key */      assert( ((i_key.seqno<<1)>>1) == i_key.seqno); /* can spare the bit */      i_key.seqno = i_key.seqno * 2 + i_mode;        c_ptr=codebuff;      i_ptr=i_item;       (*is->method->code_item)(ISAMD_ENCODE, firstpp->decodeClientData,                                &c_ptr, &i_ptr);      codelen = c_ptr - codebuff;      assert ( (codelen<128) && (codelen>0));      if (is->method->debug >7)         logf(LOG_LOG,"isamd_appd: coded %d: %s (nk=%d) (ix=%d)",             codelen, hexdump(codebuff, codelen,hexbuff),              firstpp->numKeys,diffidx);      if (diffidx + codelen > maxsize )      { /* block full */         while ( (firstpp->cat < firstpp->is->max_cat) &&                 (diffidx + codelen > maxsize) )         { /* try to increase the block size */             if (firstpp->pos > 0)  /* free the old block if allocated */                 isamd_release_block(is, firstpp->cat, firstpp->pos);             ++firstpp->cat;             maxsize = is->method->filecat[firstpp->cat].bsize;              firstpp->pos=0; /* need to allocate it when saving */                          if (is->method->debug >3)                logf(LOG_LOG,"isamd_appd: increased diff block sz to %d (%d)",                   firstpp->cat, maxsize);         }         if  ((firstpp->cat >= firstpp->is->max_cat) &&                 (diffidx + codelen > maxsize) )         { /* max size - can't help, need to merge it */             if (is->method->debug >7)                logf(LOG_LOG,"isamd_appd: need to merge");             if (is->method->debug >9)  /* !!!!! */                logf(LOG_LOG,"isamd_appd: going to merge with m=%d %d.%d",                     i_mode, i_key.sysno, i_key.seqno);             merge_rc = merge (firstpp, &i_key, filt, dictentry, dictlen);             if (0!=merge_rc)               return merge_rc;  /* merge handled them all ! */             assert(!"merge returned zero ??");         } /* need to merge */      } /* block full */      if (!( diffidx+codelen <= maxsize ))       { /* bug hunting */         logf(LOG_LOG,"OOPS, diffidx problem: d=%d c=%d s=%d > m=%d",           diffidx, codelen, diffidx+codelen, maxsize);         logf(LOG_LOG,"ipos=%d f=%d=%d:%d",           ipos,            isamd_addr(firstpp->pos, firstpp->cat),           firstpp->cat, firstpp->pos );      }      assert ( diffidx+codelen <= maxsize );            /* save the diff */       memcpy(&(firstpp->buf[diffidx]),codebuff,codelen);      diffidx += codelen;      firstpp->size = firstpp->offset = diffidx;            if (i_mode)        firstpp->numKeys++; /* insert diff */      else        firstpp->numKeys--; /* delete diff */       /* update length of this diff run */      memcpy(&(firstpp->buf[difflenidx]),&diffidx,sizeof(diffidx));            /* (try to) read the next input */      i_ptr = i_item;      i_more = filter_read(filt, &i_key, &i_mode);     /*  i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */      if ( (i_more) && (is->method->debug >6) )         logf(LOG_LOG,"isamd_appd: got m=%d %d.%d=%x.%x: %d",            i_mode,             i_key.sysno, i_key.seqno,             i_key.sysno, i_key.seqno,            i_key.sysno*2+i_mode);   } /* more loop */   /* clear the next difflen, if room for such */   difflenidx = diffidx;   while ( (difflenidx-diffidx<=sizeof(int)+1) && (difflenidx<maxsize))     firstpp->buf[difflenidx++]='\0';   if (firstpp->numKeys==0)   {        /* FIXME: Release blocks that may be allocated !!! */       return 0; /* don't bother storing this! */   }   dsize=diffidx-ISAMD_BLOCK_OFFSET_1;   /* logf(LOG_LOG,"!! nxt=%d diffidx=%d ds=%d",            firstpp->next, diffidx, dsize);  */   if ( (0==firstpp->next) && (dsize <ISAMD_MAX_DICT_LEN))   {        /* logf(LOG_LOG,"building a dict entry!!"); */        assert(firstpp->numKeys < 128);        assert(firstpp->numKeys >0);        /* actually, 255 is good enough, but sign mismatches... */        /* in real life, 4-5 is as much as we can hope for, as long */        /* as ISAMD_MAX_DICT_LEN is reasonably small (8) */        dictentry[0]=firstpp->numKeys;        memcpy(dictentry+1, firstpp->buf+ISAMD_BLOCK_OFFSET_1, dsize);        dictlen=dsize+1;   }   else    {       if (0==firstpp->pos)  /* need to (re)alloc the block */           firstpp->pos = isamd_alloc_block(is, firstpp->cat);       retpos = save_first_pp( firstpp );       isamd_pp_close(firstpp);       dictentry[0]=0; /* mark as a real isam */       memcpy(dictentry+1, &retpos, sizeof(ISAMD_P));       dictlen=sizeof(ISAMD_P)+1;   }       return dictlen;} /* append_diffs *//************************************************************* * isamd_append itself *************************************************************/int isamd_append (ISAMD is, char *dictentry, int dictlen, ISAMD_I data)/*ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) */{   FILTER F = filter_open(is,data);   int newlen=0;   if ( filter_isempty(F) ) /* can be, if del-ins of the same */   {      if (is->method->debug >3)          logf(LOG_LOG,"isamd_appd: nothing to do ");      filter_close(F);      ++(is->no_non);      return dictlen; /* without doing anything at all */   }#ifdef SKIPTHIS    /* The old way to handle singletons */   if ( ( 0==ipos) && filter_only_one(F) )   {      struct it_key k;      int mode;      filter_read(F,&k,&mode);           assert(mode);       rc = singleton_encode(&k);      if (!rc)       {      if (is->method->debug >9)          logf(LOG_LOG,"isamd_appd: singleton didn't fit, backfilling");         filter_backfill(F,&k, mode);      }      if (is->method->debug >9)          logf(LOG_LOG,"isamd_appd: singleton %d (%x)",           rc,rc);      if (rc)        is->no_singles++;      assert ( (rc==0) || is_singleton(rc) );   }   newlen = append_diffs(is,ipos,F); #endif   newlen = append_diffs(is,dictentry,dictlen,F);    filter_close(F);   if (is->method->debug >2)       logf(LOG_LOG,"isamd_appd: ret len=%d ", newlen);   return newlen;} /*  isamd_append *//* * $Log: merge-d.c,v $ * Revision 1.30  2003/03/05 16:41:10  adam * Fix GCC warnings * * Revision 1.29  2002/11/26 22:18:34  adam * Remove // comments * * Revision 1.28  2002/08/02 19:26:56  adam * Towards GPL * * Revision 1.27  2002/07/12 18:12:21  heikki * Isam-D now stores small entries directly in the dictionary. * Needs more tuning and cleaning... * * Revision 1.26  2002/07/11 16:16:00  heikki * Fixed a bug in isamd, failed to store a single key when its bits * did not fit into a singleton. * * Revision 1.25  1999/11/30 13:48:04  adam * Improved installation. Updated for inclusion of YAZ header files. * * Revision 1.24  1999/10/05 09:57:40  heikki * Tuning the isam-d (and fixed a small "detail") * * Revision 1.23  1999/09/27 14:36:36  heikki * singletons * * Revision 1.22  1999/09/23 18:01:18  heikki * singleton optimising * * Revision 1.21  1999/09/21 17:36:43  heikki * Added filter function. Not much of effect on the small test set... * * Revision 1.20  1999/09/20 15:48:06  heikki * Small changes * * Revision 1.19  1999/09/13 13:28:28  heikki * isam-d optimizing: merging input data in the same go * * Revision 1.18  1999/08/25 18:09:24  heikki * Starting to optimize * * Revision 1.17  1999/08/24 13:17:42  heikki * Block sizes, comments * * Revision 1.16  1999/08/24 10:12:02  heikki * Comments about optimising * * Revision 1.15  1999/08/22 08:26:34  heikki * COmments * * Revision 1.14  1999/08/20 12:25:58  heikki * Statistics in isamd * * Revision 1.13  1999/08/18 13:59:19  heikki * Fixed another unlikely difflen bug * * Revision 1.12  1999/08/18 13:28:17  heikki * Set log levels to decent values * * Revision 1.11  1999/08/18 10:37:11  heikki * Fixed (another) difflen bug * * Revision 1.10  1999/08/18 09:13:31  heikki * Fixed a detail * * Revision 1.9  1999/08/17 19:46:53  heikki * Fixed a memory leak * * Revision 1.8  1999/08/07 11:30:59  heikki * Bug fixing (still a mem leak somewhere) * * Revision 1.7  1999/08/04 14:21:18  heikki * isam-d seems to be working. * * Revision 1.6  1999/07/23 15:43:05  heikki * Hunted a few bugs in isam-d. Still crashes on the long test run * * Revision 1.5  1999/07/23 13:58:52  heikki * merged closer to working, still fails on filling a separate, large block * * Revision 1.4  1999/07/21 14:53:55  heikki * isamd read and write functions work, except when block full * Merge missing still. Need to split some functions * * Revision 1.1  1999/07/14 13:14:47  heikki * Created empty * * */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -