⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 merge-d.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
/* $Id: merge-d.c,v 1.30 2003/03/05 16:41:10 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#define NEW_ISAM_D 1  /* not yet ready to delete the old one! */#include <stdlib.h>#include <assert.h>#include <string.h>#include <stdio.h>#include <yaz/log.h>#include "../index/index.h"#include "isamd-p.h"struct ISAMD_DIFF_s {  int diffidx;  int maxidx;  struct it_key key;  void *decodeData;  int mode;  int difftype;  };#define DT_NONE 0 /* no diff, marks end of sequence */#define DT_DIFF 1 /* ordinarry diff */#define DT_MAIN 2 /* main data */#define DT_INPU 3 /* input data to be merged */#define DT_DONE 4 /* done with all input here *//*************************************************************** * Input preprocess filter ***************************************************************/#define FILTER_NOTYET -1  /* no data read in yet, to be done */struct ISAMD_FILTER_s {  ISAMD_I data;          /* where the data comes from */  ISAMD is;              /* for debug flags */  struct it_key k1;      /* the next item to be returned */  int           m1;      /* mode for k1 */  int           r1;      /* result for read of k1, or NOTYET */  struct it_key k2;      /* the one after that */  int           m2;  int           r2;};typedef struct ISAMD_FILTER_s *FILTER;void filter_fill(FILTER F){  while ( (F->r1 == FILTER_NOTYET) || (F->r2 == FILTER_NOTYET) )  {     if (F->r1==FILTER_NOTYET)      { /* move data forward in the filter */        F->k1 = F->k2;        F->m1 = F->m2;        F->r1 = F->r2;        if ( 0 != F->r1 ) /* not eof */          F->r2 = FILTER_NOTYET; /* say we want more */        if (F->is->method->debug > 9)            logf(LOG_LOG,"filt_fill: shift %d.%d m=%d r=%d",             F->k1.sysno,              F->k1.seqno,              F->m1, F->r1);     }     if (F->r2==FILTER_NOTYET)     { /* read new bottom value */        char *k_ptr = (char*) &F->k2;        F->r2 = (F->data->read_item)(F->data->clientData, &k_ptr, &F->m2);         if (F->is->method->debug > 9)          logf(LOG_LOG,"filt_fill: read %d.%d m=%d r=%d",             F->k2.sysno, F->k2.seqno, F->m2, F->r2);     }       if ( (F->k1.sysno == F->k2.sysno) &&           (F->k1.seqno == F->k2.seqno) &&          (F->m1 != F->m2) &&          (F->r1 >0 ) && (F->r2 >0) )     { /* del-ins pair of same key (not eof) , ignore both */       if (F->is->method->debug > 9)         logf(LOG_LOG,"filt_fill: skipped %d.%d m=%d/%d r=%d/%d",            F->k1.sysno, F->k1.seqno,             F->m1,F->m2, F->r1,F->r2);       F->r1 = FILTER_NOTYET;       F->r2 = FILTER_NOTYET;     }  } /* while */} /* filter_fill */FILTER filter_open( ISAMD is, ISAMD_I data ){  FILTER F = (FILTER) xmalloc(sizeof(struct ISAMD_FILTER_s));  F->is = is;  F->data = data;  F->k1.sysno=0;  F->k1.seqno=0;  F->k2=F->k1;   F->m1 = F->m2 = 0;  F->r1 = F->r2 = FILTER_NOTYET;  filter_fill(F);  return F;}static void filter_close (FILTER F){  xfree(F);}static int filter_read( FILTER F,                         struct it_key *k,                        int *mode){  int res;  filter_fill(F);  if (F->is->method->debug > 9)    logf(LOG_LOG,"filt_read: reading %d.%d m=%d r=%d",       F->k1.sysno, F->k1.seqno, F->m1, F->r1);  res  = F->r1;  if(res)   {    *k = F->k1;    *mode= F->m1;  }  F->r1 = FILTER_NOTYET;  return res;}static int filter_isempty(FILTER F){  return ( (0 == F->r1) && (0 == F->r2)) ;}#if 0static int filter_only_one(FILTER F){  return ( (0 != F->r1) && (0 == F->r2));}#endif/* We may need backfilling, if we read a lonely key to make *//* a singleton, but its bitw will not fit in. Then we need to *//* process it normally, which means reading it again. So we  *//* need to unread it first. Luckily the filter is empty at that *//* point */#if 0static void filter_backfill(FILTER F, struct it_key *k, int mode){  assert(F->r1 == FILTER_NOTYET ); /* not overwriting data! */  F->k1=*k;  F->m1=mode;  F->r1=1; /* ok read */}#endif/*************************************************************** * Singleton encoding ***************************************************************//* When there is only a single item, we don't allocate a block * for it, but code it in the directory entry directly, if it * fits. */#define DEC_SYSBITS 15#define DEC_SEQBITS 15#define DEC_MASK(n) ((1<<(n))-1)#define SINGLETON_BIT (1<<(DEC_SYSBITS+DEC_SEQBITS+1))int is_singleton(ISAMD_P ipos){  return 0; /* no singletons any more */  return ( ipos != 0 ) && ( ipos & SINGLETON_BIT );}int singleton_encode(struct it_key *k)/* encodes the key into one int. If it does not fit, returns 0 */{  return 0; /* no more singletons */  if ( (k->sysno & DEC_MASK(DEC_SYSBITS) ) != k->sysno )    return 0;  /* no room dor sysno */  if ( (k->seqno & DEC_MASK(DEC_SYSBITS) ) != k->seqno )    return 0;  /* no room dor sysno */  return (k->sysno | (k->seqno << DEC_SYSBITS) ) | SINGLETON_BIT;} void singleton_decode (int code, struct it_key *k){  assert (code & SINGLETON_BIT);  k->sysno = code & DEC_MASK(DEC_SYSBITS);  code = code >> DEC_SYSBITS;   k->seqno = code & DEC_MASK(DEC_SEQBITS);}   /*************************************************************** * General support routines ***************************************************************/static char *hexdump(unsigned char *p, int len, char *buff) {  static char localbuff[128];  char bytebuff[8];  if (!buff) buff=localbuff;  *buff='\0';  while (len--) {    sprintf(bytebuff,"%02x",*p);    p++;    strcat(buff,bytebuff);    if (len) strcat(buff,",");  }  return buff;}static void isamd_reduceblock(ISAMD_PP pp)/* takes a large block, and reduces its category if possible *//* Presumably the first block in an isam-list */{   if (pp->pos)      return; /* existing block, do not touch */      /* TODO: Probably we may touch anyway? */   if (pp->is->method->debug > 5)       logf(LOG_LOG,"isamd_reduce: start p=%d c=%d sz=%d",       pp->pos, pp->cat, pp->size);    while ( ( pp->cat > 0 ) && (!pp->next) &&            (pp->offset < pp->is->method->filecat[pp->cat-1].bsize ) )      pp->cat--;   pp->pos = isamd_alloc_block(pp->is, pp->cat);   if (pp->is->method->debug > 5)      logf(LOG_LOG,"isamd_reduce:  got  p=%d c=%d sz=%d",       pp->pos, pp->cat, pp->size);    } /* reduceblock */static int save_first_pp ( ISAMD_PP firstpp){   isamd_buildfirstblock(firstpp);   isamd_write_block(firstpp->is,firstpp->cat,firstpp->pos,firstpp->buf);   return isamd_addr(firstpp->pos,firstpp->cat);} static void save_last_pp (ISAMD_PP pp){   pp->next = 0;/* just to be sure */   isamd_buildlaterblock(pp);   isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf);}#ifdef UNUSEDstatic int save_both_pps (ISAMD_PP firstpp, ISAMD_PP pp){   /* order of things: Better to save firstpp first, if there are just two */   /* blocks, but last if there are blocks in between, as these have already */   /* been saved... optimise later (that's why this is in its own func...*/   int retval = save_first_pp(firstpp);   if (firstpp!=pp){       save_last_pp(pp);      isamd_pp_close(pp);   }   isamd_pp_close(firstpp);   return retval;} /* save_both_pps */#endif/*************************************************************** * Diffblock handling ***************************************************************/void isamd_free_diffs(ISAMD_PP pp){  int i;  if (pp->is->method->debug > 5)     logf(LOG_LOG,"isamd_free_diffs: pp=%p di=%p", pp, pp->diffinfo);  if (!pp->diffinfo)     return;  for (i=0;pp->diffinfo[i].difftype!=DT_NONE;i++)       if(pp->diffinfo[i].decodeData)      {          if (pp->is->method->debug > 8)             logf(LOG_LOG,"isamd_free_diffs [%d]=%p",i,                            pp->diffinfo[i].decodeData);          (*pp->is->method->code_stop)(ISAMD_DECODE,pp->diffinfo[i].decodeData);      }   xfree(pp->diffinfo);  if (pp->diffbuf != pp->buf)    xfree (pp->diffbuf);    pp->diffbuf=0;  pp->diffinfo=0;} /* isamd_free_diffs */static void getDiffInfo(ISAMD_PP pp ){ /* builds the diff info structures from a diffblock */   int maxinfos = pp->is->method->filecat[pp->cat].bsize / 5 +2;    /* Each diff takes at least 5 bytes. Probably more, but this is safe */   int i=1;  /* [0] is used for the main data, [n+1] for merge inputs */   int diffsz= maxinfos * sizeof(struct ISAMD_DIFF_s);   int maxsz = pp->is->method->filecat[pp->is->max_cat].bsize;   int diffidx = ISAMD_BLOCK_OFFSET_1;   pp->diffinfo = xmalloc( diffsz );    pp->offset = pp->size+1; /* used this block up */   memset(pp->diffinfo,'\0',diffsz);   if (pp->is->method->debug > 5)         logf(LOG_LOG,"isamd_getDiffInfo: %d=%d:%d->%d, ix=%d mx=%d",         isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->next,         diffidx,maxinfos);      /* duplicate the buffer for diffs */   /* (so that we can read the next real buffer(s) */   assert(0==pp->diffbuf);   pp->diffbuf=xmalloc(maxsz);   memcpy(pp->diffbuf, pp->buf, maxsz);      pp->diffinfo[0].maxidx=-1; /* mark as special */   pp->diffinfo[0].difftype=DT_MAIN;   while (i<maxinfos)    {        if ( diffidx+sizeof(int) > pp->is->method->filecat[pp->cat].bsize )      {         if (pp->is->method->debug > 5)           logf(LOG_LOG,"isamd_getDiffInfo:Near end (no room for len) at ix=%d n=%d",               diffidx, i);         return; /* whole block done */      }      memcpy( &pp->diffinfo[i].maxidx, &pp->diffbuf[diffidx], sizeof(int) );      pp->diffinfo[i].difftype=DT_DIFF;      if (pp->is->method->debug > 5)        logf(LOG_LOG,"isamd_getDiffInfo: max=%d ix=%d dbuf=%p",          pp->diffinfo[i].maxidx, diffidx, pp->diffbuf);      if ( (pp->is->method->debug > 0) &&         (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) )      {          logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d",                        __FILE__,__LINE__, diffidx);         logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx,                       pp->is->method->filecat[pp->cat].bsize);         logf(LOG_LOG,"pp=%d=%d:%d  pp->nx=%d=%d:%d",                       isamd_addr(pp->pos,pp->cat), pp->pos, pp->cat,                       pp->next, isamd_type(pp->next), isamd_block(pp->next) );                            }      assert(pp->diffinfo[i].maxidx <= pp->is->method->filecat[pp->cat].bsize+1);      if (0==pp->diffinfo[i].maxidx)      {         if (pp->is->method->debug > 5)  /* !!! 4 */           logf(LOG_LOG,"isamd_getDiffInfo:End mark at ix=%d n=%d",               diffidx, i);         return; /* end marker */      }      diffidx += sizeof(int);      pp->diffinfo[i].decodeData = (*pp->is->method->code_start)(ISAMD_DECODE);      pp->diffinfo[i].diffidx = diffidx;      if (pp->is->method->debug > 5)        logf(LOG_LOG,"isamd_getDiff[%d]:%d-%d %s",          i,diffidx-sizeof(int),pp->diffinfo[i].maxidx,          hexdump((char *)&pp->diffbuf[diffidx-4],8,0) );      diffidx=pp->diffinfo[i].maxidx;      if ( diffidx > pp->is->method->filecat[pp->cat].bsize )        return; /* whole block done */      ++i;   }   assert (!"too many diff sequences in the block");}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -