📄 merge-d.c
字号:
/* $Id: merge-d.c,v 1.30 2003/03/05 16:41:10 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra. If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#define NEW_ISAM_D 1 /* not yet ready to delete the old one! */#include <stdlib.h>#include <assert.h>#include <string.h>#include <stdio.h>#include <yaz/log.h>#include "../index/index.h"#include "isamd-p.h"struct ISAMD_DIFF_s { int diffidx; int maxidx; struct it_key key; void *decodeData; int mode; int difftype; };#define DT_NONE 0 /* no diff, marks end of sequence */#define DT_DIFF 1 /* ordinarry diff */#define DT_MAIN 2 /* main data */#define DT_INPU 3 /* input data to be merged */#define DT_DONE 4 /* done with all input here *//*************************************************************** * Input preprocess filter ***************************************************************/#define FILTER_NOTYET -1 /* no data read in yet, to be done */struct ISAMD_FILTER_s { ISAMD_I data; /* where the data comes from */ ISAMD is; /* for debug flags */ struct it_key k1; /* the next item to be returned */ int m1; /* mode for k1 */ int r1; /* result for read of k1, or NOTYET */ struct it_key k2; /* the one after that */ int m2; int r2;};typedef struct ISAMD_FILTER_s *FILTER;void filter_fill(FILTER F){ while ( (F->r1 == FILTER_NOTYET) || (F->r2 == FILTER_NOTYET) ) { if (F->r1==FILTER_NOTYET) { /* move data forward in the filter */ F->k1 = F->k2; F->m1 = F->m2; F->r1 = F->r2; if ( 0 != F->r1 ) /* not eof */ F->r2 = FILTER_NOTYET; /* say we want more */ if (F->is->method->debug > 9) logf(LOG_LOG,"filt_fill: shift %d.%d m=%d r=%d", F->k1.sysno, F->k1.seqno, F->m1, F->r1); } if (F->r2==FILTER_NOTYET) { /* read new bottom value */ char *k_ptr = (char*) &F->k2; F->r2 = (F->data->read_item)(F->data->clientData, &k_ptr, &F->m2); if (F->is->method->debug > 9) logf(LOG_LOG,"filt_fill: read %d.%d m=%d r=%d", F->k2.sysno, F->k2.seqno, F->m2, F->r2); } if ( (F->k1.sysno == F->k2.sysno) && (F->k1.seqno == F->k2.seqno) && (F->m1 != F->m2) && (F->r1 >0 ) && (F->r2 >0) ) { /* del-ins pair of same key (not eof) , ignore both */ if (F->is->method->debug > 9) logf(LOG_LOG,"filt_fill: skipped %d.%d m=%d/%d r=%d/%d", F->k1.sysno, F->k1.seqno, F->m1,F->m2, F->r1,F->r2); F->r1 = FILTER_NOTYET; F->r2 = FILTER_NOTYET; } } /* while */} /* filter_fill */FILTER filter_open( ISAMD is, ISAMD_I data ){ FILTER F = (FILTER) xmalloc(sizeof(struct ISAMD_FILTER_s)); F->is = is; F->data = data; F->k1.sysno=0; F->k1.seqno=0; F->k2=F->k1; F->m1 = F->m2 = 0; F->r1 = F->r2 = FILTER_NOTYET; filter_fill(F); return F;}static void filter_close (FILTER F){ xfree(F);}static int filter_read( FILTER F, struct it_key *k, int *mode){ int res; filter_fill(F); if (F->is->method->debug > 9) logf(LOG_LOG,"filt_read: reading %d.%d m=%d r=%d", F->k1.sysno, F->k1.seqno, F->m1, F->r1); res = F->r1; if(res) { *k = F->k1; *mode= F->m1; } F->r1 = FILTER_NOTYET; return res;}static int filter_isempty(FILTER F){ return ( (0 == F->r1) && (0 == F->r2)) ;}#if 0static int filter_only_one(FILTER F){ return ( (0 != F->r1) && (0 == F->r2));}#endif/* We may need backfilling, if we read a lonely key to make *//* a singleton, but its bitw will not fit in. Then we need to *//* process it normally, which means reading it again. So we *//* need to unread it first. Luckily the filter is empty at that *//* point */#if 0static void filter_backfill(FILTER F, struct it_key *k, int mode){ assert(F->r1 == FILTER_NOTYET ); /* not overwriting data! */ F->k1=*k; F->m1=mode; F->r1=1; /* ok read */}#endif/*************************************************************** * Singleton encoding ***************************************************************//* When there is only a single item, we don't allocate a block * for it, but code it in the directory entry directly, if it * fits. */#define DEC_SYSBITS 15#define DEC_SEQBITS 15#define DEC_MASK(n) ((1<<(n))-1)#define SINGLETON_BIT (1<<(DEC_SYSBITS+DEC_SEQBITS+1))int is_singleton(ISAMD_P ipos){ return 0; /* no singletons any more */ return ( ipos != 0 ) && ( ipos & SINGLETON_BIT );}int singleton_encode(struct it_key *k)/* encodes the key into one int. If it does not fit, returns 0 */{ return 0; /* no more singletons */ if ( (k->sysno & DEC_MASK(DEC_SYSBITS) ) != k->sysno ) return 0; /* no room dor sysno */ if ( (k->seqno & DEC_MASK(DEC_SYSBITS) ) != k->seqno ) return 0; /* no room dor sysno */ return (k->sysno | (k->seqno << DEC_SYSBITS) ) | SINGLETON_BIT;} void singleton_decode (int code, struct it_key *k){ assert (code & SINGLETON_BIT); k->sysno = code & DEC_MASK(DEC_SYSBITS); code = code >> DEC_SYSBITS; k->seqno = code & DEC_MASK(DEC_SEQBITS);} /*************************************************************** * General support routines ***************************************************************/static char *hexdump(unsigned char *p, int len, char *buff) { static char localbuff[128]; char bytebuff[8]; if (!buff) buff=localbuff; *buff='\0'; while (len--) { sprintf(bytebuff,"%02x",*p); p++; strcat(buff,bytebuff); if (len) strcat(buff,","); } return buff;}static void isamd_reduceblock(ISAMD_PP pp)/* takes a large block, and reduces its category if possible *//* Presumably the first block in an isam-list */{ if (pp->pos) return; /* existing block, do not touch */ /* TODO: Probably we may touch anyway? */ if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_reduce: start p=%d c=%d sz=%d", pp->pos, pp->cat, pp->size); while ( ( pp->cat > 0 ) && (!pp->next) && (pp->offset < pp->is->method->filecat[pp->cat-1].bsize ) ) pp->cat--; pp->pos = isamd_alloc_block(pp->is, pp->cat); if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_reduce: got p=%d c=%d sz=%d", pp->pos, pp->cat, pp->size); } /* reduceblock */static int save_first_pp ( ISAMD_PP firstpp){ isamd_buildfirstblock(firstpp); isamd_write_block(firstpp->is,firstpp->cat,firstpp->pos,firstpp->buf); return isamd_addr(firstpp->pos,firstpp->cat);} static void save_last_pp (ISAMD_PP pp){ pp->next = 0;/* just to be sure */ isamd_buildlaterblock(pp); isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf);}#ifdef UNUSEDstatic int save_both_pps (ISAMD_PP firstpp, ISAMD_PP pp){ /* order of things: Better to save firstpp first, if there are just two */ /* blocks, but last if there are blocks in between, as these have already */ /* been saved... optimise later (that's why this is in its own func...*/ int retval = save_first_pp(firstpp); if (firstpp!=pp){ save_last_pp(pp); isamd_pp_close(pp); } isamd_pp_close(firstpp); return retval;} /* save_both_pps */#endif/*************************************************************** * Diffblock handling ***************************************************************/void isamd_free_diffs(ISAMD_PP pp){ int i; if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_free_diffs: pp=%p di=%p", pp, pp->diffinfo); if (!pp->diffinfo) return; for (i=0;pp->diffinfo[i].difftype!=DT_NONE;i++) if(pp->diffinfo[i].decodeData) { if (pp->is->method->debug > 8) logf(LOG_LOG,"isamd_free_diffs [%d]=%p",i, pp->diffinfo[i].decodeData); (*pp->is->method->code_stop)(ISAMD_DECODE,pp->diffinfo[i].decodeData); } xfree(pp->diffinfo); if (pp->diffbuf != pp->buf) xfree (pp->diffbuf); pp->diffbuf=0; pp->diffinfo=0;} /* isamd_free_diffs */static void getDiffInfo(ISAMD_PP pp ){ /* builds the diff info structures from a diffblock */ int maxinfos = pp->is->method->filecat[pp->cat].bsize / 5 +2; /* Each diff takes at least 5 bytes. Probably more, but this is safe */ int i=1; /* [0] is used for the main data, [n+1] for merge inputs */ int diffsz= maxinfos * sizeof(struct ISAMD_DIFF_s); int maxsz = pp->is->method->filecat[pp->is->max_cat].bsize; int diffidx = ISAMD_BLOCK_OFFSET_1; pp->diffinfo = xmalloc( diffsz ); pp->offset = pp->size+1; /* used this block up */ memset(pp->diffinfo,'\0',diffsz); if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_getDiffInfo: %d=%d:%d->%d, ix=%d mx=%d", isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->next, diffidx,maxinfos); /* duplicate the buffer for diffs */ /* (so that we can read the next real buffer(s) */ assert(0==pp->diffbuf); pp->diffbuf=xmalloc(maxsz); memcpy(pp->diffbuf, pp->buf, maxsz); pp->diffinfo[0].maxidx=-1; /* mark as special */ pp->diffinfo[0].difftype=DT_MAIN; while (i<maxinfos) { if ( diffidx+sizeof(int) > pp->is->method->filecat[pp->cat].bsize ) { if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_getDiffInfo:Near end (no room for len) at ix=%d n=%d", diffidx, i); return; /* whole block done */ } memcpy( &pp->diffinfo[i].maxidx, &pp->diffbuf[diffidx], sizeof(int) ); pp->diffinfo[i].difftype=DT_DIFF; if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_getDiffInfo: max=%d ix=%d dbuf=%p", pp->diffinfo[i].maxidx, diffidx, pp->diffbuf); if ( (pp->is->method->debug > 0) && (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) ) { logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d", __FILE__,__LINE__, diffidx); logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx, pp->is->method->filecat[pp->cat].bsize); logf(LOG_LOG,"pp=%d=%d:%d pp->nx=%d=%d:%d", isamd_addr(pp->pos,pp->cat), pp->pos, pp->cat, pp->next, isamd_type(pp->next), isamd_block(pp->next) ); } assert(pp->diffinfo[i].maxidx <= pp->is->method->filecat[pp->cat].bsize+1); if (0==pp->diffinfo[i].maxidx) { if (pp->is->method->debug > 5) /* !!! 4 */ logf(LOG_LOG,"isamd_getDiffInfo:End mark at ix=%d n=%d", diffidx, i); return; /* end marker */ } diffidx += sizeof(int); pp->diffinfo[i].decodeData = (*pp->is->method->code_start)(ISAMD_DECODE); pp->diffinfo[i].diffidx = diffidx; if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_getDiff[%d]:%d-%d %s", i,diffidx-sizeof(int),pp->diffinfo[i].maxidx, hexdump((char *)&pp->diffbuf[diffidx-4],8,0) ); diffidx=pp->diffinfo[i].maxidx; if ( diffidx > pp->is->method->filecat[pp->cat].bsize ) return; /* whole block done */ ++i; } assert (!"too many diff sequences in the block");}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -