📄 extract.c
字号:
/* $Id: extract.c,v 1.146 2003/10/07 09:18:43 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra. If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <stdio.h>#include <assert.h>#ifdef WIN32#include <io.h>#else#include <unistd.h>#endif#include <fcntl.h>#include "index.h"#include <direntz.h>#include <charmap.h>#if _FILE_OFFSET_BITS == 64#define PRINTF_OFF_T "%Ld"#else#define PRINTF_OFF_T "%ld"#endif#define USE_SHELLSORT 0#if USE_SHELLSORTstatic void shellsort(void *ar, int r, size_t s, int (*cmp)(const void *a, const void *b)){ char *a = ar; char v[100]; int h, i, j, k; static const int incs[16] = { 1391376, 463792, 198768, 86961, 33936, 13776, 4592, 1968, 861, 336, 112, 48, 21, 7, 3, 1 }; for ( k = 0; k < 16; k++) for (h = incs[k], i = h; i < r; i++) { memcpy (v, a+s*i, s); j = i; while (j > h && (*cmp)(a + s*(j-h), v) > 0) { memcpy (a + s*j, a + s*(j-h), s); j -= h; } memcpy (a+s*j, v, s); } }#endifstatic void logRecord (ZebraHandle zh){ ++zh->records_processed; if (!(zh->records_processed % 1000)) { logf (LOG_LOG, "Records: %7d i/u/d %d/%d/%d", zh->records_processed, zh->records_inserted, zh->records_updated, zh->records_deleted); }}static void extract_init (struct recExtractCtrl *p, RecWord *w){ w->zebra_maps = p->zebra_maps; w->seqno = 1; w->attrSet = VAL_BIB1; w->attrUse = 1016; w->reg_type = 'w'; w->extractCtrl = p;}static const char **searchRecordKey (ZebraHandle zh, struct recKeys *reckeys, int attrSetS, int attrUseS){ static const char *ws[32]; int off = 0; int startSeq = -1; int i; int seqno = 0;#if SU_SCHEME int chS, ch;#else short attrUse; char attrSet;#endif for (i = 0; i<32; i++) ws[i] = NULL;#if SU_SCHEME chS = zebraExplain_lookupSU (zh->reg->zei, attrSetS, attrUseS); if (chS < 0) return ws;#endif while (off < reckeys->buf_used) { const char *src = reckeys->buf + off; const char *wstart; int lead; lead = *src++;#if SU_SCHEME if ((lead & 3)<3) { memcpy (&ch, src, sizeof(ch)); src += sizeof(ch); }#else if (!(lead & 1)) { memcpy (&attrSet, src, sizeof(attrSet)); src += sizeof(attrSet); } if (!(lead & 2)) { memcpy (&attrUse, src, sizeof(attrUse)); src += sizeof(attrUse); }#endif wstart = src; while (*src++) ; if (lead & 60) seqno += ((lead>>2) & 15)-1; else { memcpy (&seqno, src, sizeof(seqno)); src += sizeof(seqno); } if (#if SU_SCHEME ch == chS#else attrUseS == attrUse && attrSetS == attrSet#endif ) { int woff; if (startSeq == -1) startSeq = seqno; woff = seqno - startSeq; if (woff >= 0 && woff < 31) ws[woff] = wstart; } off = src - reckeys->buf; } assert (off == reckeys->buf_used); return ws;}struct file_read_info { off_t file_max; /* maximum offset so far */ off_t file_offset; /* current offset */ off_t file_moffset; /* offset of rec/rec boundary */ int file_more; int fd; char *sdrbuf; int sdrmax;};static struct file_read_info *file_read_start (int fd){ struct file_read_info *fi = (struct file_read_info *) xmalloc (sizeof(*fi)); fi->fd = fd; fi->file_max = 0; fi->file_moffset = 0; fi->sdrbuf = 0; fi->sdrmax = 0; return fi;}static void file_read_stop (struct file_read_info *fi){ xfree (fi);}static off_t file_seek (void *handle, off_t offset){ struct file_read_info *p = (struct file_read_info *) handle; p->file_offset = offset; if (p->sdrbuf) return offset; return lseek (p->fd, offset, SEEK_SET);}static off_t file_tell (void *handle){ struct file_read_info *p = (struct file_read_info *) handle; return p->file_offset;}static int file_read (void *handle, char *buf, size_t count){ struct file_read_info *p = (struct file_read_info *) handle; int fd = p->fd; int r; if (p->sdrbuf) { r = count; if (r > p->sdrmax - p->file_offset) r = p->sdrmax - p->file_offset; if (r) memcpy (buf, p->sdrbuf + p->file_offset, r); } else r = read (fd, buf, count); if (r > 0) { p->file_offset += r; if (p->file_offset > p->file_max) p->file_max = p->file_offset; } return r;}static void file_begin (void *handle){ struct file_read_info *p = (struct file_read_info *) handle; p->file_offset = p->file_moffset; if (!p->sdrbuf && p->file_moffset) lseek (p->fd, p->file_moffset, SEEK_SET); p->file_more = 0;}static void file_end (void *handle, off_t offset){ struct file_read_info *p = (struct file_read_info *) handle; assert (p->file_more == 0); p->file_more = 1; p->file_moffset = offset;}static char *fileMatchStr (ZebraHandle zh, struct recKeys *reckeys, struct recordGroup *rGroup, const char *fname, const char *spec){ static char dstBuf[2048]; /* static here ??? */ char *dst = dstBuf; const char *s = spec; static const char **w; while (1) { while (*s == ' ' || *s == '\t') s++; if (!*s) break; if (*s == '(') { char attset_str[64], attname_str[64]; data1_attset *attset; int i; char matchFlag[32]; int attSet = 1, attUse = 1; int first = 1; s++; for (i = 0; *s && *s != ',' && *s != ')'; s++) if (i < 63) attset_str[i++] = *s; attset_str[i] = '\0'; if (*s == ',') { s++; for (i = 0; *s && *s != ')'; s++) if (i < 63) attname_str[i++] = *s; attname_str[i] = '\0'; } if ((attset = data1_get_attset (zh->reg->dh, attset_str))) { data1_att *att; attSet = attset->reference; att = data1_getattbyname(zh->reg->dh, attset, attname_str); if (att) attUse = att->value; else attUse = atoi (attname_str); } w = searchRecordKey (zh, reckeys, attSet, attUse); assert (w); if (*s == ')') { for (i = 0; i<32; i++) matchFlag[i] = 1; } else { logf (LOG_WARN, "Missing ) in match criteria %s in group %s", spec, rGroup->groupName ? rGroup->groupName : "none"); return NULL; } s++; for (i = 0; i<32; i++) if (matchFlag[i] && w[i]) { if (first) { *dst++ = ' '; first = 0; } strcpy (dst, w[i]); dst += strlen(w[i]); } if (first) { logf (LOG_WARN, "Record didn't contain match" " fields in (%s,%s)", attset_str, attname_str); return NULL; } } else if (*s == '$') { int spec_len; char special[64]; const char *spec_src = NULL; const char *s1 = ++s; while (*s1 && *s1 != ' ' && *s1 != '\t') s1++; spec_len = s1 - s; if (spec_len > 63) spec_len = 63; memcpy (special, s, spec_len); special[spec_len] = '\0'; s = s1; if (!strcmp (special, "group")) spec_src = rGroup->groupName; else if (!strcmp (special, "database")) spec_src = rGroup->databaseName; else if (!strcmp (special, "filename")) { spec_src = fname; } else if (!strcmp (special, "type")) spec_src = rGroup->recordType; else spec_src = NULL; if (spec_src) { strcpy (dst, spec_src); dst += strlen (spec_src); } } else if (*s == '\"' || *s == '\'') { int stopMarker = *s++; char tmpString[64]; int i = 0; while (*s && *s != stopMarker) { if (i < 63) tmpString[i++] = *s++; } if (*s) s++; tmpString[i] = '\0'; strcpy (dst, tmpString); dst += strlen (tmpString); } else { logf (LOG_WARN, "Syntax error in match criteria %s in group %s", spec, rGroup->groupName ? rGroup->groupName : "none"); return NULL; } *dst++ = 1; } if (dst == dstBuf) { logf (LOG_WARN, "No match criteria for record %s in group %s", fname, rGroup->groupName ? rGroup->groupName : "none"); return NULL; } *dst = '\0'; return dstBuf;}struct recordLogInfo { const char *fname; int recordOffset; struct recordGroup *rGroup;}; static int recordExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, struct recordGroup *rGroup, int deleteFlag, struct file_read_info *fi, RecType recType, char *subType, void *clientData, int force_update){ RecordAttr *recordAttr; int r; char *matchStr; SYSNO sysnotmp; Record rec; off_t recordOffset = 0; if (fi->fd != -1) { struct recExtractCtrl extractCtrl; /* we are going to read from a file, so prepare the extraction */ int i; zh->reg->keys.buf_used = 0; zh->reg->keys.prevAttrUse = -1; zh->reg->keys.prevAttrSet = -1; zh->reg->keys.prevSeqNo = 0; zh->reg->sortKeys.buf_used = 0; recordOffset = fi->file_moffset; extractCtrl.offset = fi->file_moffset; extractCtrl.readf = file_read; extractCtrl.seekf = file_seek; extractCtrl.tellf = file_tell; extractCtrl.endf = file_end; extractCtrl.fh = fi; extractCtrl.subType = subType; extractCtrl.init = extract_init; extractCtrl.tokenAdd = extract_token_add; extractCtrl.schemaAdd = extract_schema_add; extractCtrl.dh = zh->reg->dh; extractCtrl.handle = zh; for (i = 0; i<256; i++) { if (zebra_maps_is_positioned(zh->reg->zebra_maps, i)) extractCtrl.seqno[i] = 1; else extractCtrl.seqno[i] = 0; } extractCtrl.zebra_maps = zh->reg->zebra_maps; extractCtrl.flagShowRecords = !rGroup->flagRw; if (!rGroup->flagRw) printf ("File: %s " PRINTF_OFF_T "\n", fname, recordOffset); if (rGroup->flagRw) { char msg[512]; sprintf (msg, "%s:" PRINTF_OFF_T , fname, recordOffset); yaz_log_init_prefix2 (msg); } r = (*recType->extract)(clientData, &extractCtrl); yaz_log_init_prefix2 (0); if (r == RECCTRL_EXTRACT_EOF) return 0; else if (r == RECCTRL_EXTRACT_ERROR_GENERIC) { /* error occured during extraction ... */ if (rGroup->flagRw && zh->records_processed < rGroup->fileVerboseLimit) { logf (LOG_WARN, "fail %s %s " PRINTF_OFF_T, rGroup->recordType, fname, recordOffset); } return 0; } else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -