📄 extract.c
字号:
recType = recType_byName (zh->reg->recTypes, recordType, subType, &clientData); } else { if (!(rGroup->recordType)) { logf (LOG_WARN, "No such record type defined"); return 0; } logf (LOG_DEBUG, "Get record type from rgroup: %s",rGroup->recordType); recType = recType_byName (zh->reg->recTypes, rGroup->recordType, subType, &clientData); recordType = rGroup->recordType; } if (!recType) { logf (LOG_WARN, "No such record type: %s", rGroup->recordType); return 0; } extractCtrl.subType = subType; extractCtrl.init = extract_init; extractCtrl.tokenAdd = extract_token_add; extractCtrl.schemaAdd = extract_schema_add; extractCtrl.dh = zh->reg->dh; extractCtrl.handle = zh; extractCtrl.zebra_maps = zh->reg->zebra_maps; extractCtrl.flagShowRecords = 0; for (i = 0; i<256; i++) { if (zebra_maps_is_positioned(zh->reg->zebra_maps, i)) extractCtrl.seqno[i] = 1; else extractCtrl.seqno[i] = 0; } r = (*recType->extract)(clientData, &extractCtrl); if (r == RECCTRL_EXTRACT_EOF) return 0; else if (r == RECCTRL_EXTRACT_ERROR_GENERIC) { /* error occured during extraction ... */ yaz_log (LOG_WARN, "extract error: generic"); return 0; } else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER) { /* error occured during extraction ... */ yaz_log (LOG_WARN, "extract error: no such filter"); return 0; } if (zh->reg->keys.buf_used == 0) { /* the extraction process returned no information - the record is probably empty - unless flagShowRecords is in use */ if (test_mode) return 1; logf (LOG_WARN, "No keys generated for record"); logf (LOG_WARN, " The file is probably empty"); return 1; } /* match criteria */ matchStr = NULL; if (! *sysno && match_criteria) { char *rinfo; if (*match_criteria) { matchStr = (char *)match_criteria; } else { if (rGroup->recordId && *rGroup->recordId) { matchStr = fileMatchStr (zh, &zh->reg->keys, rGroup, fname, rGroup->recordId); } } if (matchStr) { rinfo = dict_lookup (zh->reg->matchDict, matchStr); if (rinfo) memcpy (sysno, rinfo+1, sizeof(*sysno)); } else { logf (LOG_WARN, "Bad match criteria (recordID)"); return 0; } } if (! *sysno) { /* new record */ if (delete_flag) { logf (LOG_LOG, "delete %s %s %ld", recordType, fname, (long) recordOffset); logf (LOG_WARN, "cannot delete record above (seems new)"); return 1; } logf (LOG_LOG, "add %s %s %ld", recordType, fname, (long) recordOffset); rec = rec_new (zh->reg->records); *sysno = rec->sysno; recordAttr = rec_init_attr (zh->reg->zei, rec); if (matchStr) { dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno); } extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); zh->records_inserted++; } else { /* record already exists */ struct recKeys delkeys; struct sortKeys sortKeys; if (!allow_update) { logf (LOG_LOG, "skipped %s %s %ld", recordType, fname, (long) recordOffset); logRecord(zh); return -1; } rec = rec_get (zh->reg->records, *sysno); assert (rec); recordAttr = rec_init_attr (zh->reg->zei, rec); if (!force_update) { if (recordAttr->runNumber == zebraExplain_runNumberIncrement (zh->reg->zei, 0)) { logf (LOG_LOG, "skipped %s %s %ld", recordType, fname, (long) recordOffset); extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys); rec_rm (&rec); logRecord(zh); return 1; } } delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; sortKeys.buf_used = rec->size[recInfo_sortKeys]; sortKeys.buf = rec->info[recInfo_sortKeys]; extract_flushSortKeys (zh, *sysno, 0, &sortKeys); extract_flushRecordKeys (zh, *sysno, 0, &delkeys); if (delete_flag) { /* record going to be deleted */ if (!delkeys.buf_used) { logf (LOG_LOG, "delete %s %s %ld", recordType, fname, (long) recordOffset); logf (LOG_WARN, "cannot delete file above, storeKeys false"); } else { logf (LOG_LOG, "delete %s %s %ld", recordType, fname, (long) recordOffset); zh->records_deleted++; if (matchStr) dict_delete (zh->reg->matchDict, matchStr); rec_del (zh->reg->records, &rec); } rec_rm (&rec); logRecord(zh); return 1; } else { /* record going to be updated */ if (!delkeys.buf_used) { logf (LOG_LOG, "update %s %s %ld", recordType, fname, (long) recordOffset); logf (LOG_WARN, "cannot update file above, storeKeys false"); } else { logf (LOG_LOG, "update %s %s %ld", recordType, fname, (long) recordOffset); extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); zh->records_updated++; } } } /* update file type */ xfree (rec->info[recInfo_fileType]); rec->info[recInfo_fileType] = rec_strdup (recordType, &rec->size[recInfo_fileType]); /* update filename */ xfree (rec->info[recInfo_filename]); rec->info[recInfo_filename] = rec_strdup (fname, &rec->size[recInfo_filename]); /* update delete keys */ xfree (rec->info[recInfo_delKeys]); if (zh->reg->keys.buf_used > 0 && rGroup->flagStoreKeys == 1) { rec->size[recInfo_delKeys] = zh->reg->keys.buf_used; rec->info[recInfo_delKeys] = zh->reg->keys.buf; zh->reg->keys.buf = NULL; zh->reg->keys.buf_max = 0; } else { rec->info[recInfo_delKeys] = NULL; rec->size[recInfo_delKeys] = 0; } /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, - recordAttr->recordSize);#if 0 recordAttr->recordSize = fi->file_moffset - recordOffset; if (!recordAttr->recordSize) recordAttr->recordSize = fi->file_max - recordOffset;#else recordAttr->recordSize = buf_size;#endif zebraExplain_recordBytesIncrement (zh->reg->zei, recordAttr->recordSize); /* set run-number for this record */ recordAttr->runNumber = zebraExplain_runNumberIncrement (zh->reg->zei, 0); /* update store data */ xfree (rec->info[recInfo_storeData]); if (rGroup->flagStoreData == 1) { rec->size[recInfo_storeData] = recordAttr->recordSize; rec->info[recInfo_storeData] = (char *) xmalloc (recordAttr->recordSize);#if 1 memcpy (rec->info[recInfo_storeData], buf, recordAttr->recordSize);#else if (lseek (fi->fd, recordOffset, SEEK_SET) < 0) { logf (LOG_ERRNO|LOG_FATAL, "seek to %ld in %s", (long) recordOffset, fname); exit (1); } if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize) < recordAttr->recordSize) { logf (LOG_ERRNO|LOG_FATAL, "read %d bytes of %s", recordAttr->recordSize, fname); exit (1); }#endif } else { rec->info[recInfo_storeData] = NULL; rec->size[recInfo_storeData] = 0; } /* update database name */ xfree (rec->info[recInfo_databaseName]); rec->info[recInfo_databaseName] = rec_strdup (rGroup->databaseName, &rec->size[recInfo_databaseName]); /* update offset */ recordAttr->recordOffset = recordOffset; /* commit this record */ rec_put (zh->reg->records, &rec); logRecord(zh); return 0;}int explain_extract (void *handle, Record rec, data1_node *n){ ZebraHandle zh = (ZebraHandle) handle; struct recExtractCtrl extractCtrl; int i; if (zebraExplain_curDatabase (zh->reg->zei, rec->info[recInfo_databaseName])) { abort(); if (zebraExplain_newDatabase (zh->reg->zei, rec->info[recInfo_databaseName], 0)) abort (); } zh->reg->keys.buf_used = 0; zh->reg->keys.prevAttrUse = -1; zh->reg->keys.prevAttrSet = -1; zh->reg->keys.prevSeqNo = 0; zh->reg->sortKeys.buf_used = 0; extractCtrl.init = extract_init; extractCtrl.tokenAdd = extract_token_add; extractCtrl.schemaAdd = extract_schema_add; extractCtrl.dh = zh->reg->dh; for (i = 0; i<256; i++) extractCtrl.seqno[i] = 0; extractCtrl.zebra_maps = zh->reg->zebra_maps; extractCtrl.flagShowRecords = 0; extractCtrl.handle = handle; if (n) grs_extract_tree(&extractCtrl, n); if (rec->size[recInfo_delKeys]) { struct recKeys delkeys; struct sortKeys sortkeys; delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; sortkeys.buf_used = rec->size[recInfo_sortKeys]; sortkeys.buf = rec->info[recInfo_sortKeys]; extract_flushSortKeys (zh, rec->sysno, 0, &sortkeys); extract_flushRecordKeys (zh, rec->sysno, 0, &delkeys); } extract_flushRecordKeys (zh, rec->sysno, 1, &zh->reg->keys); extract_flushSortKeys (zh, rec->sysno, 1, &zh->reg->sortKeys); xfree (rec->info[recInfo_delKeys]); rec->size[recInfo_delKeys] = zh->reg->keys.buf_used; rec->info[recInfo_delKeys] = zh->reg->keys.buf; zh->reg->keys.buf = NULL; zh->reg->keys.buf_max = 0; xfree (rec->info[recInfo_sortKeys]); rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; return 0;}void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, int cmd, struct recKeys *reckeys){#if SU_SCHEME#else unsigned char attrSet = (unsigned char) -1; unsigned short attrUse = (unsigned short) -1;#endif int seqno = 0; int off = 0; int ch = 0; ZebraExplainInfo zei = zh->reg->zei; if (!zh->reg->key_buf) { int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8")); if (mem <= 0) { logf(LOG_WARN, "Invalid memory setting, using default 8 MB"); mem= 1024*1024*8; } /* FIXME: That "8" should be in a default settings include */ /* not hard-coded here! -H */ zh->reg->key_buf = (char**) xmalloc (mem); zh->reg->ptr_top = mem/sizeof(char*); zh->reg->ptr_i = 0; zh->reg->key_buf_used = 0; zh->reg->key_file_no = 0; } zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1); while (off < reckeys->buf_used) { const char *src = reckeys->buf + off; struct it_key key; int lead; lead = *src++;#if SU_SCHEME if ((lead & 3) < 3) { memcpy (&ch, src, sizeof(ch)); src += sizeof(ch); }#else if (!(lead & 1)) { memcpy (&attrSet, src, sizeof(attrSet)); src += sizeof(attrSet); } if (!(lead & 2)) { memcpy (&attrUse, src, sizeof(attrUse)); src += sizeof(attrUse); }#endif if (zh->reg->key_buf_used + 1024 > (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*)) extract_flushWriteKeys (zh); ++(zh->reg->ptr_i); (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] = (char*)zh->reg->key_buf + zh->reg->key_buf_used;#if SU_SCHEME#else ch = zebraExplain_lookupSU (zei, attrSet, attrUse); if (ch < 0) ch = zebraExplain_addSU (zei, attrSet, attrUse);#endif assert (ch > 0); zh->reg->key_buf_used += key_SU_encode (ch,((char*)zh->reg->key_buf) + zh->reg->key_buf_used); while (*src) ((char*)zh->reg->key_buf) [(zh->reg->key_buf_used)++] = *src++; src++; ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = '\0'; ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = cmd; if (lead & 60) seqno += ((lead>>2) & 15)-1; else { memcpy (&seqno, src, sizeof(seqno)); src += sizeof(seqno); } key.seqno = seqno; key.sysno = sysno; memcpy ((char*)zh->reg->key_buf + zh->reg->key_buf_used, &key, sizeof(key)); (zh->reg->key_buf_used) += sizeof(key); off = src - reckeys->buf; } assert (off == reckeys->buf_used);}void extract_flushWriteKeys (ZebraHandle zh){ FILE *outf; char out_fname[200]; char *prevcp, *cp; struct encode_info encode_info; int ptr_i = zh->reg->ptr_i;#if SORT_EXTRA int i;#endif if (!zh->reg->key_buf || ptr_i <= 0) return; (zh->reg->key_file_no)++; logf (LOG_LOG, "sorting section %d", (zh->reg->key_file_no));#if !SORT_EXTRA qsort (zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i, sizeof(char*), key_qsort_compare); extract_get_fname_tmp (zh, out_fname, zh->reg->key_file_no); if (!(outf = fopen (out_fname, "wb"))) { logf (LOG_FATAL|LOG_ERRNO, "fopen %s", out_fname); exit (1); } logf (LOG_LOG, "writing section %d", zh->reg->key_file_no); prevcp = cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i]; encode_key_init (&encode_info); encode_key_write (cp, &encode_info, outf); while (--ptr_i > 0) { cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i]; if (strcmp (cp, prevcp)) { encode_key_flush ( &encode_info, outf); encode_key_init (&encode_info); encode_key_write (cp, &encode_info, outf); prevcp = cp; } else encode_key_write (cp + strlen(cp), &encode_info, outf); } encode_key_flush ( &encode_info, outf);#else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -