📄 extract.c
字号:
/* error occured during extraction ... */ if (rGroup->flagRw && zh->records_processed < rGroup->fileVerboseLimit) { logf (LOG_WARN, "no filter for %s %s " PRINTF_OFF_T, rGroup->recordType, fname, recordOffset); } return 0; } if (zh->reg->keys.buf_used == 0) { /* the extraction process returned no information - the record is probably empty - unless flagShowRecords is in use */ if (!rGroup->flagRw) return 1; logf (LOG_WARN, "empty %s %s " PRINTF_OFF_T, rGroup->recordType, fname, recordOffset); return 1; } } /* perform match if sysno not known and if match criteria is specified */ matchStr = NULL; if (!sysno) { sysnotmp = 0; sysno = &sysnotmp; if (rGroup->recordId && *rGroup->recordId) { char *rinfo; matchStr = fileMatchStr (zh, &zh->reg->keys, rGroup, fname, rGroup->recordId); if (matchStr) { rinfo = dict_lookup (zh->reg->matchDict, matchStr); if (rinfo) memcpy (sysno, rinfo+1, sizeof(*sysno)); } else { logf (LOG_WARN, "Bad match criteria"); return 0; } } } if (! *sysno) { /* new record */ if (deleteFlag) { logf (LOG_LOG, "delete %s %s " PRINTF_OFF_T, rGroup->recordType, fname, recordOffset); logf (LOG_WARN, "cannot delete record above (seems new)"); return 1; } if (zh->records_processed < rGroup->fileVerboseLimit) logf (LOG_LOG, "add %s %s " PRINTF_OFF_T, rGroup->recordType, fname, recordOffset); rec = rec_new (zh->reg->records); *sysno = rec->sysno; recordAttr = rec_init_attr (zh->reg->zei, rec); if (matchStr) { dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno); } extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); zh->records_inserted++; } else { /* record already exists */ struct recKeys delkeys; struct sortKeys sortKeys; rec = rec_get (zh->reg->records, *sysno); assert (rec); recordAttr = rec_init_attr (zh->reg->zei, rec); if (!force_update && recordAttr->runNumber == zebraExplain_runNumberIncrement (zh->reg->zei, 0)) { yaz_log (LOG_LOG, "run number = %d", recordAttr->runNumber); yaz_log (LOG_LOG, "skipped %s %s " PRINTF_OFF_T, rGroup->recordType, fname, recordOffset); extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys); rec_rm (&rec); logRecord (zh); return 1; } delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; sortKeys.buf_used = rec->size[recInfo_sortKeys]; sortKeys.buf = rec->info[recInfo_sortKeys]; extract_flushSortKeys (zh, *sysno, 0, &sortKeys); extract_flushRecordKeys (zh, *sysno, 0, &delkeys); if (deleteFlag) { /* record going to be deleted */ if (!delkeys.buf_used) { logf (LOG_LOG, "delete %s %s " PRINTF_OFF_T, rGroup->recordType, fname, recordOffset); logf (LOG_WARN, "cannot delete file above, storeKeys false"); } else { if (zh->records_processed < rGroup->fileVerboseLimit) logf (LOG_LOG, "delete %s %s " PRINTF_OFF_T, rGroup->recordType, fname, recordOffset); zh->records_deleted++; if (matchStr) dict_delete (zh->reg->matchDict, matchStr); rec_del (zh->reg->records, &rec); } rec_rm (&rec); logRecord (zh); return 1; } else { /* record going to be updated */ if (!delkeys.buf_used) { logf (LOG_LOG, "update %s %s " PRINTF_OFF_T, rGroup->recordType, fname, recordOffset); logf (LOG_WARN, "cannot update file above, storeKeys false"); } else { if (zh->records_processed < rGroup->fileVerboseLimit) logf (LOG_LOG, "update %s %s " PRINTF_OFF_T, rGroup->recordType, fname, recordOffset); extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); zh->records_updated++; } } } /* update file type */ xfree (rec->info[recInfo_fileType]); rec->info[recInfo_fileType] = rec_strdup (rGroup->recordType, &rec->size[recInfo_fileType]); /* update filename */ xfree (rec->info[recInfo_filename]); rec->info[recInfo_filename] = rec_strdup (fname, &rec->size[recInfo_filename]); /* update delete keys */ xfree (rec->info[recInfo_delKeys]); if (zh->reg->keys.buf_used > 0 && rGroup->flagStoreKeys == 1) { rec->size[recInfo_delKeys] = zh->reg->keys.buf_used; rec->info[recInfo_delKeys] = zh->reg->keys.buf; zh->reg->keys.buf = NULL; zh->reg->keys.buf_max = 0; } else { rec->info[recInfo_delKeys] = NULL; rec->size[recInfo_delKeys] = 0; } /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, - recordAttr->recordSize); recordAttr->recordSize = fi->file_moffset - recordOffset; if (!recordAttr->recordSize) recordAttr->recordSize = fi->file_max - recordOffset; zebraExplain_recordBytesIncrement (zh->reg->zei, recordAttr->recordSize); /* set run-number for this record */ recordAttr->runNumber = zebraExplain_runNumberIncrement (zh->reg->zei, 0); /* update store data */ xfree (rec->info[recInfo_storeData]); if (rGroup->flagStoreData == 1) { rec->size[recInfo_storeData] = recordAttr->recordSize; rec->info[recInfo_storeData] = (char *) xmalloc (recordAttr->recordSize); if (lseek (fi->fd, recordOffset, SEEK_SET) < 0) { logf (LOG_ERRNO|LOG_FATAL, "seek to " PRINTF_OFF_T " in %s", recordOffset, fname); exit (1); } if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize) < recordAttr->recordSize) { logf (LOG_ERRNO|LOG_FATAL, "read %d bytes of %s", recordAttr->recordSize, fname); exit (1); } } else { rec->info[recInfo_storeData] = NULL; rec->size[recInfo_storeData] = 0; } /* update database name */ xfree (rec->info[recInfo_databaseName]); rec->info[recInfo_databaseName] = rec_strdup (rGroup->databaseName, &rec->size[recInfo_databaseName]); /* update offset */ recordAttr->recordOffset = recordOffset; /* commit this record */ rec_put (zh->reg->records, &rec); logRecord (zh); return 1;}int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, const struct recordGroup *rGroupP, int deleteFlag){ int r, i, fd; char gprefix[128]; char ext[128]; char ext_res[128]; char subType[128]; RecType recType; struct recordGroup rGroupM; struct recordGroup *rGroup = &rGroupM; struct file_read_info *fi; void *clientData; memcpy (rGroup, rGroupP, sizeof(*rGroupP)); if (!rGroup->groupName || !*rGroup->groupName) *gprefix = '\0'; else sprintf (gprefix, "%s.", rGroup->groupName); logf (LOG_DEBUG, "fileExtract %s", fname); /* determine file extension */ *ext = '\0'; for (i = strlen(fname); --i >= 0; ) if (fname[i] == '/') break; else if (fname[i] == '.') { strcpy (ext, fname+i+1); break; } /* determine file type - depending on extension */ if (!rGroup->recordType) { sprintf (ext_res, "%srecordType.%s", gprefix, ext); if (!(rGroup->recordType = res_get (zh->res, ext_res))) { sprintf (ext_res, "%srecordType", gprefix); rGroup->recordType = res_get (zh->res, ext_res); } } if (!rGroup->recordType) { if (zh->records_processed < rGroup->fileVerboseLimit) logf (LOG_LOG, "? %s", fname); return 0; } if (!*rGroup->recordType) return 0; if (!(recType = recType_byName (zh->reg->recTypes, rGroup->recordType, subType, &clientData))) { logf (LOG_WARN, "No such record type: %s", rGroup->recordType); return 0; } /* determine match criteria */ if (!rGroup->recordId) { sprintf (ext_res, "%srecordId.%s", gprefix, ext); rGroup->recordId = res_get (zh->res, ext_res); } /* determine database name */ if (!rGroup->databaseName) { sprintf (ext_res, "%sdatabase.%s", gprefix, ext); if (!(rGroup->databaseName = res_get (zh->res, ext_res))) { sprintf (ext_res, "%sdatabase", gprefix); rGroup->databaseName = res_get (zh->res, ext_res); } } if (!rGroup->databaseName) rGroup->databaseName = "Default"; /* determine if explain database */ sprintf (ext_res, "%sexplainDatabase", gprefix); rGroup->explainDatabase = atoi (res_get_def (zh->res, ext_res, "0")); /* announce database */ if (zebraExplain_curDatabase (zh->reg->zei, rGroup->databaseName)) { if (zebraExplain_newDatabase (zh->reg->zei, rGroup->databaseName, rGroup->explainDatabase)) return 0; } if (rGroup->flagStoreData == -1) { const char *sval; sprintf (ext_res, "%sstoreData.%s", gprefix, ext); if (!(sval = res_get (zh->res, ext_res))) { sprintf (ext_res, "%sstoreData", gprefix); sval = res_get (zh->res, ext_res); } if (sval) rGroup->flagStoreData = atoi (sval); } if (rGroup->flagStoreData == -1) rGroup->flagStoreData = 0; if (rGroup->flagStoreKeys == -1) { const char *sval; sprintf (ext_res, "%sstoreKeys.%s", gprefix, ext); sval = res_get (zh->res, ext_res); if (!sval) { sprintf (ext_res, "%sstoreKeys", gprefix); sval = res_get (zh->res, ext_res); } if (!sval) sval = res_get (zh->res, "storeKeys"); if (sval) rGroup->flagStoreKeys = atoi (sval); } if (rGroup->flagStoreKeys == -1) rGroup->flagStoreKeys = 0; if (sysno && deleteFlag) fd = -1; else { char full_rep[1024]; if (zh->path_reg && !yaz_is_abspath (fname)) { strcpy (full_rep, zh->path_reg); strcat (full_rep, "/"); strcat (full_rep, fname); } else strcpy (full_rep, fname); if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1) { logf (LOG_WARN|LOG_ERRNO, "open %s", full_rep); return 0; } } fi = file_read_start (fd); do { file_begin (fi); r = recordExtract (zh, sysno, fname, rGroup, deleteFlag, fi, recType, subType, clientData, 1); } while (r && !sysno && fi->file_more); file_read_stop (fi); if (fd != -1) close (fd); return r;}int extract_rec_in_mem (ZebraHandle zh, const char *recordType, const char *buf, size_t buf_size, const char *databaseName, int delete_flag, int test_mode, int *sysno, int store_keys, int store_data, const char *match_criteria){ struct recordGroup rGroup; rGroup.groupName = NULL; rGroup.databaseName = (char *)databaseName; rGroup.path = NULL; rGroup.recordId = NULL; rGroup.recordType = (char *)recordType; rGroup.flagStoreData = store_data; rGroup.flagStoreKeys = store_keys; rGroup.flagRw = 1; rGroup.databaseNamePath = 0; rGroup.explainDatabase = 0; rGroup.fileVerboseLimit = 100000; rGroup.followLinks = -1; return (bufferExtractRecord (zh, buf, buf_size, &rGroup, delete_flag, test_mode, recordType, sysno, match_criteria, "<no file>", 0,1));}/* If sysno is provided, then it's used to identify the reocord. If not, and match_criteria is provided, then sysno is guessed If not, and a record is provided, then sysno is got from there */int bufferExtractRecord (ZebraHandle zh, const char *buf, size_t buf_size, struct recordGroup *rGroup, int delete_flag, int test_mode, const char *recordType, int *sysno, const char *match_criteria, const char *fname, int force_update, int allow_update){ RecordAttr *recordAttr; struct recExtractCtrl extractCtrl; int i, r; char *matchStr = 0; RecType recType = NULL; char subType[1024]; void *clientData; Record rec; long recordOffset = 0; struct zebra_fetch_control fc; fc.fd = -1; fc.record_int_buf = buf; fc.record_int_len = buf_size; fc.record_int_pos = 0; fc.offset_end = 0; fc.record_offset = 0; extractCtrl.offset = 0; extractCtrl.readf = zebra_record_int_read; extractCtrl.seekf = zebra_record_int_seek; extractCtrl.tellf = zebra_record_int_tell; extractCtrl.endf = zebra_record_int_end; extractCtrl.fh = &fc; zh->reg->keys.buf_used = 0; zh->reg->keys.prevAttrUse = -1; zh->reg->keys.prevAttrSet = -1; zh->reg->keys.prevSeqNo = 0; zh->reg->sortKeys.buf_used = 0; /* announce database */ if (!(rGroup->databaseName)) { logf (LOG_WARN, "Invalid record group, no database name given"); return 0; } if (zebraExplain_curDatabase (zh->reg->zei, rGroup->databaseName)) { if (zebraExplain_newDatabase (zh->reg->zei, rGroup->databaseName, 0)) return 0; } if (*recordType) { logf (LOG_DEBUG, "Record type explicitly specified: %s", recordType);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -