📄 cqdb.c
字号:
/* * Constant Quark Database (CQDB). * * Copyright (c) 2007, Naoaki Okazaki * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the Northwestern University, University of Tokyo, * nor the names of its contributors may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *//* $Id: cqdb.c 153 2009-03-11 03:24:30Z naoaki $ */#include <stdio.h>#include <stdlib.h>#include <stdint.h>#include <string.h>#include <cqdb.h>#define CHUNKID "CQDB"#define BYTEORDER_CHECK (0x62445371)#define NUM_TABLES (256)#define OFFSET_REFS (0 + sizeof(header_t))#define OFFSET_DATA (OFFSET_REFS + sizeof(tableref_t) * NUM_TABLES)/** * An element of a hash table. */typedef struct { uint32_t hash; /**< Hash value of the record. */ uint32_t offset; /**< Offset address to the actual record. */} bucket_t;/** * A hash table. */typedef struct { uint32_t num; /**< Number of elements in the table. */ uint32_t size; /**< Maximum number of elements. */ bucket_t* bucket; /**< Bucket (array of bucket_t). */} table_t;/** * CQDB chunk header. */typedef struct { int8_t chunkid[4]; /**< Chunk identifier, "CQDB". */ uint32_t size; /**< Chunk size including this header. */ uint32_t flag; /**< Global flags. */ uint32_t byteorder; /**< Byte-order indicator. */ uint32_t bwd_size; /**< Number of elements in the backward array. */ uint32_t bwd_offset; /**< Offset to the backward array. */} header_t;/** * Reference to a hash table. */typedef struct { uint32_t offset; /**< Offset to a hash table. */ uint32_t num; /**< Number of elements in the hash table. */} tableref_t;/** * Writer for a constant quark database. */struct tag_cqdb_writer { uint32_t flag; /**< Operation flag. */ FILE* fp; /**< File pointer. */ uint32_t begin; /**< Offset address to the head of this database. */ uint32_t cur; /**< Offset address to a new key/data pair. */ table_t ht[NUM_TABLES]; /**< Hash tables (string -> id). */ uint32_t* bwd; /**< Backlink array. */ uint32_t bwd_num; /**< */ uint32_t bwd_size; /**< Number of elements in the backlink array. */};/** * Constant quark database (CQDB). */struct tag_cqdb { uint8_t* buffer; /**< Pointer to the memory block. */ size_t size; /**< Size of the memory block. */ header_t header; /**< Chunk header. */ table_t ht[NUM_TABLES]; /**< Hash tables (string -> id). */ uint32_t* bwd; /**< Array for backward look-up (id -> string). */ int num; /**< Number of key/data pairs. */};uint32_t hashlittle(const void *key, size_t length, uint32_t initval);static size_t write_uint32(cqdb_writer_t* wt, uint32_t value){ uint8_t buffer[4]; buffer[0] = (uint8_t)(value & 0xFF); buffer[1] = (uint8_t)(value >> 8); buffer[2] = (uint8_t)(value >> 16); buffer[3] = (uint8_t)(value >> 24); return fwrite(buffer, sizeof(uint8_t), 4, wt->fp) / sizeof(value);}static size_t write_data(cqdb_writer_t* wt, const void *data, size_t size){ return fwrite(data, size, 1, wt->fp);}cqdb_writer_t* cqdb_writer(FILE *fp, int flag){ int i; cqdb_writer_t* dbw = (cqdb_writer_t*)calloc(1, sizeof(cqdb_writer_t)); if (dbw != NULL) { /* Initialize cqdb_writer_t members. */ memset(dbw, 0, sizeof(*dbw)); dbw->flag = flag; dbw->fp = fp; dbw->begin = ftell(dbw->fp); dbw->cur = OFFSET_DATA; /* Initialize the hash tables.*/ for (i = 0;i < NUM_TABLES;++i) { dbw->ht[i].bucket = NULL; } dbw->bwd = NULL; dbw->bwd_num = 0; dbw->bwd_size = 0; /* Move the file pointer to the offset to the first key/data pair. */ if (fseek(dbw->fp, dbw->begin + dbw->cur, SEEK_SET) != 0) { goto error_exit; /* Seek error. */ } } return dbw;error_exit: free(dbw); return NULL;}static int cqdb_writer_delete(cqdb_writer_t* dbw){ int i; /* Free allocated memory blocks. */ for (i = 0;i < NUM_TABLES;++i) { free(dbw->ht[i].bucket); } free(dbw->bwd); free(dbw); return 0;}int cqdb_writer_put(cqdb_writer_t* dbw, const char *str, int id){ int ret = 0; const void *key = str; uint32_t ksize = (uint32_t)(strlen(str) + 1); /* Compute the hash value and choose a hash table. */ uint32_t hv = hashlittle(key, ksize, 0); table_t* ht = &dbw->ht[hv % 256]; /* Check for non-negative identifier. */ if (id < 0) { ret = CQDB_ERROR_INVALIDID; goto error_exit; } /* Write out the current data. */ write_uint32(dbw, (uint32_t)id); write_uint32(dbw, (uint32_t)ksize); write_data(dbw, key, ksize); if (ferror(dbw->fp)) { ret = CQDB_ERROR_FILEWRITE; goto error_exit; } /* Expand the bucket if necessary. */ if (ht->size <= ht->num) { ht->size = (ht->size+1) * 2; ht->bucket = (bucket_t*)realloc(ht->bucket, sizeof(bucket_t) * ht->size); if (ht->bucket == NULL) { ret = CQDB_ERROR_OUTOFMEMORY; goto error_exit; } } /* Set the hash value and current offset position. */ ht->bucket[ht->num].hash = hv; ht->bucket[ht->num].offset = dbw->cur; ++ht->num; /* Store the backlink if specified. */ if (!(dbw->flag & CQDB_ONEWAY)) { /* Expand the backlink array if necessary. */ if (dbw->bwd_size <= (uint32_t)id) { uint32_t size = dbw->bwd_size; while (size <= (uint32_t)id) size = (size + 1) * 2; dbw->bwd = (uint32_t*)realloc(dbw->bwd, sizeof(uint32_t) * size); if (dbw->bwd == NULL) { ret = CQDB_ERROR_OUTOFMEMORY; goto error_exit; } while (dbw->bwd_size < size) { dbw->bwd[dbw->bwd_size++] = 0; } } if (dbw->bwd_num <= (uint32_t)id) { dbw->bwd_num = (uint32_t)id+1; } dbw->bwd[id] = dbw->cur; } /* Increment the current position. */ dbw->cur += sizeof(uint32_t) + sizeof(uint32_t) + ksize; return 0;error_exit: dbw->flag |= CQDB_ERROR_OCCURRED; return ret;}int cqdb_writer_close(cqdb_writer_t* dbw){ uint32_t i, j; int k, ret = 0; long offset = 0; header_t header; /* If an error have occurred, just free the memory blocks. */ if (dbw->flag & CQDB_ERROR_OCCURRED) { cqdb_writer_delete(dbw); return 0; } /* Initialize the file header. */ strncpy((char*)header.chunkid, CHUNKID, 4); header.byteorder = BYTEORDER_CHECK; header.bwd_offset = 0; header.bwd_size = dbw->bwd_num; /* Store the hash tables. At this moment, the file pointer refers to the offset succeeding the last key/data pair. */ for (i = 0;i < NUM_TABLES;++i) { table_t* ht = &dbw->ht[i]; /* Do not write empty hash tables. */ if (ht->bucket != NULL) { /* Actual bucket will have the double size; half elements in the bucket are kept empty. */ int n = ht->num * 2; /* Allocate the bucket. */ bucket_t* dst = (bucket_t*)calloc(n, sizeof(bucket_t)); if (dst == NULL) { ret = CQDB_ERROR_OUTOFMEMORY; goto error_exit;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -