📄 ssi.c
字号:
/* yeah, this looks stupid, but look: we have to give a valid looking, non-NULL d_off of some sort, or writes will fail. It's going to be unused anyway. */ g->pkeys[g->nprimary].d_off = *r_off; g->pkeys[g->nprimary].len = 0; } g->nprimary++; if (g->nprimary % SSI_KEY_BLOCK == 0) { g->pkeys = realloc(g->pkeys, sizeof(struct ssipkey_s) * (g->nprimary+SSI_KEY_BLOCK)); if (g->pkeys == NULL) return SSI_ERR_MALLOC; } return 0;}/* Function: SSIAddSecondaryKeyToIndex() * Date: SRE, Tue Jan 2 12:44:40 2001 [St. Louis] * * Purpose: Puts secondary key {key} in the index {g}, associating * it with primary key {pkey} that was previously * registered by SSIAddPrimaryKeyToIndex(). * * Args: g - active index * key - secondary key to add * pkey - primary key to associate this key with * * Returns: 0 on success, nonzero on failure. */intSSIAddSecondaryKeyToIndex(SSIINDEX *g, char *key, char *pkey){ int n; /* a string length */ if (g->nsecondary >= SSI_MAXKEYS) return SSI_ERR_TOOMANY_KEYS; /* Before adding the key: check how big our index is. * If it's getting too large, switch to external mode. */ if (!g->external && current_index_size(g) >= g->max_ram) if (activate_external_sort(g) != 0) return SSI_ERR_NOFILE; /* Update maximum secondary key length, if necessary. */ n = strlen(key); if ((n+1) > g->slen) g->slen = n+1; /* if external mode: write info to disk. */ if (g->external) { fprintf(g->stmp, "%s\t%s\n", key, pkey); g->nsecondary++; return 0; } /* else, internal mode... store info in memory. */ if ((g->skeys[g->nsecondary].key = sre_strdup(key, n)) == NULL) return SSI_ERR_MALLOC; if ((g->skeys[g->nsecondary].pkey = sre_strdup(pkey, -1)) == NULL) return SSI_ERR_MALLOC; g->nsecondary++; if (g->nsecondary % SSI_KEY_BLOCK == 0) { g->skeys = realloc(g->skeys, sizeof(struct ssiskey_s) * (g->nsecondary+SSI_KEY_BLOCK)); if (g->skeys == NULL) return SSI_ERR_MALLOC; } return 0;}/* Function: SSIWriteIndex() * Date: SRE, Tue Jan 2 13:55:56 2001 [St. Louis] * * Purpose: Writes complete index {g} in SSI format to a * binary file {file}. Does all * the overhead of sorting the primary and secondary keys, * and maintaining the association of secondary keys * with primary keys during and after the sort. * * Args: file - file to write to * g - index to sort & write out. * * Returns: 0 on success, nonzero on error. *//* needed for qsort() */static int pkeysort(const void *k1, const void *k2){ struct ssipkey_s *key1; struct ssipkey_s *key2; key1 = (struct ssipkey_s *) k1; key2 = (struct ssipkey_s *) k2; return strcmp(key1->key, key2->key);}static int skeysort(const void *k1, const void *k2){ struct ssiskey_s *key1; struct ssiskey_s *key2; key1 = (struct ssiskey_s *) k1; key2 = (struct ssiskey_s *) k2; return strcmp(key1->key, key2->key);}intSSIWriteIndex(char *file, SSIINDEX *g){ FILE *fp; int status; int i; sqd_uint32 header_flags, file_flags; sqd_uint32 frecsize, precsize, srecsize; sqd_uint64 foffset, poffset, soffset; char *s, *s2; if ((fp = fopen(file,"wb")) == NULL) return SSI_ERR_NOFILE; status = 0; /* How big is the index? If it's going to be > 2GB, we need * to flip to 64-bit index mode. 2047 (instead of 2048) gives us * some slop room. * die'ing here is pretty brutal - if we flip to 64-bit index * mode, we hve 100's of millions of keys, so we've processed * a long time before reaching this point. Ah well. */ if (current_index_size(g) >= 2047) { g->imode = SSI_OFFSET_I64;#ifndef HAS_64BIT_FILE_OFFSETS Die("\Can't switch to 64-bit SSI index mode on this system, sorry;\n\I don't have 64-bit file offset functions available.\n");#endif } /* Magic-looking numbers come from adding up sizes * of things in bytes */ frecsize = 16 + g->flen; precsize = (g->smode == SSI_OFFSET_I64) ? 22+g->plen : 14+g->plen; srecsize = g->slen + g->plen; header_flags = 0; if (g->smode == SSI_OFFSET_I64) header_flags |= SSI_USE64; if (g->imode == SSI_OFFSET_I64) header_flags |= SSI_USE64_INDEX; /* Magic-looking numbers again come from adding up sizes * of things in bytes */ foffset = (header_flags & SSI_USE64_INDEX) ? 66 : 54; poffset = foffset + frecsize*g->nfiles; soffset = poffset + precsize*g->nprimary; /* Sort the keys * If external mode, make system calls to UNIX/POSIX "sort" in place, then * open new sorted files for reading thru ptmp and stmp handles. * If internal mode, call qsort. * * Note that you'd better force a POSIX locale for the sort; else, * some silly distro (e.g. Mandrake Linux >=8.1) may have specified * LC_COLLATE=en_US, and this'll give a sort "bug" in which it doesn't * sort by byte order. */ if (g->external) { char cmd[1024]; fclose(g->ptmp); g->ptmp = NULL; sprintf(cmd, "env LC_ALL=POSIX sort -o %s %s\n", g->ptmpfile, g->ptmpfile); if ((status = system(cmd)) != 0) return SSI_ERR_EXTERNAL_SORT; if ((g->ptmp = fopen(g->ptmpfile, "r")) == NULL) return SSI_ERR_EXTERNAL_SORT; fclose(g->stmp); g->stmp = NULL; sprintf(cmd, "env LC_ALL=POSIX sort -o %s %s\n", g->stmpfile, g->stmpfile); if ((status = system(cmd)) != 0) return SSI_ERR_EXTERNAL_SORT; if ((g->stmp = fopen(g->stmpfile, "r")) == NULL) return SSI_ERR_EXTERNAL_SORT; } else { qsort((void *) g->pkeys, g->nprimary, sizeof(struct ssipkey_s), pkeysort); qsort((void *) g->skeys, g->nsecondary, sizeof(struct ssiskey_s), skeysort); } /* Write the header */ if (! write_i32(fp, v20magic)) return SSI_ERR_FWRITE; if (! write_i32(fp, header_flags)) return SSI_ERR_FWRITE; if (! write_i16(fp, g->nfiles)) return SSI_ERR_FWRITE; if (! write_i32(fp, g->nprimary)) return SSI_ERR_FWRITE; if (! write_i32(fp, g->nsecondary)) return SSI_ERR_FWRITE; if (! write_i32(fp, g->flen)) return SSI_ERR_FWRITE; if (! write_i32(fp, g->plen)) return SSI_ERR_FWRITE; if (! write_i32(fp, g->slen)) return SSI_ERR_FWRITE; if (! write_i32(fp, frecsize)) return SSI_ERR_FWRITE; if (! write_i32(fp, precsize)) return SSI_ERR_FWRITE; if (! write_i32(fp, srecsize)) return SSI_ERR_FWRITE; if (g->imode == SSI_OFFSET_I32) { if (! write_i32(fp, foffset)) return SSI_ERR_FWRITE; if (! write_i32(fp, poffset)) return SSI_ERR_FWRITE; if (! write_i32(fp, soffset)) return SSI_ERR_FWRITE; } else { if (! write_i64(fp, foffset)) return SSI_ERR_FWRITE; if (! write_i64(fp, poffset)) return SSI_ERR_FWRITE; if (! write_i64(fp, soffset)) return SSI_ERR_FWRITE; } /* The file section */ if ((s = malloc(sizeof(char) * g->flen)) == NULL) return SSI_ERR_MALLOC; for (i = 0; i < g->nfiles; i++) { file_flags = 0; if (g->bpl[i] > 0 && g->rpl[i] > 0) file_flags |= SSI_FAST_SUBSEQ; strcpy(s, g->filenames[i]); if (fwrite(s, sizeof(char), g->flen, fp) != g->flen) return SSI_ERR_FWRITE; if (! write_i32(fp, g->fileformat[i])) return SSI_ERR_FWRITE; if (! write_i32(fp, file_flags)) return SSI_ERR_FWRITE; if (! write_i32(fp, g->bpl[i])) return SSI_ERR_FWRITE; if (! write_i32(fp, g->rpl[i])) return SSI_ERR_FWRITE; } free(s); /* The primary key section */ if ((s = malloc(sizeof(char) * g->plen)) == NULL) return SSI_ERR_MALLOC; if (g->external) { char *buf = NULL; int buflen = 0; struct ssipkey_s pkey; for (i = 0; i < g->nprimary; i++) { if (sre_fgets(&buf, &buflen, g->ptmp) == NULL) return SSI_ERR_NODATA; if (parse_pkey_info(buf, g->smode, &pkey) != 0) return SSI_ERR_BADFORMAT; strcpy(s, pkey.key); if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE; if (! write_i16( fp, pkey.fnum)) return SSI_ERR_FWRITE; if (! write_offset(fp, &(pkey.r_off))) return SSI_ERR_FWRITE; if (! write_offset(fp, &(pkey.d_off))) return SSI_ERR_FWRITE; if (! write_i32( fp, pkey.len)) return SSI_ERR_FWRITE; } free(buf); } else { for (i = 0; i < g->nprimary; i++) { strcpy(s, g->pkeys[i].key); if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE; if (! write_i16( fp, g->pkeys[i].fnum)) return SSI_ERR_FWRITE; if (! write_offset(fp, &(g->pkeys[i].r_off))) return SSI_ERR_FWRITE; if (! write_offset(fp, &(g->pkeys[i].d_off))) return SSI_ERR_FWRITE; if (! write_i32( fp, g->pkeys[i].len)) return SSI_ERR_FWRITE; } } /* The secondary key section */ if (g->nsecondary > 0) { if ((s2 = malloc(sizeof(char) * g->slen)) == NULL) return SSI_ERR_MALLOC; if (g->external) { struct ssiskey_s skey; char *buf = NULL; int n = 0; for (i = 0; i < g->nsecondary; i++) { if (sre_fgets(&buf, &n, g->stmp) == NULL) return SSI_ERR_NODATA; if (parse_skey_info(buf, &skey) != 0) return SSI_ERR_BADFORMAT; strcpy(s2, skey.key); strcpy(s, skey.pkey); if (fwrite(s2, sizeof(char), g->slen, fp) != g->slen) return SSI_ERR_FWRITE; if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE; } free(buf); } else { for (i = 0; i < g->nsecondary; i++) { strcpy(s2, g->skeys[i].key); strcpy(s, g->skeys[i].pkey); if (fwrite(s2, sizeof(char), g->slen, fp) != g->slen) return SSI_ERR_FWRITE; if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE; } } free(s2); } free(s); fclose(fp); return status;}/* Function: SSIFreeIndex() * Date: SRE, Tue Jan 2 11:44:08 2001 [St. Louis] * * Purpose: Free an index structure {g}. * * Args: g - ptr to an open index. * * Returns: (void) */voidSSIFreeIndex(SSIINDEX *g) { int i; if (g != NULL) { if (g->external == FALSE) { for (i = 0; i < g->nprimary; i++) free(g->pkeys[i].key); for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].key); for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].pkey); if (g->pkeys != NULL) free(g->pkeys); if (g->skeys != NULL) free(g->skeys); } else { if (g->ptmp != NULL) fclose(g->ptmp); if (g->stmp != NULL) fclose(g->stmp); #if DEBUGLEVEL == 0 remove(g->ptmpfile); remove(g->stmpfile);#endif } for (i = 0; i < g->nfiles; i++) free(g->filenames[i]); if (g->filenames != NULL) free(g->filenames); if (g->fileformat != NULL) free(g->fileformat); if (g->bpl != NULL) free(g->bpl); if (g->rpl != NULL) free(g->rpl); free(g); }}/* Function: SSIErrorString() * Date: SRE, Tue Jan 2 10:38:10 2001 [St. Louis] * * Purpose: Returns a ptr to an internal string corresponding * to error {n}, a code returned from any of the * functions in the API that return non-zero on error. * * Args: n - error code * * Returns: ptr to an internal string. */char *SSIErrorString(int n){ switch (n) { case SSI_ERR_OK: return "ok (no error)"; case SSI_ERR_NODATA: return "no data, fread() failed"; case SSI_ERR_NO_SUCH_KEY: return "no such key"; case SSI_ERR_MALLOC: return "out of memory, malloc() failed"; case SSI_ERR_NOFILE: return "file not found, fopen() failed"; case SSI_ERR_BADMAGIC: return "not a SSI file? (bad magic)"; case SSI_ERR_BADFORMAT: return "corrupt format? unexpected data"; case SSI_ERR_NO64BIT: return "no large file support for this system"; case SSI_ERR_SEEK_FAILED: return "failed to reposition on disk"; case SSI_ERR_TELL_FAILED: return "failed to get file position on disk"; case SSI_ERR_NO_SUBSEQS: return "no fast subseq support for this seqfile"; case SSI_ERR_RANGE: return "subseq start is out of range"; case SSI_ERR_BADARG: return "an argument is out of range"; case SSI_ERR_TOOMANY_FILES: return "number of files exceeds limit"; case SSI_ERR_TOOMANY_KEYS: return "number of keys exceeds limit"; case SSI_ERR_FWRITE: return "an fwrite() failed"; case SSI_ERR_EXTERNAL_SORT: return "some problem with external sorting"; default: return "unrecognized code"; } /*NOTREACHED*/}static intread_i16(FILE *fp, sqd_uint16 *ret_result){ sqd_uint16 result; if (fread(&result, sizeof(sqd_uint16), 1, fp) != 1) return 0; *ret_result = sre_ntoh16(result); return 1;}static intwrite_i16(FILE *fp, sqd_uint16 n){ n = sre_hton16(n); if (fwrite(&n, sizeof(sqd_uint16), 1, fp) != 1) return 0; return 1;}static intread_i32(FILE *fp, sqd_uint32 *ret_result)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -