📄 ssi.c
字号:
{ sqd_uint32 result; if (fread(&result, sizeof(sqd_uint32), 1, fp) != 1) return 0; *ret_result = sre_ntoh32(result); return 1;}static intwrite_i32(FILE *fp, sqd_uint32 n){ n = sre_hton32(n); if (fwrite(&n, sizeof(sqd_uint32), 1, fp) != 1) return 0; return 1;}static intread_i64(FILE *fp, sqd_uint64 *ret_result){ sqd_uint64 result; if (fread(&result, sizeof(sqd_uint64), 1, fp) != 1) return 0; *ret_result = sre_ntoh64(result); return 1;}static intwrite_i64(FILE *fp, sqd_uint64 n){ n = sre_hton64(n); if (fwrite(&n, sizeof(sqd_uint64), 1, fp) != 1) return 0; return 1;}static int read_offset(FILE *fp, char mode, SSIOFFSET *ret_offset){ if (mode == SSI_OFFSET_I32) { ret_offset->mode = SSI_OFFSET_I32; if (! read_i32(fp, &(ret_offset->off.i32))) return 0; } else if (mode == SSI_OFFSET_I64) { ret_offset->mode = SSI_OFFSET_I64; if (! read_i64(fp, &(ret_offset->off.i64))) return 0; } else return 0; return 1;}static intwrite_offset(FILE *fp, SSIOFFSET *offset){ if (offset->mode == SSI_OFFSET_I32) return write_i32(fp, offset->off.i32); else if (offset->mode == SSI_OFFSET_I64) return write_i64(fp, offset->off.i64); else abort(); /*UNREACHED*/ return 1; /* silence bitchy compilers */} static intparse_pkey_info(char *buf, char mode, struct ssipkey_s *pkey){ char *s, *tok; int n; s = buf; if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; pkey->key = tok; if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; pkey->fnum = (sqd_uint16) atoi(tok); if (mode == SSI_OFFSET_I32) { if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; pkey->r_off.mode = mode; pkey->r_off.off.i32 = (sqd_uint32) strtoul(tok, NULL, 10); if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; pkey->d_off.mode = mode; pkey->d_off.off.i32 = (sqd_uint32) strtoul(tok, NULL, 10); }#ifdef HAS_64BIT_FILE_OFFSETS else { if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; pkey->r_off.mode = mode; pkey->r_off.off.i64 = (sqd_uint64) strtoull(tok, NULL, 10); if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; pkey->d_off.mode = mode; pkey->d_off.off.i64 = (sqd_uint64) strtoull(tok, NULL, 10); }#else else { return SSI_ERR_NO64BIT; }#endif if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; pkey->len = (sqd_uint32) strtoul(tok, NULL, 10); return 0;}static intparse_skey_info(char *buf, struct ssiskey_s *skey){ char *s, *tok; int n; s = buf; if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; skey->key = tok; if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; skey->pkey = tok; return 0;}/* Function: binary_search() * Date: SRE, Sun Dec 31 16:05:03 2000 [St. Louis] * * Purpose: Find a key in a SSI index, by a binary search * in an alphabetically sorted list of keys. If successful, * return 0, and the index file is positioned to read * the rest of the data for that key. Else returns nonzero. * * Args: sfp - an open SSIFILE * key - key to find * klen - key length to allocate (plen or slen from sfp) * base - base offset (poffset or soffset) * recsize - size of each key record in bytes (precsize or srecsize) * maxidx - # of keys (nprimary or nsecondary) * * Returns: 0 on success, and leaves file positioned for reading remaining * data for the key. * Nonzero on failure: * SSI_ERR_NO_SUCH_KEY - that key's not in the index * SSI_ERR_MALLOC - a memory allocation failure * SSI_ERR_NODATA - an fread() failed */static intbinary_search(SSIFILE *sfp, char *key, int klen, SSIOFFSET *base, sqd_uint32 recsize, sqd_uint32 maxidx){ char *name; sqd_uint32 left, right, mid; int cmp; int status; if (maxidx == 0) return SSI_ERR_NO_SUCH_KEY; /* special case: empty index */ if ((name = malloc (sizeof(char)*klen)) == NULL) return SSI_ERR_MALLOC; left = 0; right = maxidx-1; while (1) { /* A binary search: */ mid = (left+right) / 2; /* careful here. only works because we limit unsigned vars to signed ranges. */ if ((status = indexfile_position(sfp, base, recsize, mid)) != 0) { free(name); return status; } if (fread(name, sizeof(char), klen, sfp->fp) != klen) { free(name); return SSI_ERR_NODATA; } cmp = strcmp(name, key); if (cmp == 0) break; /* found it! */ else if (left >= right) /* oops, missed it; fail */ { free(name); return SSI_ERR_NO_SUCH_KEY; } else if (cmp < 0) left = mid+1; /* it's right of mid */ else if (cmp > 0) { if (mid == 0) { free(name); return SSI_ERR_NO_SUCH_KEY; } /* special case, beware */ else right = mid-1; /* it's left of mid */ } } free(name); return 0; /* and sfp->fp is positioned... */}/* Function: indexfile_position() * Date: SRE, Mon Jan 1 19:32:49 2001 [St. Louis] * * Purpose: Position the open index file {sfp} at the start * of record {n} in a list of records that starts at * base offset {base}, where each record takes up {l} * bytes. (e.g. the position is byte (base + n*l)). * * Args: sfp - open SSIFILE * base - offset of record 0 (e.g. sfp->foffset) * len - size of each record in bytes (e.g. sfp->frecsize) * n - which record to get (e.g. 0..sfp->nfiles) * * Returns: 0 on success, non-zero on failure. */static intindexfile_position(SSIFILE *sfp, SSIOFFSET *base, sqd_uint32 len, sqd_uint32 n){ SSIOFFSET pos; int status; if (base->mode == SSI_OFFSET_I32) { pos.mode = SSI_OFFSET_I32; pos.off.i32 = base->off.i32 + n*len; } else if (base->mode == SSI_OFFSET_I64) { pos.mode = SSI_OFFSET_I64; pos.off.i64 = base->off.i64 + n*len; } else return 0; if ((status = SSISetFilePosition(sfp->fp, &pos)) != 0) return status; return 0;}/* Function: current_index_size() * Date: SRE, Tue Feb 20 18:23:30 2001 [St. Louis] * * Purpose: Calculates the size of the current index, * in megabytes. */static sqd_uint64 current_index_size(SSIINDEX *g) { sqd_uint64 frecsize, precsize, srecsize; sqd_uint64 total; /* Magic-looking numbers come from adding up sizes * of things in bytes */ frecsize = 16 + g->flen; precsize = (g->smode == SSI_OFFSET_I64) ? 22+g->plen : 14+g->plen; srecsize = g->plen+g->slen; total = (66L + /* header size, if 64bit index offsets */ frecsize * g->nfiles + /* file section size */ precsize * g->nprimary + /* primary key section size */ srecsize * g->nsecondary) / /* secondary key section size */ 1048576L; return total;}/* Function: activate_external_sort() * Date: SRE, Mon Feb 4 09:08:08 2002 [St. Louis] * * Purpose: Switch to external sort mode. * Open file handles for external index files (ptmp, stmp). * Flush current index information to these files. * Free current memory, turn over control to the tmpfiles. * * Return: 0 on success; non-zero on failure. */static intactivate_external_sort(SSIINDEX *g){ int i; /* it's a bit late to be checking this, but... */ if (g->external) return 0; /* we already are external, fool */ if (FileExists(g->ptmpfile)) return 1; if (FileExists(g->stmpfile)) return 1; if ((g->ptmp = fopen(g->ptmpfile, "w")) == NULL) return 1; if ((g->stmp = fopen(g->stmpfile, "w")) == NULL) return 1; /* Flush the current indices. */ SQD_DPRINTF1(("Switching to external sort - flushing ssiindex to disk...\n")); for (i = 0; i < g->nprimary; i++) { if (g->smode == SSI_OFFSET_I32) { fprintf(g->ptmp, "%s\t%u\t%lu\t%lu\t%lu\n", g->pkeys[i].key, g->pkeys[i].fnum, (unsigned long) g->pkeys[i].r_off.off.i32, (unsigned long) g->pkeys[i].d_off.off.i32, (unsigned long) g->pkeys[i].len); } else { fprintf(g->ptmp, "%s\t%u\t%llu\t%llu\t%lu\n", g->pkeys[i].key, g->pkeys[i].fnum, (unsigned long long) g->pkeys[i].r_off.off.i64, (unsigned long long) g->pkeys[i].d_off.off.i64, (unsigned long) g->pkeys[i].len); } } for (i = 0; i < g->nsecondary; i++) fprintf(g->stmp, "%s\t%s\n", g->skeys[i].key, g->skeys[i].pkey); /* Free the memory now that we've flushed our lists to disk */ for (i = 0; i < g->nprimary; i++) free(g->pkeys[i].key); for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].key); for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].pkey); if (g->pkeys != NULL) free(g->pkeys); if (g->skeys != NULL) free(g->skeys); g->pkeys = NULL; g->skeys = NULL; /* Turn control over to external accumulation mode. */ g->external = TRUE; return 0;}/***************************************************************** * Debugging API *****************************************************************/voidSSIForceExternalSort(SSIINDEX *g){ if (activate_external_sort(g) != 0) Die("failed to turn external sorting on.");}/***************************************************************** * Test driving mode *****************************************************************/#ifdef MUGGINS_LETS_ME_SLEEP /* Minimally: cc -g -Wall -o shiva -DDEBUGLEVEL=1 -DMUGGINS_LETS_ME_SLEEP ssi.c sqerror.c sre_string.c types.c sre_ctype.c sre_math.c file.c -lm */intmain(int argc, char **argv){ char name[32], accession[32]; SSIINDEX *ssi; int mode; SSIOFFSET r_off, d_off; FILE *ofp; int i; int fh; /* a file handle */ int status; /* return status from a SSI call */ mode = SSI_OFFSET_I32; if ((ssi = SSICreateIndex(mode)) == NULL) Die("Failed to allocate SSI index"); /* Generate two FASTA files, tmp.0 and tmp.1, and index them. */ if ((ofp = fopen("tmp.0", "w")) == NULL) Die("failed to open tmp.0"); if ((status = SSIAddFileToIndex(ssi, "tmp.0", SQFILE_FASTA, &fh)) != 0) Die("SSIAddFileToIndex() failed: %s", SSIErrorString(status)); for (i = 0; i < 10; i++) { if ((status = SSIGetFilePosition(ofp, mode, &r_off)) != 0) Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); sprintf(name, "seq%d", i); sprintf(accession, "ac%d", i); fprintf(ofp, ">%s [%s] Description? we don't need no steenking description.\n", name, accession); if ((status = SSIGetFilePosition(ofp, mode, &d_off)) != 0) Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); fprintf(ofp, "AAAAAAAAAA\n"); fprintf(ofp, "CCCCCCCCCC\n"); fprintf(ofp, "GGGGGGGGGG\n"); fprintf(ofp, "TTTTTTTTTT\n"); if ((status = SSIAddPrimaryKeyToIndex(ssi, name, fh, &r_off, &d_off, 40)) != 0) Die("SSIAddPrimaryKeyToIndex() failed: %s", SSIErrorString(status)); if ((status = SSIAddSecondaryKeyToIndex(ssi, accession, name)) != 0) Die("SSIAddSecondaryKeyToIndex() failed: %s", SSIErrorString(status)); } SSISetFileForSubseq(ssi, fh, 11, 10); fclose(ofp); if ((ofp = fopen("tmp.1", "w")) == NULL) Die("failed to open tmp.1"); if ((status = SSIAddFileToIndex(ssi, "tmp.1", SQFILE_FASTA, &fh)) != 0) Die("SSIAddFileToIndex() failed: %s", SSIErrorString(status)); for (i = 10; i < 20; i++) { if ((status = SSIGetFilePosition(ofp, mode, &r_off)) != 0) Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); sprintf(name, "seq%d", i); sprintf(accession, "ac%d", i); fprintf(ofp, ">%s [%s] i/o, i/o, it's off to disk we go.\n", name, accession); if ((status = SSIGetFilePosition(ofp, mode, &d_off)) != 0) Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); fprintf(ofp, "AAAAAAAAAA 10\n"); fprintf(ofp, "CCCCCCCCCC 20\n"); fprintf(ofp, "GGGGGGGGGG 30\n"); fprintf(ofp, "TTTTTTTTTT 40\n"); if ((status = SSIAddPrimaryKeyToIndex(ssi, name, fh, &r_off, &d_off, 40)) != 0) Die("SSIAddPrimaryKeyToIndex() failed: %s", SSIErrorString(status)); if ((status = SSIAddSecondaryKeyToIndex(ssi, accession, name)) != 0) Die("SSIAddSecondaryKeyToIndex() failed: %s", SSIErrorString(status)); } SSISetFileForSubseq(ssi, fh, 14, 10); fclose(ofp); /* Write the index to tmp.ssi */ if ((status = SSIWriteIndex("tmp.ssi", ssi)) != 0) Die("SSIWriteIndex() failed: %s", SSIErrorString(status)); SSIFreeIndex(ssi); /* Now reopen the index and run some tests. */ exit(0);}#endif /* test driving code */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -