📄 ssi.c
字号:
static intwrite_i64(FILE *fp, sqd_uint64 n){ n = sre_hton64(n); if (fwrite(&n, sizeof(sqd_uint64), 1, fp) != 1) return 0; return 1;}static int read_offset(FILE *fp, char mode, SSIOFFSET *ret_offset){ if (mode == SSI_OFFSET_I32) { ret_offset->mode = SSI_OFFSET_I32; if (! read_i32(fp, &(ret_offset->off.i32))) return 0; } else if (mode == SSI_OFFSET_I64) { ret_offset->mode = SSI_OFFSET_I64; if (! read_i64(fp, &(ret_offset->off.i64))) return 0; } else return 0; return 1;}static intwrite_offset(FILE *fp, SSIOFFSET *offset){ if (offset->mode == SSI_OFFSET_I32) return write_i32(fp, offset->off.i32); else if (offset->mode == SSI_OFFSET_I64) return write_i64(fp, offset->off.i64); else abort(); /*UNREACHED*/ return 1; /* silence bitchy compilers */} /* Function: binary_search() * Date: SRE, Sun Dec 31 16:05:03 2000 [St. Louis] * * Purpose: Find a key in a SSI index, by a binary search * in an alphabetically sorted list of keys. If successful, * return 0, and the index file is positioned to read * the rest of the data for that key. Else returns nonzero. * * Args: sfp - an open SSIFILE * key - key to find * klen - key length to allocate (plen or slen from sfp) * base - base offset (poffset or soffset) * recsize - size of each key record in bytes (precsize or srecsize) * maxidx - # of keys (nprimary or nsecondary) * * Returns: 0 on success, and leaves file positioned for reading remaining * data for the key. * Nonzero on failure: * SSI_ERR_NO_SUCH_KEY - that key's not in the index * SSI_ERR_MALLOC - a memory allocation failure * SSI_ERR_NODATA - an fread() failed */static intbinary_search(SSIFILE *sfp, char *key, int klen, SSIOFFSET *base, sqd_uint32 recsize, sqd_uint32 maxidx){ char *name; sqd_uint32 left, right, mid; int cmp; int status; if ((name = malloc (sizeof(char)*klen)) == NULL) return SSI_ERR_MALLOC; left = 0; right = maxidx; while (1) { /* A binary search: */ mid = (left+right) / 2; /* careful here. only works because we limit unsigned vars to signed ranges. */ if ((status = indexfile_position(sfp, base, recsize, mid)) != 0) { free(name); return status; } if (fread(name, sizeof(char), klen, sfp->fp) != klen) { free(name); return SSI_ERR_NODATA; } cmp = strcmp(name, key); if (cmp == 0) break; /* found it! */ else if (left >= right) /* oops, missed it; fail */ { free(name); return SSI_ERR_NO_SUCH_KEY; } else if (cmp < 0) left = mid+1; /* it's right of mid */ else if (cmp > 0) right = mid-1; /* it's left of mid */ } free(name); return 0; /* and sfp->fp is positioned... */}/* Function: indexfile_position() * Date: SRE, Mon Jan 1 19:32:49 2001 [St. Louis] * * Purpose: Position the open index file {sfp} at the start * of record {n} in a list of records that starts at * base offset {base}, where each record takes up {l} * bytes. (e.g. the position is byte (base + n*l)). * * Args: sfp - open SSIFILE * base - offset of record 0 (e.g. sfp->foffset) * len - size of each record in bytes (e.g. sfp->frecsize) * n - which record to get (e.g. 0..sfp->nfiles) * * Returns: 0 on success, non-zero on failure. */static intindexfile_position(SSIFILE *sfp, SSIOFFSET *base, sqd_uint32 len, sqd_uint32 n){ SSIOFFSET pos; int status; if (base->mode == SSI_OFFSET_I32) { pos.mode = SSI_OFFSET_I32; pos.off.i32 = base->off.i32 + n*len; } else if (base->mode == SSI_OFFSET_I64) { pos.mode = SSI_OFFSET_I64; pos.off.i64 = base->off.i64 + n*len; } else return 0; if ((status = SSISetFilePosition(sfp->fp, &pos)) != 0) return status; return 0;}/* Function: current_chunk_size() * Date: SRE, Tue Feb 20 18:23:30 2001 [St. Louis] * * Purpose: Calculates the size of the current indexfile chunk, * in megabytes. */static sqd_uint64 current_chunk_size(SSIINDEX *g) { sqd_uint64 frecsize, precsize, srecsize; sqd_uint64 total; /* Magic-looking numbers come from adding up sizes * of things in bytes */ frecsize = 16 + g->flen; precsize = (g->smode == SSI_OFFSET_I64) ? 22+g->plen : 14+g->plen; srecsize = g->plen+g->slen; total = (66L + /* header size, if 64bit index offsets */ frecsize * g->nfiles + /* file section size */ precsize * g->nprimary + /* primary key section size */ srecsize * g->nsecondary) / /* secondary key section size */ 1048576L; return total;}#if 0static intmergesort(SSIINDEX *g){ char *infile; /* reading "tape" 1: source. */ char *outfile; /* writing "tape" 2: destination. */ SSIFILE *in1; /* on read, a chunk of the SSI file goes in an SSIFILE. */ SSIFILE *in2; /* and chunk 2 goes in here. */ FILE *outfp; /* where we're writing the merged data */ int b; /* b, b+1 are current chunks we're merging from infile */ char *k1, *k2; /* buffers full of keys to be merged from ch1, ch2 */ sqd_uint32 base1, pos1, buflen1; /* buffered key input for ch1 */ sqd_uint32 base2, pos2, buflen2; /* buffered key input for ch2 */ sqd_uint32 maxbuf; int status; /* Initializations. */ /* create the tmp file names */ if ((infile = sre_strdup(g->tmpbase, -1)) == NULL) return SSI_ERR_MALLOC; if (sre_strcat(&infile, -1, ".t1", 3) < 0) return SSI_ERR_MALLOC; if ((outfile = sre_strdup(g->tmpbase, -1)) == NULL) return SSI_ERR_MALLOC; if (sre_strcat(&outfile, -1, ".t2", 3) < 0) return SSI_ERR_MALLOC; /* allocate the SSIFILEs for reading chunks */ if ((in1 = malloc(sizeof(SSIFILE))) == NULL) return SSI_ERR_MALLOC; if ((in2 = malloc(sizeof(SSIFILE))) == NULL) return SSI_ERR_MALLOC; /* Open infile for read; both chunks (in1 and in2) are read from this file, * from different file offsets kept in g->chunkoffset[] */ if ((in1->fp = fopen(infile, "rb")) == NULL) return SSI_ERR_NOFILE; in2->fp = in1->fp; if ((outfp = fopen(outfile, "wb")) == NULL) return SSI_ERR_NOFILE; for (b = 0; b+1 < g->nchunks; b+=2) { if (fsetpos(in1->fp, &(g->chunkoffset[b])) > 0) return SSI_ERR_SEEK_FAILED; if (fsetpos(in2->fp, &(g->chunkoffset[b+1])) > 0) return SSI_ERR_SEEK_FAILED; if (status = load_indexfile(in1) > 0) return status; if (status = load_indexfile(in2) > 0) return status; merge_headers(g, in1, in2); write_index_header(outfp, g); /* Merge the primary key section; * do a buffered read of the pkeys from ch1 and ch2. */ maxbuf = 100000; if ((k1 = malloc(sizeof(char) * (maxbuf*in1->precsize))) == NULL) return SSI_ERR_MALLOC; if ((k2 = malloc(sizeof(char) * (maxbuf*in2->precsize))) == NULL) return SSI_ERR_MALLOC; base1 = pos1 = buflen1 = 0; base2 = pos2 = buflen2 = 0; while (base1+pos1 < ch1->nprimary || base2+pos2 < ch2->nprimary) { /* refill buffer for ch1? */ if (pos1 == buflen1) { base1 += buflen1; pos1 = 0; buflen1 = MIN(in1->nprimary - base1, maxbuf); if (buflen1 > 0) { if (fread(k1, sizeof(char), (buflen1*in1->precsize), in1->fp) < buflen1*in1->precsize) return SSI_ERR_NODATA; } } /* refill buffer for ch2? */ if (pos2 == buflen2) { base2 += buflen2; pos2 = 0; buflen2 = MIN(in2->nprimary - base2, maxbuf); if (buflen2 > 0) { if (fread(k2, sizeof(char), (buflen1*in2->precsize), in2->fp) < buflen2*in2->precsize) return SSI_ERR_NODATA; } } /* mergesort on keys; be careful of case where we're out of keys in either ch1 or ch2 */ if (base2+pos2 == ch2->nprimary || strcmp(k1+(pos1*in1->precsize), k2+(pos2*in2->precsize))) write_pkey(t3, &(pk1[pos1]), s); pos1++; } else { write_pkey(t3, &(pk2[pos2]), s); pos2++; } } free(s); free(pk1); free(pk2); /* Merge the secondary keys; much like the primary key code above. */ maxbuf = 100000; if ((sk1 = malloc(sizeof(struct ssiskey_s) * maxbuf)) == NULL) return SSI_ERR_MALLOC; if ((sk2 = malloc(sizeof(struct ssiskey_s) * maxbuf)) == NULL) return SSI_ERR_MALLOC; if ((s = malloc(sizeof(char) * newch->slen)) == NULL) return SSI_ERR_MALLOC; base1 = pos1 = buflen1 = 0; base2 = pos2 = buflen2 = 0; while (base1+pos1 < ch1->nsecondary || base2+pos2 < ch2->nsecondary) { /* refill buffer for ch1? */ if (pos1 == buflen1) { base1 += buflen1; pos1 = 0; buflen1 = MIN(ch1->nsecondary - base1, maxbuf); if (buflen1 > 0) read_skeys(ch1->fp, sk1, buflen1); } /* refill buffer for ch2? */ if (pos2 == buflen2) { base2 += buflen2; pos2 = 0; buflen2 = MIN(ch2->nsecondary - base2, maxbuf); if (buflen2 > 0) read_skeys(ch2->fp, sk2, buflen2); } /* mergesort on keys; be careful of case where we're out of keys in either ch1 or ch2 */ if (base2+pos2 == ch2->nsecondary || pkeysort(&(sk1[pos1]), &(sk2[pos2])) < 0) { write_skey(t3, &(pk1[pos1]), s); pos1++; } else { write_skey(t3, &(pk2[pos2]), s); pos2++; } } free(s); free(pk1); free(pk2); /* clear ch1, ch2, in prep for loading new chunks */ clear_ssifile(ch1); clear_ssifile(ch2); } /* end loop over chunks */ }#endif#ifdef MUGGINS_LETS_ME_SLEEP /* test driving code. *//* Minimally: cc -g -Wall -o shiva -D MUGGINS_LETS_ME_SLEEP ssi.c sqerror.c sre_string.c types.c sre_ctype.c sre_math.c -lm */intmain(int argc, char **argv){ char name[32], accession[32]; SSIINDEX *ssi; int mode; SSIOFFSET r_off, d_off; FILE *ofp; int i; int fh; /* a file handle */ int status; /* return status from a SSI call */ mode = SSI_OFFSET_I32; if ((ssi = SSICreateIndex(mode)) == NULL) Die("Failed to allocate SSI index"); /* Generate two FASTA files, tmp.0 and tmp.1, and index them. */ if ((ofp = fopen("tmp.0", "w")) == NULL) Die("failed to open tmp.0"); if ((status = SSIAddFileToIndex(ssi, "tmp.0", SQFILE_FASTA, &fh)) != 0) Die("SSIAddFileToIndex() failed: %s", SSIErrorString(status)); for (i = 0; i < 10; i++) { if ((status = SSIGetFilePosition(ofp, mode, &r_off)) != 0) Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); sprintf(name, "seq%d", i); sprintf(accession, "ac%d", i); fprintf(ofp, ">%s [%s] Description? we don't need no steenking description.\n", name, accession); if ((status = SSIGetFilePosition(ofp, mode, &d_off)) != 0) Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); fprintf(ofp, "AAAAAAAAAA\n"); fprintf(ofp, "CCCCCCCCCC\n"); fprintf(ofp, "GGGGGGGGGG\n"); fprintf(ofp, "TTTTTTTTTT\n"); if ((status = SSIAddPrimaryKeyToIndex(ssi, name, fh, &r_off, &d_off, 40)) != 0) Die("SSIAddPrimaryKeyToIndex() failed: %s", SSIErrorString(status)); if ((status = SSIAddSecondaryKeyToIndex(ssi, accession, name)) != 0) Die("SSIAddSecondaryKeyToIndex() failed: %s", SSIErrorString(status)); } SSISetFileForSubseq(ssi, fh, 11, 10); fclose(ofp); if ((ofp = fopen("tmp.1", "w")) == NULL) Die("failed to open tmp.1"); if ((status = SSIAddFileToIndex(ssi, "tmp.1", SQFILE_FASTA, &fh)) != 0) Die("SSIAddFileToIndex() failed: %s", SSIErrorString(status)); for (i = 10; i < 20; i++) { if ((status = SSIGetFilePosition(ofp, mode, &r_off)) != 0) Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); sprintf(name, "seq%d", i); sprintf(accession, "ac%d", i); fprintf(ofp, ">%s [%s] i/o, i/o, it's off to disk we go.\n", name, accession); if ((status = SSIGetFilePosition(ofp, mode, &d_off)) != 0) Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); fprintf(ofp, "AAAAAAAAAA 10\n"); fprintf(ofp, "CCCCCCCCCC 20\n"); fprintf(ofp, "GGGGGGGGGG 30\n"); fprintf(ofp, "TTTTTTTTTT 40\n"); if ((status = SSIAddPrimaryKeyToIndex(ssi, name, fh, &r_off, &d_off, 40)) != 0) Die("SSIAddPrimaryKeyToIndex() failed: %s", SSIErrorString(status)); if ((status = SSIAddSecondaryKeyToIndex(ssi, accession, name)) != 0) Die("SSIAddSecondaryKeyToIndex() failed: %s", SSIErrorString(status)); } SSISetFileForSubseq(ssi, fh, 14, 10); fclose(ofp); /* Write the index to tmp.ssi */ if ((status = SSIWriteIndex("tmp.ssi", ssi)) != 0) Die("SSIWriteIndex() failed: %s", SSIErrorString(status)); SSIFreeIndex(ssi); /* Now reopen the index and run some tests. */ exit(0);}#endif /* test driving code */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -