📄 ssi.c
字号:
#elif defined HAVE_FSEEKO && SIZEOF_OFF_T == 8 else if (fseeko(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED;#elif defined HAVE_FSEEKO64 && SIZEOF_OFF64_T == 8 else if (fseeko64(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED;#elif defined HAVE_FSEEK64 else if (fseek64(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED;#elif defined ARITHMETIC_FPOS_T && SIZEOF_FPOS_T == 8 else if (fsetpos(fp, &(offset->off.i64)) != 0) return SSI_ERR_SEEK_FAILED;#endif return 0;}/* Function: SSIFileInfo() * Date: SRE, Tue Jan 2 10:31:01 2001 [St. Louis] * * Purpose: Given a file number {fh} in an open index file * {sfp}, retrieve file name {ret_filename} and * the file format {ret_format}. * * {ret_filename} is a pointer to a string maintained * internally by {sfp}. It should not be free'd; * SSIClose(sfp) takes care of it. * * Args: sfp - open index file * fh - handle on file to look up * ret_filename - RETURN: name of file n * ret_format - RETURN: format of file n * * Returns: 0 on success, nonzero on failure. */intSSIFileInfo(SSIFILE *sfp, int fh, char **ret_filename, int *ret_format){ if (fh < 0 || fh >= sfp->nfiles) return SSI_ERR_BADARG; *ret_filename = sfp->filename[fh]; *ret_format = sfp->fileformat[fh]; return 0;}/* Function: SSIClose() * Date: SRE, Sun Dec 31 14:56:37 2000 [St. Louis] * * Purpose: Close an open {SSIFILE *}. * * Args: sfp - index file to close. * * Returns: (void) */voidSSIClose(SSIFILE *sfp) { if (sfp != NULL) { clear_ssifile(sfp); if (sfp->fp != NULL) fclose(sfp->fp); free(sfp); }} /* clear_ssifile(): free the innards of SSIFILE, without * destroying the structure or closing the stream. */static voidclear_ssifile(SSIFILE *sfp){ int i; if (sfp->filename != NULL) { for (i = 0; i < sfp->nfiles; i++) if (sfp->filename[i] != NULL) free(sfp->filename[i]); free(sfp->filename); } if (sfp->fileformat != NULL) free(sfp->fileformat); if (sfp->fileflags != NULL) free(sfp->fileflags); if (sfp->bpl != NULL) free(sfp->bpl); if (sfp->rpl != NULL) free(sfp->rpl);} /* Function: SSIRecommendMode() * Date: SRE, Fri Feb 16 08:23:47 2001 [St. Louis] * * Purpose: Examines the file and determines whether it should be * indexed with large file support or not; returns * SSI_OFFSET_I32 for most files, SSI_OFFSET_I64 for large * files, or -1 on failure. * * Args: file - name of file to check for size * * Returns: -1 on failure (including case where file is too big) * SSI_OFFSET_I32 for most files (<= 2^31-1 bytes) * SSI_OFFSET_I64 for large files (> 2^31-1 bytes) */intSSIRecommendMode(char *file){#if HAVE_STAT64 struct stat64 s1; if (stat64(file, &s1) == 0) { if (s1.st_size <= 2146483647L) return SSI_OFFSET_I32; else return SSI_OFFSET_I64; }#else struct stat s2; if (stat(file, &s2) == 0) { if (s2.st_size <= 2146483647L) return SSI_OFFSET_I32; else return SSI_OFFSET_I64; }#endif return -1;} /* Function: SSICreateIndex() * Date: SRE, Tue Jan 2 11:23:25 2001 [St. Louis] * * Purpose: Creates and initializes a SSI index structure. * Sequence file offset type is specified by {mode}. * * Args: mode - SSI_OFFSET_I32 or SSI_OFFSET_I64, sequence file index mode. * * Returns: ptr to new index structure, or NULL on failure. * Caller is responsible for free'ing the returned * structure with SSIFreeIndex(). */SSIINDEX *SSICreateIndex(int mode){ SSIINDEX *g; g = NULL; if ((g = malloc(sizeof(SSIINDEX))) == NULL) goto FAILURE; g->smode = mode; g->imode = SSI_OFFSET_I32; /* index always starts as 32-bit; may get upgraded later */ g->external = FALSE; g->max_ram = SSI_MAXRAM;#ifndef HAS_64BIT_FILE_OFFSETS if (mode == SSI_OFFSET_I64) Die("\Can't create a 64-bit SSI index on this system, sorry;\n\I don't have 64-bit file offset functions available.\n");#endif g->filenames = NULL; g->fileformat = NULL; g->bpl = NULL; g->rpl = NULL; g->flen = 0; g->nfiles = 0; g->pkeys = NULL; g->plen = 0; g->nprimary = 0; g->ptmpfile = "tmp.ssi.1"; /* hardcoded, for now. */ g->ptmp = NULL; g->skeys = NULL; g->slen = 0; g->nsecondary = 0; g->stmpfile = "tmp.ssi.2"; /* hardcoded, for now. */ g->stmp = NULL; /* All mallocs must go after NULL initializations, because of the cleanup strategy; * we'll try to free anything non-NULL if a malloc fails. */ if ((g->filenames = malloc(sizeof(char *) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; if ((g->fileformat= malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; if ((g->bpl = malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; if ((g->rpl = malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; if ((g->pkeys = malloc(sizeof(struct ssipkey_s)* SSI_KEY_BLOCK))== NULL) goto FAILURE; if ((g->skeys = malloc(sizeof(struct ssipkey_s)* SSI_KEY_BLOCK))== NULL) goto FAILURE; return g; FAILURE: SSIFreeIndex(g); /* free the damaged structure */ return NULL;}/* Function: SSIGetFilePosition() * Date: SRE, Tue Jan 2 09:59:26 2001 [St. Louis] * * Purpose: Fills {ret_offset} with the current disk * offset of {fp}, relative to the start of the file. * {mode} is set to either SSI_OFFSET_I32 or * SSI_OFFSET_I64. If {mode} is _I32 (32 bit), just wraps * a call to ftell(); otherwise deals with system-dependent * details of 64-bit file offsets. * * Args: fp - open stream * mode - SSI_OFFSET_I32 or SSI_OFFSET_I64 * ret_offset - RETURN: file position * * Returns: 0 on success. nonzero on error. */int SSIGetFilePosition(FILE *fp, int mode, SSIOFFSET *ret_offset){ if (mode == SSI_OFFSET_I32) { ret_offset->mode = SSI_OFFSET_I32; ret_offset->off.i32 = ftell(fp); if (ret_offset->off.i32 == -1) return SSI_ERR_TELL_FAILED; } else if (mode != SSI_OFFSET_I64) abort(); /* only happens on a coding error */ else { ret_offset->mode = SSI_OFFSET_I64;#ifndef HAS_64BIT_FILE_OFFSETS return SSI_ERR_NO64BIT;#elif defined HAVE_FTELLO && SIZEOF_OFF_T == 8 if ((ret_offset->off.i64 = ftello(fp)) == -1) return SSI_ERR_TELL_FAILED;#elif defined HAVE_FTELLO64 && SIZEOF_OFF64_T == 8 if ((ret_offset->off.i64 = ftello64(fp)) == -1) return SSI_ERR_TELL_FAILED;#elif defined HAVE_FTELL64 if ((ret_offset->off.i64 = ftell64(fp)) == -1) return SSI_ERR_TELL_FAILED;#elif defined ARITHMETIC_FPOS_T && SIZEOF_FPOS_T == 8 if (fgetpos(fp, &(ret_offset->off.i64)) != 0) return SSI_ERR_TELL_FAILED;#endif } return 0;}/* Function: SSIAddFileToIndex() * Date: SRE, Tue Jan 2 12:54:36 2001 [St. Louis] * * Purpose: Adds the sequence file {filename}, which is known to * be in format {fmt}, to the index {g}. Creates and returns * a unique filehandle {fh} for then associating primary keys * with this file using SSIAddPrimaryKeyToIndex(). * * Args: g - active index * filename - file to add * fmt - format code for this file (e.g. SQFILE_FASTA) * ret_fh - RETURN: unique handle for this file * * Returns: 0 on success; nonzero on error. */intSSIAddFileToIndex(SSIINDEX *g, char *filename, int fmt, int *ret_fh){ int n; if (g->nfiles >= SSI_MAXFILES) return SSI_ERR_TOOMANY_FILES; n = strlen(filename); if ((n+1) > g->flen) g->flen = n+1; g->filenames[g->nfiles] = FileTail(filename, FALSE); g->fileformat[g->nfiles] = fmt; g->bpl[g->nfiles] = 0; g->rpl[g->nfiles] = 0; *ret_fh = g->nfiles; /* handle is simply = file number */ g->nfiles++; if (g->nfiles % SSI_FILE_BLOCK == 0) { g->filenames = realloc(g->filenames, sizeof(char *) * (g->nfiles+SSI_FILE_BLOCK)); if (g->filenames == NULL) return SSI_ERR_MALLOC; g->fileformat= realloc(g->fileformat, sizeof(sqd_uint32) * (g->nfiles+SSI_FILE_BLOCK)); if (g->fileformat == NULL) return SSI_ERR_MALLOC; g->bpl = realloc(g->bpl, sizeof(sqd_uint32) * (g->nfiles+SSI_FILE_BLOCK)); if (g->bpl == NULL) return SSI_ERR_MALLOC; g->rpl = realloc(g->rpl, sizeof(sqd_uint32) * (g->nfiles+SSI_FILE_BLOCK)); if (g->rpl == NULL) return SSI_ERR_MALLOC; } return 0;}/* Function: SSISetFileForSubseq() * Date: SRE, Tue Jan 9 10:02:05 2001 [St. Louis] * * Purpose: Set SSI_FAST_SUBSEQ for the file indicated by * filehandle {fh} in the index {g}, setting * parameters {bpl} and {rpl} to the values given. * {bpl} is the number of bytes per sequence data line. * {rpl} is the number of residues per sequence data line. * Caller must be sure that {bpl} and {rpl} do not change * on any line of any sequence record in the file * (except for the last data line of each record). If * this is not the case in this file, SSI_FAST_SUBSEQ * will not work, and this routine should not be * called. * * Args: g - the active index * fh - handle for file to set SSI_FAST_SUBSEQ on * bpl - bytes per data line * rpl - residues per data line * * Returns: 0 on success; 1 on error. */intSSISetFileForSubseq(SSIINDEX *g, int fh, int bpl, int rpl){ if (fh < 0 || fh >= g->nfiles) return SSI_ERR_BADARG; if (bpl <= 0 || rpl <= 0) return SSI_ERR_BADARG; g->bpl[fh] = bpl; g->rpl[fh] = rpl; return 0;}/* Function: SSIAddPrimaryKeyToIndex() * Date: SRE, Tue Jan 2 11:50:54 2001 [St. Louis] * * Purpose: Put primary key {key} in the index {g}, while telling * the index this primary key is in the file associated * with filehandle {fh} (returned by a previous call * to SSIAddFileToIndex()), and its record starts at * position {r_off} in the file. * * {d_off} and {L} are optional; they may be left unset * by passing NULL and 0, respectively. (If one is * provided, both must be provided.) If they are provided, * {d_off} gives the position of the first line of sequence * data in the record, and {L} gives the length of * the sequence in residues. They are used when * SSI_FAST_SUBSEQ is set for this file. If SSI_FAST_SUBSEQ * is not set for the file, {d_off} and {L} will be * ignored by the index reading API even if they are stored * by the index writing API, so it doesn't hurt for the * indexing program to provide them; typically they * won't know whether it's safe to set SSI_FAST_SUBSEQ * for the whole file until the whole file has been * read and every key has already been added to the index. * * Args: g - active index * key - primary key to add * fh - handle on file that this key's in * r_off - offset to start of record * d_off - offset to start of sequence data * L - length of sequence, or 0 * * Returns: 0 on success, nonzero on error. */intSSIAddPrimaryKeyToIndex(SSIINDEX *g, char *key, int fh, SSIOFFSET *r_off, SSIOFFSET *d_off, int L){ int n; /* a string length */ if (fh >= SSI_MAXFILES) return SSI_ERR_TOOMANY_FILES; if (g->nprimary >= SSI_MAXKEYS) return SSI_ERR_TOOMANY_KEYS; if (L > 0 && d_off == NULL) abort(); /* need both. */ /* Before adding the key: check how big our index is. * If it's getting too large, switch to external mode. */ if (!g->external && current_index_size(g) >= g->max_ram) if (activate_external_sort(g) != 0) return SSI_ERR_NOFILE; /* Update maximum pkey length, if needed. */ n = strlen(key); if ((n+1) > g->plen) g->plen = n+1; /* External mode? Simply append to disk... */ if (g->external) { if (g->smode == SSI_OFFSET_I32) { fprintf(g->ptmp, "%s\t%d\t%lu\t%lu\t%lu\n", key, fh, (unsigned long) r_off->off.i32, (unsigned long) (d_off == NULL? 0 : d_off->off.i32), (unsigned long) L); } else { fprintf(g->ptmp, "%s\t%d\t%llu\t%llu\t%lu\n", key, fh, (unsigned long long) r_off->off.i64, (unsigned long long) (d_off == NULL? 0 : d_off->off.i64), (unsigned long) L); } g->nprimary++; return 0; } /* Else: internal mode, keep keys in memory... */ if ((g->pkeys[g->nprimary].key = sre_strdup(key, n)) == NULL) return SSI_ERR_MALLOC; g->pkeys[g->nprimary].fnum = (sqd_uint16) fh; g->pkeys[g->nprimary].r_off = *r_off; if (d_off != NULL && L > 0) { g->pkeys[g->nprimary].d_off = *d_off; g->pkeys[g->nprimary].len = L; } else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -