📄 cast.c
字号:
if (outfp != NULL) putc(END_VERBATIM, outfp);\ if (outbuf != NULL) outbuf[outlen] = END_VERBATIM;\ outlen ++;\ }\}#define EASY_PRE_VERBATIM(v) \{\ if (easysearch) {\ if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen;\ if (outfp != NULL) putc(ONE_VERBATIM, outfp);\ if (outbuf != NULL) outbuf[outlen] = ONE_VERBATIM;\ outlen ++;\ }\ else {\ PRE_VERBATIM(v)\ }\}#define EASY_POST_VERBATIM(v) \{\ if (easysearch) {\ POST_VERBATIM(v)\ }\ /* else ignore */\}intget_special_word_index(word, len) char word[MAX_NAME_LEN]; int len;{ register int comp; hash_entry *e; if ((len > MAX_WORD_LEN) || (SPECIAL_WORDS <= 0)) return -1; e = freq_words_table[len]; while((e != NULL) && (e->val.offset != -1)) { comp = strcmp(word, e->word); if (comp == 0) return e->val.offset; if (comp < 0) return -1; /* can't find it anyway */ e = e->next; } return -1;}/* Compresses input from indata and outputs it into outdata: returns number of chars in output */inttcompress(indata, maxinlen, outdata, maxoutlen, flags) void *indata, *outdata; int maxinlen, maxoutlen; int flags;{ unsigned char curword[MAX_NAME_LEN]; int curlen; int hashindex; hash_entry *e; unsigned int c; unsigned short encodedindex; int skiplen; int ret; int verbatim_state = 0; char *sig = comp_signature; FILE *infp = NULL, *outfp = NULL; unsigned char *inbuf = NULL, *outbuf = NULL; int outlen = 0, inlen = 0; int easysearch = flags&TC_EASYSEARCH; int untilnewline = flags&TC_UNTILNEWLINE; if (flags & TC_SILENT) return 0; if (easysearch) { ONE_VERBATIM = EASY_ONE_VERBATIM; NUM_SPECIAL_DELIMITERS = EASY_NUM_SPECIAL_DELIMITERS; END_SPECIAL_DELIMITERS = EASY_END_SPECIAL_DELIMITERS; } else { ONE_VERBATIM = HARD_ONE_VERBATIM; NUM_SPECIAL_DELIMITERS = HARD_NUM_SPECIAL_DELIMITERS; END_SPECIAL_DELIMITERS = HARD_END_SPECIAL_DELIMITERS; } if (maxinlen < 0) { infp = (FILE *)indata; } else { inbuf = (unsigned char *)indata; } if (maxoutlen < 0) { outfp = (FILE *)outdata; } else { outbuf = (unsigned char *)outdata; } /* Write signature and information about whether compression was context-free or not: first 16 bytes */ if (outfp != NULL) { if ((maxoutlen >= 0) && (outlen + SIGNATURE_LEN >= maxoutlen)) return outlen; if (0 == fwrite(sig, 1, SIGNATURE_LEN - 1, outfp)) return 0; if (easysearch) putc(1, outfp); else putc(0, outfp); outlen += SIGNATURE_LEN; } /* No need to put a signature OR easysearch when doing it in memory: caller must manipulate */ /* * The algorithm for compression is as follows: * * For each input word, we search and see if it is in the dictionary. * If it IS there, we just look at its word-index and output it. * Then, if the character immediately after the word is NOT a blank, * we output a second character indicating what it was. * * If it is not in the dictionary then we output it verbatim: for * verbatim o/p, we take care to merge consecutive verbatim outputs * by NOT putting delimiters between them (one start and one end * delimiter). * * If the input is not a word but a single character, then it can be: * 1. A special character, in which case we output its code. * 2. A blank character in which case we keep getting more characters * to see howmany blanks we get. At the first non blank character, * we output a sequence of special characters which encode multiple * blanks (note: blanks can be spaces, tabs or newlines). * * Please refer to the state diagram for explanations. * I've used gotos since the termination condition is too complex. */real_tgetword: curlen = 0; curword[0] = '\0';concocted_tgetword: c = tgetword(infp, inbuf, maxinlen, &inlen, curword, &curlen);bypass_tgetword: if (curlen == 0) { /* only one character read and that is in c. */ switch(c) { case ' ': case '\t': case '\n': POST_VERBATIM(verbatim_state); /* need post-verbatim since there might be a LOT of blanks, etc. */ ret = skip(infp, inbuf, maxinlen, &inlen, c, &skiplen); process_spaces(c, skiplen); if ((c == '\n') && untilnewline) return outlen; if (isalnum((unsigned char)ret)) { curword[0] = (unsigned char)ret; curword[1] = '\0'; curlen = 1; goto concocted_tgetword; } else if (ret != MYEOF) { c = (unsigned int)ret; goto bypass_tgetword; } /* else fall thru */ case MYEOF: return outlen; default: if ((ret = get_special_text_index(c)) != -1) { if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen; if (verbatim_state) { /* no need to do post-verbatim since only one character: optimization */ if (outfp != NULL) putc(c, outfp); if (outbuf != NULL) outbuf[outlen] = c; outlen ++; } else { if (outfp != NULL) putc(ret + BEGIN_SPECIAL_TEXTS, outfp); if (outbuf != NULL) outbuf[outlen] = ret + BEGIN_SPECIAL_TEXTS; outlen ++; } } else { /* * Has to be verbatim character: they have a ONE_VERBATIM before each * irrespective of verbatim_state. Otherwise there is no way to differentiate * one of our special characters from the same characters appearing in the * source. Hence binary files blow-up to twice their original size. * * Also, if it is a verbatim character that cannot be confused with one of OUR * special characters, then just put it in w/o changing verbatim state. Else * put a begin-verbatim before it and THEN output that character=saves 1 char. */ if ((c != BEGIN_VERBATIM) && (c != END_VERBATIM)) { /* reduces to below if easysearch */ EASY_PRE_VERBATIM(verbatim_state) if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen; if (outfp != NULL) putc(c, outfp); if (outbuf != NULL) outbuf[outlen] = c; outlen ++; } else { /* like \ escape in C: \ is \\ */ if ((maxoutlen >= 0) && (outlen + 2 >= maxoutlen)) return outlen; if (outfp != NULL) putc(ONE_VERBATIM, outfp); if (outbuf != NULL) outbuf[outlen] = ONE_VERBATIM; outlen ++; if (outfp != NULL) putc(c, outfp); if (outbuf != NULL) outbuf[outlen] = c; outlen ++; } } goto real_tgetword; } } else /* curlen >= 1 */ { if (!easysearch && verbatim_state && (curlen <= 2)) { fprintf(outfp, "%s", curword); /* don't bother to close the verbatim state and put a 2byte index=saves 1 char */ curword[0] = '\0'; curlen = 0; goto bypass_tgetword; } else { if ((ret = get_special_word_index(curword, curlen)) != -1) { POST_VERBATIM(verbatim_state); /* printf("ret=%d word=%s\n", ret, curword); */ if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen; if (outfp != NULL) putc(ret + BEGIN_SPECIAL_WORDS, outfp); if (outbuf != NULL) outbuf[outlen] = ret + BEGIN_SPECIAL_WORDS; outlen ++; } else if ((e = get_hash(compress_hash_table, curword, curlen, &hashindex)) != NULL) {#if 0 fprintf(stderr, "%x ", e->val.attribute.index);#endif /*0*/ encodedindex = encode_index(e->val.attribute.index); POST_VERBATIM(verbatim_state); if ((maxoutlen >= 0) && (outlen + sizeof(short) >= maxoutlen)) return outlen; if (outfp != NULL) { putc(((encodedindex & 0xff00)>>8), outfp); putc((encodedindex & 0x00ff), outfp); } if (outbuf != NULL) { outbuf[outlen] = ((encodedindex & 0xff00)>>8); outbuf[outlen + 1] = encodedindex & 0x00ff; } outlen += sizeof(short); } else goto NOT_IN_DICTIONARY; /* process_char_after_word: */ switch(c) { case ' ': goto real_tgetword; /* blank is a part of the word */ case MYEOF: if (easysearch) return outlen; if (outfp != NULL) putc(NOTBLANK, outfp); if (outbuf != NULL) outbuf[outlen] = NOTBLANK; outlen ++; return outlen; default: if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen; if ((ret = get_special_delimiter_index(c)) != -1) { if (outfp != NULL) putc((ret+BEGIN_SPECIAL_DELIMITERS), outfp); if (outbuf != NULL) outbuf[outlen] = ret + BEGIN_SPECIAL_DELIMITERS; outlen ++; goto real_tgetword; } else { if (outfp != NULL) putc(NOTBLANK, outfp); if (outbuf != NULL) outbuf[outlen] = NOTBLANK; outlen ++; if (!isalnum(c)) { curword[0] = '\0'; curlen = 0; goto bypass_tgetword; } else { /* might be a number which ended with an alphabet: ".. born in 1992AD" */ curword[0] = c; curword[1] = '\0'; curlen = 1; goto concocted_tgetword; } } } } NOT_IN_DICTIONARY: /* word not in dictionary */ PRE_VERBATIM(verbatim_state); if ((maxoutlen >= 0) && (outlen + curlen >= maxoutlen)) return outlen; if ((outfp != NULL) && (0 == fwrite(curword, sizeof(char), curlen, outfp))) return 0; if (outbuf != NULL) memcpy(outbuf+outlen, curword, curlen); outlen += curlen; EASY_POST_VERBATIM(verbatim_state); switch(c) { case MYEOF: /* Prefix searches still work since our scheme is context free */ return outlen; default: if (!isalnum(c)) { curword[0] = '\0'; curlen = 0; goto bypass_tgetword; } else { /* might be a number which ended with an alphabet: ".. born in 1992AD" */ curword[0] = c; curword[1] = '\0'; curlen = 1; goto concocted_tgetword; } } }}#define FUNCTION tcompress_file#define DIRECTORY tcompress_directory#include "trecursive.c"/* returns #bytes (>=0) in the compressed file, -1 if major error (not able to compress) */tcompress_file(name, outname, flags) char *name, *outname; int flags;{ FILE *fp; FILE *outfp; int inlen, ret; struct stat statbuf; /* struct timeval tvp[2]; */ struct utimbuf tvp; char tempname[MAX_LINE_LEN]; if (name == NULL) return -1; special_get_name(name, -1, tempname); inlen = strlen(tempname); if (-1 == stat(tempname, &statbuf)) { if (flags & TC_ERRORMSGS) fprintf(stderr, "permission denied or non-existent: %s\n", tempname); return -1; } if (S_ISDIR(statbuf.st_mode)) { if (flags & TC_RECURSIVE) return tcompress_directory(tempname, outname, flags); if (flags & TC_ERRORMSGS) fprintf(stderr, "skipping directory: %s\n", tempname); return -1; } if (!S_ISREG(statbuf.st_mode)) { if (flags & TC_ERRORMSGS) fprintf(stderr, "not a regular file, skipping: %s\n", tempname); return -1; } if ((fp = fopen(tempname, "r")) == NULL) { if (flags & TC_ERRORMSGS) fprintf(stderr, "permission denied or non-existent: %s\n", tempname); return -1; } if (!tcompressible_fp(fp, flags)) { if (flags & TC_ERRORMSGS) fprintf(stderr, " skipping: %s\n", tempname); fclose(fp); return -1; } rewind(fp); if (flags & TC_SILENT) { printf("%s\n", tempname); fclose(fp); return 0; } /* Create and open output file */ strncpy(outname, tempname, MAX_LINE_LEN); if (inlen + strlen(COMP_SUFFIX) + 1 >= MAX_LINE_LEN) { outname[MAX_LINE_LEN - strlen(COMP_SUFFIX)] = '\0'; fprintf(stderr, "very long file name %s: truncating to: %s", tempname, outname); } strcat(outname, COMP_SUFFIX); if (!access(outname, R_OK)) { /* output file exists */ if (!(flags & TC_OVERWRITE)) { fclose(fp); return 0; } else if (!(flags & TC_NOPROMPT)) { char s[8]; printf("overwrite %s? (y/n): ", outname); scanf("%c", s); if (s[0] != 'y') { fclose(fp); return 0; } } } if ((outfp = fopen(outname, "w")) == NULL) { fprintf(stderr, "cannot open for writing: %s\n", outname); fclose(fp); return -1; } ret = tcompress(fp, -1, outfp, -1, flags); if ((statbuf.st_size * (100 - COMP_ATLEAST))/100 < ret) { fprintf(stderr, "less than %d%% compression, skipping: %s\n", COMP_ATLEAST, tempname); fclose(fp); rewind(outfp); fclose(outfp); unlink(outname); return ret; } if ((ret > 0) && (flags & TC_REMOVE)) unlink(tempname); fclose(fp); fflush(outfp); fclose(outfp); /* tvp[0].tv_sec = statbuf.st_atime; tvp[0].tv_usec = 0; tvp[1].tv_sec = statbuf.st_mtime; tvp[1].tv_usec = 0; utimes(outname, tvp); */ tvp.actime = statbuf.st_atime; tvp.modtime = statbuf.st_mtime; utime(outname, &tvp); return ret;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -