📄 word.c
字号:
*
*/
void
*do_malloc(size_t size)
{
total_memory += size;
if ((total_memory / MEGABYTE) >= mbytes)
return NULL;
else
return (malloc(size));
}
/*
*
* adds specified memory to current memory count
* returns 0 if successful, NOMEMLEFT if memory limit is reached
*/
int
get_memory(size_t size)
{
total_memory += size;
if ((total_memory / MEGABYTE) >= mbytes)
return NOMEMLEFT;
else
return 0;
}
/*
*
* initialize the word/non-word context and hash tables
*
*/
void
init_word_model(hash_table *tables[], context *words[])
{
tables[WORD] = create_table();
tables[NON_WORD] = create_table();
words[WORD] = create_context(INIT_CONTEXT, DYNAMIC);
words[NON_WORD] = create_context(INIT_CONTEXT, DYNAMIC);
/* add end of message symbol to word contexts */
install_symbol(words[WORD], END_OF_MESSAGE);
install_symbol(words[NON_WORD], END_OF_MESSAGE);
get_memory(2 * MEM_PER_SYMBOL); /* record memory used */
}
/*
*
* free all memory associated with the word and non-word models
* then create empty models.
*
*/
void
purge_word_model(hash_table *tables[], context *words[])
{
/* free the memory used by the word models */
purge_context(words[WORD]);
purge_context(words[NON_WORD]);
purge_table(tables[WORD]);
purge_table(tables[NON_WORD]);
purge_counter++;
/* rebuild the hash tables with no entries */
total_memory = 0; /* set memory count back to zero */
tables[WORD] = create_table();
tables[NON_WORD] = create_table();
/* add end of message symbol to word contexts */
install_symbol(words[WORD], END_OF_MESSAGE);
install_symbol(words[NON_WORD], END_OF_MESSAGE);
total_memory = base_memory;
}
/*
*
* initialize the character and length contexts
*
*/
void
init_char_model(context *characters[], context *lengths[])
{
int i;
/* initialize the character and length contexts */
characters[WORD] = create_context(CHAR_CONTEXT, STATIC);
characters[NON_WORD] = create_context(CHAR_CONTEXT, STATIC);
lengths[WORD] = create_context(MAX_WORD_LEN+1, STATIC);
lengths[NON_WORD] = create_context(MAX_WORD_LEN+1, STATIC);
/* initialise char contexts with all chars having a frequency of 1 */
for (i = 0; i < CHAR_CONTEXT; i++)
{
if (ISWORD(i))
install_symbol(characters[WORD], i);
else
install_symbol(characters[NON_WORD], i);
}
for (i = 0; i <= MAX_WORD_LEN; i++)
{
install_symbol(lengths[WORD], i);
install_symbol(lengths[NON_WORD], i);
}
/* record memory used by character and length contexts */
get_memory(2 * MAX_WORD_LEN * MEM_PER_SYMBOL);
get_memory(2 * CHAR_CONTEXT * MEM_PER_SYMBOL);
}
/*
*
* compress a file writing to stdout
*
*/
void
encode_file(unsigned char tempstring[], int templength)
{
char buffer[BUFFER_SIZE];
int buffer_len, buffer_pos = 0, word_no, i, type;
string curr_word;
context *words[2], *characters[2], *lengths[2];
hash_table *tables[2];
/* set up the character and length contexts */
init_char_model(characters, lengths);
/* initialize the word and non-word contexts and hash tables */
init_word_model(tables, words);
base_memory = total_memory; /* record base memory level */
/* copy first chars read to check magic no. into buffer for encoding */
memcpy(buffer, tempstring, templength);
buffer_len = templength;
start_encode();
startoutputtingbits();
/* start processing with a word */
type = WORD;
for (;;)
{
read_word(buffer, &buffer_len, &buffer_pos, &curr_word, type);
if ((buffer_len == 0) && (curr_word.length == 0))
break;
nWords[type]++;
word_no = lookup_word(&curr_word, tables[type]);
if (encode(words[type], word_no) == NOT_KNOWN)
{
/* spell out new word before adding to list of words */
encode(lengths[type], curr_word.length);
for (i = 0; i<curr_word.length; i++)
encode(characters[type], curr_word.text[i]);
/* add word to hash table, and install new symbol */
if ((add_word(&curr_word, tables[type]) == NOMEMLEFT) ||
(install_symbol(words[type], word_no) != 0))
/* purge word model if memory limit is exceeded */
purge_word_model(tables, words);
nDistinctWords[type]++;
}
type = !type; /* toggle WORD/NON_WORD type */
}
encode(words[type], END_OF_MESSAGE); /* encode end of message */
finish_encode();
doneoutputtingbits();
}
/*
*
* decode a compressed file to stdout
*
*/
void
decode_file()
{
int i, symbol, type, length;
hash_table *tables[2];
context *words[2], *characters[2], *lengths[2];
string word;
unsigned char *pWord;
/* set up the character and length contexts */
init_char_model(characters, lengths);
/* initialize word/non-word contexts and hash tables */
init_word_model(tables, words);
base_memory = total_memory; /* record base memory level */
start_decode();
startinputtingbits();
type = WORD; /* first symbol is a WORD */
for (;;)
{
symbol = decode(words[type]);
if (symbol == END_OF_MESSAGE)
break;
if (symbol == NOT_KNOWN)
{
/* read in the length, then the spelling of a new word */
word.length = decode(lengths[type]);
for (i = 0; i<word.length; i++)
word.text[i] = decode(characters[type]);
pWord = word.text;
length = word.length;
/* add new word to hash table, and install new symbol */
if (((symbol = add_word(&word, tables[type])) == NOMEMLEFT) ||
(install_symbol(words[type], symbol) != 0))
/* purge word model if memory limit exceeded */
purge_word_model(tables, words);
}
else
get_word(tables[type], symbol, &pWord, &length);
/* output the word to standard out */
fwrite(pWord, length, 1, stdout);
bytes_output += length;
type = !type; /* toggle between WORD/NON_WORD */
}
finish_decode();
doneinputtingbits();
}
/*
*
* read word or non-word from stdin and update the buffer_length
* and buffer_position variables
*
*/
void
read_word(char buffer[], int *buffer_length, int *curr_pos, string *pWord,
int type)
{
pWord->length = 0;
while (pWord->length < MAX_WORD_LEN)
{
if (*buffer_length == 0)
{
/*
* if buffer is empty then fill it, using fread. If file to be
* encoded is empty then return current word
*/
if ((*buffer_length = fread(buffer, 1, BUFFER_SIZE, stdin)) == 0)
return;
bytes_input += *buffer_length;
*curr_pos = 0;
}
/*
* terminate on non-word character if type = WORD (0)
* or word character if type = NON_WORD (1)
*/
if ((!ISWORD(buffer[*curr_pos])) ^ type)
return;
else
{
pWord->text[pWord->length] = buffer[*curr_pos];
pWord->length += 1;
*curr_pos += 1;
*buffer_length -= 1;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -