📄 word.c

📁 本程序可以对文件进行算术编码处理解码处理等
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
 *
 */
void 
*do_malloc(size_t size)
{
    total_memory += size;
    if ((total_memory / MEGABYTE) >= mbytes)
	return NULL;
    else
        return (malloc(size));
}


/*
 *
 * adds specified memory to current memory count
 * returns 0 if successful, NOMEMLEFT if memory limit is reached
 */
int 
get_memory(size_t size)
{
    total_memory += size;
    if ((total_memory / MEGABYTE) >= mbytes)
	return NOMEMLEFT;
    else
	return 0;
      
}


/*
 *
 * initialize the word/non-word context and hash tables
 *
 */
void 
init_word_model(hash_table *tables[], context *words[])
{
    tables[WORD] = create_table();
    tables[NON_WORD] = create_table();
    words[WORD] = create_context(INIT_CONTEXT, DYNAMIC);
    words[NON_WORD] = create_context(INIT_CONTEXT, DYNAMIC);
    
    /* add end of message symbol to word contexts */
    install_symbol(words[WORD], END_OF_MESSAGE);	
    install_symbol(words[NON_WORD], END_OF_MESSAGE);	

    get_memory(2 * MEM_PER_SYMBOL);		/* record memory used */
}


/*
 *
 * free all memory associated with the word and non-word models
 * then create empty models.
 *
 */
void 
purge_word_model(hash_table *tables[], context *words[])
{
    /* free the memory used by the word models */
    purge_context(words[WORD]);
    purge_context(words[NON_WORD]);
    purge_table(tables[WORD]);
    purge_table(tables[NON_WORD]);
    purge_counter++;

    /* rebuild the hash tables with no entries */
    total_memory = 0;			/* set memory count back to zero */
    tables[WORD] = create_table();
    tables[NON_WORD] = create_table();

    /* add end of message symbol to word contexts */
    install_symbol(words[WORD], END_OF_MESSAGE);	
    install_symbol(words[NON_WORD], END_OF_MESSAGE);	

    total_memory = base_memory;
}


/*
 *
 * initialize the character and length contexts
 *
 */
void 
init_char_model(context *characters[], context *lengths[])
{
    int i;

    /* initialize the character and length contexts */
    characters[WORD] = create_context(CHAR_CONTEXT, STATIC);
    characters[NON_WORD] = create_context(CHAR_CONTEXT, STATIC);
    lengths[WORD] = create_context(MAX_WORD_LEN+1, STATIC);
    lengths[NON_WORD] = create_context(MAX_WORD_LEN+1, STATIC);

    /* initialise char contexts with all chars having a frequency of 1 */ 
    for (i = 0; i < CHAR_CONTEXT; i++)
    {
	if (ISWORD(i)) 
	    install_symbol(characters[WORD], i);
	else
	    install_symbol(characters[NON_WORD], i);
    }

    for (i = 0; i <= MAX_WORD_LEN; i++)
    {
	install_symbol(lengths[WORD], i);
	install_symbol(lengths[NON_WORD], i);
    }

    /* record memory used by character and length contexts */
    get_memory(2 * MAX_WORD_LEN * MEM_PER_SYMBOL);
    get_memory(2 * CHAR_CONTEXT * MEM_PER_SYMBOL);
}



/*
 *
 * compress a file writing to stdout
 *
 */
void 
encode_file(unsigned char tempstring[], int templength)
{
    char	buffer[BUFFER_SIZE];
    int		buffer_len, buffer_pos = 0, word_no, i, type;
    string	curr_word;
    context	*words[2], *characters[2], *lengths[2];
    hash_table	*tables[2];

    /* set up the character and length contexts */
    init_char_model(characters, lengths);

    /* initialize the word and non-word contexts and hash tables */
    init_word_model(tables, words);
    base_memory = total_memory;		/* record base memory level */

    /* copy first chars read to check magic no. into buffer for encoding */
    memcpy(buffer, tempstring, templength);
    buffer_len = templength;

    start_encode();
    startoutputtingbits();
    
    /* start processing with a word */
    type = WORD;
    for (;;)
    {
	read_word(buffer, &buffer_len, &buffer_pos, &curr_word, type);
	if ((buffer_len == 0) && (curr_word.length == 0))
	    break;
	nWords[type]++;
	word_no = lookup_word(&curr_word, tables[type]);
	if (encode(words[type], word_no) == NOT_KNOWN)
	{
	    /* spell out new word before adding to list of words */
	    encode(lengths[type], curr_word.length);
	    
	    for (i = 0; i<curr_word.length; i++)
		encode(characters[type], curr_word.text[i]);
	    
	    /* add word to hash table, and install new symbol */
	    if ((add_word(&curr_word, tables[type]) == NOMEMLEFT) || 
		(install_symbol(words[type], word_no) != 0))
		/* purge word model if memory limit is exceeded */
		purge_word_model(tables, words);	
	    nDistinctWords[type]++;
	}
 	type = !type;				/* toggle WORD/NON_WORD type */
    } 

    encode(words[type], END_OF_MESSAGE);	/* encode end of message */
    finish_encode();
    doneoutputtingbits();
}


/*
 *
 * decode a compressed file to stdout
 *
 */
void 
decode_file()
{
    int i, symbol, type, length;
    hash_table *tables[2];
    context *words[2], *characters[2], *lengths[2];
    string word;
    unsigned char *pWord;
    
    /* set up the character and length contexts */
    init_char_model(characters, lengths);

    /* initialize word/non-word contexts and hash tables */
    init_word_model(tables, words);
    base_memory = total_memory;		/* record base memory level */

    start_decode();
    startinputtingbits();
    type = WORD;				/* first symbol is a WORD */

    for (;;)
    {
	symbol = decode(words[type]);
	if (symbol == END_OF_MESSAGE)
	    break;
	if (symbol == NOT_KNOWN)
	{      
	    /* read in the length, then the spelling of a new word */
	    word.length = decode(lengths[type]);
	    for (i = 0; i<word.length; i++)
		word.text[i] = decode(characters[type]);
	    pWord = word.text;
	    length = word.length;

	    /* add new word to hash table, and install new symbol */
	    if (((symbol = add_word(&word, tables[type])) == NOMEMLEFT) || 
		(install_symbol(words[type], symbol) != 0))
		/* purge word model if memory limit exceeded */
		purge_word_model(tables, words);
	}
	else
	    get_word(tables[type], symbol, &pWord, &length);

	/* output the word to standard out */
	fwrite(pWord, length, 1, stdout);
	bytes_output += length;

	type = !type;			/* toggle between WORD/NON_WORD */
    } 
    finish_decode();
    doneinputtingbits();
}


/*
 *
 * read word or non-word from stdin and update the buffer_length 
 * and buffer_position variables
 *
 */
void
read_word(char buffer[], int *buffer_length, int *curr_pos, string *pWord,
	  int type)
{
    pWord->length = 0;
    while (pWord->length < MAX_WORD_LEN)
    {
	if (*buffer_length == 0)
	{
	    /* 
	     * if buffer is empty then fill it, using fread. If file to be
             * encoded is empty then return current word
	     */ 
	    if ((*buffer_length = fread(buffer, 1, BUFFER_SIZE, stdin)) == 0)
		return;
	    bytes_input += *buffer_length;
	    *curr_pos = 0;
	}
	
	/* 
	 * terminate on non-word character if type = WORD (0)
	 * or word character if type = NON_WORD (1)
	 */
	if ((!ISWORD(buffer[*curr_pos])) ^ type)
	    return;
	else
	{
	    pWord->text[pWord->length] = buffer[*curr_pos];
	    pWord->length += 1;
	    *curr_pos += 1;
	    *buffer_length -= 1;
	}
    }
}
上一页 12
💿 文件大小 1372 K
👤 上传用户 dsjacky
📂 所属分类数值算法/人工智能
🏷️ 相关标签

#程序 #算术编码 #解码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -