📄 megahal.c
字号:
*/ dictionary->index[position]=dictionary->size-1;succeed: return(dictionary->index[position]);fail: return(0);}/*---------------------------------------------------------------------------*//* * Function: Search_Dictionary * * Purpose: Search the dictionary for the specified word, returning its * position in the index if found, or the position where it * should be inserted otherwise. */int search_dictionary(DICTIONARY *dictionary, STRING word, bool *find){ int position; int min; int max; int middle; int compar; /* * If the dictionary is empty, then obviously the word won't be found */ if(dictionary->size==0) { position=0; goto notfound; } /* * Initialize the lower and upper bounds of the search */ min=0; max=dictionary->size-1; /* * Search repeatedly, halving the search space each time, until either * the entry is found, or the search space becomes empty */ while(TRUE) { /* * See whether the middle element of the search space is greater * than, equal to, or less than the element being searched for. */ middle=(min+max)/2; compar=wordcmp(word, dictionary->entry[dictionary->index[middle]]); /* * If it is equal then we have found the element. Otherwise we * can halve the search space accordingly. */ if(compar==0) { position=middle; goto found; } else if(compar>0) { if(max==middle) { position=middle+1; goto notfound; } min=middle+1; } else { if(min==middle) { position=middle; goto notfound; } max=middle-1; } }found: *find=TRUE; return(position);notfound: *find=FALSE; return(position);}/*---------------------------------------------------------------------------*//* * Function: Find_Word * * Purpose: Return the symbol corresponding to the word specified. * We assume that the word with index zero is equal to a * NULL word, indicating an error condition. */BYTE2 find_word(DICTIONARY *dictionary, STRING word){ int position; bool found; position=search_dictionary(dictionary, word, &found); if(found==TRUE) return(dictionary->index[position]); else return(0);}/*---------------------------------------------------------------------------*//* * Function: Wordcmp * * Purpose: Compare two words, and return an integer indicating whether * the first word is less than, equal to or greater than the * second word. */int wordcmp(STRING word1, STRING word2){ register int i; int bound; bound=MIN(word1.length,word2.length); for(i=0; i<bound; ++i) if(toupper(word1.word[i])!=toupper(word2.word[i])) return((int)(toupper(word1.word[i])-toupper(word2.word[i]))); if(word1.length<word2.length) return(-1); if(word1.length>word2.length) return(1); return(0);}/*---------------------------------------------------------------------------*//* * Function: Free_Dictionary * * Purpose: Release the memory consumed by the dictionary. */void free_dictionary(DICTIONARY *dictionary){ if(dictionary==NULL) return; if(dictionary->entry!=NULL) { free(dictionary->entry); dictionary->entry=NULL; } if(dictionary->index!=NULL) { free(dictionary->index); dictionary->index=NULL; } dictionary->size=0;}/*---------------------------------------------------------------------------*/void free_model(MODEL *model){ if(model==NULL) return; if(model->forward!=NULL) { free_tree(model->forward); } if(model->backward!=NULL) { free_tree(model->backward); } if(model->context!=NULL) { free(model->context); } if(model->dictionary!=NULL) { free_dictionary(model->dictionary); free(model->dictionary); } free(model);}/*---------------------------------------------------------------------------*/void free_tree(TREE *tree){ static int level=0; register unsigned int i; if(tree==NULL) return; if(tree->tree!=NULL) { if(level==0) progress("Freeing tree", 0, 1); for(i=0; i<tree->branch; ++i) { ++level; free_tree(tree->tree[i]); --level; if(level==0) progress(NULL, i, tree->branch); } if(level==0) progress(NULL, 1, 1); free(tree->tree); } free(tree);}/*---------------------------------------------------------------------------*//* * Function: Initialize_Dictionary * * Purpose: Add dummy words to the dictionary. */void initialize_dictionary(DICTIONARY *dictionary){ STRING word={ 7, "<ERROR>" }; STRING end={ 5, "<FIN>" }; (void)add_word(dictionary, word); (void)add_word(dictionary, end);}/*---------------------------------------------------------------------------*//* * Function: New_Dictionary * * Purpose: Allocate room for a new dictionary. */DICTIONARY *new_dictionary(void){ DICTIONARY *dictionary=NULL; dictionary=(DICTIONARY *)malloc(sizeof(DICTIONARY)); if(dictionary==NULL) { error("new_dictionary", "Unable to allocate dictionary."); return(NULL); } dictionary->size=0; dictionary->index=NULL; dictionary->entry=NULL; return(dictionary);}/*---------------------------------------------------------------------------*//* * Function: Save_Dictionary * * Purpose: Save a dictionary to the specified file. */void save_dictionary(FILE *file, DICTIONARY *dictionary){ register unsigned int i; fwrite(&(dictionary->size), sizeof(BYTE4), 1, file); progress("Saving dictionary", 0, 1); for(i=0; i<dictionary->size; ++i) { save_word(file, dictionary->entry[i]); progress(NULL, i, dictionary->size); } progress(NULL, 1, 1);}/*---------------------------------------------------------------------------*//* * Function: Load_Dictionary * * Purpose: Load a dictionary from the specified file. */void load_dictionary(FILE *file, DICTIONARY *dictionary){ register int i; int size; fread(&size, sizeof(BYTE4), 1, file); progress("Loading dictionary", 0, 1); for(i=0; i<size; ++i) { load_word(file, dictionary); progress(NULL, i, size); } progress(NULL, 1, 1);}/*---------------------------------------------------------------------------*//* * Function: Save_Word * * Purpose: Save a dictionary word to a file. */void save_word(FILE *file, STRING word){ register unsigned int i; fwrite(&(word.length), sizeof(BYTE1), 1, file); for(i=0; i<word.length; ++i) fwrite(&(word.word[i]), sizeof(char), 1, file);}/*---------------------------------------------------------------------------*//* * Function: Load_Word * * Purpose: Load a dictionary word from a file. */void load_word(FILE *file, DICTIONARY *dictionary){ register unsigned int i; STRING word; fread(&(word.length), sizeof(BYTE1), 1, file); word.word=(char *)malloc(sizeof(char)*word.length); if(word.word==NULL) { error("load_word", "Unable to allocate word"); return; } for(i=0; i<word.length; ++i) fread(&(word.word[i]), sizeof(char), 1, file); add_word(dictionary, word); free(word.word);}/*---------------------------------------------------------------------------*//* * Function: New_Node * * Purpose: Allocate a new node for the n-gram tree, and initialise * its contents to sensible values. */TREE *new_node(void){ TREE *node=NULL; /* * Allocate memory for the new node */ node=(TREE *)malloc(sizeof(TREE)); if(node==NULL) { error("new_node", "Unable to allocate the node."); goto fail; } /* * Initialise the contents of the node */ node->symbol=0; node->usage=0; node->count=0; node->branch=0; node->tree=NULL; return(node);fail: if(node!=NULL) free(node); return(NULL);}/*---------------------------------------------------------------------------*//* * Function: New_Model * * Purpose: Create and initialise a new ngram model. */MODEL *new_model(int order){ MODEL *model=NULL; model=(MODEL *)malloc(sizeof(MODEL)); if(model==NULL) { error("new_model", "Unable to allocate model."); goto fail; } model->order=order; model->forward=new_node(); model->backward=new_node(); model->context=(TREE **)malloc(sizeof(TREE *)*(order+2)); if(model->context==NULL) { error("new_model", "Unable to allocate context array."); goto fail; } initialize_context(model); model->dictionary=new_dictionary(); initialize_dictionary(model->dictionary); return(model);fail: return(NULL);}/*---------------------------------------------------------------------------*//* * Function: Update_Model * * Purpose: Update the model with the specified symbol. */void update_model(MODEL *model, int symbol){ register unsigned int i; /* * Update all of the models in the current context with the specified * symbol. */ for(i=(model->order+1); i>0; --i) if(model->context[i-1]!=NULL) model->context[i]=add_symbol(model->context[i-1], (BYTE2)symbol); return;}/*---------------------------------------------------------------------------*//* * Function: Update_Context * * Purpose: Update the context of the model without adding the symbol. */void update_context(MODEL *model, int symbol){ register unsigned int i; for(i=(model->order+1); i>0; --i) if(model->context[i-1]!=NULL) model->context[i]=find_symbol(model->context[i-1], symbol);}/*---------------------------------------------------------------------------*//* * Function: Add_Symbol * * Purpose: Update the statistics of the specified tree with the * specified symbol, which may mean growing the tree if the * symbol hasn't been seen in this context before. */TREE *add_symbol(TREE *tree, BYTE2 symbol){ TREE *node=NULL; /* * Search for the symbol in the subtree of the tree node. */ node=find_symbol_add(tree, symbol); /* * Increment the symbol counts */ if((node->count<65535)) { node->count+=1; tree->usage+=1; } return(node);}/*---------------------------------------------------------------------------*//* * Function: Find_Symbol * * Purpose: Return a pointer to the child node, if one exists, which * contains the specified symbol. */TREE *find_symbol(TREE *node, int symbol){ register unsigned int i; TREE *found=NULL; bool found_symbol=FALSE; /* * Perform a binary search for the symbol. */ i=search_node(node, symbol, &found_symbol); if(found_symbol==TRUE) found=node->tree[i]; return(found);}/*---------------------------------------------------------------------------*//* * Function: Find_Symbol_Add * * Purpose: This function is conceptually similar to find_symbol, * apart from the fact that if the symbol is not found, * a new node is automatically allocated and added to the * tree. */TREE *find_symbol_add(TREE *node, int symbol){ register unsigned int i; TREE *found=NULL; bool found_symbol=FALSE; /* * Perform a binary search for the symbol. If the symbol isn't found, * attach a new sub-node to the tree node so that it remains sorted. */ i=search_node(node, symbol, &found_symbol); if(found_symbol==TRUE) { found=node->tree[i]; } else { found=new_node(); found->symbol=symbol; add_node(node, found, i); } return(found);}/*---------------------------------------------------------------------------*//* * Function: Add_Node * * Purpose: Attach a new child node to the sub-tree of the tree * specified. */void add_node(TREE *tree, TREE *node, int position){ register int i; /* * Allocate room for one more child node, which may mean allocating * the sub-tree from scratch. */ if(tree->tree==NULL) { tree->tree=(TREE **)malloc(sizeof(TREE *)*(tree->branch+1)); } else { tree->tree=(TREE **)realloc((TREE **)(tree->tree),sizeof(TREE *)* (tree->branch+1)); } if(tree->tree==NULL) { error("add_node", "Unable to reallocate subtree."); return; } /* * Shuffle the nodes down so that we can insert the new node at the * subtree index given by position. */ for(i=tree->branch; i>position; --i) tree->tree[i]=tree->tree[i-1]; /* * Add the new node to the sub-tree. */ tree->tree[position]=node; tree->branch+=1;}/*---------------------------------------------------------------------------*//* * Function: Search_Node * * Purpose: Perform a binary search for the specified symbol on the
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -