⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trie.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
字号:
/* -*-C-*- ******************************************************************************** * * File:        trie.c  (Formerly trie.c) * Description:  Functions to build a trie data structure. * Author:       Mark Seaman, OCR Technology * Created:      Fri Oct 16 14:37:00 1987 * Modified:     Fri Jul 26 12:18:10 1991 (Mark Seaman) marks@hpgrlt * Language:     C * Package:      N/A * Status:       Reusable Software Component * * (c) Copyright 1987, Hewlett-Packard Company. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * *********************************************************************************//*----------------------------------------------------------------------              I n c l u d e s----------------------------------------------------------------------*/#include "trie.h"#include "callcpp.h"#ifdef __UNIX__#include <assert.h>#endif#include <stdio.h>/*----------------------------------------------------------------------              V a r i a b l e s----------------------------------------------------------------------*/static INT32 move_counter = 0;static INT32 new_counter  = 0;static INT32 edge_counter = 0;/*----------------------------------------------------------------------              F u n c t i o n s----------------------------------------------------------------------*//********************************************************************** * add_edge_linkage * * Add a single edge linkage to between the two nodes.  This function * can be used to add either forward or backward links. **********************************************************************/void add_edge_linkage(EDGE_ARRAY dawg,                      NODE_REF node1,                      NODE_REF node2,                      INT32 direction,                      char character,                      INT32 word_end) {  EDGE_REF edge1 = node1;  EDGE_REF edge2;  INT32      num_edges = edges_in_node (dawg, node1);  INT32      last_one;  word_end  = (word_end ? WERD_END_FLAG : 0);  if (num_edges == 0) {          /* No edges yet */    direction = ((direction == FORWARD_EDGE) ? DIRECTION_FLAG : 0);    link_edge  (dawg, edge1, node2,  character,      LAST_FLAG | direction | word_end);  }  else {                                 /* Forward links */    if (direction == FORWARD_EDGE) {      last_one = (forward_edge (dawg, edge1) ? 0 : LAST_FLAG);      if (debug)        cprintf ("moving edges (nodes = %ld, %dl, num = %ld)\n",          edge1, edge1+1, num_edges);      copy_edges (dawg, edge1, edge1+1, num_edges);      link_edge  (dawg, edge1, node2,  character,        last_one | DIRECTION_FLAG | word_end);    }    else {                       /* Backward links */      if (forward_edge (dawg, edge1))        edge_loop(dawg, edge1);                                  /* Existing back edges */      if (backward_edge (dawg,edge1)) {        num_edges = 0;        edge2 = edge1;        do { num_edges++; }        edge_loop(dawg, edge2);         if (debug)          cprintf ("moving edges (nodes = %ld, %ld, num = %ld)\n",            edge1, edge1+1, num_edges);        copy_edges (dawg, edge1, edge1+1, num_edges);        link_edge(dawg, edge1, node2, character, word_end);       }      else {                     /* First back edge */        link_edge  (dawg, edge1, node2,  character,          LAST_FLAG | word_end);      }    }  }}/********************************************************************** * add_new_edge * * Add an edge between two nodes in the DAWG.  Link the nodes both ways. **********************************************************************/void add_new_edge(EDGE_ARRAY dawg,                  NODE_REF *node1,                  NODE_REF *node2,                  char character,                  INT32 word_end,                  INT32 max_num_edges,                  INT32 reserved_edges) {  int direction;  if (debug)    cprintf ("add_new_edge (nodes = %ld, %ld, ch = '%c', end = %d)\n",      *node1, *node2,  character, word_end);  edge_counter++;  move_node_if_needed(dawg, *node1, max_num_edges, reserved_edges);   move_node_if_needed(dawg, *node2, max_num_edges, reserved_edges);   direction = (int) FORWARD_EDGE;  add_edge_linkage(dawg, *node1, *node2, direction, character, word_end);   direction = (int) BACKWARD_EDGE;  add_edge_linkage(dawg, *node2, *node1, direction, character, word_end); }/********************************************************************** * add_word_to_dawg * * Add in a word by creating the necessary nodes and edges. **********************************************************************/void add_word_to_dawg(EDGE_ARRAY dawg,                      char *string,                      INT32 max_num_edges,                      INT32 reserved_edges) {  EDGE_REF    edge;  NODE_REF    last_node = 0;  NODE_REF    the_next_node;  INT32         i;  INT32         still_finding_chars = TRUE;  INT32         word_end = FALSE;  for (i=0; i<strlen(string)-1; i++) {    if (still_finding_chars) {      edge = edge_char_of (dawg, last_node, string[i], word_end);      if (debug) cprintf ("exploring edge = %d\n", edge);      if (edge == NO_EDGE)        still_finding_chars = FALSE;      else      if (next_node (dawg, edge) == 0) {        word_end = TRUE;        still_finding_chars = FALSE;        if (! case_sensative) string[i] = tolower (string[i]);        remove_edge (dawg, last_node, 0, string[i], word_end);      }      else {        last_node = next_node (dawg, edge);      }    }    if (! still_finding_chars) {      the_next_node = new_dawg_node (dawg, DEFAULT_NODE_SIZE, max_num_edges, reserved_edges);      if (edges_in_node (dawg, last_node) + last_node == the_next_node) {        cprintf ("Node collision at %d\n", the_next_node);        the_next_node = new_dawg_node (dawg, DEFAULT_NODE_SIZE, max_num_edges, reserved_edges);      }      if (! case_sensative) string[i] = tolower (string[i]);      add_new_edge (dawg, &last_node, &the_next_node,        string[i], word_end, max_num_edges, reserved_edges);      word_end = FALSE;      if (debug)        cprintf ("adding node = %ld\n", the_next_node);      last_node = the_next_node;    }  }  the_next_node = 0;  if (! case_sensative) string[i] = tolower (string[i]);  add_new_edge (dawg, &last_node, &the_next_node,    string[i], TRUE, max_num_edges, reserved_edges);  if (edges_in_node (dawg, 0) > reserved_edges) {    cprintf ("error: Not enough room in root node, %d\n",      edges_in_node (dawg, 0));    exit (1);  }}/********************************************************************** * initialize_dawg * * Initialize the DAWG data structure for further used.  Reset each of * the edge cells to NO_EDGE. **********************************************************************/void initialize_dawg(EDGE_ARRAY dawg, INT32 max_num_edges) {   INT32 x;  clear_all_edges(dawg, x, max_num_edges); }/********************************************************************** * move_node * * Move the location in the edge array of this node in the DAWG. **********************************************************************/NODE_REF move_node(EDGE_ARRAY dawg,                   NODE_REF node,                   INT32 max_num_edges,                   INT32 reserved_edges) {  NODE_REF   this_new_node;  EDGE_REF   edge;  INT32        num_edges = edges_in_node (dawg, node);  if (debug)    print_dawg_node(dawg, node);   this_new_node = new_dawg_node (dawg, num_edges + EDGE_NUM_MARGIN, max_num_edges, reserved_edges);  if (debug)    cprintf ("move_node  (from = %ld, to = %ld, num = %ld)\n",      node, this_new_node, num_edges);  move_edges(dawg, node, this_new_node, num_edges);   if (debug)    print_dawg_node(dawg, this_new_node);   edge = this_new_node;  if (forward_edge (dawg, edge)) {    do {      relocate_edge (dawg, next_node (dawg, edge), node, this_new_node);    } edge_loop (dawg, edge);  }  if (backward_edge (dawg, edge)) {    do {      relocate_edge (dawg, next_node (dawg, edge), node, this_new_node);    } edge_loop (dawg, edge);  }  move_counter++;  return (this_new_node);}/********************************************************************** * new_dawg_node * * Create a space within the DAWG data structure to house a node that * consists of the requested number of edges. **********************************************************************/NODE_REF new_dawg_node(EDGE_ARRAY dawg,                       INT32 num_edges,                       INT32 max_num_edges,                       INT32 reserved_edges) {  INT32        i;  INT32        n;  INT32        edge_index;  INT32        edge_collision;  /* Try several times */  for (i=0; i<NUM_PLACEMENT_ATTEMPTS; i++) {    edge_index = reserved_edges +      long_rand (max_num_edges - MAX_WERD_LENGTH - reserved_edges);    edge_collision = FALSE;    /* Find N adjacent slots */    for  (n=0; n<num_edges && !edge_collision; n++) {      if (edge_occupied (dawg, edge_index + n))        edge_collision = TRUE;    }    if (! edge_collision) {      new_counter++;      //?			max_new_attempts = max (max_new_attempts, i);      return (edge_index);    }  }  cprintf ("DAWG Table is too full, nodes = %ld, edges = %ld, moves %ld\n",    new_counter, edge_counter, move_counter);  exit (1);  return 0;}/********************************************************************** * read_word_list * * Read the requested file (containing a list of words) and add all * the words to the DAWG. **********************************************************************/void read_word_list(char *filename,                    EDGE_ARRAY dawg,                    INT32 max_num_edges,                    INT32 reserved_edges) {  FILE       *word_file;  char       string [CHARS_PER_LINE];  word_file = open_file (filename, "r");  initialize_dawg(dawg, max_num_edges);   while (fgets (string, CHARS_PER_LINE, word_file) != NULL) {    string [strlen (string) - 1] = (char) 0;    if (strlen (string)) {      add_word_to_dawg(dawg, string, max_num_edges, reserved_edges);       if (! word_in_dawg (dawg, string)) {        cprintf ("error: word not in DAWG after adding it '%s'\n", string);        return;      }    }  }}/********************************************************************** * relocate_edge * * The location of a node has moved, so an edge entry in another node * must be changed.  Change the value of this edge entry to match the * new location of the node. **********************************************************************/void relocate_edge(EDGE_ARRAY dawg,                   NODE_REF node,                   NODE_REF old_node,                   NODE_REF new_node) {  EDGE_REF   edge;  if (debug) cprintf ("relocate (%ld, %ld ==> %ld)\n", node, old_node, new_node);  edge = node;  if (forward_edge (dawg, edge)) {    do {      if (next_node (dawg, edge) == old_node) {        if (debug)          cprintf ("forward assign (%ld, %ld ==> %ld)\n", edge, old_node, new_node);        set_next_edge(dawg, edge, new_node);       }    } edge_loop (dawg, edge);  }  if (backward_edge (dawg, edge)) {    do {      if (next_node (dawg, edge) == old_node) {        if (debug)          cprintf ("backward assign (%ld, %ld ==> %ld)\n", edge, old_node, new_node);        set_next_edge(dawg, edge, new_node);       }    }    edge_loop(dawg, edge);   }}/********************************************************************** * remove_edge * * Add a single edge linkage to between the two nodes.  This function * can be used to add either forward or backward links. **********************************************************************/void remove_edge(EDGE_ARRAY dawg,                 NODE_REF node1,                 NODE_REF node2,                 char character,                 INT32 word_end) {  remove_edge_linkage(dawg, node1, node2, FORWARD_EDGE, character, word_end);   remove_edge_linkage(dawg, node2, node1, BACKWARD_EDGE, character, word_end); }/********************************************************************** * remove_edge_linkage * * Remove this edge reference from this node.  Move the edge entries in * this node to fill the gap. **********************************************************************/void remove_edge_linkage(EDGE_ARRAY dawg,                         NODE_REF node,                         NODE_REF next,                         INT32 direction,                         char character,                         INT32 word_end) {  INT32      forward_edges;  INT32      num_edges;  INT32      e = node;  INT32      last_flag;  forward_edges = num_forward_edges (dawg, node);  num_edges = edges_in_node (dawg, node);  for (e=node; e<node+num_edges; e++) {    /* Is this the edge*/    if ((edge_letter (dawg, e) == character) &&      ((direction == FORWARD_EDGE) ?      forward_edge(dawg,e) : backward_edge(dawg,e)) &&      (next_node (dawg, e) == next) &&    (word_end ? end_of_word(dawg,e) : (! end_of_word(dawg,e)))) {      if (debug)        cprintf ("remove (edge = %ld, character is '%c')\n",          e, edge_letter (dawg, e));      /* Delete the slot */      last_flag = last_edge (dawg, e);      set_empty_edge(dawg, e);       move_edges (dawg, e+1, e, num_edges+node-e-1);      /* Restore 'last' flag */      if (direction == FORWARD_EDGE) {        if ((forward_edges - 1) &&        (forward_edges - 1 == e - node)) {          set_last_flag (dawg, e - 1);        }      }      else {        if ((num_edges - forward_edges - 1) &&        (num_edges - 1 == e - node)) {          set_last_flag (dawg, e - 1);        }      }      if (debug)        print_dawg_node(dawg, node);       return;    }  }  cprintf ("error: Could not find the edge to remove, %d\n", next);  print_dawg_node(dawg, node);   exit (1);}/********************************************************************** * room_in_node * * Check to see if there is enough room left in this node for one more * edge link.  This may be a forward or backward link. **********************************************************************/INT32 room_in_node(EDGE_ARRAY dawg, NODE_REF node) {   EDGE_REF   edge = node;  if (edge_occupied (dawg, edge + edges_in_node (dawg, node))) {    return (FALSE);  }  else {    return (TRUE);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -