trie.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 649 行 · 第 1/2 页

CPP
649
字号
    /* Find N adjacent slots */    for  (n=0; n<num_edges && !edge_collision; n++) {      if (edge_occupied (dawg, edge_index + n))        edge_collision = TRUE;    }    if (! edge_collision) {      new_counter++;      //?			max_new_attempts = max (max_new_attempts, i);      return (edge_index);    }  }  cprintf ("DAWG Table is too full, nodes = %d, edges = %d, moves %d\n",           new_counter, edge_counter, move_counter);  return 0;}/*********************************************************************** print_dawg_map** Print the contents of one of the nodes in the DAWG.**********************************************************************/void print_dawg_map (EDGE_ARRAY dawg, inT32 max_num_edges) {   EDGE_REF edge = 0;   inT32 counter = 0;   do {      if (edge_occupied (dawg, edge))        printf (".");      else        printf (" ");      if (counter % 100 == 0) fflush (stdout);   } while (edge++ < max_num_edges);}/*********************************************************************** read_full_dawg** Read this DAWG in from a file.  The nodes in this DAWG contain both* forward and backward edges.  There are gaps in between the nodes.**********************************************************************/void read_full_dawg (const char *filename,                     EDGE_ARRAY dawg,                     inT32 max_num_edges) {   FILE       *file;   EDGE_REF   node_index;   inT32      num_edges;   inT32      node_count;   inT32      error_occured = FALSE;   if (debug) print_string ("read_dawg");   clear_all_edges (dawg, node_index, max_num_edges);   file = open_file (filename, "rb");   fread (&node_count, sizeof (inT32), 1, file);   while (node_count-- > 0) {      fread (&node_index, sizeof (EDGE_REF), 1, file);      fread (&num_edges,  sizeof (inT32),    1, file);      assert (node_index + num_edges < max_num_edges);      fread (&dawg[node_index], sizeof (EDGE_RECORD), num_edges, file);      if (debug > 2) {         print_dawg_node (dawg, node_index);         new_line ();      }   }   fclose (file);   if (error_occured) exit (1);}/********************************************************************** * read_word_list * * Read the requested file (containing a list of words) and add all * the words to the DAWG. **********************************************************************/void read_word_list(const char *filename,                    EDGE_ARRAY dawg,                    inT32 max_num_edges,                    inT32 reserved_edges) {  FILE *word_file;  char string [CHARS_PER_LINE];  int  word_count = 0;  int old_debug = debug;  if (debug > 0 && debug < 3)    debug = 0;  word_file = open_file (filename, "r");  initialize_dawg(dawg, max_num_edges);  while (fgets (string, CHARS_PER_LINE, word_file) != NULL) {    string [strlen (string) - 1] = (char) 0;    ++word_count;    if (debug && word_count % 10000 == 0)      cprintf("Read %d words so far\n", word_count);    if (string[0] != '\0' /* strlen (string) */) {      add_word_to_dawg(dawg, string, max_num_edges, reserved_edges);      if (! word_in_dawg (dawg, string)) {        cprintf ("error: word not in DAWG after adding it '%s'\n", string);      return;      }    }  }  debug = old_debug;  if (debug)    cprintf("Read %d words total.\n", word_count);  fclose(word_file);}/********************************************************************** * relocate_edge * * The location of a node has moved, so an edge entry in another node * must be changed.  Change the value of this edge entry to match the * new location of the node. **********************************************************************/void relocate_edge(EDGE_ARRAY dawg,                   NODE_REF node,                   NODE_REF old_node,                   NODE_REF new_node) {  EDGE_REF   edge;  if (debug) cprintf ("relocate (" REFFORMAT ", " REFFORMAT " ==> " \                      REFFORMAT ")\n", node, old_node, new_node);  edge = node;  if (forward_edge (dawg, edge)) {    do {      if (next_node (dawg, edge) == old_node) {        if (debug)          cprintf ("forward assign (" REFFORMAT ", " REFFORMAT " ==> " \                   REFFORMAT ")\n", edge, old_node, new_node);        set_next_edge(dawg, edge, new_node);      }    } edge_loop (dawg, edge);  }  if (backward_edge (dawg, edge)) {    do {      if (next_node (dawg, edge) == old_node) {        if (debug)          cprintf ("backward assign (" REFFORMAT ", " REFFORMAT " ==> " \                   REFFORMAT ")\n", edge, old_node, new_node);        set_next_edge(dawg, edge, new_node);      }    }    edge_loop(dawg, edge);  }}/********************************************************************** * remove_edge * * Add a single edge linkage to between the two nodes.  This function * can be used to add either forward or backward links. **********************************************************************/void remove_edge(EDGE_ARRAY dawg,                 NODE_REF node1,                 NODE_REF node2,                 char character,                 EDGE_RECORD word_end) {  remove_edge_linkage(dawg, node1, node2, FORWARD_EDGE, character, word_end);  remove_edge_linkage(dawg, node2, node1, BACKWARD_EDGE, character, word_end);}/********************************************************************** * remove_edge_linkage * * Remove this edge reference from this node.  Move the edge entries in * this node to fill the gap. **********************************************************************/void remove_edge_linkage(EDGE_ARRAY dawg,                         NODE_REF node,                         NODE_REF next,                         EDGE_RECORD direction,                         char character,                         EDGE_RECORD word_end) {  inT32      forward_edges;  inT32      num_edges;  NODE_REF   e = node;  inT32      last_flag;  forward_edges = num_forward_edges (dawg, node);  num_edges = edges_in_node (dawg, node);  for (e=node; e<node+num_edges; e++) {    /* Is this the edge*/    if ((edge_letter (dawg, e) == character) &&      ((direction == FORWARD_EDGE) ?      forward_edge(dawg,e) : backward_edge(dawg,e)) &&      (next_node (dawg, e) == next) &&    (word_end ? end_of_word(dawg,e) : (! end_of_word(dawg,e)))) {      if (debug)        cprintf ("remove (edge = " REFFORMAT ", character is '%c')\n",          e, edge_letter (dawg, e));      /* Delete the slot */      last_flag = last_edge (dawg, e);      set_empty_edge(dawg, e);      move_edges (dawg, e+1, e, num_edges+node-e-1);      /* Restore 'last' flag */      if (direction == FORWARD_EDGE) {        if ((forward_edges - 1) &&        (forward_edges - 1 == e - node)) {          set_last_flag (dawg, e - 1);        }      }      else {        if ((num_edges - forward_edges - 1) &&        (num_edges - 1 == e - node)) {          set_last_flag (dawg, e - 1);        }      }      if (debug)        print_dawg_node(dawg, node);      return;    }  }  cprintf ("error: Could not find the edge to remove, %d char = '%c'\n",           next, character);  print_dawg_node(dawg, node);  exit (1);}/********************************************************************** * room_in_node * * Check to see if there is enough room left in this node for one more * edge link.  This may be a forward or backward link. **********************************************************************/inT32 room_in_node(EDGE_ARRAY dawg, NODE_REF node) {  EDGE_REF   edge = node;  if (edge_occupied (dawg, edge + edges_in_node (dawg, node))) {    return (FALSE);  }  else {    return (TRUE);  }}/*********************************************************************** write_full_dawg** Write the DAWG out to a file**********************************************************************/void write_full_dawg (const char *filename, EDGE_ARRAY dawg,                      inT32 max_num_edges) {   FILE       *file;   EDGE_REF   edge;   inT32      num_edges;   inT32      node_count = 0;   NODE_REF   node;   if (debug) print_string ("write_full_dawg");   for (edge=0; edge<max_num_edges; edge++) {                                                  /* Count nodes links */      if (forward_edge (dawg, edge)) {         node_count++;         if (forward_edge  (dawg, edge))  edge_loop (dawg, edge);         if (backward_edge (dawg, edge))  edge_loop (dawg, edge);         edge--;      }   }   file = open_file (filename, "wb");   fwrite (&node_count, sizeof (inT32), 1, file);   node_count = 0;   for (edge=0; edge<max_num_edges; edge++) {                                                  /* Write all edges */      if (forward_edge (dawg, edge)) {         node =  edge;         num_edges = edges_in_node (dawg, edge);         assert ((node + num_edges < max_num_edges) && (num_edges > 0));         fwrite (&edge,                sizeof (EDGE_REF),    1,         file);	 fwrite (&num_edges,           sizeof (inT32),       1,         file);	 fwrite (&edge_of (dawg,edge), sizeof (EDGE_RECORD), num_edges, file);         node_count++;         if (debug) {           printf ("Writing node index=" REFFORMAT                   ", node_count=%d, edges=%d\n",                   node, node_count, num_edges);           for (edge = node; edge < node + num_edges; edge++)  {              printf ("Writing index=" REFFORMAT ", old_node=" REFFORMAT \                      ", letter=%c, flags=%d\n",                      node, next_node (dawg,edge),                      (char) edge_letter(dawg,edge),                      (char) edge_flags (dawg,edge));           }         }         edge = node + num_edges - 1;      }   }   fclose (file);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?