⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 reduce.cpp

📁 一个google的OCR源码
💻 CPP
字号:
/* -*-C-*-********************************************************************************** File:         reduce.cpp* Description:  Functions to reduce a TRIE into a DAWG* Author:       Mark Seaman, OCR Technology* Created:      Fri Oct 16 14:37:00 1987* Modified:     Wed Jun 19 16:51:29 1991 (Mark Seaman) marks@hpgrlt* Language:     C* Package:      N/A* Status:       Reusable Software Component** (c) Copyright 1987, Hewlett-Packard Company, all rights reserved.** Licensed under the Apache License, Version 2.0 (the "License");** you may not use this file except in compliance with the License.** You may obtain a copy of the License at** http://www.apache.org/licenses/LICENSE-2.0** Unless required by applicable law or agreed to in writing, software** distributed under the License is distributed on an "AS IS" BASIS,** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.** See the License for the specific language governing permissions and** limitations under the License.**********************************************************************************//*----------------------------------------------------------------------                     I n c l u d e s----------------------------------------------------------------------*/#include "reduce.h"#include "makedawg.h"#include "cutil.h"#ifdef __UNIX__#include <assert.h>#endif/*----------------------------------------------------------------------                     T y p e s----------------------------------------------------------------------*//*----------------------------------------------------------------------                     V a r i a b l e s----------------------------------------------------------------------*/static inT32 debug_1 = 0;/*----------------------------------------------------------------------                     M a c r o s----------------------------------------------------------------------*//*----------------------------------------------------------------------                     F u n c t i o n s----------------------------------------------------------------------*//*********************************************************************** collapse_source_nodes** A pair of edges has been found that can be reduced.  This function* accomplishes that reduction by collapsing the two nodes into a* single node.**********************************************************************/void collapse_source_nodes (EDGE_ARRAY dawg,                            NODE_REF   source_node_1,                            NODE_REF   source_node_2,                            NODE_REF   dest_node,                            inT32        max_num_edges,                            inT32        reserved_edges) {  inT32      num_links;  EDGE_REF   edge;  /*    NODE_REF   new_source_1; */  num_links = num_forward_edges (dawg, source_node_2);  /*   if (debug_1)       printf ("Node = %d, Input 1 = %d, Input 2 = %6d, num_links = %d\n",       dest_node, source_node_1, source_node_2, num_links);       if (debug) {       printf ("Node = %d, Input 1 = %d, Input 2 = %6d, num_links = %d\n",       dest_node, source_node_1, source_node_2, num_links);       print_dawg_node (dawg, source_node_1);       print_dawg_node (dawg, source_node_2);       new_line ();       }  */  /* Remove forward links in   */  edge = source_node_2;                       /* source_node_2 - dest_node */  if (forward_edge (dawg, edge)) {    do {      remove_edge_linkage (dawg, dest_node, source_node_2,                           BACKWARD_EDGE,                           edge_letter (dawg, edge),                           end_of_word (dawg, edge));    } edge_loop (dawg, edge);  }  /* Fix backward links */  edge = source_node_2;                          /* in source_node_2 */  edge += num_forward_edges (dawg, source_node_2);  if (backward_edge (dawg, edge)) {    do {      move_node_if_needed (dawg, &source_node_1,                           max_num_edges, reserved_edges);      add_edge_linkage (dawg, source_node_1, next_node (dawg, edge),                        BACKWARD_EDGE,                        edge_letter (dawg, edge),                        end_of_word (dawg, edge));      /* Node moved */      relocate_edge (dawg, next_node (dawg, edge),                     source_node_2, source_node_1);    } edge_loop (dawg, edge);  }  delete_node (dawg, source_node_2);  /*   if (debug) {       printf ("Number of edges  source = %d, dest = %d\n",       edges_in_node (dawg, new_source_1),       edges_in_node (dawg, dest_node));       print_int ("Number of edges = ", edges_in_node (dawg, dest_node));       print_dawg_node (dawg, new_source_1);       print_string ("_________________________________________");       new_line ();       }  */}/*********************************************************************** eliminate_redundant_edges** Compare these two edges in this node to see if they point to two* nodes that could be collapsed.  If they do, then perform the* reduction and return TRUE.  If not, return FALSE.**********************************************************************/inT32 eliminate_redundant_edges (EDGE_ARRAY dawg,                                 NODE_REF   node,                                 EDGE_REF   edge_1,                                 EDGE_REF   edge_2,                                 inT32        max_num_edges,                                 inT32        reserved_edges) {  static inT32 elim_count = 0;  static inT32 keep_count = 0;  if (same_output (dawg,                   next_node (dawg, edge_1),                   next_node (dawg, edge_2))) {    elim_count++;    collapse_source_nodes (dawg,                           next_node (dawg, edge_1),                           next_node (dawg, edge_2),                           node,                           max_num_edges, reserved_edges);    /*      if (debug_1) {            printf ("Collapsing node %d\n", node);            print_dawg_node (dawg, node);            printf ("Candidate edges = %d, %d\n", edge_1, edge_2);            printf ("Candidate nodes = %d, %d\n\n",            next_node (dawg, edge_1), next_node (dawg, edge_2));            new_line ();            }    */    return (TRUE);  }  else {    keep_count++;  }  return (FALSE);}/*********************************************************************** letter_order** Compare two edges to see which one of the letters is larger.**********************************************************************/inT32 letter_order (const void* edge1_ptr,                    const void* edge2_ptr) {  if (letter_of_edge(*((EDGE_RECORD*) edge1_ptr)) <      letter_of_edge(*((EDGE_RECORD*) edge2_ptr)))    return (-1);  if (letter_of_edge(*((EDGE_RECORD*) edge1_ptr)) >      letter_of_edge(*((EDGE_RECORD*) edge2_ptr)))    return (1);  return (0);}/*  printf ("%c%c   %c%c  ",  edge1.letter, (edge1.flags & WORD_END_FLAG ? '*' : ' '),  edge2.letter, (edge2.flags & WORD_END_FLAG ? '*' : ' '));  printf ("\n");  printf ("+\n");  printf ("-\n");*/void print_n_edges (EDGE_RECORD *edge1,                    inT32         n) {  EDGE_RECORD *edge;  edge = edge1;  while (n-- > 0) {    printf ("%c ", letter_of_edge(edge[0]));    edge++;  }  new_line ();}/*********************************************************************** reduce_lettered_edges** The edge parameter is pointing to the first edge in a group of edges* in this node with a particular letter value.  Look through these edges* to see if any of them can be collapsed.  If so do it.  When all edges* with this letter have been reduced then return to the caller.* If further reduction is possible with this same letter then the* edge parameter is not incremented.  When no further reduction is* possible then FALSE is returned.**********************************************************************/inT32 reduce_lettered_edges (EDGE_ARRAY  dawg,                             EDGE_REF    *edge,                             NODE_REF    node,                             NODE_MARKER reduced_nodes,                             inT32        max_num_edges,                             inT32        reserved_edges) {  EDGE_REF    edge_1;  EDGE_REF    edge_2;  inT32         fixed_one;  inT32         did_something = FALSE;  if (debug_1)    printf ("reduce_lettered_edges (edge=" REFFORMAT ")\n", *edge);  /* Loop for each back edge */  edge_1 = *edge;  while ((! last_edge (dawg, edge_1)) &&         edge_letter (dawg, edge_1) == edge_letter (dawg, *edge)) {    edge_2 = edge_1 + 1;                        /* Compare all back edges */    do {      if (edge_letter (dawg, edge_1) <  edge_letter (dawg, edge_2))        break;      if (debug_1) {        printf (REFFORMAT " (%c), " REFFORMAT " (%c) ",                edge_1, edge_letter (dawg, edge_1),                edge_2, edge_letter (dawg, edge_2));      }      if (edge_2 != edge_1 &&          edge_letter (dawg, edge_2) == edge_letter (dawg, edge_1) &&          end_of_word (dawg, edge_2) == end_of_word (dawg, edge_1) &&          eliminate_redundant_edges (dawg, node, edge_1, edge_2,                                     max_num_edges, reserved_edges)) {        reduced_nodes [next_node (dawg, edge_1)] = 0;        fixed_one     = TRUE;        did_something = TRUE;      }      else {        if (debug_1) printf ("   .");        fixed_one = FALSE;      }      if (debug_1) printf ("\n");    } while (fixed_one ?             edge_occupied (dawg, edge_2) :             (! last_edge (dawg, edge_2++)));    edge_1++;  }  if (! did_something) {    if (last_edge (dawg, edge_1))      return (FALSE);    else      *edge = edge_1;  }  return (TRUE);}/*********************************************************************** reduce_node_input** Eliminate any redundant edges from this node in the DAWG.**********************************************************************/void reduce_node_input (EDGE_ARRAY dawg,                        NODE_REF   node,                        NODE_MARKER reduced_nodes,                        inT32        max_num_edges,                        inT32        reserved_edges) {  EDGE_REF   edge_1;  inT32        forward_edges  = num_forward_edges (dawg, node);  inT32        backward_edges = edges_in_node (dawg, node) - forward_edges;  static inT32 num_nodes_reduced = 0;  if (debug_1) {    printf ("reduce_node_input (node=" REFFORMAT ")\n", node);    print_dawg_node (dawg, node);  }  if (++num_nodes_reduced % 100 == 0) {    printf ("%d nodes reduced\n", num_nodes_reduced);    if (debug_1 && num_nodes_reduced % 1000 == 0) {      write_full_dawg ("temp-save", dawg, max_num_edges);    }  }  qsort ((void *) &edge_of (dawg, node + forward_edges),         backward_edges,         sizeof (EDGE_RECORD),         letter_order);  /*   if (debug_1) {       printf ("__________________________\n");       print_dawg_node (dawg, node);       }  */  edge_1 = node + forward_edges;  while (reduce_lettered_edges (dawg, &edge_1, node, reduced_nodes,                                max_num_edges, reserved_edges));  reduced_nodes [node] = 1;                      /* Mark as reduced */  if (debug_1) {    printf ("After reduction:\n");    print_dawg_node (dawg, node);  }  edge_1 = node + num_forward_edges (dawg, node); /* Reduce next level */  if (backward_edge (dawg, edge_1)) {    do {      if (next_node (dawg, edge_1) &&          reduced_nodes [next_node (dawg, edge_1)] == 0)        reduce_node_input (dawg, next_node (dawg, edge_1), reduced_nodes,                           max_num_edges, reserved_edges);    } edge_loop (dawg, edge_1);  }}/*********************************************************************** same_output** Check to see if these two nodes have identical output.  If so then* they can be collapsed into a single node.**********************************************************************/inT32 same_output (EDGE_ARRAY dawg,                   NODE_REF   node1,                   NODE_REF   node2) {  if (debug_1) printf ("Edge nodes = " REFFORMAT " , " \                     REFFORMAT " \n", node1, node2);  if (num_forward_edges (dawg, node1) == 1  &&      num_forward_edges (dawg, node2) == 1) {    if (debug_1) printf ("   * ");    return (TRUE);  }  else {    if (debug_1) {      printf ("   %d,%d \n",              num_forward_edges (dawg, node1),              num_forward_edges (dawg, node2));      print_dawg_node (dawg, node1);      print_dawg_node (dawg, node2);    }    return (FALSE);  }}/*********************************************************************** trie_to_dawg** Change a Trie data structure into a DAWG by eliminating the redund**********************************************************************/void trie_to_dawg (EDGE_ARRAY dawg,                   inT32        max_num_edges,                   inT32        reserved_edges) {  NODE_MARKER reduced_nodes;  inT32         x;  max_new_attempts = 100000;  compact_dawg (dawg, max_num_edges, reserved_edges);  reduced_nodes = (NODE_MARKER) malloc (max_num_edges);  for (x=0; x<max_num_edges; x++) reduced_nodes [x] = 0;  reduce_node_input (dawg, 0, reduced_nodes, max_num_edges, reserved_edges);  free (reduced_nodes);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -