trie.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 649 行 · 第 1/2 页
CPP
649 行
/* Find N adjacent slots */ for (n=0; n<num_edges && !edge_collision; n++) { if (edge_occupied (dawg, edge_index + n)) edge_collision = TRUE; } if (! edge_collision) { new_counter++; //? max_new_attempts = max (max_new_attempts, i); return (edge_index); } } cprintf ("DAWG Table is too full, nodes = %d, edges = %d, moves %d\n", new_counter, edge_counter, move_counter); return 0;}/*********************************************************************** print_dawg_map** Print the contents of one of the nodes in the DAWG.**********************************************************************/void print_dawg_map (EDGE_ARRAY dawg, inT32 max_num_edges) { EDGE_REF edge = 0; inT32 counter = 0; do { if (edge_occupied (dawg, edge)) printf ("."); else printf (" "); if (counter % 100 == 0) fflush (stdout); } while (edge++ < max_num_edges);}/*********************************************************************** read_full_dawg** Read this DAWG in from a file. The nodes in this DAWG contain both* forward and backward edges. There are gaps in between the nodes.**********************************************************************/void read_full_dawg (const char *filename, EDGE_ARRAY dawg, inT32 max_num_edges) { FILE *file; EDGE_REF node_index; inT32 num_edges; inT32 node_count; inT32 error_occured = FALSE; if (debug) print_string ("read_dawg"); clear_all_edges (dawg, node_index, max_num_edges); file = open_file (filename, "rb"); fread (&node_count, sizeof (inT32), 1, file); while (node_count-- > 0) { fread (&node_index, sizeof (EDGE_REF), 1, file); fread (&num_edges, sizeof (inT32), 1, file); assert (node_index + num_edges < max_num_edges); fread (&dawg[node_index], sizeof (EDGE_RECORD), num_edges, file); if (debug > 2) { print_dawg_node (dawg, node_index); new_line (); } } fclose (file); if (error_occured) exit (1);}/********************************************************************** * read_word_list * * Read the requested file (containing a list of words) and add all * the words to the DAWG. **********************************************************************/void read_word_list(const char *filename, EDGE_ARRAY dawg, inT32 max_num_edges, inT32 reserved_edges) { FILE *word_file; char string [CHARS_PER_LINE]; int word_count = 0; int old_debug = debug; if (debug > 0 && debug < 3) debug = 0; word_file = open_file (filename, "r"); initialize_dawg(dawg, max_num_edges); while (fgets (string, CHARS_PER_LINE, word_file) != NULL) { string [strlen (string) - 1] = (char) 0; ++word_count; if (debug && word_count % 10000 == 0) cprintf("Read %d words so far\n", word_count); if (string[0] != '\0' /* strlen (string) */) { add_word_to_dawg(dawg, string, max_num_edges, reserved_edges); if (! word_in_dawg (dawg, string)) { cprintf ("error: word not in DAWG after adding it '%s'\n", string); return; } } } debug = old_debug; if (debug) cprintf("Read %d words total.\n", word_count); fclose(word_file);}/********************************************************************** * relocate_edge * * The location of a node has moved, so an edge entry in another node * must be changed. Change the value of this edge entry to match the * new location of the node. **********************************************************************/void relocate_edge(EDGE_ARRAY dawg, NODE_REF node, NODE_REF old_node, NODE_REF new_node) { EDGE_REF edge; if (debug) cprintf ("relocate (" REFFORMAT ", " REFFORMAT " ==> " \ REFFORMAT ")\n", node, old_node, new_node); edge = node; if (forward_edge (dawg, edge)) { do { if (next_node (dawg, edge) == old_node) { if (debug) cprintf ("forward assign (" REFFORMAT ", " REFFORMAT " ==> " \ REFFORMAT ")\n", edge, old_node, new_node); set_next_edge(dawg, edge, new_node); } } edge_loop (dawg, edge); } if (backward_edge (dawg, edge)) { do { if (next_node (dawg, edge) == old_node) { if (debug) cprintf ("backward assign (" REFFORMAT ", " REFFORMAT " ==> " \ REFFORMAT ")\n", edge, old_node, new_node); set_next_edge(dawg, edge, new_node); } } edge_loop(dawg, edge); }}/********************************************************************** * remove_edge * * Add a single edge linkage to between the two nodes. This function * can be used to add either forward or backward links. **********************************************************************/void remove_edge(EDGE_ARRAY dawg, NODE_REF node1, NODE_REF node2, char character, EDGE_RECORD word_end) { remove_edge_linkage(dawg, node1, node2, FORWARD_EDGE, character, word_end); remove_edge_linkage(dawg, node2, node1, BACKWARD_EDGE, character, word_end);}/********************************************************************** * remove_edge_linkage * * Remove this edge reference from this node. Move the edge entries in * this node to fill the gap. **********************************************************************/void remove_edge_linkage(EDGE_ARRAY dawg, NODE_REF node, NODE_REF next, EDGE_RECORD direction, char character, EDGE_RECORD word_end) { inT32 forward_edges; inT32 num_edges; NODE_REF e = node; inT32 last_flag; forward_edges = num_forward_edges (dawg, node); num_edges = edges_in_node (dawg, node); for (e=node; e<node+num_edges; e++) { /* Is this the edge*/ if ((edge_letter (dawg, e) == character) && ((direction == FORWARD_EDGE) ? forward_edge(dawg,e) : backward_edge(dawg,e)) && (next_node (dawg, e) == next) && (word_end ? end_of_word(dawg,e) : (! end_of_word(dawg,e)))) { if (debug) cprintf ("remove (edge = " REFFORMAT ", character is '%c')\n", e, edge_letter (dawg, e)); /* Delete the slot */ last_flag = last_edge (dawg, e); set_empty_edge(dawg, e); move_edges (dawg, e+1, e, num_edges+node-e-1); /* Restore 'last' flag */ if (direction == FORWARD_EDGE) { if ((forward_edges - 1) && (forward_edges - 1 == e - node)) { set_last_flag (dawg, e - 1); } } else { if ((num_edges - forward_edges - 1) && (num_edges - 1 == e - node)) { set_last_flag (dawg, e - 1); } } if (debug) print_dawg_node(dawg, node); return; } } cprintf ("error: Could not find the edge to remove, %d char = '%c'\n", next, character); print_dawg_node(dawg, node); exit (1);}/********************************************************************** * room_in_node * * Check to see if there is enough room left in this node for one more * edge link. This may be a forward or backward link. **********************************************************************/inT32 room_in_node(EDGE_ARRAY dawg, NODE_REF node) { EDGE_REF edge = node; if (edge_occupied (dawg, edge + edges_in_node (dawg, node))) { return (FALSE); } else { return (TRUE); }}/*********************************************************************** write_full_dawg** Write the DAWG out to a file**********************************************************************/void write_full_dawg (const char *filename, EDGE_ARRAY dawg, inT32 max_num_edges) { FILE *file; EDGE_REF edge; inT32 num_edges; inT32 node_count = 0; NODE_REF node; if (debug) print_string ("write_full_dawg"); for (edge=0; edge<max_num_edges; edge++) { /* Count nodes links */ if (forward_edge (dawg, edge)) { node_count++; if (forward_edge (dawg, edge)) edge_loop (dawg, edge); if (backward_edge (dawg, edge)) edge_loop (dawg, edge); edge--; } } file = open_file (filename, "wb"); fwrite (&node_count, sizeof (inT32), 1, file); node_count = 0; for (edge=0; edge<max_num_edges; edge++) { /* Write all edges */ if (forward_edge (dawg, edge)) { node = edge; num_edges = edges_in_node (dawg, edge); assert ((node + num_edges < max_num_edges) && (num_edges > 0)); fwrite (&edge, sizeof (EDGE_REF), 1, file); fwrite (&num_edges, sizeof (inT32), 1, file); fwrite (&edge_of (dawg,edge), sizeof (EDGE_RECORD), num_edges, file); node_count++; if (debug) { printf ("Writing node index=" REFFORMAT ", node_count=%d, edges=%d\n", node, node_count, num_edges); for (edge = node; edge < node + num_edges; edge++) { printf ("Writing index=" REFFORMAT ", old_node=" REFFORMAT \ ", letter=%c, flags=%d\n", node, next_node (dawg,edge), (char) edge_letter(dawg,edge), (char) edge_flags (dawg,edge)); } } edge = node + num_edges - 1; } } fclose (file);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?