lookdawg.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 219 行
CPP
219 行
/* -*-C-*-********************************************************************************** File: lookdawg.cpp* Description: Look up words in a Directed Accyclic Word Graph* Author: Mark Seaman, OCR Technology* Created: Fri Oct 16 14:37:00 1987* Modified: Thu Jul 25 17:09:55 1991 (Mark Seaman) marks@hpgrlt* Language: C* Package: N/A* Status: Reusable Software Component** (c) Copyright 1987, Hewlett-Packard Company, all rights reserved.** Licensed under the Apache License, Version 2.0 (the "License");** you may not use this file except in compliance with the License.** You may obtain a copy of the License at** http://www.apache.org/licenses/LICENSE-2.0** Unless required by applicable law or agreed to in writing, software** distributed under the License is distributed on an "AS IS" BASIS,** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.** See the License for the specific language governing permissions and** limitations under the License.**********************************************************************************//*---------------------------------------------------------------------- I n c l u d e s----------------------------------------------------------------------*/#include "lookdawg.h"#include "cutil.h"#include "trie.h"#ifdef __UNIX__#include <assert.h>#endif/*---------------------------------------------------------------------- V a r i a b l e s----------------------------------------------------------------------*//*---------------------------------------------------------------------- F u n c t i o n s----------------------------------------------------------------------*//*********************************************************************** check_for_words** Check the DAWG for the words that are listed in the requested file.* A file name of NULL will cause the words to be read from stdin.**********************************************************************/void check_for_words (EDGE_ARRAY dawg, char *filename) { FILE *word_file; char string [CHARS_PER_LINE]; word_file = open_file (filename, "r"); if (filename == NULL) { printf ("? "); fflush (stdout); } while (fgets (string, CHARS_PER_LINE, word_file) != NULL) { string [strlen (string) - 1] = (char) 0; if (strlen (string)) { if (debug) { debug=0; if (! word_in_dawg (dawg, string)) { puts (string); if (filename == NULL) { debug = 1; word_in_dawg (dawg, string); } } debug = 1; } else { match_words (dawg, string, 0, 0); } } if (filename == NULL) { printf ("? "); fflush (stdout); } } fclose (word_file);}#if 0/*********************************************************************** main** Test the DAWG functions.**********************************************************************/int main (argc, argv) int argc; char **argv;{ inT32 max_num_edges = 700000; EDGE_ARRAY dawg; int argnum = 1; int show_nodes = FALSE; dawg = (EDGE_ARRAY) malloc (sizeof (EDGE_RECORD) * max_num_edges); if (dawg == NULL) { printf ("error: Could not allocate enough memory for DAWG "); printf ("(%ld,%03ld bytes needed)\n", sizeof (EDGE_RECORD) * max_num_edges / 1000, sizeof (EDGE_RECORD) * max_num_edges % 1000); exit (1); } if (! strcmp (argv[argnum], "-v")) { show_nodes = TRUE; argnum++; } if (strcmp (argv[argnum], "-f")) { read_squished_dawg (argv[argnum++], dawg, max_num_edges); } else { argnum++; read_full_dawg (argv[argnum++], dawg, max_num_edges); } printf ("argc = %d\n", argc); print_int ("argnum", argnum); print_string (argv[argnum]); if (argc < argnum + 1) { printf ("Type in words to search for: (use * for wildcard)\n"); debug = show_nodes; check_for_words (dawg, NULL); new_line (); } else { print_lost_words (dawg, argv[argnum]); }}#endif/*********************************************************************** match_words** Match all of the words that are specified with this string. The *'s* in this string are wildcards.**********************************************************************/void match_words (EDGE_ARRAY dawg, char *string, inT32 index, NODE_REF node) { EDGE_REF edge; inT32 word_end; if (string[index] == '*') { edge = node; do { string[index] = edge_letter (dawg, edge); match_words (dawg, string, index, node); } edge_loop (dawg, edge); string[index] = '*'; } else { word_end = (string[index+1] == (char) 0); edge = edge_char_of (dawg, node, string[index], word_end); if (edge != NO_EDGE) { /* Normal edge in DAWG */ node = next_node (dawg, edge); if (word_end) { printf ("%s\n", string); } else if (node != 0) { match_words (dawg, string, index+1, node); } } }}/*********************************************************************** print_lost_words** Check the DAWG for the words that are listed in the requested file.* A file name of NULL will cause the words to be read from stdin. Print* each of the words that can not be found in the DAWG.**********************************************************************/void print_lost_words (EDGE_ARRAY dawg, char *filename) { FILE *word_file; char string [CHARS_PER_LINE]; word_file = open_file (filename, "r"); while (fgets (string, CHARS_PER_LINE, word_file) != NULL) { string [strlen (string) - 1] = (char) 0; if (strlen (string)) { if (! word_in_dawg (dawg, string)) { puts (string); } } } fclose (word_file);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?