lookdawg.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 219 行

CPP
219
字号
/* -*-C-*-********************************************************************************** File:         lookdawg.cpp* Description:  Look up words in a Directed Accyclic Word Graph* Author:       Mark Seaman, OCR Technology* Created:      Fri Oct 16 14:37:00 1987* Modified:     Thu Jul 25 17:09:55 1991 (Mark Seaman) marks@hpgrlt* Language:     C* Package:      N/A* Status:       Reusable Software Component** (c) Copyright 1987, Hewlett-Packard Company, all rights reserved.** Licensed under the Apache License, Version 2.0 (the "License");** you may not use this file except in compliance with the License.** You may obtain a copy of the License at** http://www.apache.org/licenses/LICENSE-2.0** Unless required by applicable law or agreed to in writing, software** distributed under the License is distributed on an "AS IS" BASIS,** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.** See the License for the specific language governing permissions and** limitations under the License.**********************************************************************************//*----------------------------------------------------------------------                     I n c l u d e s----------------------------------------------------------------------*/#include "lookdawg.h"#include "cutil.h"#include "trie.h"#ifdef __UNIX__#include <assert.h>#endif/*----------------------------------------------------------------------                     V a r i a b l e s----------------------------------------------------------------------*//*----------------------------------------------------------------------                     F u n c t i o n s----------------------------------------------------------------------*//*********************************************************************** check_for_words** Check the DAWG for the words that are listed in the requested file.* A file name of NULL will cause the words to be read from stdin.**********************************************************************/void check_for_words (EDGE_ARRAY dawg,                      char       *filename) {  FILE       *word_file;  char       string [CHARS_PER_LINE];  word_file = open_file (filename, "r");  if (filename == NULL) {    printf ("? ");    fflush (stdout);  }  while (fgets (string, CHARS_PER_LINE, word_file) != NULL) {    string [strlen (string) - 1] = (char) 0;    if (strlen (string)) {      if (debug) {        debug=0;        if (! word_in_dawg (dawg, string)) {          puts (string);          if (filename == NULL) {            debug = 1;            word_in_dawg (dawg, string);          }        }        debug = 1;      }      else {        match_words (dawg, string, 0, 0);      }    }    if (filename == NULL) {      printf ("? ");      fflush (stdout);    }  }  fclose (word_file);}#if 0/*********************************************************************** main** Test the DAWG functions.**********************************************************************/int main (argc, argv)   int  argc;   char **argv;{   inT32       max_num_edges  = 700000;   EDGE_ARRAY  dawg;   int         argnum = 1;   int         show_nodes = FALSE;   dawg = (EDGE_ARRAY) malloc (sizeof (EDGE_RECORD) * max_num_edges);   if (dawg == NULL) {      printf ("error: Could not allocate enough memory for DAWG  ");      printf ("(%ld,%03ld bytes needed)\n",	      sizeof (EDGE_RECORD) * max_num_edges / 1000,	      sizeof (EDGE_RECORD) * max_num_edges % 1000);      exit (1);   }   if (! strcmp (argv[argnum], "-v")) {      show_nodes = TRUE;      argnum++;   }   if (strcmp  (argv[argnum], "-f")) {      read_squished_dawg  (argv[argnum++], dawg, max_num_edges);   }   else {      argnum++;      read_full_dawg  (argv[argnum++], dawg, max_num_edges);   }   printf ("argc = %d\n", argc);   print_int ("argnum", argnum);   print_string (argv[argnum]);   if (argc < argnum + 1) {      printf ("Type in words to search for:          (use * for wildcard)\n");      debug = show_nodes;      check_for_words (dawg, NULL);      new_line ();   }   else {      print_lost_words (dawg, argv[argnum]);   }}#endif/*********************************************************************** match_words** Match all of the words that are specified with this string.  The *'s* in this string are wildcards.**********************************************************************/void match_words (EDGE_ARRAY  dawg,                  char        *string,                  inT32         index,                  NODE_REF    node) {  EDGE_REF   edge;  inT32        word_end;  if (string[index] == '*') {    edge = node;    do {      string[index] = edge_letter (dawg, edge);      match_words (dawg, string, index, node);    } edge_loop (dawg, edge);    string[index] = '*';  }  else {    word_end = (string[index+1] == (char) 0);    edge = edge_char_of (dawg, node, string[index], word_end);    if (edge != NO_EDGE) {                         /* Normal edge in DAWG */      node = next_node (dawg, edge);      if (word_end) {        printf ("%s\n", string);      }      else if (node != 0) {        match_words (dawg, string, index+1, node);      }    }  }}/*********************************************************************** print_lost_words** Check the DAWG for the words that are listed in the requested file.* A file name of NULL will cause the words to be read from stdin. Print* each of the words that can not be found in the DAWG.**********************************************************************/void print_lost_words (EDGE_ARRAY dawg,                       char       *filename) {  FILE       *word_file;  char       string [CHARS_PER_LINE];  word_file = open_file (filename, "r");  while (fgets (string, CHARS_PER_LINE, word_file) != NULL) {    string [strlen (string) - 1] = (char) 0;    if (strlen (string)) {      if (! word_in_dawg (dawg, string)) {        puts (string);      }    }  }  fclose (word_file);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?