📄 dictionary.h
字号:
/****************************************************************************** * 光学字符识别程序 * 文件名:dictionary.h * 功能 :字典文件,后处理用 * modified by PRTsinghua@hotmail.com******************************************************************************/#ifndef DICTIONARY_H#define DICTIONARY_H#include "common.h"#include "character_database.h"#include <string>#pragma pack(1)struct node{ enum { transcode_char_NULL = 127, max_branches = character_database::dictionary_lookup_chars }; typedef enum {super_node_type, small_node_type} node_type; bool upper_case_start:1; bool lower_case_start:1; node_type type :1; node(node_type t) : upper_case_start(false), lower_case_start(false), type(t) {}; ~node(); bool Test(const char* matchstr); bool Test(list<recognized_char_group>::const_iterator start, list<recognized_char_group>::const_iterator end, string &result, string &lower_case_variant); bool Insert(const char* matchstr); node **walk(node* &last); bool tree_match(); void insert(node* &last); void tree_statistics(); static string prepare(const char *match, bool &first_is_capital); static void build_table(); static int secure_char_name_to_char_code(char c); static char null() { return static_cast<char>(transcode_char_NULL); } static char transcode[256]; static char lowercode[256]; static bool first_is_capital; static string match; static unsigned int pos; static char push(char c); static void pop() { match.resize(match.length()-1); } static list<recognized_char_group>::const_iterator now_char; static list<recognized_char_group>::const_iterator end_char; static string far; // 不同的案例 static int num_super_nodes; static int num_small_nodes; static int refs_in_super_nodes; static int refs_in_small_nodes;};struct super_node : public node{ node * branch[max_branches]; // 所有可能字符的分支 super_node(); void free(); node **walk(node* &last); bool tree_match(); void insert(node* &last); void tree_statistics();};struct small_node : public node{ enum {max_small_branches=2}; char match_char[max_small_branches]; node* branch[max_small_branches]; // 分支 small_node(); void free(); node **walk(node* &last); bool tree_match(); void insert(node* &last); void tree_statistics();};#pragma pack()class dictionary{ public: dictionary(); ~dictionary(); void read(const char *file); // 特定字符: // '['xyz']' 匹配 x, y or z (没有子组!) // '?' 匹配任何字符 // ' ' 不匹配任何字符 string match(const char *pattern); string match(list<recognized_char_group>::const_iterator start, list<recognized_char_group>::const_iterator end); void code_string_to_string(string &cs); void print_statistics(); private: super_node prefix; super_node words; super_node suffix;};// 内联函数// 跟踪边界inline node **node::walk(node* &last){ if( pos>=match.length() ) return &last; else if( type==super_node_type ) return static_cast<super_node *>(this)->walk(last); else return static_cast<small_node *>(this)->walk(last);}// 数匹配inline bool node::tree_match(){ if( now_char==end_char ) { if( !upper_case_start && !lower_case_start ) return false; if( (first_is_capital && upper_case_start) || (!first_is_capital && lower_case_start) ) return true; if( far.empty() ) far=match; return false; } else if( type==super_node_type ) return static_cast<super_node *>(this)->tree_match(); else return static_cast<small_node *>(this)->tree_match();}// 插入节点inline void node::insert(node* &last){ if( pos>=match.length() ) { if( first_is_capital ) upper_case_start=true; else lower_case_start=true; } else if( type==super_node_type ) static_cast<super_node *>(this)->insert(last); else static_cast<small_node *>(this)->insert(last);}// 树统计inline void node::tree_statistics(){ if( type==super_node_type ) static_cast<super_node *>(this)->tree_statistics(); else static_cast<small_node *>(this)->tree_statistics();}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -