⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 profile.cpp

📁 Ocr source code. provides an Ocr engine that works in C++ language.
💻 CPP
字号:
/* profile.cpp : code for storing info about the trained profile and  *               related functions; * Author: Maxie D. Schmidt (created June 2006)                       */#include "profile.h"// generates a blank profile:profile::profile(int gsx, int gsy, int gcx, int gcy) {     gs_x = gsx;     gs_y = gsy;     gc_x = gcx;     gc_y = gcy;     total_instances = 0;     gridcell_totals = zero_grid(gs_y / gc_y, gs_x / gc_x);     profile_node node;     node.num_instances = 0;     node.gc_count = zero_grid(gs_y / gc_y, gs_x / gc_x);     // A-Z:     for(int i = 65; i <= 90; i++) {          node.character = (char) i;          character_nodes.push_back(node);     }     // a-z:     for(int i = 97; i <= 122; i++) {          node.character = (char) i;          character_nodes.push_back(node);     }}void profile::update() {     // zero out totals:     total_instances = 0;     for(int i = 0; i < gridcell_totals.size(); i++) {          for(int j = 0; j < gridcell_totals[i].size(); j++)               gridcell_totals[i][j] = 0;     }     // update:     for(int i = 0; i < character_nodes.size(); i++) {          for(int j = 0; j < character_nodes[i].gc_count.size(); j++) {               for(int k = 0; k < character_nodes[i].gc_count[j].size(); k++) {                    // normalize to 100 instances per character:                    //if(character_nodes[i].num_instances > 0) {                    //     double normalized_count = 100.0 *                     //                       character_nodes[i].gc_count[j][k];                    //     normalized_count /= character_nodes[i].num_instances;                    //     gridcell_totals[j][k] += (int) normalized_count;                    //                    //}                    gridcell_totals[j][k] += character_nodes[i].gc_count[j][k];               } // for k          } // for j          total_instances += character_nodes[i].num_instances;          } // for i}// related functions:bool load_profile_from_file(string filename) {     FILE *fp = fopen(filename.c_str(), "r");     if(fp == NULL) {          log_error(ERROR_OPENING_FILE, "in load_profile_from_file",                     strerror(errno));          return false;     }     char line[256];     string cur_line;          // get grid(cell) sizes and line & space intervals:     int gsx = -1, gsy = -1, gcx = -1, gcy = -1;     int space_int_lower = -1, space_int_upper = -1, lineh_int_lower = -1,          lineh_int_upper = -1;     for(int i = 0; i < 8; i++) {          if(feof(fp)) {               log_error(ERROR_PROFILE, "in load: profile missing data", NULL);               return false;          }          fgets(line, 256, fp);          cur_line = trim_whitespace(string(line));          switch(i) {                              case 0:                    gsx = atoi(cur_line.c_str());                    break;               case 1:                    gsy = atoi(cur_line.c_str());                    break;               case 2:                    gcx = atoi(cur_line.c_str());                    break;               case 3:                    gcy = atoi(cur_line.c_str());                    break;               case 4:                    space_int_lower = atoi(cur_line.c_str());                    break;               case 5:                    space_int_upper = atoi(cur_line.c_str());                    break;               case 6:                    lineh_int_lower = atoi(cur_line.c_str());                    break;               case 7:                    lineh_int_upper = atoi(cur_line.c_str());                    break;          }               } // for i     if((gsx <= 0) || (gsy <= 0) || (gcx <= 0) || (gcy <= 0)) {          log_error(ERROR_PROFILE, "in load: grid sizes incorrect", NULL);          return false;     }     //else if((gsx != trcfg_grid_size_x) || (gsy != trcfg_grid_size_y) ||      //        (gcx != trcfg_gridcell_size_x) || (gcy != trcfg_gridcell_size_y))     //     log_error(ERROR_PROFILE, "loaded grid sizes differ from current!!!",     //               NULL);     else if((space_int_lower <= 0) || (space_int_upper <= 0) ||             (lineh_int_lower <= 0) || (lineh_int_upper <= 0) ||             (space_int_lower > space_int_upper) ||              (lineh_int_lower > lineh_int_upper)) {          log_error(ERROR_PROFILE, "in load: intervals incorrect.", NULL);          return false;     }     if(prof != NULL) {          delete prof;          prof = NULL;     }     prof = new profile(gsx, gsy, gcx, gcy);          if(feof(fp)) {          log_error(ERROR_PROFILE, "in load: missing data", NULL);          return false;     }     fgets(line, 256, fp);     cur_line = trim_whitespace(string(line));     prof->total_instances = atoi(cur_line.c_str());     if(prof->total_instances < 0) {          log_error(ERROR_PROFILE, "in load: total_insts < 0", NULL);          return false;     }          // fill in grid cell totals:     for(int i = 0; i < prof->gridcell_totals.size(); i++) {          for(int j = 0; j < prof->gridcell_totals[i].size(); j++) {               if(feof(fp)) {                    log_error(ERROR_PROFILE, "in load: file missing data",NULL);                    return false;               }                              fgets(line, 256, fp);               cur_line = trim_whitespace(string(line));                              prof->gridcell_totals[i][j] = atoi(cur_line.c_str());               if(prof->gridcell_totals[i][j] < 0) {                    log_error(ERROR_PROFILE, "in load: gridcell totals < 0",                               NULL);                    return false;               }          }     }     for(int i = 0; i < prof->character_nodes.size(); i++) {          if(feof(fp)) {               log_error(ERROR_PROFILE, "in load: missing data", NULL);               return false;          }          fgets(line, 256, fp);          cur_line = trim_whitespace(string(line));          prof->character_nodes[i].character = cur_line;                    if(feof(fp)) {               log_error(ERROR_PROFILE, "in load: missing data", NULL);               return false;          }          fgets(line, 256, fp);          cur_line = trim_whitespace(string(line));          prof->character_nodes[i].num_instances = atoi(cur_line.c_str());          if(prof->character_nodes[i].num_instances < 0) {               log_error(ERROR_PROFILE, "in load: num_instances < 0", NULL);               return false;          }                    // fill gc_count:          for(int j = 0; j < prof->character_nodes[i].gc_count.size(); j++) {               for(int k = 0; k < prof->character_nodes[i].gc_count[j].size();                                                                         k++) {                    if(feof(fp)) {                         log_error(ERROR_PROFILE, "in load: missing data",NULL);                         return false;                    }                                        fgets(line, 256, fp);                    cur_line = trim_whitespace(string(line));                    prof->character_nodes[i].gc_count[j][k] =                          atoi(cur_line.c_str());                    if(prof->character_nodes[i].gc_count[j][k] < 0) {                         log_error(ERROR_PROFILE, "in load: gc_count < 0",                                    NULL);                         return false;                    }               } // for k          } // for j     } // for i         fclose(fp);          // setup grid(cell) sizes and intervals to match profile:     trcfg_grid_size_x = gsx;     trcfg_grid_size_y = gsy;     trcfg_gridcell_size_x = gcx;     trcfg_gridcell_size_y = gcy;     trcfg_space_int.lower = space_int_lower;     trcfg_space_int.upper = space_int_upper;     trcfg_line_height_int.lower = lineh_int_lower;     trcfg_line_height_int.upper = lineh_int_upper;          return true;}bool write_profile_to_file(string filename, profile *prf) {     if(prf == NULL) {          log_error(ERROR_PROFILE, "in write_profile_to_file: NULL profile",                    NULL);          return false;     }          FILE *fp = fopen(filename.c_str(), "w");     if(fp == NULL) {          log_error(ERROR_OPENING_FILE, "in write_profile_to_file",                     strerror(errno));          return false;     }     // overall data (vs. indiv char data):     string output_line = int_to_string(prf->gs_x) + '\n';     fputs(output_line.c_str(), fp);     output_line = int_to_string(prf->gs_y) + '\n';     fputs(output_line.c_str(), fp);     output_line = int_to_string(prf->gc_x) + '\n';     fputs(output_line.c_str(), fp);     output_line = int_to_string(prf->gc_y) + '\n';     fputs(output_line.c_str(), fp);          output_line = int_to_string(trcfg_space_int.lower) + '\n';     fputs(output_line.c_str(), fp);     output_line = int_to_string(trcfg_space_int.upper) + '\n';     fputs(output_line.c_str(), fp);     output_line = int_to_string(trcfg_line_height_int.lower) + '\n';     fputs(output_line.c_str(), fp);     output_line = int_to_string(trcfg_line_height_int.upper) + '\n';     fputs(output_line.c_str(), fp);          output_line = int_to_string(prf->total_instances) + '\n';     fputs(output_line.c_str(), fp);     for(int i = 0; i < prf->gridcell_totals.size(); i++) {          for(int j = 0; j < prf->gridcell_totals[i].size(); j++) {               output_line = int_to_string(prf->gridcell_totals[i][j]) + '\n';               fputs(output_line.c_str(), fp);          }     }     // character data:     for(int i = 0; i < prf->character_nodes.size(); i++) {          output_line = prf->character_nodes[i].character + '\n';          fputs(output_line.c_str(), fp);          output_line = int_to_string(prf->character_nodes[i].num_instances) +                         '\n';          fputs(output_line.c_str(), fp);                    // gc_count grid:          for(int j = 0; j < prf->character_nodes[i].gc_count.size(); j++) {               for(int k = 0; k < prf->character_nodes[i].gc_count[j].size();                                                                         k++) {                   output_line = int_to_string(                                 prf->character_nodes[i].gc_count[j][k]) + '\n';                   fputs(output_line.c_str(), fp);               }          }     }     fclose(fp);     return true;}bool merge_profiles(profile *prof1) {     if(prof1 == NULL)          return true;     else if(prof == NULL)          prof = new profile(prof1->gs_x, prof1->gs_y, prof1->gc_x,                               prof1->gc_y);     if((prof->gs_x != prof1->gs_x) || (prof->gs_y != prof1->gs_y) ||        (prof->gc_x != prof1->gc_x) || (prof->gc_y != prof1->gc_y))          return false;                    for(int i = 0; i < prof->character_nodes.size(); i++) {          prof->character_nodes[i].num_instances +=                                    prof1->character_nodes[i].num_instances;                    int gc_size_y = prof->character_nodes[i].gc_count.size();          int gc_size_x = prof->character_nodes[i].gc_count[0].size();          for(int j = 0; j < gc_size_y; j++) {               for(int k = 0; k < gc_size_x; k++) {                    prof->character_nodes[i].gc_count[j][k] +=                                    prof1->character_nodes[i].gc_count[j][k];               }          }     } // for i     prof->total_instances += prof1->total_instances;     for(int i = 0; i < prof->gridcell_totals.size(); i++) {          for(int j = 0; j < prof->gridcell_totals[i].size(); j++)               prof->gridcell_totals[i][j] += prof1->gridcell_totals[i][j];     }     return true;}int find_character_index(string character, profile *prof) {     int letter = (int) ((char) character[0]);     if((letter >= 65) && (letter <= 90))          return (letter - 65);     else          return (letter - 97 + 26);          return -1;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -