⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parseutils.cpp

📁 Ocr source code. provides an Ocr engine that works in C++ language.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
          log_error(ERROR_INVALID_INTERVAL, " lower > upper", NULL);          return false;     }     else if(line_int.upper > pfile->height) {          log_error(ERROR_INVALID_INTERVAL, "upper > scan height", NULL);          return false;     }          int cur_entry_y = scan_ignore_edges_top - 1, cur_line_num = 1;     box_t line_box;     vector<box_t> chars;     line_node lnode;     char_node cnode;     while(true) {                    line_box = find_line_box(cur_entry_y, pfile);          if(equals_NULL_BOX(line_box))               return true;                    else if((line_box.vert_len < line_int.lower) ||                   (line_box.vert_len > line_int.upper)) {               //printf("***********************************************\n");               log_error(ERROR_INVALID_INTERVAL, "line_box out of int", NULL);                              //printf("line_box top corner: (x, y) = (%d, %d)\n",                //        line_box.xy.x, line_box.xy.y);               //printf("line_box vert_len = %d\n", line_box.vert_len);               //printf("***********************************************\n");                              cur_entry_y = line_box.xy.y + line_box.vert_len;               continue;          }          // add line to list:          lnode.self_box = line_box;          lnode.line_num = cur_line_num;          if(plist->pl_begin == NULL) {               plist->pl_begin = new parsed_line(lnode);               plist->pl_end = plist->pl_begin;               }          else {                    plist->pl_end->next = new parsed_line(lnode);               plist->pl_end->next->prev = plist->pl_end;               plist->pl_end = plist->pl_end->next;          }                    // add chars to line:          chars = find_chars(line_box, pfile);          cnode.is_space = false;          cnode.character = ""; // have yet to interpret          for(int i = 0; i < chars.size(); i++) {               cnode.self_box = chars[i];                              // plist->pl_end is the current parsed_line *:               if(plist->pl_end->pc_begin == NULL) {                                        plist->pl_end->pc_begin = new parsed_char(cnode);                    plist->pl_end->pc_end = plist->pl_end->pc_begin;               }               else {                    plist->pl_end->pc_end->next = new parsed_char(cnode);                    plist->pl_end->pc_end->next->prev =                                              plist->pl_end->pc_end;                    plist->pl_end->pc_end = plist->pl_end->pc_end->next;               }          }               cur_entry_y = line_box.xy.y + line_box.vert_len;          cur_line_num++;     } // while}// gridding general utility:box_t find_biggest_char_box_hlen(parse_list *plist) {     box_t r_val = NULL_BOX();     int hlen = 0;     if(plist->pl_begin == NULL)          return r_val;     parsed_line *cur_pl = plist->pl_begin;     while(cur_pl != NULL) {          parsed_char *cur_pc = cur_pl->pc_begin;          while(cur_pc != NULL) {               if(cur_pc->self_node.is_space) {                    cur_pc = cur_pc->next;                    continue;               }                              if(cur_pc->self_node.self_box.horiz_len > hlen) {                    r_val = cur_pc->self_node.self_box;                    hlen = cur_pc->self_node.self_box.horiz_len;               }               cur_pc = cur_pc->next;          }          cur_pl = cur_pl->next;     }     return r_val;}box_t find_biggest_char_box_vlen(parse_list *plist) {     box_t r_val = NULL_BOX();     int vlen = 0;     if(plist->pl_begin == NULL)          return r_val;     parsed_line *cur_pl = plist->pl_begin;     while(cur_pl != NULL) {          parsed_char *cur_pc = cur_pl->pc_begin;          while(cur_pc != NULL) {               if(cur_pc->self_node.is_space) {                    cur_pc = cur_pc->next;                    continue;               }                              if(cur_pc->self_node.self_box.vert_len > vlen) {                    r_val = cur_pc->self_node.self_box;                    vlen = cur_pc->self_node.self_box.vert_len;               }               cur_pc = cur_pc->next;          }          cur_pl = cur_pl->next;     }     return r_val;}grid_t grid_char(unsigned char *buf, int buf_rows, int buf_cols,                  int pos_offset_x, int pos_offset_y, int gsx, int gsy,                  int gcx, int gcy) {     grid_t r_grid = zero_matrix(gsy / gcy, gsx / gcx);     int grid_y_pos = -1, grid_x_pos = -1;          for(int i = 0; i < buf_rows; i++) {          for(int j = 0; j < buf_cols; j++) {               if(grid(buf, buf_rows, buf_cols, i, j)) {                    // find grid cell the pixel belongs to:                    grid_y_pos = (pos_offset_y + i) / gcy;                    grid_x_pos = (pos_offset_x + j) / gcx;                                        if((grid_y_pos >= (gsy / gcy)) ||                        (grid_x_pos >= (gsx / gcx))) {                         log_error(ERROR_GRID_ERROR,                               "in grid char: the grid size is too small", NULL);                         return r_grid;                    }                                        r_grid[grid_y_pos][grid_x_pos] = true;               }          }     }         return r_grid;     }grid_t grid_char(box_t char_box) {     int gsx = trcfg_grid_size_x, gsy = trcfg_grid_size_y,          gcx = trcfg_gridcell_size_x, gcy = trcfg_gridcell_size_y;     grid_t r_grid;     // compute offsets:     if((gsx < char_box.horiz_len) || (gsy < char_box.vert_len)) {                    log_error(ERROR_GRID_ERROR,                     "in grid_char: grid is smaller than char", NULL);          return r_grid;     }     r_grid = zero_matrix(gsy / gcy, gsx / gcx);     int grid_offset_x = (gsx - char_box.horiz_len) / 2;     int grid_offset_y = (gsy - char_box.vert_len) / 2;     int grid_y_pos, grid_x_pos;               for(int i = 0; i < char_box.vert_len; i++) {          for(int j = 0; j < char_box.horiz_len; j++) {               if(pf->grid(char_box.xy.y + i, char_box.xy.x + j)) {                    grid_y_pos = (grid_offset_y + i) / gcy;                    grid_x_pos = (grid_offset_x + j) / gcx;                    if((grid_y_pos >= (gsy / gcy)) ||                        (grid_x_pos >= (gsx / gcx))) {                         log_error(ERROR_GRID_ERROR,                                    "in grid_char: grid size too small", NULL);                         return r_grid;                    }                    r_grid[grid_y_pos][grid_x_pos] = true;               }          } // for j     } // for i                        return r_grid;}vector<char_probability> identify_character(grid_t char_grid) {     vector<char_probability> r_val;     if(prof == NULL)          return r_val;          vector<char_probability>::iterator iter;     char_probability chpr;     double total_pr, num, denom;     double gc_l_hits, notgc_l_hits, l_insts, total_gc_hits, total_insts,             total_notgc_hits;     double cur_pr, p_gc_given_l, p_notgc_given_l, p_letter, p_gc, p_notgc;      bool pr_done;          for(int i = 0; i < (prof->character_nodes.size()); i++) {          num = 1.0; denom = 1.0;          pr_done = false;          for(int j = 0; j < char_grid.size(); j++) {               for(int k = 0; k < char_grid[j].size(); k++) {                    if((prof->character_nodes[i].num_instances) == 0) {                         num = 0.0;                         pr_done = true;                         break;                    }                                        gc_l_hits = (double) prof->character_nodes[i].gc_count[j][k];                    l_insts = (double) prof->character_nodes[i].num_instances;                    notgc_l_hits = l_insts - gc_l_hits;                    total_insts = (double) prof->total_instances;                    total_gc_hits = (double) prof->gridcell_totals[j][k];                    total_notgc_hits = total_insts - total_gc_hits;                    p_letter = 1.0 / NUM_TRAINED_CHARS;                                        if(char_grid[j][k]) {                         if((prof->character_nodes[i].gc_count[j][k]) > 0)                              p_gc_given_l = gc_l_hits / l_insts;                         else                              p_gc_given_l = ZERO_INST_PR1_PGIVEN;                         if((prof->gridcell_totals[j][k]) > 0)                              p_gc = total_gc_hits / total_insts;                         else                              p_gc = ZERO_INST_PR1_PGC;                         cur_pr = p_gc_given_l * p_letter / p_gc;                                                                                               }                    else { // grid cells NOT hit                         if((prof->character_nodes[i].num_instances -                             prof->character_nodes[i].gc_count[j][k]) > 0)                              p_notgc_given_l = notgc_l_hits / l_insts;                         else                              p_notgc_given_l = ZERO_INST_PR0_PGIVEN;                         if((prof->total_instances -                              prof->gridcell_totals[j][k]) > 0)                              p_notgc = total_notgc_hits / total_insts;                         else                              p_notgc = ZERO_INST_PR0_PGC;                         cur_pr = p_notgc_given_l * p_letter / p_notgc;                    }                    num *= cur_pr * PR_SCALING_FACTOR;                    denom *= (1.0 - cur_pr) * PR_SCALING_FACTOR;               } // for k               if(pr_done)                    break;          } // for j          // p0 = p(letter | gridcell 0) [or NOT gridcell 0];          // p(letter | gridcell 0) = p(gridcell 0 | letter) * p(letter)          //                          __________________________________          //                                     p(gridcell 0)          //          // naive bayes (see paulgraham.com/naivebayes.html):          //                        p0 * ... * pn          // total_pr = _________________________________________          //            p0 * ... * pn + (1 - p0) * ... * (1 - pn)          total_pr = num / (num + denom);                    iter = r_val.begin();          chpr.character = prof->character_nodes[i].character;          chpr.probability = total_pr;                    // add to r_val (keep sorted):          if(r_val.size() == 0)               r_val.push_back(chpr);          else {               for(int l = 0; l < r_val.size(); l++) {                    if((l == 0) && (total_pr >= r_val[0].probability)) {                                                  r_val.insert(iter, chpr);                         break;                    }                    else if((l != 0) && (total_pr <= r_val[l - 1].probability) &&                            (total_pr >= r_val[l].probability)) {                                                  r_val.insert(iter, chpr);                         break;                    }                    else if(l == (r_val.size() - 1)) {                                                  r_val.push_back(chpr);                         break;                    }                    iter++;               } // for l          } // else     } // for i}// other utility:plist_parsed_char point_to_char(parse_list *plist, int x, int y,                                 bool search_alternate) {     plist_parsed_char r_val;     if((plist == NULL) || (plist->pl_begin == NULL) ||         (plist->pl_begin->pc_begin == NULL)) {          r_val.pc = NULL;          r_val.pl = NULL;          return r_val;     }               // find line with y point (if any):     parsed_line *pl = plist->pl_begin;     bool line_found = false;     while(pl != NULL) {          if((pl->self_node.self_box.xy.y <= y) && ((pl->self_node.self_box.xy.y              + pl->self_node.self_box.vert_len) >= y) &&              (pl->self_node.self_box.xy.x <= x) && ((pl->self_node.self_box.xy.x              + pl->self_node.self_box.horiz_len) >= x)) {               line_found = true;               break;          }          pl = pl->next;     }     if(line_found)          r_val.pl = pl;     else {          r_val.pl = NULL;          r_val.pc = NULL;          return r_val;     }     // find the char box (if any):     parsed_char *pc = pl->pc_begin;     bool char_found = false, checking_next = false;     while(pc != NULL) {          if((pc->self_node.self_box.xy.x <= x) && ((pc->self_node.self_box.xy.x              + pc->self_node.self_box.horiz_len) >= x) &&             (pc->self_node.self_box.xy.y <= y) && ((pc->self_node.self_box.xy.y              + pc->self_node.self_box.vert_len) >= y)) {               if(checking_next) {                    char_found = true;                    break;               }               if(search_alternate && (pc->next != NULL))                    checking_next = true;               else {                                        char_found = true;                    break;               }          }          else if(checking_next) {               char_found = true;               pc = pc->prev;               break;          }          pc = pc->next;     }     if(char_found)          r_val.pc = pc;     else {          r_val.pl = NULL;          r_val.pc = NULL;          return r_val;     }     return r_val;}    

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -