📄 permnum.cpp
字号:
/* -*-C-*- ******************************************************************************** * * File: permnum.c (Formerly permnum.c) * Description: * Author: Mark Seaman, OCR Technology * Created: Fri Oct 16 14:37:00 1987 * Modified: Tue Jul 2 14:12:43 1991 (Mark Seaman) marks@hpgrlt * Language: C * Package: N/A * Status: Reusable Software Component * * (c) Copyright 1987, Hewlett-Packard Company. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * *********************************************************************************//*---------------------------------------------------------------------- I n c l u d e s----------------------------------------------------------------------*/#include "const.h"#include "permnum.h"#include "debug.h"#include "permute.h"#include "dawg.h"#include "tordvars.h"#include "stopper.h"#include <math.h>#include <ctype.h>/*---------------------------------------------------------------------- V a r i a b l e s----------------------------------------------------------------------*/static const char *allowed_alpha_strs[] = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec", NULL};static const char *allowed_char_strs[] = { "adfjmnos", "aceopu", "bcglnrptvy"};const int kNumStates = 7;static int number_state_table[kNumStates][8] = { { /* 0. Beginning of string */ /* l d o a t 1 2 3 */ 0, 1, 1, -99, -99, 4, -99, -99 }, { /* 1. After a digit or operator */ -99, 1, 1, 3, 2, 4, 3, 3 }, { /* 2. After trailing punctuation */ -99, -99, 1, -99, 2, -99, -99, -99 }, { /* 3. After a alpha character */ -99, -99, 3, 3, 2, 3, 3, 3 }, { /* 4. After 1st char */ -99, -1, -1, -99, -2, -99, 5, -99 }, { /* 5. After 2nd char */ -99, -1, -1, -99, -2, -99, -99, 6 }, { /* 6. After 3rd char */ -99, -1, -1, -99, -2, -99, -99, -99 }};// The state is coded with its true state shifted left by kStateShift.// A repeat count (starting with 0) is stored in the lower bits// No state is allowed to occur more than kMaxRepeats times.const int kStateShift = 4;const int kRepeatMask = (1 << kStateShift) - 1;const int kMaxRepeats[kNumStates] = { 3, 10, 3, 3, 3, 3, 3};make_float_var (good_number, GOOD_NUMBER, make_good_number,8, 15, set_good_number, "Good number adjustment");make_float_var (ok_number, OK_NUMBER, make_ok_number,8, 16, set_ok_number, "Bad number adjustment");make_toggle_var (number_debug, 0, make_number_debug,8, 23, set_number_debug, "Number debug");make_int_var (number_depth, 3, make_number_depth,8, 24, set_number_depth, "Number depth");/*---------------------------------------------------------------------- M a c r o s----------------------------------------------------------------------*//********************************************************************** * isleading * * Return non-zero if this is a leading type punctuation mark for the * numeric grammar. **********************************************************************/#define isleading(ch) \((ch == '{' ) || \ (ch == '[' ) || \ (ch == '(' ) || \ (ch == '#' ) || \ (ch == '@' ) || \ (ch == '$' ))/********************************************************************** * istrailing * * Return non-zero if this is a leading type punctuation mark for the * numeric grammar. **********************************************************************/#define istrailing(ch) \((ch == '}' ) || \ (ch == ']' ) || \ (ch == ')' ) || \ (ch == ';' ) || \ (ch == ':' ) || \ (ch == ',' ) || \ (ch == '.' ) || \ (ch == '%' ))/********************************************************************** * isoperator * * Return non-zero if this is a leading type punctuation mark for the * numeric grammar. **********************************************************************/#define isoperator(ch) \((ch == '*' ) || \ (ch == '+' ) || \ (ch == '-' ) || \ (ch == '/' ) || \ (ch == '.' ) || \ (ch == ':' ) || \ (ch == ',' ))/*---------------------------------------------------------------------- F u n c t i o n s----------------------------------------------------------------------*//********************************************************************** * adjust_number * * Assign an adjusted value to a string that is a word. The value * that this word choice has is based on case and punctuation rules. **********************************************************************/void adjust_number(A_CHOICE *best_choice, float *certainty_array) { float adjust_factor; if (adjust_debug) cprintf ("Number: %s %4.2f ", class_string (best_choice), class_probability (best_choice)); class_probability (best_choice) += RATING_PAD; if (pure_number (class_string (best_choice))) { class_probability (best_choice) *= good_number; adjust_factor = good_number; if (adjust_debug) cprintf (", %4.2f ", good_number); } else { class_probability (best_choice) *= ok_number; adjust_factor = ok_number; if (adjust_debug) cprintf (", N, %4.2f ", ok_number); } class_probability (best_choice) -= RATING_PAD; LogNewWordChoice(best_choice, adjust_factor, certainty_array); if (adjust_debug) cprintf (" --> %4.2f\n", class_probability (best_choice));}/********************************************************************** * append_number_choices * * Check to see whether or not the next choice is worth appending to * the string being generated. If so then keep going deeper into the * word. **********************************************************************/void append_number_choices(int state, char *word, CHOICES_LIST choices, int char_index, A_CHOICE *this_choice, float *limit, float rating, float certainty, float *certainty_array, CHOICES *result) { int word_ending = FALSE; int x; if (char_index == (array_count (choices) - 1)) word_ending = TRUE; word[char_index] = class_string (this_choice)[0]; word[char_index + 1] = '\0'; if (word[char_index] == '\0') word[char_index] = ' '; certainty_array[char_index] = class_certainty (this_choice); rating += class_probability (this_choice); certainty = min (class_certainty (this_choice), certainty); if (rating < *limit) { state = number_state_change (state, word + char_index); if (number_debug) cprintf ("%-20s prob=%4.2f state=%d\n", word, rating, state); if (state != -1) { if ((state >> kStateShift) == 3 && char_index + 3 < array_count (choices)) { return; } if (word_ending) { for (x = 0; x <= char_index; x++) { if (isdigit (word[x])) { if (number_debug) cprintf ("new choice = %s\n", word); push_on (*result, new_choice (word, rating, certainty, -1, NUMBER_PERM)); adjust_number ((A_CHOICE *) first (*result), certainty_array); if (best_probability (*result) > *limit) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -