📄 gc_read_1.cc
字号:
// file: gc_read_1.cc// // isip include files//#include "grammar_compiler.h"#include "grammar_compiler_constants.h"// system include files//#include <string.h>// method: preprocessing_grammar_cc// // arguments:// char_1** symbols: (input/output) the array of symbols// int_4& size_a: (input/output) number of symbols in array//// return: a logical_1 indicating status//// this method preprocess the grammar from array of symbols// distribute the probability value from sub-expression to terminals//logical_1 preprocessing_grammar_cc(char_1** symbols_a, int_4& size_a) { int_4 i_pointer = size_a-1; // loop variable begining at the end of grammar int_4 i_end=0, i_start=0; //start and end position of matched "()" int_4 i_brace_indicate=0; // matched brace indicate =0 mean matched brace // store the probability value for each terminal or sub-expression // float_8 probability_a[GC_GRAM_MAX_SYMBOL_SIZE]; // store the boolean for if probability value has been given // logical_1 prob_bool_a[GC_GRAM_MAX_SYMBOL_SIZE]; logical_1 alternative_bool=ISIP_FALSE; // default no alternative operation // sub total probability for all given probability in a sub-expression // float_8 sub_total_prob=0.0; // sub total number for all no given probability in a sub-expression // int_4 sub_total_num_no_prob=0; int_4 offset; //offset for add ~ and probability value int_4 sub_total=0; //total terminals or sub-expressions in float_8 distribution_prob=0.0; float_8 target_prob=0.0; int_4 i_loop=size_a-1; //loop variable to control whether finished // check if the grammar has matched brace // while(i_pointer > 0) { if(strcmp((char *)symbols_a[i_pointer-1], (char *)BRACE_START)==0) i_brace_indicate++; else if(strcmp((char *)symbols_a[i_pointer-1], (char *)BRACE_CLOSE)==0) i_brace_indicate--; i_pointer--; } if(i_brace_indicate==0) {} else { printf("Unmatched brace found!!!\n"); exit(ISIP_PROTO_ERROR); } i_pointer=size_a-1; i_brace_indicate=0; // Check whether the format in the grammar is correct // look for ]~ >~ }~ // if found // print error message, then exit // else // continue // for (int_4 j = size_a-1; j >0 ; j--) { if((strcmp((char *)symbols_a[j], (char *)PROBABILITY_SYMBOL)==0 && strcmp((char *)symbols_a[j-1], (char *)ONE_OR_MORE_REPEAT_CLOSE)==0) ||(strcmp((char *)symbols_a[j], (char *)PROBABILITY_SYMBOL)==0 && strcmp((char *)symbols_a[j-1], (char *)ZERO_OR_MORE_REPEAT_CLOSE)==0) ||(strcmp((char *)symbols_a[j], (char *)PROBABILITY_SYMBOL)==0 && strcmp((char *)symbols_a[j-1], (char *)OPTIONAL_CLOSE)==0) ) { printf("Illegal probability assignment after expansion of subexpression: %s%s\n", symbols_a[j-1], symbols_a[j]); exit(ISIP_PROTO_ERROR); } } // find the ")~" , search from the end of grammar // while(i_pointer > 0) { if((strcmp((char *)symbols_a[i_pointer], (char *)PROBABILITY_SYMBOL)==0 && strcmp((char *)symbols_a[i_pointer-1], (char *)BRACE_CLOSE)==0) ) { target_prob=(float_8) atof((char*)symbols_a[i_pointer+1]); i_end=i_pointer-1; i_brace_indicate++; break; } else i_pointer--; } i_loop=i_end; i_brace_indicate=0; // i_loop=0 mean no more ")~" combination in grammar // while(i_loop>0) { // Check whether the format in the grammar is correct // look for ]~ >~ }~ // if found // print error message, then exit // else // continue // for (int_4 j = size_a-1; j >0 ; j--) { if((strcmp((char *)symbols_a[j], (char *)PROBABILITY_SYMBOL)==0 && strcmp((char *)symbols_a[j-1], (char *)ONE_OR_MORE_REPEAT_CLOSE)==0) ||(strcmp((char *)symbols_a[j], (char *)PROBABILITY_SYMBOL)==0 && strcmp((char *)symbols_a[j-1], (char *)ZERO_OR_MORE_REPEAT_CLOSE)==0) ||(strcmp((char *)symbols_a[j], (char *)PROBABILITY_SYMBOL)==0 && strcmp((char *)symbols_a[j-1], (char *)OPTIONAL_CLOSE)==0)) { printf("Illegal probability assignment after expansion of subexpression: %s%s\n", symbols_a[j-1], symbols_a[j]); exit(ISIP_PROTO_ERROR); } } // Apply the "()" sub-expression probability distribution rules // to grammar. // Inside the sub-expression, apply alternative probability // distribution rules to grammar if applicable. // i_pointer=size_a-1; // get the position of the first appear string ")~" // beginning from the end of grammar // while(i_pointer > 0) { if((strcmp((char *)symbols_a[i_pointer], (char *)PROBABILITY_SYMBOL)==0 && strcmp((char *)symbols_a[i_pointer-1], (char *)BRACE_CLOSE)==0) ) { target_prob=(float_8) atof((char*)symbols_a[i_pointer+1]); i_end=i_pointer-1; i_brace_indicate++; break; } else i_pointer--; } i_pointer=i_end; i_loop=i_pointer; // initialization probability value and indicate // for (int_4 j = 0; j < GC_GRAM_MAX_SYMBOL_SIZE; j++) { probability_a[j] = 0.0; prob_bool_a[j]=ISIP_FALSE; } // get the position of the string "(" which matched the ')~' // at the position i_end of the grammar // while(i_loop > 0) { if(strcmp((char *)symbols_a[i_loop], (char *)BRACE_START)==0) { i_brace_indicate--; if(i_brace_indicate==0) { i_start=i_loop; break; } } else if(strcmp((char *)symbols_a[i_loop-1], (char *)BRACE_CLOSE)==0) i_brace_indicate++; i_loop--; } // end of while i_loop>0 i_pointer=i_start+1; i_brace_indicate=0; sub_total=0; sub_total_prob=0.0; sub_total_num_no_prob=0; // find all terminals or sub-expressions in the sub-expression // between i_start and i_end // while(i_pointer < i_end) { // if it is sub-expression, move all the way // till the end of sub-expression // if(strcmp((char *)symbols_a[i_pointer], (char *)BRACE_START)==0) { i_brace_indicate++; i_pointer++; //move to next symbol while (i_brace_indicate!=0) { if(strcmp((char *)symbols_a[i_pointer], (char *)BRACE_START)==0) i_brace_indicate++; else if(strcmp((char *)symbols_a[i_pointer], (char *)BRACE_CLOSE)==0) i_brace_indicate--; i_pointer++; //move to next symbol } //end of while i_brace_indicate // if it is "~", ignore the probability symbol // if(strcmp((char *)symbols_a[i_pointer], (char *)PROBABILITY_SYMBOL)==0) { prob_bool_a[sub_total]=ISIP_TRUE; i_pointer++; probability_a[sub_total]= (float_8) atof((char*)symbols_a[i_pointer]); } else i_pointer--; //push back the symbol sub_total++; } //end of if BRACE_START // if it is alternative symbol, ignore it // else if(strcmp((char *)symbols_a[i_pointer], (char *)ALTERNATIVE_SYMBOL)==0) { i_pointer++; alternative_bool=ISIP_TRUE; continue; } // if move to here this is terminal // else { i_pointer++; if(strcmp((char *)symbols_a[i_pointer], (char *)PROBABILITY_SYMBOL)==0) { prob_bool_a[sub_total]=ISIP_TRUE; i_pointer++; probability_a[sub_total]= (float_8) atof((char*)symbols_a[i_pointer]); } else i_pointer--; sub_total++; } i_pointer++; //move to next symbol } // calculate the total given probability in this sub-expression // for (int_4 j = 0; j < sub_total; j++) { sub_total_prob+=probability_a[j]; if(prob_bool_a[j]==ISIP_FALSE) sub_total_num_no_prob++; } // offset to add ~ and probability value // probability value need to distribute to each no-given terminals // offset = 2*sub_total_num_no_prob; distribution_prob=(1-sub_total_prob)/sub_total_num_no_prob; if(alternative_bool==ISIP_TRUE) { for (int_4 j = 0; j < sub_total; j++) { if(prob_bool_a[j]==ISIP_FALSE) probability_a[j]=distribution_prob; probability_a[j] *=target_prob; } } else { if(prob_bool_a[0]==ISIP_FALSE) //no probability value probability_a[0]=target_prob; else //has probability value
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -