📄 lex_06.cc
字号:
// model ( word graph / lattice ) which is represented as a// DiGraph<LexicalNode>//booleanLexicalTree::buildLexicalTree(DiGraph<LexicalNode>& word_graph_a, const Vector<DiGraph<LexicalNode> >& pron_vec_a, SingleLinkedList<LexicalTree>& tree_list_a){ // algorithm for this method: // 1 LOOP: // iterate through all the vertexs on this word graph // 1.1 build the lexical tree for that node // 1.2 insert the lexical tree in a list // 2 Exit // lexical tree for the start vertex of the DiGraph // LexicalTree* lex_tree = new LexicalTree(); GVLexicalNode* root_vert = word_graph_a.getStart(); if ( NULL == lex_tree->buildLexicalTree(root_vert, pron_vec_a) ){ delete lex_tree; } else{ tree_list_a.insert(lex_tree); // make the search node point to this lexical tree // LexicalNode* curr_node = (LexicalNode*)root_vert->getItem(); curr_node->setLexicalTree(lex_tree); } // iterate through all the vertexs on this word graph // for (boolean more_vertexes = word_graph_a.gotoFirst(); more_vertexes; more_vertexes = word_graph_a.gotoNext()) { // get the successor // root_vert = word_graph_a.getCurr(); lex_tree = new LexicalTree(); // we need set the pointer to this lexical tree on root_vert later // if ( NULL == lex_tree->buildLexicalTree(root_vert, pron_vec_a) ){ delete lex_tree; } else{ tree_list_a.insert(lex_tree); // make the search node point to this lexical tree // LexicalNode* curr_node = (LexicalNode*)root_vert->getItem(); curr_node->setLexicalTree(lex_tree); } } // exit gracefully // return true;} // method: buildLexicalTree//// arguments:// LexicalNode* root_vert_a: (input) the node from which we expend the// the Lexical tree. Currently, it is defined as:// ==> typedef LexicalNode SearchNode;// ==> GraphVertex<LexicalNode> GVLexicalNode;// Vector<DiGraph<LexicalNode>>& pron_vec_a : (input) the pronunciation for// each word in the symbol_list_a; It is a 1:1// mapping. Currently, it is defined as:// ==> typedef DiGraph<LexicalNode> Pronunciation;//// return: a boolean indicating status//// note: the symbol id on the vertex should be the index of the digraph in// the(input) pron_vec_a//// this method build the lexical tree for a lexical node in the language// model ( word graph / lattice ) which is represented as a// DiGraph<LexicalNode>//LexicalTree* LexicalTree::buildLexicalTree(GVLexicalNode*& root_vert_a, const Vector<DiGraph<LexicalNode> >& pron_vec_a){ // algorithm for this method: // 1 LOOP: // iterate through all the arcs emitted from root_vert_a // 1.1 find the subgraph of each successor of root_vert_a // 1.2 insert the pronunciation of the successor // 2 Exit // test the pointer // if (root_vert_a == NULL){ Error::handle(name(), L"buildLexicalTree", Error::NULL_ARG, __FILE__, __LINE__); return (LexicalTree*)NULL; } root_vert_d = root_vert_a; // no lexical tree for this vertex // if (root_vert_a->isTerm()) return (LexicalTree*)NULL; // iterate through all the arcs emitted from this vertex, we // need to remove arcs already exist // for (boolean more_arcs = root_vert_a->gotoFirst(); more_arcs; more_arcs = root_vert_a->gotoNext()) { // get the successor // GVLexicalNode* successor = root_vert_a->getCurr()->getVertex(); // no lexical tree for this vertex // //if (successor->isTerm()) // return (LexicalTree*)NULL; float weight = 0; // get the transition probability // if ( root_vert_a->getParentGraph()->isWeighted() && (! root_vert_a->getCurr()->getEpsilon()) ){ weight = root_vert_a->getCurr()->getWeight(); } // has a path to term // if (successor->isTerm()){ // add an arc directly to the term // continue; } LexicalNode* curr_node = (LexicalNode*)successor->getItem(); long symbol_id = curr_node->getSymbolId(); // error handling // if ( symbol_id < 0 || symbol_id >= pron_vec_a.length()){ Long error(symbol_id); error.debug(L"invalid symbol id: "); Error::handle(name(), L"buildLexicalTree", Error::NULL_ARG, __FILE__, __LINE__); return (LexicalTree*)NULL; } // insert the pronunciation to the lexical tree // if( !insertPron(successor, pron_vec_a(symbol_id), weight) ) return (LexicalTree*)NULL; } // exit gracefully // return this;} // method: insertPron//// arguments:// const GVLexicalNode*& word_vert_a: the pronunciation vertex need to// be inserted// ==> typedef GraphVertex<LexicalNode> GVLexicalNode;// const Pronunciation& pron_a: the pronunciation need to be inserted// ==> typedef DiGraph<LexicalNode> Pronunciation;//// return: a boolean indicating status//// this method insert a new pronunciation into the lexical tree//boolean LexicalTree::insertPron(GVLexicalNode*& word_vert_a, const DiGraph<LexicalNode>& pron_a, const float & weight_a ) { // algorithm for this method // 1 initialize ( set current lexical tree and prun_a to the start pointer) // curr_lex_vert <- start , curr_pron_vert <- start // 2 insertSubgraph( curr_lex_vert, curr_pron_vert, false); // 3 exit GVLexicalNode* curr_lex_vert = getStart(); GVLexicalNode* curr_pron_vert = pron_a.getStart(); // we insert subgraph with checking , that is why we set // insert_mode_a = false // return insertSubgraph( word_vert_a, curr_lex_vert, curr_pron_vert, weight_a, false);}// method: insertSubgraph//// arguments:// const GVLexicalNode*& word_vert_a: the subgraph is from the Pronunciation// of this// const GVLexicalNode*& curr_lex_vert_a: current vertex in the lexical// const GVLexicalNode*& curr_pron_vert_a: current vertex in the pronunciation// graph// boolean insert_mode_a = true: insert the subgraph without checking//// note: ==> typedef DiGraph<LexicalNode> Pronunciation;//// return: a boolean indicating status//// this method insert a subgraph of a pronunciation into the lexical tree// the pronunciation is weighted in this case//boolean LexicalTree::insertSubgraph(GVLexicalNode*& word_vert_a, GVLexicalNode*& curr_lex_vert_a, GVLexicalNode*& curr_pron_vert_a, const float & pron_prob_a, boolean insert_mode_a) { // algorithm for this method // 1 Error checking // 2 IF: insert_mode == true // insert the subgraph of curr_pron_vert to the lexical tree // ( append to curr_lex_vert), no checking for any successors // ELSE: insert_mode == false // check every vertex(succ_pron_vert) following curr_pron_vert. // IF: this successor is a successor(succ_lex_vert) of curr_lex_vert // call recursively: // insertSubgraph(succ_lex_vert, succ_pron_vert, false); // ELSE: // call recursively: // insertSubgraph(succ_lex_vert, succ_pron_vert, true); // 3 Exit // long symbol_id = -1; boolean more_arcs = false; float pron_prob = pron_prob_a; // error checking // if ( curr_lex_vert_a == NULL || curr_pron_vert_a == NULL ){ return Error::handle(name(), L"insertSubgraph", Error::NULL_ARG, __FILE__, __LINE__); } // shouldn't insert a subgraph with only term vertex // if ( curr_pron_vert_a->isTerm()){ return Error::handle(name(), L"insertSubgraph", Error::NULL_ARG, __FILE__, __LINE__); } // insert anyway // if ( insert_mode_a ){ // iterate through all the arcs emitted from this vertex // for (more_arcs = curr_pron_vert_a->gotoFirst(); more_arcs; more_arcs = curr_pron_vert_a->gotoNext()) { // get the successor // GVLexicalNode* succ_pron_vert = curr_pron_vert_a->getCurr()->getVertex(); // accumulate the probability if it is not an epsilon transition // if ( curr_pron_vert_a->getParentGraph()->isWeighted() ){ if (! curr_pron_vert_a->getCurr()->getEpsilon()){ pron_prob = pron_prob_a + curr_pron_vert_a->getCurr()->getWeight(); } } LexicalNode* curr_node = (LexicalNode*)NULL; // at the end of pronunciation // we link it to the up level symbol // if ( succ_pron_vert->isTerm() ){ // insert the arc to the word with probability // insertArc(curr_lex_vert_a, word_vert_a, false, pron_prob); if (debug_level_d > Integral::BRIEF) { curr_node = (LexicalNode*)word_vert_a->getItem(); curr_node->debug(L"link to up level symbol"); } continue; } // get the lexical // curr_node = (LexicalNode*)succ_pron_vert->getItem(); // insert this pronunciation vertex into the lex tree // GVLexicalNode* succ_lex_vert = insertVertex(curr_node); if (debug_level_d > Integral::BRIEF) { curr_node->debug(L"add to lexical tree"); } // insert an epsilon arc, the pronunciation probability will // only on the arc to the word // insertArc(curr_lex_vert_a, succ_lex_vert, true); // call recursively // insertSubgraph(word_vert_a, succ_lex_vert, succ_pron_vert, pron_prob, true); } } // end if insert_mode_a == true // don't insert those with the same symbol_id // else { // iterate through all the arcs emitted from this vertex // for (more_arcs = curr_pron_vert_a->gotoFirst(); more_arcs; more_arcs = curr_pron_vert_a->gotoNext()) { // get the successor // GVLexicalNode* succ_pron_vert = curr_pron_vert_a->getCurr()->getVertex(); // accumulate the probability if it is not an epsilon transition // if ( curr_pron_vert_a->getParentGraph()->isWeighted() ){ if (! curr_pron_vert_a->getCurr()->getEpsilon()){ pron_prob = pron_prob_a + curr_pron_vert_a->getCurr()->getWeight(); } } LexicalNode* curr_node = (LexicalNode*)NULL; // at the end of pronunciation // we link it to the up level symbol // if ( succ_pron_vert->isTerm() ){ // insert the arc to the word with probability // insertArc(curr_lex_vert_a, word_vert_a, false, pron_prob); if (debug_level_d > Integral::BRIEF) { curr_node = (LexicalNode*)word_vert_a->getItem(); curr_node->debug(L"link to up level symbol"); } continue; } // get the lexical node // curr_node = (LexicalNode*)succ_pron_vert->getItem(); // get the symbol id of this node // symbol_id = curr_node->getSymbolId(); // find the vertex with the same symbol_id in the lexical tree // GVLexicalNode* succ_lex_vert = findSuccVert(curr_lex_vert_a, symbol_id); // we can find a successor with the same id // insert the subgraph ( with checking ) // if ( succ_lex_vert != NULL ){ insertSubgraph(word_vert_a, succ_lex_vert, succ_pron_vert, pron_prob, false); } // we can't find a successor with the same id // insert all subgraph ( without checking ) // else { if (debug_level_d > Integral::BRIEF) { curr_node->debug(L"add to lexical tree"); } // insert this pronunciation vertex into the lex tree // succ_lex_vert= insertVertex(curr_node); insertArc(curr_lex_vert_a, succ_lex_vert, true); // insert this node and all following nodes // insertSubgraph(word_vert_a, succ_lex_vert, succ_pron_vert, pron_prob, true); } } // end of for loop } // end if insert_mode_a == false ( insert with checking ) // exit gracefully // return true;}// method: findSuccVert//// arguments:// const GVLexicalNode*& lex_vert_a: the vertex in the lexical// long symbol_id_a: the symbol id of the lexical vertex we try to find//// return: a pointer to the lexical node vertex//// this method find the successor with given symbol ID.// If it is not exsited, return a NULL.//LexicalTree::GVLexicalNode*LexicalTree::findSuccVert(GVLexicalNode*& lex_vert_a, long symbol_id_a ) { // algorithm for this method: // self-explain // GVLexicalNode* succ_vert = (GVLexicalNode*)NULL; boolean more_arcs = false; for ( more_arcs = lex_vert_a->gotoFirst(); more_arcs; more_arcs = lex_vert_a->gotoNext() ) { // get the successor // succ_vert = lex_vert_a->getCurr()->getVertex(); LexicalNode* curr_node = (LexicalNode*)succ_vert->getItem(); // find a lexical node vertex with the same id // if (symbol_id_a == curr_node->getSymbolId()){ return succ_vert; } } succ_vert = (GVLexicalNode*)NULL; // exit gracefully // return (GVLexicalNode*) succ_vert; }// method: insertArc//// arguments:// GVLexicalNode* start_vertex: (input) the start vertex// GVLexicalNode* end_vertex: (input) the ending vertex// boolean is_epsilon: (input) is the arc an epsilon transition// float weight: (input) the weight on the arc (if any)// // return: a boolean value indicating status//// this method inserts an arc between two vertices in the graph provided// that the two vertices are already present in the graph.// note: the arc inserted will be stored in the SingleLinkedList ( contain// all arcs) of the start vertex//boolean LexicalTree::insertArc(GVLexicalNode* start_vertex_a, GVLexicalNode* end_vertex_a, boolean is_epsilon_a, float weight_a) { // make sure neither vertex is NULL // if ((start_vertex_a == (GVLexicalNode*)NULL) || (end_vertex_a == (GVLexicalNode*)NULL)) { return Error::handle(name(), L"insertArc", Error::NULL_ARG, __FILE__, __LINE__); } // add an arc from the start to the end // boolean return_val = start_vertex_a->insertArc(end_vertex_a, weight_a, is_epsilon_a); // exit gracefully // return return_val;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -