📄 lex_06.cc
字号:
//// file: $isip/class/search/LexicalTree/lex_06.cc// version: $Id: lex_06.cc,v 1.8 2002/11/19 22:32:57 parihar Exp $////isip include files//#include "LexicalTree.h"// method: expandLexicalTree//// arguments:// Digraph<LexicalNode>*& word_graph_a: (input/output) word level graph// ==> typedef LexicalNode SearchNode;// ==> GraphVertex<LexicalNode> GVLexicalNode;//// Vector<DiGraph<LexicalNode>>& pron_vec_a : (input) the pronunciation for// each word in the symbol_list_a; It is a 1:1// mapping. Currently, it is defined as:// ==> typedef DiGraph<LexicalNode> Pronunciation;//// long level_a: (input) from which level to expand// //// return: a boolean indicating status//// note: the symbol id on the vertex should be the index of the digraph in// the(input) pron_vec_a//// this method expand the word graph to a lexical tree structure//booleanLexicalTree::expandLexicalTree(DiGraph<LexicalNode>& word_graph_a, const Vector<DiGraph<LexicalNode> >& pron_vec_a, long level_a){ // algorithm for this method: // 1 LOOP: // iterate through all the vertexs on this word graph // 1.1 build the lexical tree for that node // 1.2 insert the lexical tree in a list // 2 Exit // local variable // boolean end_loop = false; // get the start // GVLexicalNode* root_vert = word_graph_a.getStart(); if ( !expandLexicalTree(root_vert, pron_vec_a, level_a) ){ Error::handle(name(), L"expandLexicalTree", Error::NULL_ARG, __FILE__, __LINE__); return false; } // loop over all current vertex // end_loop = (!word_graph_a.gotoFirst()); // ISIP_BUG, why not have empty for DiGraph // //boolean end_loop = word_graph_a.isEmpty(); // iterate through all the vertexs on this word graph // while ( !end_loop){ // loop until we reached the marked element // end_loop = word_graph_a.isLast(); // get the current vertex // root_vert = word_graph_a.getCurr(); word_graph_a.gotoNext(); word_graph_a.setMark(); word_graph_a.gotoPrev(); // we don't expand those vertex at lower level // if ( root_vert->getItem()->getSearchLevel()->getLevelIndex() > level_a ){ word_graph_a.gotoMark(); continue; } // we need set the pointer to this lexical tree on root_vert later // if ( !expandLexicalTree(root_vert, pron_vec_a,level_a) ){ Error::handle(name(), L"expandLexicalTree", Error::NULL_ARG, __FILE__, __LINE__); return false; } // go to the next one // word_graph_a.gotoMark(); } // exit gracefully // return true;} // method: expandLexicalTree//// arguments:// LexicalNode* root_vert_a: (input) the node from which we expend the// the Lexical tree. Currently, it is defined as:// ==> typedef LexicalNode SearchNode;// ==> GraphVertex<LexicalNode> GVLexicalNode;//// Vector<DiGraph<LexicalNode>>& pron_vec_a : (input) the pronunciation for// each word in the symbol_list_a; It is a 1:1// mapping. Currently, it is defined as:// ==> typedef DiGraph<LexicalNode> Pronunciation;//// long level_a: (input) from which level to expand//// return: a boolean indicating status//// note: the symbol id on the vertex should be the index of the digraph in// the(input) pron_vec_a//// this method expand the lexical tree from a lexical node in the language// model ( word graph / lattice ) which is represented as a// DiGraph<LexicalNode>//boolean LexicalTree::expandLexicalTree(GVLexicalNode*& root_vert_a, const Vector<DiGraph<LexicalNode> >& pron_vec_a, long level_a ){ // algorithm for this method: // 1 LOOP: // iterate through all the arcs emitted from root_vert_a // 1.1 find the subgraph of each successor of root_vert_a // 1.2 insert the pronunciation of the successor // 2 Exit // local variable // GALexicalNode* curr_arc = (GALexicalNode*)NULL; // test the pointer // if (root_vert_a == NULL){ Error::handle(name(), L"expandLexicalTree", Error::NULL_ARG, __FILE__, __LINE__); return false; } // no lexical tree for this vertex // if (root_vert_a->isTerm()){ Error::handle(name(), L"expandLexicalTree", Error::NULL_ARG, __FILE__, __LINE__); return false; } // iterate through all the arcs emitted from this vertex // root_vert_a->gotoLast(); root_vert_a->setMark(); boolean end_loop = root_vert_a->isEmpty(); // iterate through all the vertexs on this word graph // while ( !end_loop ) { // get the current one // root_vert_a->gotoFirst(); end_loop = root_vert_a->isMarkedElement(); // remove current arc // root_vert_a->removeFirst(curr_arc); // get the successor // GVLexicalNode* successor = curr_arc->getVertex(); // has a path to term // if (successor->isTerm()){ root_vert_a->insertLast(curr_arc); continue; } // we don't expand those vertex with dummy node // if ( successor->getItem()->isDummyNode() ){ root_vert_a->insertLast(curr_arc); continue; } // we don't expand those vertex at different level // if ( successor->getItem()->getSearchLevel()->getLevelIndex() != level_a ){ root_vert_a->insertLast(curr_arc); continue; } float weight = 0; // get the transition probability // if ( root_vert_a->getParentGraph()->isWeighted() && (! curr_arc->getEpsilon()) ){ weight = curr_arc->getWeight(); } LexicalNode* curr_node = (LexicalNode*)successor->getItem(); long symbol_id = curr_node->getSymbolId(); // error handling // if ( symbol_id < 0 || symbol_id >= pron_vec_a.length()){ Long error(symbol_id); error.debug(L"invalid symbol id: "); Error::handle(name(), L"expandLexicalTree", Error::NULL_ARG, __FILE__, __LINE__); return (LexicalTree*)NULL; } // remove current arc and insert new arcs at the last // //root_vert_a->removeArc(); root_vert_a->gotoLast(); // set the start vert of the of the subgraph // GVLexicalNode* start_vert = pron_vec_a(symbol_id).getStart(); // insert the pronunciation to the lexical tree // if( !expandSubgraph(successor, root_vert_a, start_vert, weight, false)){ delete curr_arc; Error::handle(name(), L"expandLexicalTree", Error::NULL_ARG, __FILE__, __LINE__); return false; } delete curr_arc; } // exit gracefully // return true;}// method: expandSubgraph//// arguments:// const GVLexicalNode*& word_vert_a: the subgraph is from the Pronunciation// of this// const GVLexilex_vert_a: current vertex in the lexical// const GVLexicalNode*& curr_pron_vert_a: current vertex in the pronunciation// graph// boolean insert_mode_a = true: insert the subgraph without checking//// note: ==> typedef DiGraph<LexicalNode> Pronunciation;//// return: a boolean indicating status//// this method insert a subgraph of a pronunciation into the lexical tree// the pronunciation is weighted in this case//boolean LexicalTree::expandSubgraph(GVLexicalNode*& word_vert_a, GVLexicalNode*& curr_lex_vert_a, GVLexicalNode*& curr_pron_vert_a, const float & pron_prob_a, boolean insert_mode_a) { // algorithm for this method // 1 Error checking // 2 IF: insert_mode == true // insert the subgraph of curr_pron_vert to the lexical tree // ( append to curr_lex_vert), no checking for any successors // ELSE: insert_mode == false // check every vertex(succ_pron_vert) following curr_pron_vert. // IF: this successor is a successor(succ_lex_vert) of curr_lex_vert // call recursively: // expandSubgraph(succ_lex_vert, succ_pron_vert, false); // ELSE: // call recursively: // expandSubgraph(succ_lex_vert, succ_pron_vert, true); // 3 Exit // long symbol_id = -1; boolean more_arcs = false; float pron_prob = pron_prob_a; // error checking // if ( curr_lex_vert_a == NULL || curr_pron_vert_a == NULL ){ return Error::handle(name(), L"expandSubgraph", Error::NULL_ARG, __FILE__, __LINE__); } // shouldn't insert a subgraph with only term vertex // if ( curr_pron_vert_a->isTerm()){ return Error::handle(name(), L"expandSubgraph", Error::NULL_ARG, __FILE__, __LINE__); } // insert anyway // if ( insert_mode_a ){ // iterate through all the arcs emitted from this vertex // for (more_arcs = curr_pron_vert_a->gotoFirst(); more_arcs; more_arcs = curr_pron_vert_a->gotoNext()) { // get the successor // GVLexicalNode* succ_pron_vert = curr_pron_vert_a->getCurr()->getVertex(); // accumulate the probability if it is not an epsilon transition // if ( curr_pron_vert_a->getParentGraph()->isWeighted() ){ if (! curr_pron_vert_a->getCurr()->getEpsilon()){ pron_prob = pron_prob_a + curr_pron_vert_a->getCurr()->getWeight(); } } LexicalNode* curr_node = (LexicalNode*)NULL; // at the end of pronunciation // we link it to the up level symbol // if ( succ_pron_vert->isTerm() ){ // insert the arc to the word with probability // //insertArc(curr_lex_vert_a, word_vert_a, false, pron_prob); curr_lex_vert_a->insertArc(word_vert_a,pron_prob,false); if (debug_level_d > Integral::BRIEF) { curr_node = (LexicalNode*)word_vert_a->getItem(); curr_node->debug(L"link to up level symbol"); } continue; } // get the lexical node, used for pruning // curr_node = (LexicalNode*)succ_pron_vert->getItem(); curr_node = new LexicalNode(*curr_node); // insert this pronunciation vertex into the lex tree // GVLexicalNode* succ_lex_vert = curr_lex_vert_a-> getParentGraph()->insertVertex(curr_node); if (debug_level_d > Integral::BRIEF) { curr_node->debug(L"add to lexical tree"); } // insert an epsilon arc, the pronunciation probability will // only on the arc to the word // //insertArc(curr_lex_vert_a, succ_lex_vert, true); curr_lex_vert_a->insertArc(succ_lex_vert,true); // call recursively // expandSubgraph(word_vert_a, succ_lex_vert, succ_pron_vert, pron_prob, true); } } // end if insert_mode_a == true // don't insert those with the same symbol_id // else { // iterate through all the arcs emitted from this vertex // for (more_arcs = curr_pron_vert_a->gotoFirst(); more_arcs; more_arcs = curr_pron_vert_a->gotoNext()) { // get the successor // GVLexicalNode* succ_pron_vert = curr_pron_vert_a->getCurr()->getVertex(); // accumulate the probability if it is not an epsilon transition // if ( curr_pron_vert_a->getParentGraph()->isWeighted() ){ if (! curr_pron_vert_a->getCurr()->getEpsilon()){ pron_prob = pron_prob_a + curr_pron_vert_a->getCurr()->getWeight(); } } LexicalNode* curr_node = (LexicalNode*)NULL; // at the end of pronunciation // we link it to the up level symbol // if ( succ_pron_vert->isTerm() ){ // insert the arc to the word with probability // //insertArc(curr_lex_vert_a, word_vert_a, false, pron_prob); curr_lex_vert_a->insertArc(word_vert_a,pron_prob,false); if (debug_level_d > Integral::BRIEF) { curr_node = (LexicalNode*)word_vert_a->getItem(); curr_node->debug(L"link to up level symbol"); } continue; } // get the lexical node, generate new nodes, will be used for pruning // curr_node = (LexicalNode*)succ_pron_vert->getItem(); // get the symbol id of this node // symbol_id = curr_node->getSymbolId(); // find the vertex with the same symbol_id in the lexical tree // GVLexicalNode* succ_lex_vert = findSuccVert(curr_lex_vert_a, symbol_id); // we can find a successor with the same id at the same level // insert the subgraph ( with checking ) // if ( succ_lex_vert != NULL && (succ_lex_vert->getItem()->getSearchLevel()->getLevelIndex() == word_vert_a->getItem()->getSearchLevel()->getLevelIndex()+1)){ expandSubgraph(word_vert_a, succ_lex_vert, succ_pron_vert, pron_prob, false); } // we can't find a successor with the same id at the same level // insert all subgraph ( without checking ) // else { if (debug_level_d > Integral::BRIEF) { curr_node->debug(L"add to lexical tree"); } // insert this pronunciation vertex into the lex tree // curr_node = new LexicalNode(*curr_node); succ_lex_vert = curr_lex_vert_a-> getParentGraph()->insertVertex(curr_node); //insertArc(curr_lex_vert_a, succ_lex_vert, true); curr_lex_vert_a->insertArc(succ_lex_vert,false); // insert this node and all following nodes // expandSubgraph(word_vert_a, succ_lex_vert, succ_pron_vert, pron_prob, true); } } // end of for loop } // end if insert_mode_a == false ( insert with checking ) // exit gracefully // return true;}// method: buildLexicalTree//// arguments:// Digraph<LexicalNode>*& word_graph_a: (input) word level graph// ==> typedef LexicalNode SearchNode;// ==> GraphVertex<LexicalNode> GVLexicalNode;// Vector<DiGraph<LexicalNode>>& pron_vec_a : (input) the pronunciation for// each word in the symbol_list_a; It is a 1:1// mapping. Currently, it is defined as:// ==> typedef DiGraph<LexicalNode> Pronunciation;// Vector<LexicalTree> tree_vec_a: (output) a vector to hold all the// lexical trees//// return: a boolean indicating status//// note: the symbol id on the vertex should be the index of the digraph in// the(input) pron_vec_a//// this method build the lexical tree for a lexical node in the language
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -