📄 triebuilder.cxx
字号:
// Copyright (c) 1994 James Clark// See the file COPYING for copying permission.#ifdef __GNUG__#pragma implementation#endif#include "splib.h"#include "types.h"#include "macros.h"#include "StringOf.h"#include "Trie.h"#include "TrieBuilder.h"#include "Priority.h"#include <stdlib.h>#ifdef SP_NAMESPACEnamespace SP_NAMESPACE {#endifTrie::~Trie(){ if (next_) delete [] next_;}Trie::Trie(const Trie &t): nCodes_(t.nCodes_), token_(t.token_), tokenLength_(t.tokenLength_), priority_(t.priority_), blank_(t.blank_){ if (t.next_) { next_ = new Trie[nCodes_]; for (int i = 0; i < nCodes_; i++) next_[i] = t.next_[i]; } else next_ = 0;}Trie &Trie::operator=(const Trie &t){ if (next_) delete [] next_; nCodes_ = t.nCodes_; token_ = t.token_; tokenLength_ = t.tokenLength_; priority_ = t.priority_; blank_ = t.blank_; if (t.next_) { next_ = new Trie[nCodes_]; for (int i = 0; i < nCodes_; i++) next_[i] = t.next_[i]; } else next_ = 0; return *this;}TrieBuilder::TrieBuilder(int nCodes): nCodes_(nCodes), root_(new Trie){ root_->token_ = 0; root_->tokenLength_ = 0; root_->priority_ = Priority::data; root_->nCodes_ = nCodes;}void TrieBuilder::recognize(const String<EquivCode> &chars, Token t, Priority::Type priority, TokenVector &ambiguities){ setToken(extendTrie(root_.pointer(), chars), chars.size(), t, priority, ambiguities);}void TrieBuilder::recognize(const String<EquivCode> &chars, const String<EquivCode> &set, Token t, Priority::Type priority, TokenVector &ambiguities){ Trie *trie = extendTrie(root_.pointer(), chars); for (size_t i = 0; i < set.size(); i++) setToken(forceNext(trie, set[i]), chars.size() + 1, t, priority, ambiguities);}void TrieBuilder::recognizeB(const String<EquivCode> &chars, int bSequenceLength, size_t maxBlankSequence, const String<EquivCode> &blankCodes, const String<EquivCode> &chars2, Token token, TokenVector &ambiguities){ doB(extendTrie(root_.pointer(), chars), chars.size(), bSequenceLength, maxBlankSequence, blankCodes, chars2, token, Priority::blank(bSequenceLength), ambiguities);}void TrieBuilder::recognizeEE(EquivCode code, Token t){ Trie *trie = forceNext(root_.pointer(), code); trie->tokenLength_ = 0; // it has length 0 in the buffer trie->token_ = t; trie->priority_ = Priority::data;}void TrieBuilder::doB(Trie *trie, int tokenLength, int minBLength, size_t maxLength, const String<EquivCode> &blankCodes, const String<EquivCode> &chars2, Token token, Priority::Type pri, TokenVector &ambiguities){ if (minBLength == 0 && trie->next_ == 0) { if (!trie->blank_) { BlankTrie *b = new BlankTrie; trie->blank_ = b; b->maxBlanksToScan_ = maxLength; b->additionalLength_ = tokenLength; b->codeIsBlank_.assign(nCodes_, 0); for (size_t i = 0; i < blankCodes.size(); i++) b->codeIsBlank_[blankCodes[i]] = 1; b->tokenLength_ = 0; b->token_ = 0; b->priority_ = Priority::data; b->nCodes_ = nCodes_; } else { // A B sequence is not allowed to be adjacent to a character // that can occur in a blank sequence, so maxLength will be // the same at a node, no matter how we got there. ASSERT(trie->blank_->maxBlanksToScan_ == maxLength); ASSERT(trie->blank_->additionalLength_ == tokenLength); } if (chars2.size() == 0) setToken(trie, tokenLength, token, pri, ambiguities); else setToken(extendTrie(trie->blank_.pointer(), chars2), chars2.size(), token, pri, ambiguities); } else { if (minBLength == 0) setToken(extendTrie(trie, chars2), tokenLength + chars2.size(), token, pri, ambiguities); for (size_t i = 0; i < blankCodes.size(); i++) doB(forceNext(trie, blankCodes[i]), tokenLength + 1, minBLength == 0 ? 0 : minBLength - 1, maxLength - 1, blankCodes, chars2, token, pri, ambiguities); }}Trie *TrieBuilder::extendTrie(Trie *trie, const String<EquivCode> &s){ for (size_t i = 0; i < s.size(); i++) trie = forceNext(trie, s[i]); return trie;}void TrieBuilder::setToken(Trie *trie, int tokenLength, Token token, Priority::Type pri, TokenVector &ambiguities){ if (tokenLength > trie->tokenLength_ || (tokenLength == trie->tokenLength_ && pri > trie->priority_)) { trie->tokenLength_ = tokenLength; trie->token_ = token; trie->priority_ = pri; } else if (trie->tokenLength_ == tokenLength && trie->priority_ == pri && trie->token_ != token && trie->token_ != 0) { ambiguities.push_back(Token(trie->token_)); ambiguities.push_back(token); } if (trie->hasNext()) { for (int i = 0; i < nCodes_; i++) setToken(&trie->next_[i], tokenLength, token, pri, ambiguities); }}void TrieBuilder::copyInto(Trie *into, const Trie *from, int additionalLength){ if (from->token_ != 0) { TokenVector ambiguities; setToken(into, from->tokenLength_ + additionalLength, from->token_, from->priority_, ambiguities); ASSERT(ambiguities.size() == 0); } if (from->hasNext()) for (int i = 0; i < nCodes_; i++) copyInto(forceNext(into, i), &from->next_[i], additionalLength);}Trie *TrieBuilder::forceNext(Trie *trie, EquivCode c){ if (!trie->hasNext()) { trie->next_ = new Trie[nCodes_]; if (trie->blank_) { trie->blank_->additionalLength_ += 1; trie->blank_->maxBlanksToScan_ -= 1; } Owner<BlankTrie> blankOwner(trie->blank_.extract()); const BlankTrie *b = blankOwner.pointer(); for (int i = 0; i < nCodes_; i++) { Trie *p = &trie->next_[i]; if (b && b->codeIsBlank(i)) trie->next_[i].blank_ = (blankOwner ? blankOwner.extract() : new BlankTrie(*b)); p->token_ = trie->token_; p->tokenLength_ = trie->tokenLength_; p->priority_ = trie->priority_; p->nCodes_ = nCodes_; } if (b) // -1 because 1 was added above copyInto(trie, b, b->additionalLength_ - 1); } return &trie->next_[c];}#ifdef SP_NAMESPACE}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -