📄 qdawg.cpp
字号:
delete club; } } bool write(QIODevice* dev) { QDataStream ds(dev); ds.writeRawBytes(dawg_sig64,8); // #### endianness problem ignored. //always write new style ds.writeBytes((char*)node,sizeof(QDawg::Node)*nodes); /*char * a = (char*)node; for (int i = 0; i < sizeof(QDawg::Node)*nodes; i++) { if ((i % 8) == 0) printf("\n"); printf(" %x ", (unsigned char)*a); a++; }*/ return dev->state() == IO_Ok; } void dumpWords(int nid=0, int index=0) { static char word[256]; // ick latin1 int i=0; do { QDawg::Node& n = node[nid+i]; word[index] = n.let; if ( n.isword ) fprintf(stderr,"%.*s\n",index+1,word); if ( n.offset ) dumpWords(n.offset+nid+i,index+1); } while (!node[nid+i++].islast); } void dump(int nid=0, int indent=0) { int i=0; do { QDawg::Node& n = node[nid+i]; fprintf(stderr,"%d: ",nid+i); for (int in=0; in<indent; in++) fputc(' ',stderr); fprintf(stderr," %c %d %d %d\n",n.let, n.isword,n.islast,n.offset); if ( n.offset ) dump(n.offset+nid+i,indent+2); } while (!node[nid+i++].islast); } int countWords(int nid=0) { int t=0; int i=0; do { QDawg::Node& n = node[nid+i]; if ( n.isword ) t++; if ( n.offset ) t+=countWords(n.offset+nid+i); } while (!node[nid+i++].islast); return t; } bool contains(const QString& s, int nid=0, int index=0) const { int i=0; do { QDawg::Node& n = node[nid+i]; if ( s[index] == QChar((ushort)n.let) ) { if ( n.isword && index == (int)s.length()-1 ) return TRUE; if ( n.offset ) return contains(s,n.offset+nid+i,index+1); } } while (!node[nid+i++].islast); return FALSE; } void appendAllWords(QStringList& list, int nid=0, QString s="") const { int i=0; int next = s.length(); do { QDawg::Node& n = node[nid+i]; s[next] = QChar((ushort)n.let); if ( n.isword ) { list.append(s); } if ( n.offset ) appendAllWords(list, n.offset+nid+i, s); } while (!node[nid+i++].islast); } const QDawg::Node* root() { return node; }private: void generateArray(QTrie* t) { nodes = 0; int n = t->collectKeys(); node = new QDawg::Node[n]; appendToArray(t); ASSERT(n == nodes); } int appendToArray(QTrie* t) { if ( !t->key ) { if ( !t->children.count() ) return 0; t->key = nodes; nodes += t->children.count(); QDawg::Node* n = &node[t->key-1]; int here = t->key; for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) { QTrie* s = (*it).p; ++n; n->let = (*it).letter.unicode(); n->isword = s->isword; n->islast = 0; n->offset = appendToArray(s); if ( n->offset ) { int t = n->offset-here; n->offset=t; if ( n->offset != t ) qWarning("Overflow: too many words"); } here++; } n->islast = 1; } return t->key; }private: int nodes; QDawg::Node *node;};/*! \class QDawg qdawg.h \brief The QDawg class provides an implementation of a Directed Acyclic Word Graph. A DAWG provides very fast look-up of words in a word list. The word list is created using readFile(), read() or createFromWords(). A list of all the DAWG's words is returned by allWords(), and the total number of words is returned by countWords(). Use contains() to see if a particular word is in the DAWG. The root \link qdawg-node.html node\endlink is returned by root(). A global DAWG is maintained for the current locale. See the \l Global class for details. The structure of a DAWG is a graph of \link qdawg-node.html Nodes\endlink. There are no cycles in the graph (since there are no inifinitely repeating words). Each \link qdawg-node.html Node\endlink is a member of a list of \link qdawg-node.html Nodes\endlink called a child list. Each \link qdawg-node.html Node\endlink in the child list has a \e letter, an \e isWord flag, at most one \e jump arc, and at most one arc to the next child in the list. If you traverse the \link qdawg-node.html Nodes\endlink in a DAWG, starting from the root(), and you concatenate all the letters from the single child in each child list that you visit, at every \link qdawg-node.html Node\endlink which has the isWord flag set your concatenation will be a word in the list represented by the DAWG. For example, the DAWG below represents the word list: ban, band, can, cane, cans, pan, pane, pans. This structuring not only provides O(1) lookup of words in the word list, but also produces a smaller storage file than a plain text file word list. \img qdawg.png*//*! Constructs a new empty DAWG.*/QDawg::QDawg(){ d = 0;}/*! Deletes the DAWG.*/QDawg::~QDawg(){ delete d;}/*! \overload Replaces all the DAWG's words with words read from \a dev.*/bool QDawg::createFromWords(QIODevice* dev){ delete d; QTextStream i(dev); QTrie* trie = new QTrie; int n=0; while (!i.atEnd()) { trie->insertWord(i.readLine()); n++; } if ( n ) d = new QDawgPrivate(trie); else d = 0; return TRUE;}/*! Replaces all the DAWG's words with the words in the \a list.*/void QDawg::createFromWords(const QStringList& list){ delete d; if ( list.count() ) { QTrie* trie = new QTrie; for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) { QString word = (*it).utf8(); //trie->insertWord(word); trie->insertWord(*it); } d = new QDawgPrivate(trie); } else { d = 0; }}/*! Returns a list of all the words in the DAWG.*/QStringList QDawg::allWords() const{ QStringList result; if ( d ) d->appendAllWords(result); return result;}/*! Replaces the DAWG with the DAWG in \a filename. The file is memory-mapped. \sa write()*/bool QDawg::readFile(const QString& filename){ delete d; d = new QDawgPrivate(filename); if (!d->ok()) { // failed to load file into memory delete d; d = 0; } return d;}/*! Replaces the DAWG with the DAWG in \a dev. The file is memory-mapped. \sa write()*/bool QDawg::read(QIODevice* dev){ delete d; d = new QDawgPrivate(dev); if ( d->ok() ) return TRUE; delete d; d = 0; return FALSE;}/*! Writes the DAWG to \a dev, in a custom QDAWG format. \warning QDawg memory maps DAWG files. The safe method for writing to DAWG files is to write the data to a new file and move the new file to the old file name. QDawgs using the old file will continue using that file.*/bool QDawg::write(QIODevice* dev) const{ return d ? d->write(dev) : TRUE;}/*! Returns the number of words in the DAWG.*/int QDawg::countWords() const{ return d ? d->countWords() : 0;}/*! Returns the root \link qdawg-node.html Node\endlink of the DAWG.*/const QDawg::Node* QDawg::root() const{ return d ? d->root() : 0;}/*! Returns TRUE if the DAWG contains the word \a s; otherwise returns FALSE.*/bool QDawg::contains(const QString& s) const{ return d ? d->contains(s) : FALSE;}/*! \internal For debugging: prints out the DAWG contents.*/void QDawg::dump() const{ if ( d ) d->dump();}/*! \class QDawg::Node qdawg.h \brief The QDawg::Node class represents one node of a QDawg.*//*! \fn QChar QDawg::Node::letter() const Returns this Node's letter.*//*! \fn bool QDawg::Node::isWord() const Returns TRUE if this Node is the end of a word; otherwise returns FALSE.*//*! \fn bool QDawg::Node::isLast() const Returns TRUE if this Node is the last in the child list; otherwise returns FALSE.*//*! \fn const Node* QDawg::Node::next() const Returns the next child Node in the child list or 0 if the current Node isLast().*//*! \fn const Node* QDawg::Node::jump() const Returns the node connected to this Node.*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -