index.cpp
来自「奇趣公司比较新的qt/emd版本」· C++ 代码 · 共 573 行 · 第 1/2 页
CPP
573 行
dict.insert( key, new Entry( docs ) ); } f.close(); readDocumentList();}void Index::readDocumentList(){ QFile f( docListFile ); if ( !f.open(QFile::ReadOnly ) ) return; QDataStream s( &f ); s >> docList;}QStringList Index::query( const QStringList &terms, const QStringList &termSeq, const QStringList &seqWords ){ QList<Term> termList; for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it ) { Entry *e = 0; if ( (*it).contains(QLatin1Char('*')) ) { QVector<Document> wcts = setupDummyTerm( getWildcardTerms( *it ) ); termList.append( Term(QLatin1String("dummy"), wcts.count(), wcts ) ); } else if ( dict[ *it ] ) { e = dict[ *it ]; termList.append( Term( *it, e->documents.count(), e->documents ) ); } else { return QStringList(); } } if ( !termList.count() ) return QStringList(); qSort(termList); QVector<Document> minDocs = termList.takeFirst().documents; for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) { Term *t = &(*it); QVector<Document> docs = t->documents; for(QVector<Document>::Iterator minDoc_it = minDocs.begin(); minDoc_it != minDocs.end(); ) { bool found = false; for (QVector<Document>::ConstIterator doc_it = docs.constBegin(); doc_it != docs.constEnd(); ++doc_it ) { if ( (*minDoc_it).docNumber == (*doc_it).docNumber ) { (*minDoc_it).frequency += (*doc_it).frequency; found = true; break; } } if ( !found ) minDoc_it = minDocs.erase( minDoc_it ); else ++minDoc_it; } } QStringList results; qSort( minDocs ); if ( termSeq.isEmpty() ) { for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it) results << docList.at((int)(*it).docNumber); return results; } QString fileName; for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it) { fileName = docList[ (int)(*it).docNumber ]; if ( searchForPattern( termSeq, seqWords, fileName ) ) results << fileName; } return results;}QString Index::getDocumentTitle( const QString &fullFileName ){ QUrl url(fullFileName); QString fileName = url.toLocalFile(); if (documentTitleCache.contains(fileName)) return documentTitleCache.value(fileName); QFile file( fileName ); if ( !file.open( QFile::ReadOnly ) ) { qWarning( "cannot open file %s", qPrintable(fileName) ); return fileName; } QTextStream s( &file ); QString text = s.readAll(); int start = text.indexOf(QLatin1String("<title>"), 0, Qt::CaseInsensitive) + 7; int end = text.indexOf(QLatin1String("</title>"), 0, Qt::CaseInsensitive); QString title = tr("Untitled"); if (end - start > 0) { title = text.mid(start, end - start); if (Qt::mightBeRichText(title)) { QTextDocument doc; doc.setHtml(title); title = doc.toPlainText(); } } documentTitleCache.insert(fileName, title); return title;}QStringList Index::getWildcardTerms( const QString &term ){ QStringList lst; QStringList terms = split( term ); QStringList::Iterator iter; for(QHash<QString, Entry*>::Iterator it = dict.begin(); it != dict.end(); ++it) { int index = 0; bool found = false; QString text( it.key() ); for ( iter = terms.begin(); iter != terms.end(); ++iter ) { if ( *iter == QLatin1String("*") ) { found = true; continue; } if ( iter == terms.begin() && (*iter)[0] != text[0] ) { found = false; break; } index = text.indexOf( *iter, index ); if ( *iter == terms.last() && index != (int)text.length()-1 ) { index = text.lastIndexOf( *iter ); if ( index != (int)text.length() - (int)(*iter).length() ) { found = false; break; } } if ( index != -1 ) { found = true; index += (*iter).length(); continue; } else { found = false; break; } } if ( found ) lst << text; } return lst;}QStringList Index::split( const QString &str ){ QStringList lst; int j = 0; int i = str.indexOf(QLatin1Char('*'), j ); if (str.startsWith(QLatin1String("*"))) lst << QLatin1String("*"); while ( i != -1 ) { if ( i > j && i <= (int)str.length() ) { lst << str.mid( j, i - j ); lst << QLatin1String("*"); } j = i + 1; i = str.indexOf(QLatin1Char('*'), j ); } int l = str.length() - 1; if ( str.mid( j, l - j + 1 ).length() > 0 ) lst << str.mid( j, l - j + 1 ); return lst;}QVector<Document> Index::setupDummyTerm( const QStringList &terms ){ QList<Term> termList; for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it) { Entry *e = 0; if ( dict[ *it ] ) { e = dict[ *it ]; termList.append( Term( *it, e->documents.count(), e->documents ) ); } } QVector<Document> maxList(0); if ( !termList.count() ) return maxList; qSort(termList); maxList = termList.takeLast().documents; for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) { Term *t = &(*it); QVector<Document> docs = t->documents; for (QVector<Document>::iterator docIt = docs.begin(); docIt != docs.end(); ++docIt ) { if ( maxList.indexOf( *docIt ) == -1 ) maxList.append( *docIt ); } } return maxList;}void Index::buildMiniDict( const QString &str ){ if ( miniDict[ str ] ) miniDict[ str ]->positions.append( wordNum ); ++wordNum;}bool Index::searchForPattern( const QStringList &patterns, const QStringList &words, const QString &fileName ){ QUrl url(fileName); QString fName = url.toLocalFile(); QFile file( fName ); if ( !file.open( QFile::ReadOnly ) ) { qWarning( "cannot open file %s", qPrintable(fName) ); return false; } wordNum = 3; miniDict.clear(); QStringList::ConstIterator cIt = words.begin(); for ( ; cIt != words.end(); ++cIt ) miniDict.insert( *cIt, new PosEntry( 0 ) ); QTextStream s( &file ); QString text = s.readAll(); bool valid = true; const QChar *buf = text.unicode(); QChar str[64]; QChar c = buf[0]; int j = 0; int i = 0; while ( j < text.length() ) { if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) { valid = false; if ( i > 1 ) buildMiniDict( QString(str,i) ); i = 0; c = buf[++j]; continue; } if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) { valid = true; c = buf[++j]; continue; } if ( !valid ) { c = buf[++j]; continue; } if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) { str[i] = c.toLower(); ++i; } else { if ( i > 1 ) buildMiniDict( QString(str,i) ); i = 0; } c = buf[++j]; } if ( i > 1 ) buildMiniDict( QString(str,i) ); file.close(); QStringList::ConstIterator patIt = patterns.begin(); QStringList wordLst; QList<uint> a, b; QList<uint>::iterator aIt; for ( ; patIt != patterns.end(); ++patIt ) { wordLst = (*patIt).split(QLatin1Char(' ')); a = miniDict[ wordLst[0] ]->positions; for ( int j = 1; j < (int)wordLst.count(); ++j ) { b = miniDict[ wordLst[j] ]->positions; aIt = a.begin(); while ( aIt != a.end() ) { if ( b.contains( *aIt + 1 )) { (*aIt)++; ++aIt; } else { aIt = a.erase( aIt ); } } } } if ( a.count() ) return true; return false;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?