index.cpp

来自「奇趣公司比较新的qt/emd版本」· C++ 代码 · 共 573 行 · 第 1/2 页

CPP
573
字号
        dict.insert( key, new Entry( docs ) );    }    f.close();    readDocumentList();}void Index::readDocumentList(){    QFile f( docListFile );    if ( !f.open(QFile::ReadOnly ) )        return;    QDataStream s( &f );    s >> docList;}QStringList Index::query( const QStringList &terms, const QStringList &termSeq, const QStringList &seqWords ){    QList<Term> termList;    for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it ) {        Entry *e = 0;        if ( (*it).contains(QLatin1Char('*')) ) {            QVector<Document> wcts = setupDummyTerm( getWildcardTerms( *it ) );            termList.append( Term(QLatin1String("dummy"), wcts.count(), wcts ) );        } else if ( dict[ *it ] ) {            e = dict[ *it ];            termList.append( Term( *it, e->documents.count(), e->documents ) );        } else {            return QStringList();        }    }    if ( !termList.count() )        return QStringList();    qSort(termList);    QVector<Document> minDocs = termList.takeFirst().documents;    for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) {        Term *t = &(*it);        QVector<Document> docs = t->documents;        for(QVector<Document>::Iterator minDoc_it = minDocs.begin(); minDoc_it != minDocs.end(); ) {            bool found = false;            for (QVector<Document>::ConstIterator doc_it = docs.constBegin(); doc_it != docs.constEnd(); ++doc_it ) {                if ( (*minDoc_it).docNumber == (*doc_it).docNumber ) {                    (*minDoc_it).frequency += (*doc_it).frequency;                    found = true;                    break;                }            }            if ( !found )                minDoc_it = minDocs.erase( minDoc_it );            else                ++minDoc_it;        }    }    QStringList results;    qSort( minDocs );    if ( termSeq.isEmpty() ) {        for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it)            results << docList.at((int)(*it).docNumber);        return results;    }    QString fileName;    for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it) {        fileName =  docList[ (int)(*it).docNumber ];        if ( searchForPattern( termSeq, seqWords, fileName ) )            results << fileName;    }    return results;}QString Index::getDocumentTitle( const QString &fullFileName ){    QUrl url(fullFileName);    QString fileName = url.toLocalFile();    if (documentTitleCache.contains(fileName))        return documentTitleCache.value(fileName);    QFile file( fileName );    if ( !file.open( QFile::ReadOnly ) ) {        qWarning( "cannot open file %s", qPrintable(fileName) );        return fileName;    }    QTextStream s( &file );    QString text = s.readAll();    int start = text.indexOf(QLatin1String("<title>"), 0, Qt::CaseInsensitive) + 7;    int end = text.indexOf(QLatin1String("</title>"), 0, Qt::CaseInsensitive);    QString title = tr("Untitled");    if (end - start > 0) {        title = text.mid(start, end - start);        if (Qt::mightBeRichText(title)) {            QTextDocument doc;            doc.setHtml(title);            title = doc.toPlainText();        }    }    documentTitleCache.insert(fileName, title);    return title;}QStringList Index::getWildcardTerms( const QString &term ){    QStringList lst;    QStringList terms = split( term );    QStringList::Iterator iter;    for(QHash<QString, Entry*>::Iterator it = dict.begin(); it != dict.end(); ++it) {        int index = 0;        bool found = false;        QString text( it.key() );        for ( iter = terms.begin(); iter != terms.end(); ++iter ) {            if ( *iter == QLatin1String("*") ) {                found = true;                continue;            }            if ( iter == terms.begin() && (*iter)[0] != text[0] ) {                found = false;                break;            }            index = text.indexOf( *iter, index );            if ( *iter == terms.last() && index != (int)text.length()-1 ) {                index = text.lastIndexOf( *iter );                if ( index != (int)text.length() - (int)(*iter).length() ) {                    found = false;                    break;                }            }            if ( index != -1 ) {                found = true;                index += (*iter).length();                continue;            } else {                found = false;                break;            }        }        if ( found )            lst << text;    }    return lst;}QStringList Index::split( const QString &str ){    QStringList lst;    int j = 0;    int i = str.indexOf(QLatin1Char('*'), j );    if (str.startsWith(QLatin1String("*")))        lst << QLatin1String("*");    while ( i != -1 ) {        if ( i > j && i <= (int)str.length() ) {            lst << str.mid( j, i - j );            lst << QLatin1String("*");        }        j = i + 1;        i = str.indexOf(QLatin1Char('*'), j );    }    int l = str.length() - 1;    if ( str.mid( j, l - j + 1 ).length() > 0 )        lst << str.mid( j, l - j + 1 );    return lst;}QVector<Document> Index::setupDummyTerm( const QStringList &terms ){    QList<Term> termList;    for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it) {        Entry *e = 0;        if ( dict[ *it ] ) {            e = dict[ *it ];            termList.append( Term( *it, e->documents.count(), e->documents ) );        }    }    QVector<Document> maxList(0);    if ( !termList.count() )        return maxList;    qSort(termList);    maxList = termList.takeLast().documents;    for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) {        Term *t = &(*it);        QVector<Document> docs = t->documents;        for (QVector<Document>::iterator docIt = docs.begin(); docIt != docs.end(); ++docIt ) {            if ( maxList.indexOf( *docIt ) == -1 )                maxList.append( *docIt );        }    }    return maxList;}void Index::buildMiniDict( const QString &str ){    if ( miniDict[ str ] )        miniDict[ str ]->positions.append( wordNum );    ++wordNum;}bool Index::searchForPattern( const QStringList &patterns, const QStringList &words, const QString &fileName ){    QUrl url(fileName);    QString fName = url.toLocalFile();    QFile file( fName );    if ( !file.open( QFile::ReadOnly ) ) {        qWarning( "cannot open file %s", qPrintable(fName) );        return false;    }    wordNum = 3;    miniDict.clear();    QStringList::ConstIterator cIt = words.begin();    for ( ; cIt != words.end(); ++cIt )        miniDict.insert( *cIt, new PosEntry( 0 ) );    QTextStream s( &file );    QString text = s.readAll();    bool valid = true;    const QChar *buf = text.unicode();    QChar str[64];    QChar c = buf[0];    int j = 0;    int i = 0;    while ( j < text.length() ) {        if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {            valid = false;            if ( i > 1 )                buildMiniDict( QString(str,i) );            i = 0;            c = buf[++j];            continue;        }        if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {            valid = true;            c = buf[++j];            continue;        }        if ( !valid ) {            c = buf[++j];            continue;        }        if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {            str[i] = c.toLower();            ++i;        } else {            if ( i > 1 )                buildMiniDict( QString(str,i) );            i = 0;        }        c = buf[++j];    }    if ( i > 1 )        buildMiniDict( QString(str,i) );    file.close();    QStringList::ConstIterator patIt = patterns.begin();    QStringList wordLst;    QList<uint> a, b;    QList<uint>::iterator aIt;    for ( ; patIt != patterns.end(); ++patIt ) {        wordLst = (*patIt).split(QLatin1Char(' '));        a = miniDict[ wordLst[0] ]->positions;        for ( int j = 1; j < (int)wordLst.count(); ++j ) {            b = miniDict[ wordLst[j] ]->positions;            aIt = a.begin();            while ( aIt != a.end() ) {                if ( b.contains( *aIt + 1 )) {                    (*aIt)++;                    ++aIt;                } else {                    aIt = a.erase( aIt );                }            }        }    }    if ( a.count() )        return true;    return false;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?