📄 textiterator.cpp

📁 linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit
💻 CPP
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
    }    size_t usableLength = min(m_buffer.capacity() - m_buffer.size(), length);    ASSERT(usableLength);    m_buffer.append(characters, usableLength);    return usableLength;}inline bool SearchBuffer::atBreak() const{    return m_atBreak;}inline void SearchBuffer::reachedBreak(){    m_atBreak = true;}inline size_t SearchBuffer::search(size_t& start){    size_t size = m_buffer.size();    if (m_atBreak) {        if (!size)            return 0;    } else {        if (size != m_buffer.capacity())            return 0;    }    UStringSearch* searcher = WebCore::searcher();    UErrorCode status = U_ZERO_ERROR;    usearch_setText(searcher, m_buffer.data(), size, &status);    ASSERT(status == U_ZERO_ERROR);    int matchStart = usearch_first(searcher, &status);    ASSERT(status == U_ZERO_ERROR);    if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {        ASSERT(matchStart == USEARCH_DONE);        return 0;    }    // Matches that start in the overlap area are only tentative.    // The same match may appear later, matching more characters,    // possibly including a combining character that's not yet in the buffer.    if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {        memcpy(m_buffer.data(), m_buffer.data() + size - m_overlap, m_overlap * sizeof(UChar));        m_buffer.shrink(m_overlap);        return 0;    }    size_t newSize = size - (matchStart + 1);    memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));    m_buffer.shrink(newSize);    start = size - matchStart;    return usearch_getMatchedLength(searcher);}#else // !ICU_UNICODEinline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive)    : m_target(isCaseSensitive ? target : target.foldCase())    , m_isCaseSensitive(isCaseSensitive)    , m_buffer(m_target.length())    , m_isCharacterStartBuffer(m_target.length())    , m_isBufferFull(false)    , m_cursor(0){    ASSERT(!m_target.isEmpty());    m_target.replace(noBreakSpace, ' ');}inline SearchBuffer::~SearchBuffer(){}inline void SearchBuffer::reachedBreak(){    m_cursor = 0;    m_isBufferFull = false;}inline bool SearchBuffer::atBreak() const{    return !m_cursor && !m_isBufferFull;}inline void SearchBuffer::append(UChar c, bool isStart){    m_buffer[m_cursor] = c == noBreakSpace ? ' ' : c;    m_isCharacterStartBuffer[m_cursor] = isStart;    if (++m_cursor == m_target.length()) {        m_cursor = 0;        m_isBufferFull = true;    }}inline size_t SearchBuffer::append(const UChar* characters, size_t length){    ASSERT(length);    if (m_isCaseSensitive) {        append(characters[0], true);        return 1;    }    const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough    UChar foldedCharacters[maxFoldedCharacters];    bool error;    int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, characters, 1, &error);    ASSERT(!error);    ASSERT(numFoldedCharacters);    ASSERT(numFoldedCharacters <= maxFoldedCharacters);    if (!error && numFoldedCharacters) {        numFoldedCharacters = min(numFoldedCharacters, maxFoldedCharacters);        append(foldedCharacters[0], true);        for (int i = 1; i < numFoldedCharacters; ++i)            append(foldedCharacters[i], false);    }    return 1;}inline size_t SearchBuffer::search(size_t& start){    if (!m_isBufferFull)        return 0;    if (!m_isCharacterStartBuffer[m_cursor])        return 0;    size_t tailSpace = m_target.length() - m_cursor;    if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0)        return 0;    if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0)        return 0;    start = length();    // Now that we've found a match once, we don't want to find it again, because those    // are the SearchBuffer semantics, allowing for a buffer where you append more than one    // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if    // we want to get rid of that in the future we could track this with a separate boolean    // or even move the characters to the start of the buffer and set m_isBufferFull to false.    m_isCharacterStartBuffer[m_cursor] = false;    return start;}// Returns the number of characters that were appended to the buffer (what we are searching in).// That's not necessarily the same length as the passed-in target string, because case folding// can make two strings match even though they're not the same length.size_t SearchBuffer::length() const{    size_t bufferSize = m_target.length();    size_t length = 0;    for (size_t i = 0; i < bufferSize; ++i)        length += m_isCharacterStartBuffer[i];    return length;}#endif // !ICU_UNICODE// --------int TextIterator::rangeLength(const Range *r, bool forSelectionPreservation){    int length = 0;    for (TextIterator it(r, forSelectionPreservation); !it.atEnd(); it.advance())        length += it.length();        return length;}PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount){    CharacterIterator entireRangeIterator(entireRange);    return characterSubrange(entireRangeIterator, characterOffset, characterCount);}PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int rangeLocation, int rangeLength, bool forSelectionPreservation){    RefPtr<Range> resultRange = scope->document()->createRange();    int docTextPosition = 0;    int rangeEnd = rangeLocation + rangeLength;    bool startRangeFound = false;    RefPtr<Range> textRunRange;    TextIterator it(rangeOfContents(scope).get(), forSelectionPreservation);        // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>.    if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) {        textRunRange = it.range();                ExceptionCode ec = 0;        resultRange->setStart(textRunRange->startContainer(), 0, ec);        ASSERT(!ec);        resultRange->setEnd(textRunRange->startContainer(), 0, ec);        ASSERT(!ec);                return resultRange.release();    }    for (; !it.atEnd(); it.advance()) {        int len = it.length();        textRunRange = it.range();                bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len;        bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len;                // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only        // in those cases that textRunRange is used.        if (foundStart || foundEnd) {            // FIXME: This is a workaround for the fact that the end of a run is often at the wrong            // position for emitted '\n's.            if (len == 1 && it.characters()[0] == '\n') {                Position runStart = textRunRange->startPosition();                Position runEnd = VisiblePosition(runStart).next().deepEquivalent();                if (runEnd.isNotNull()) {                    ExceptionCode ec = 0;                    textRunRange->setEnd(runEnd.node(), runEnd.offset(), ec);                    ASSERT(!ec);                }            }        }        if (foundStart) {            startRangeFound = true;            int exception = 0;            if (textRunRange->startContainer()->isTextNode()) {                int offset = rangeLocation - docTextPosition;                resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);            } else {                if (rangeLocation == docTextPosition)                    resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), exception);                else                    resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), exception);            }        }        if (foundEnd) {            int exception = 0;            if (textRunRange->startContainer()->isTextNode()) {                int offset = rangeEnd - docTextPosition;                resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);            } else {                if (rangeEnd == docTextPosition)                    resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), exception);                else                    resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);            }            docTextPosition += len;            break;        }        docTextPosition += len;    }        if (!startRangeFound)        return 0;        if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds        int exception = 0;        resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);    }        return resultRange.release();}// --------    UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength, bool isDisplayString) {    UChar* result = 0;    // Do this in pieces to avoid massive reallocations if there is a large amount of text.    // Use system malloc for buffers since they can consume lots of memory and current TCMalloc is unable return it back to OS.    static const unsigned cMaxSegmentSize = 1 << 16;    bufferLength = 0;    typedef pair<UChar*, unsigned> TextSegment;    Vector<TextSegment>* textSegments = 0;    Vector<UChar> textBuffer;    textBuffer.reserveInitialCapacity(cMaxSegmentSize);    for (TextIterator it(r); !it.atEnd(); it.advance()) {        if (textBuffer.size() && textBuffer.size() + it.length() > cMaxSegmentSize) {            UChar* newSegmentBuffer = static_cast<UChar*>(malloc(textBuffer.size() * sizeof(UChar)));            if (!newSegmentBuffer)                goto exit;            memcpy(newSegmentBuffer, textBuffer.data(), textBuffer.size() * sizeof(UChar));            if (!textSegments)                textSegments = new Vector<TextSegment>;            textSegments->append(make_pair(newSegmentBuffer, (unsigned)textBuffer.size()));            textBuffer.clear();        }        textBuffer.append(it.characters(), it.length());        bufferLength += it.length();    }    if (!bufferLength)        return 0;    // Since we know the size now, we can make a single buffer out of the pieces with one big alloc    result = static_cast<UChar*>(malloc(bufferLength * sizeof(UChar)));    if (!result)        goto exit;    {        UChar* resultPos = result;        if (textSegments) {            unsigned size = textSegments->size();            for (unsigned i = 0; i < size; ++i) {                const TextSegment& segment = textSegments->at(i);                memcpy(resultPos, segment.first, segment.second * sizeof(UChar));                resultPos += segment.second;            }        }        memcpy(resultPos, textBuffer.data(), textBuffer.size() * sizeof(UChar));    }exit:    if (textSegments) {        unsigned size = textSegments->size();        for (unsigned i = 0; i < size; ++i)            free(textSegments->at(i).first);        delete textSegments;    }        if (isDisplayString && r->ownerDocument())        r->ownerDocument()->displayBufferModifiedByEncoding(result, bufferLength);    return result;}String plainText(const Range* r){    unsigned length;    UChar* buf = plainTextToMallocAllocatedBuffer(r, length, false);    if (!buf)        return "";    String result(buf, length);    free(buf);    return result;}static inline bool isAllCollapsibleWhitespace(const String& string){    const UChar* characters = string.characters();    unsigned length = string.length();    for (unsigned i = 0; i < length; ++i) {        if (!isCollapsibleWhitespace(characters[i]))            return false;    }    return true;}static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward){    ExceptionCode ec = 0;    RefPtr<Range> result = range->cloneRange(ec);    ASSERT(!ec);    result->collapse(!forward, ec);    ASSERT(!ec);    return result.release();}static size_t findPlainText(CharacterIterator& it, const String& target, bool forward, bool caseSensitive, size_t& matchStart){    matchStart = 0;    size_t matchLength = 0;    SearchBuffer buffer(target, caseSensitive);    while (!it.atEnd()) {        it.advance(buffer.append(it.characters(), it.length()));tryAgain:        size_t matchStartOffset;        if (size_t newMatchLength = buffer.search(matchStartOffset)) {            // Note that we found a match, and where we found it.            size_t lastCharacterInBufferOffset = it.characterOffset();            ASSERT(lastCharacterInBufferOffset >= matchStartOffset);            matchStart = lastCharacterInBufferOffset - matchStartOffset;            matchLength = newMatchLength;            // If searching forward, stop on the first match.            // If searching backward, don't stop, so we end up with the last match.            if (forward)                break;            goto tryAgain;        }        if (it.atBreak() && !buffer.atBreak()) {            buffer.reachedBreak();            goto tryAgain;        }    }    return matchLength;}PassRefPtr<Range> findPlainText(const Range* range, const String& target, bool forward, bool caseSensitive){    // We can't search effectively for a string that's entirely made of collapsible    // whitespace, so we won't even try. This also takes care of the empty string case.    if (isAllCollapsibleWhitespace(target))        return collapsedToBoundary(range, forward);    // First, find the text.    size_t matchStart;    size_t matchLength;    {        CharacterIterator findIterator(range, false, true);        matchLength = findPlainText(findIterator, target, forward, caseSensitive, matchStart);        if (!matchLength)            return collapsedToBoundary(range, forward);    }    // Then, find the document position of the start and the end of the text.    CharacterIterator computeRangeIterator(range, false, true);    return characterSubrange(computeRangeIterator, matchStart, matchLength);}}
上一页 1 2 34
💿 文件大小 15751 K
👤 上传用户 Jane
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#WebKit #linux #浏览器 #开源
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -