📄 textiterator.cpp
字号:
} size_t usableLength = min(m_buffer.capacity() - m_buffer.size(), length); ASSERT(usableLength); m_buffer.append(characters, usableLength); return usableLength;}inline bool SearchBuffer::atBreak() const{ return m_atBreak;}inline void SearchBuffer::reachedBreak(){ m_atBreak = true;}inline size_t SearchBuffer::search(size_t& start){ size_t size = m_buffer.size(); if (m_atBreak) { if (!size) return 0; } else { if (size != m_buffer.capacity()) return 0; } UStringSearch* searcher = WebCore::searcher(); UErrorCode status = U_ZERO_ERROR; usearch_setText(searcher, m_buffer.data(), size, &status); ASSERT(status == U_ZERO_ERROR); int matchStart = usearch_first(searcher, &status); ASSERT(status == U_ZERO_ERROR); if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) { ASSERT(matchStart == USEARCH_DONE); return 0; } // Matches that start in the overlap area are only tentative. // The same match may appear later, matching more characters, // possibly including a combining character that's not yet in the buffer. if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { memcpy(m_buffer.data(), m_buffer.data() + size - m_overlap, m_overlap * sizeof(UChar)); m_buffer.shrink(m_overlap); return 0; } size_t newSize = size - (matchStart + 1); memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar)); m_buffer.shrink(newSize); start = size - matchStart; return usearch_getMatchedLength(searcher);}#else // !ICU_UNICODEinline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive) : m_target(isCaseSensitive ? target : target.foldCase()) , m_isCaseSensitive(isCaseSensitive) , m_buffer(m_target.length()) , m_isCharacterStartBuffer(m_target.length()) , m_isBufferFull(false) , m_cursor(0){ ASSERT(!m_target.isEmpty()); m_target.replace(noBreakSpace, ' ');}inline SearchBuffer::~SearchBuffer(){}inline void SearchBuffer::reachedBreak(){ m_cursor = 0; m_isBufferFull = false;}inline bool SearchBuffer::atBreak() const{ return !m_cursor && !m_isBufferFull;}inline void SearchBuffer::append(UChar c, bool isStart){ m_buffer[m_cursor] = c == noBreakSpace ? ' ' : c; m_isCharacterStartBuffer[m_cursor] = isStart; if (++m_cursor == m_target.length()) { m_cursor = 0; m_isBufferFull = true; }}inline size_t SearchBuffer::append(const UChar* characters, size_t length){ ASSERT(length); if (m_isCaseSensitive) { append(characters[0], true); return 1; } const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough UChar foldedCharacters[maxFoldedCharacters]; bool error; int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, characters, 1, &error); ASSERT(!error); ASSERT(numFoldedCharacters); ASSERT(numFoldedCharacters <= maxFoldedCharacters); if (!error && numFoldedCharacters) { numFoldedCharacters = min(numFoldedCharacters, maxFoldedCharacters); append(foldedCharacters[0], true); for (int i = 1; i < numFoldedCharacters; ++i) append(foldedCharacters[i], false); } return 1;}inline size_t SearchBuffer::search(size_t& start){ if (!m_isBufferFull) return 0; if (!m_isCharacterStartBuffer[m_cursor]) return 0; size_t tailSpace = m_target.length() - m_cursor; if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0) return 0; if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0) return 0; start = length(); // Now that we've found a match once, we don't want to find it again, because those // are the SearchBuffer semantics, allowing for a buffer where you append more than one // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if // we want to get rid of that in the future we could track this with a separate boolean // or even move the characters to the start of the buffer and set m_isBufferFull to false. m_isCharacterStartBuffer[m_cursor] = false; return start;}// Returns the number of characters that were appended to the buffer (what we are searching in).// That's not necessarily the same length as the passed-in target string, because case folding// can make two strings match even though they're not the same length.size_t SearchBuffer::length() const{ size_t bufferSize = m_target.length(); size_t length = 0; for (size_t i = 0; i < bufferSize; ++i) length += m_isCharacterStartBuffer[i]; return length;}#endif // !ICU_UNICODE// --------int TextIterator::rangeLength(const Range *r, bool forSelectionPreservation){ int length = 0; for (TextIterator it(r, forSelectionPreservation); !it.atEnd(); it.advance()) length += it.length(); return length;}PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount){ CharacterIterator entireRangeIterator(entireRange); return characterSubrange(entireRangeIterator, characterOffset, characterCount);}PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int rangeLocation, int rangeLength, bool forSelectionPreservation){ RefPtr<Range> resultRange = scope->document()->createRange(); int docTextPosition = 0; int rangeEnd = rangeLocation + rangeLength; bool startRangeFound = false; RefPtr<Range> textRunRange; TextIterator it(rangeOfContents(scope).get(), forSelectionPreservation); // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>. if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) { textRunRange = it.range(); ExceptionCode ec = 0; resultRange->setStart(textRunRange->startContainer(), 0, ec); ASSERT(!ec); resultRange->setEnd(textRunRange->startContainer(), 0, ec); ASSERT(!ec); return resultRange.release(); } for (; !it.atEnd(); it.advance()) { int len = it.length(); textRunRange = it.range(); bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len; bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len; // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only // in those cases that textRunRange is used. if (foundStart || foundEnd) { // FIXME: This is a workaround for the fact that the end of a run is often at the wrong // position for emitted '\n's. if (len == 1 && it.characters()[0] == '\n') { Position runStart = textRunRange->startPosition(); Position runEnd = VisiblePosition(runStart).next().deepEquivalent(); if (runEnd.isNotNull()) { ExceptionCode ec = 0; textRunRange->setEnd(runEnd.node(), runEnd.offset(), ec); ASSERT(!ec); } } } if (foundStart) { startRangeFound = true; int exception = 0; if (textRunRange->startContainer()->isTextNode()) { int offset = rangeLocation - docTextPosition; resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception); } else { if (rangeLocation == docTextPosition) resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), exception); else resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), exception); } } if (foundEnd) { int exception = 0; if (textRunRange->startContainer()->isTextNode()) { int offset = rangeEnd - docTextPosition; resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception); } else { if (rangeEnd == docTextPosition) resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), exception); else resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception); } docTextPosition += len; break; } docTextPosition += len; } if (!startRangeFound) return 0; if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds int exception = 0; resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception); } return resultRange.release();}// -------- UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength, bool isDisplayString) { UChar* result = 0; // Do this in pieces to avoid massive reallocations if there is a large amount of text. // Use system malloc for buffers since they can consume lots of memory and current TCMalloc is unable return it back to OS. static const unsigned cMaxSegmentSize = 1 << 16; bufferLength = 0; typedef pair<UChar*, unsigned> TextSegment; Vector<TextSegment>* textSegments = 0; Vector<UChar> textBuffer; textBuffer.reserveInitialCapacity(cMaxSegmentSize); for (TextIterator it(r); !it.atEnd(); it.advance()) { if (textBuffer.size() && textBuffer.size() + it.length() > cMaxSegmentSize) { UChar* newSegmentBuffer = static_cast<UChar*>(malloc(textBuffer.size() * sizeof(UChar))); if (!newSegmentBuffer) goto exit; memcpy(newSegmentBuffer, textBuffer.data(), textBuffer.size() * sizeof(UChar)); if (!textSegments) textSegments = new Vector<TextSegment>; textSegments->append(make_pair(newSegmentBuffer, (unsigned)textBuffer.size())); textBuffer.clear(); } textBuffer.append(it.characters(), it.length()); bufferLength += it.length(); } if (!bufferLength) return 0; // Since we know the size now, we can make a single buffer out of the pieces with one big alloc result = static_cast<UChar*>(malloc(bufferLength * sizeof(UChar))); if (!result) goto exit; { UChar* resultPos = result; if (textSegments) { unsigned size = textSegments->size(); for (unsigned i = 0; i < size; ++i) { const TextSegment& segment = textSegments->at(i); memcpy(resultPos, segment.first, segment.second * sizeof(UChar)); resultPos += segment.second; } } memcpy(resultPos, textBuffer.data(), textBuffer.size() * sizeof(UChar)); }exit: if (textSegments) { unsigned size = textSegments->size(); for (unsigned i = 0; i < size; ++i) free(textSegments->at(i).first); delete textSegments; } if (isDisplayString && r->ownerDocument()) r->ownerDocument()->displayBufferModifiedByEncoding(result, bufferLength); return result;}String plainText(const Range* r){ unsigned length; UChar* buf = plainTextToMallocAllocatedBuffer(r, length, false); if (!buf) return ""; String result(buf, length); free(buf); return result;}static inline bool isAllCollapsibleWhitespace(const String& string){ const UChar* characters = string.characters(); unsigned length = string.length(); for (unsigned i = 0; i < length; ++i) { if (!isCollapsibleWhitespace(characters[i])) return false; } return true;}static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward){ ExceptionCode ec = 0; RefPtr<Range> result = range->cloneRange(ec); ASSERT(!ec); result->collapse(!forward, ec); ASSERT(!ec); return result.release();}static size_t findPlainText(CharacterIterator& it, const String& target, bool forward, bool caseSensitive, size_t& matchStart){ matchStart = 0; size_t matchLength = 0; SearchBuffer buffer(target, caseSensitive); while (!it.atEnd()) { it.advance(buffer.append(it.characters(), it.length()));tryAgain: size_t matchStartOffset; if (size_t newMatchLength = buffer.search(matchStartOffset)) { // Note that we found a match, and where we found it. size_t lastCharacterInBufferOffset = it.characterOffset(); ASSERT(lastCharacterInBufferOffset >= matchStartOffset); matchStart = lastCharacterInBufferOffset - matchStartOffset; matchLength = newMatchLength; // If searching forward, stop on the first match. // If searching backward, don't stop, so we end up with the last match. if (forward) break; goto tryAgain; } if (it.atBreak() && !buffer.atBreak()) { buffer.reachedBreak(); goto tryAgain; } } return matchLength;}PassRefPtr<Range> findPlainText(const Range* range, const String& target, bool forward, bool caseSensitive){ // We can't search effectively for a string that's entirely made of collapsible // whitespace, so we won't even try. This also takes care of the empty string case. if (isAllCollapsibleWhitespace(target)) return collapsedToBoundary(range, forward); // First, find the text. size_t matchStart; size_t matchLength; { CharacterIterator findIterator(range, false, true); matchLength = findPlainText(findIterator, target, forward, caseSensitive, matchStart); if (!matchLength) return collapsedToBoundary(range, forward); } // Then, find the document position of the start and the end of the text. CharacterIterator computeRangeIterator(range, false, true); return characterSubrange(computeRangeIterator, matchStart, matchLength);}}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -