📄 stringimpl.cpp
字号:
/* * Copyright (C) 1999 Lars Knoll (knoll@kde.org) * (C) 1999 Antti Koivisto (koivisto@kde.org) * (C) 2001 Dirk Mueller ( mueller@kde.org ) * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */#include "config.h"#include "StringImpl.h"#include "AtomicString.h"#include "CString.h"#include "CharacterNames.h"#include "FloatConversion.h"#include "StringBuffer.h"#include "StringHash.h"#include "TextBreakIterator.h"#include "TextEncoding.h"#include "ThreadGlobalData.h"#include <wtf/dtoa.h>#include <wtf/Assertions.h>#include <wtf/Threading.h>#include <wtf/unicode/Unicode.h>using namespace WTF;using namespace Unicode;namespace WebCore {static inline UChar* newUCharVector(unsigned n){ return static_cast<UChar*>(fastMalloc(sizeof(UChar) * n));}static inline void deleteUCharVector(const UChar* p){ fastFree(const_cast<UChar*>(p));}// This constructor is used only to create the empty string.StringImpl::StringImpl() : m_length(0) , m_data(0) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false){ // Ensure that the hash is computed so that AtomicStringHash can call existingHash() // with impunity. The empty string is special because it is never entered into // AtomicString's HashKey, but still needs to compare correctly. hash();}// This is one of the most common constructors, but it's also used for the copy()// operation. Because of that, it's the one constructor that doesn't assert the// length is non-zero, since we support copying the empty string.inline StringImpl::StringImpl(const UChar* characters, unsigned length) : m_length(length) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false){ UChar* data = newUCharVector(length); memcpy(data, characters, length * sizeof(UChar)); m_data = data;}inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacter) : m_length(str.m_length) , m_hash(str.m_hash) , m_inTable(false) , m_hasTerminatingNullCharacter(true){ UChar* data = newUCharVector(str.m_length + 1); memcpy(data, str.m_data, str.m_length * sizeof(UChar)); data[str.m_length] = 0; m_data = data;}inline StringImpl::StringImpl(const char* characters, unsigned length) : m_length(length) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false){ ASSERT(characters); ASSERT(length); UChar* data = newUCharVector(length); for (unsigned i = 0; i != length; ++i) { unsigned char c = characters[i]; data[i] = c; } m_data = data;}inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer) : m_length(length) , m_data(characters) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false){ ASSERT(characters); ASSERT(length);}// This constructor is only for use by AtomicString.StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash) : m_length(length) , m_hash(hash) , m_inTable(true) , m_hasTerminatingNullCharacter(false){ ASSERT(hash); ASSERT(characters); ASSERT(length); UChar* data = newUCharVector(length); memcpy(data, characters, length * sizeof(UChar)); m_data = data;}// This constructor is only for use by AtomicString.StringImpl::StringImpl(const char* characters, unsigned length, unsigned hash) : m_length(length) , m_hash(hash) , m_inTable(true) , m_hasTerminatingNullCharacter(false){ ASSERT(hash); ASSERT(characters); ASSERT(length); UChar* data = newUCharVector(length); for (unsigned i = 0; i != length; ++i) { unsigned char c = characters[i]; data[i] = c; } m_data = data;}StringImpl::~StringImpl(){ if (m_inTable) AtomicString::remove(this); deleteUCharVector(m_data);}StringImpl* StringImpl::empty(){ return threadGlobalData().emptyString();}bool StringImpl::containsOnlyWhitespace(){ // FIXME: The definition of whitespace here includes a number of characters // that are not whitespace from the point of view of RenderText; I wonder if // that's a problem in practice. for (unsigned i = 0; i < m_length; i++) if (!isASCIISpace(m_data[i])) return false; return true;}PassRefPtr<StringImpl> StringImpl::substring(unsigned pos, unsigned len){ if (pos >= m_length) return empty(); if (len > m_length - pos) len = m_length - pos; return create(m_data + pos, len);}PassRefPtr<StringImpl> StringImpl::substringCopy(unsigned pos, unsigned len){ if (pos >= m_length) pos = m_length; if (len > m_length - pos) len = m_length - pos; if (!len) return adoptRef(new StringImpl); return substring(pos, len);}UChar32 StringImpl::characterStartingAt(unsigned i){ if (U16_IS_SINGLE(m_data[i])) return m_data[i]; if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1])) return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]); return 0;}bool StringImpl::isLower(){ // Do a faster loop for the case where all the characters are ASCII. bool allLower = true; UChar ored = 0; for (unsigned i = 0; i < m_length; i++) { UChar c = m_data[i]; allLower = allLower && isASCIILower(c); ored |= c; } if (!(ored & ~0x7F)) return allLower; // Do a slower check for cases that include non-ASCII characters. allLower = true; unsigned i = 0; while (i < m_length) { UChar32 character; U16_NEXT(m_data, i, m_length, character) allLower = allLower && Unicode::isLower(character); } return allLower;}PassRefPtr<StringImpl> StringImpl::lower(){ StringBuffer data(m_length); int32_t length = m_length; // Do a faster loop for the case where all the characters are ASCII. UChar ored = 0; for (int i = 0; i < length; i++) { UChar c = m_data[i]; ored |= c; data[i] = toASCIILower(c); } if (!(ored & ~0x7F)) return adopt(data); // Do a slower implementation for cases that include non-ASCII characters. bool error; int32_t realLength = Unicode::toLower(data.characters(), length, m_data, m_length, &error); if (!error && realLength == length) return adopt(data); data.resize(realLength); Unicode::toLower(data.characters(), realLength, m_data, m_length, &error); if (error) return this; return adopt(data);}PassRefPtr<StringImpl> StringImpl::upper(){ StringBuffer data(m_length); int32_t length = m_length; // Do a faster loop for the case where all the characters are ASCII. UChar ored = 0; for (int i = 0; i < length; i++) { UChar c = m_data[i]; ored |= c; data[i] = toASCIIUpper(c); } if (!(ored & ~0x7F)) return adopt(data); // Do a slower implementation for cases that include non-ASCII characters. bool error; int32_t realLength = Unicode::toUpper(data.characters(), length, m_data, m_length, &error); if (!error && realLength == length) return adopt(data); data.resize(realLength); Unicode::toUpper(data.characters(), realLength, m_data, m_length, &error); if (error) return this; return adopt(data);}PassRefPtr<StringImpl> StringImpl::secure(UChar aChar){ int length = m_length; StringBuffer data(length); for (int i = 0; i < length; ++i) data[i] = aChar; return adopt(data);}PassRefPtr<StringImpl> StringImpl::foldCase(){ StringBuffer data(m_length); int32_t length = m_length; // Do a faster loop for the case where all the characters are ASCII. UChar ored = 0; for (int i = 0; i < length; i++) { UChar c = m_data[i]; ored |= c; data[i] = toASCIILower(c); } if (!(ored & ~0x7F)) return adopt(data); // Do a slower implementation for cases that include non-ASCII characters. bool error; int32_t realLength = Unicode::foldCase(data.characters(), length, m_data, m_length, &error); if (!error && realLength == length) return adopt(data); data.resize(realLength); Unicode::foldCase(data.characters(), realLength, m_data, m_length, &error); if (error) return this; return adopt(data);}PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(){ if (!m_length) return empty(); unsigned start = 0; unsigned end = m_length - 1; // skip white space from start while (start <= end && isSpaceOrNewline(m_data[start])) start++; // only white space if (start > end) return empty(); // skip white space from end while (end && isSpaceOrNewline(m_data[end])) end--; return create(m_data + start, end + 1 - start);}PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch){ const UChar* from = m_data; const UChar* fromend = from + m_length; // Assume the common case will not remove any characters while (from != fromend && !findMatch(*from)) from++; if (from == fromend) return this; StringBuffer data(m_length); UChar* to = data.characters(); unsigned outc = from - m_data; if (outc) memcpy(to, m_data, outc * sizeof(UChar)); while (true) { while (from != fromend && findMatch(*from)) from++; while (from != fromend && !findMatch(*from)) to[outc++] = *from++; if (from == fromend) break; } data.shrink(outc); return adopt(data);}PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(){ StringBuffer data(m_length); const UChar* from = m_data; const UChar* fromend = from + m_length; int outc = 0; UChar* to = data.characters(); while (true) { while (from != fromend && isSpaceOrNewline(*from)) from++; while (from != fromend && !isSpaceOrNewline(*from)) to[outc++] = *from++; if (from != fromend) to[outc++] = ' '; else break; } if (outc > 0 && to[outc - 1] == ' ') outc--; data.shrink(outc); return adopt(data);}PassRefPtr<StringImpl> StringImpl::capitalize(UChar previous){ StringBuffer stringWithPrevious(m_length + 1); stringWithPrevious[0] = previous == noBreakSpace ? ' ' : previous; for (unsigned i = 1; i < m_length + 1; i++) { // Replace   with a real space since ICU no longer treats   as a word separator. if (m_data[i - 1] == noBreakSpace) stringWithPrevious[i] = ' '; else stringWithPrevious[i] = m_data[i - 1]; } TextBreakIterator* boundary = wordBreakIterator(stringWithPrevious.characters(), m_length + 1); if (!boundary) return this; StringBuffer data(m_length); int32_t endOfWord; int32_t startOfWord = textBreakFirst(boundary); for (endOfWord = textBreakNext(boundary); endOfWord != TextBreakDone; startOfWord = endOfWord, endOfWord = textBreakNext(boundary)) { if (startOfWord != 0) // Ignore first char of previous string data[startOfWord - 1] = m_data[startOfWord - 1] == noBreakSpace ? noBreakSpace : toTitleCase(stringWithPrevious[startOfWord]); for (int i = startOfWord + 1; i < endOfWord; i++) data[i - 1] = m_data[i - 1]; } return adopt(data);}int StringImpl::toIntStrict(bool* ok, int base){ return charactersToIntStrict(m_data, m_length, ok, base);}unsigned StringImpl::toUIntStrict(bool* ok, int base){ return charactersToUIntStrict(m_data, m_length, ok, base);}int64_t StringImpl::toInt64Strict(bool* ok, int base){ return charactersToInt64Strict(m_data, m_length, ok, base);}uint64_t StringImpl::toUInt64Strict(bool* ok, int base){ return charactersToUInt64Strict(m_data, m_length, ok, base);}int StringImpl::toInt(bool* ok){ return charactersToInt(m_data, m_length, ok);}unsigned StringImpl::toUInt(bool* ok){ return charactersToUInt(m_data, m_length, ok);}int64_t StringImpl::toInt64(bool* ok){ return charactersToInt64(m_data, m_length, ok);}uint64_t StringImpl::toUInt64(bool* ok){ return charactersToUInt64(m_data, m_length, ok);}double StringImpl::toDouble(bool* ok){ return charactersToDouble(m_data, m_length, ok);}float StringImpl::toFloat(bool* ok){ return charactersToFloat(m_data, m_length, ok);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -