📄 kurlgoogle.cpp
字号:
/* * Copyright (C) 2008, 2009 Google Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */#include "config.h"#if USE(GOOGLEURL)#include "KURL.h"#include "CString.h"#include "NotImplemented.h"#include "TextEncoding.h"#include <wtf/Vector.h>#include <googleurl/src/url_canon_internal.h>#include <googleurl/src/url_util.h>using WTF::isASCIILower;using WTF::toASCIILower;namespace WebCore {// Wraps WebCore's text encoding in a character set converter for the// canonicalizer.class KURLCharsetConverter : public url_canon::CharsetConverter {public: // The encoding parameter may be NULL, but in this case the object must not // be called. KURLCharsetConverter(const TextEncoding* encoding) : m_encoding(encoding) { } virtual void ConvertFromUTF16(const url_parse::UTF16Char* input, int inputLength, url_canon::CanonOutput* output) { CString encoded = m_encoding->encode(input, inputLength, URLEncodedEntitiesForUnencodables); output->Append(encoded.data(), static_cast<int>(encoded.length())); }private: const TextEncoding* m_encoding;};// Note that this function must be named differently than the one in KURL.cpp// since our unit tests evilly include both files, and their local definition// will be ambiguous.static inline void assertProtocolIsGood(const char* protocol){#ifndef NDEBUG const char* p = protocol; while (*p) { ASSERT(*p > ' ' && *p < 0x7F && !(*p >= 'A' && *p <= 'Z')); ++p; }#endif}// Returns the characters for the given string, or a pointer to a static empty// string if the input string is NULL. This will always ensure we have a non-// NULL character pointer since ReplaceComponents has special meaning for NULL.static inline const url_parse::UTF16Char* CharactersOrEmpty(const String& str){ static const url_parse::UTF16Char zero = 0; return str.characters() ? reinterpret_cast<const url_parse::UTF16Char*>(str.characters()) : &zero;}static inline bool isUnicodeEncoding(const TextEncoding* encoding){ return encoding->encodingForFormSubmission() == UTF8Encoding();}static bool lowerCaseEqualsASCII(const char* begin, const char* end, const char* str){ while (begin != end && *str) { ASSERT(isASCIILower(*str)); if (toASCIILower(*begin++) != *str++) return false; } // Both strings are equal (ignoring case) if and only if all of the characters were equal, // and the end of both has been reached. return begin == end && !*str;}// KURLGooglePrivate -----------------------------------------------------------KURLGooglePrivate::KURLGooglePrivate() : m_isValid(false) , m_protocolInHTTPFamily(false) , m_utf8IsASCII(true) , m_stringIsValid(false){}KURLGooglePrivate::KURLGooglePrivate(const url_parse::Parsed& parsed, bool isValid) : m_isValid(isValid) , m_protocolInHTTPFamily(false) , m_parsed(parsed) , m_utf8IsASCII(true) , m_stringIsValid(false){}// Setters for the data. Using the ASCII version when you know the// data is ASCII will be slightly more efficient. The UTF-8 version// will always be correct if the caller is unsure.void KURLGooglePrivate::setUtf8(const CString& str){ const char* data = str.data(); unsigned dataLength = str.length(); // The m_utf8IsASCII must always be correct since the DeprecatedString // getter must create it with the proper constructor. This test can be // removed when DeprecatedString is gone, but it still might be a // performance win. m_utf8IsASCII = true; for (unsigned i = 0; i < dataLength; i++) { if (static_cast<unsigned char>(data[i]) >= 0x80) { m_utf8IsASCII = false; break; } } m_utf8 = str; m_stringIsValid = false; initProtocolInHTTPFamily();}void KURLGooglePrivate::setAscii(const CString& str){ m_utf8 = str; m_utf8IsASCII = true; m_stringIsValid = false; initProtocolInHTTPFamily();}void KURLGooglePrivate::init(const KURL& base, const String& relative, const TextEncoding* queryEncoding){ init(base, relative.characters(), relative.length(), queryEncoding);}// Note: code mostly duplicated below.void KURLGooglePrivate::init(const KURL& base, const char* rel, int relLength, const TextEncoding* queryEncoding){ // As a performance optimization, we do not use the charset converter if // encoding is UTF-8 or other Unicode encodings. Note that this is // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be // more efficient with no charset converter object because it // can do UTF-8 internally with no extra copies. // We feel free to make the charset converter object every time since it's // just a wrapper around a reference. KURLCharsetConverter charsetConverterObject(queryEncoding); KURLCharsetConverter* charsetConverter = (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 : &charsetConverterObject; url_canon::RawCanonOutputT<char> output; const CString& baseStr = base.m_url.utf8String(); m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(), base.m_url.m_parsed, rel, relLength, charsetConverter, &output, &m_parsed); // See FIXME in KURLGooglePrivate in the header. If canonicalization has not // changed the string, we can avoid an extra allocation by using assignment. // // When KURL encounters an error such that the URL is invalid and empty // (for example, resolving a relative URL on a non-hierarchical base), it // will produce an isNull URL, and calling setUtf8 will produce an empty // non-null URL. This is unlikely to affect anything, but we preserve this // just in case. if (m_isValid || output.length()) { // Without ref, the whole url is guaranteed to be ASCII-only. if (m_parsed.ref.is_nonempty()) setUtf8(CString(output.data(), output.length())); else setAscii(CString(output.data(), output.length())); } else { // WebCore expects resolved URLs to be empty rather than NULL. setUtf8(CString("", 0)); }}// Note: code mostly duplicated above. See FIXMEs and comments there.void KURLGooglePrivate::init(const KURL& base, const UChar* rel, int relLength, const TextEncoding* queryEncoding){ KURLCharsetConverter charsetConverterObject(queryEncoding); KURLCharsetConverter* charsetConverter = (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 : &charsetConverterObject; url_canon::RawCanonOutputT<char> output; const CString& baseStr = base.m_url.utf8String(); m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(), base.m_url.m_parsed, rel, relLength, charsetConverter, &output, &m_parsed); if (m_isValid || output.length()) { if (m_parsed.ref.is_nonempty()) setUtf8(CString(output.data(), output.length())); else setAscii(CString(output.data(), output.length())); } else setUtf8(CString("", 0));}void KURLGooglePrivate::initProtocolInHTTPFamily(){ if (!m_isValid) { m_protocolInHTTPFamily = false; return; } const char* scheme = m_utf8.data() + m_parsed.scheme.begin; if (m_parsed.scheme.len == 4) m_protocolInHTTPFamily = lowerCaseEqualsASCII(scheme, scheme + 4, "http"); else if (m_parsed.scheme.len == 5) m_protocolInHTTPFamily = lowerCaseEqualsASCII(scheme, scheme + 5, "https"); else m_protocolInHTTPFamily = false;}void KURLGooglePrivate::copyTo(KURLGooglePrivate* dest) const{ dest->m_isValid = m_isValid; dest->m_protocolInHTTPFamily = m_protocolInHTTPFamily; dest->m_parsed = m_parsed; // Don't copy the 16-bit string since that will be regenerated as needed. dest->m_utf8 = CString(m_utf8.data(), m_utf8.length()); dest->m_utf8IsASCII = m_utf8IsASCII; dest->m_stringIsValid = false;}String KURLGooglePrivate::componentString(const url_parse::Component& comp) const{ if (!m_isValid || comp.len <= 0) { // KURL returns a NULL string if the URL is itself a NULL string, and an // empty string for other nonexistant entities. if (utf8String().isNull()) return String(); return String("", 0); } // begin and len are in terms of bytes which do not match // if string() is UTF-16 and input contains non-ASCII characters. // However, the only part in urlString that can contain non-ASCII // characters is 'ref' at the end of the string. In that case, // begin will always match the actual value and len (in terms of // byte) will be longer than what's needed by 'mid'. However, mid // truncates len to avoid go past the end of a string so that we can // get away withtout doing anything here. return string().substring(comp.begin, comp.len);}void KURLGooglePrivate::replaceComponents(const Replacements& replacements){ url_canon::RawCanonOutputT<char> output; url_parse::Parsed newParsed; m_isValid = url_util::ReplaceComponents(utf8String().data(), utf8String().length(), m_parsed, replacements, 0, &output, &newParsed); m_parsed = newParsed; if (m_parsed.ref.is_nonempty()) setUtf8(CString(output.data(), output.length())); else setAscii(CString(output.data(), output.length()));}const String& KURLGooglePrivate::string() const{ if (!m_stringIsValid) { // Must special case the NULL case, since constructing the // string like we do below will generate an empty rather than // a NULL string. if (m_utf8.isNull()) m_string = String(); else if (m_utf8IsASCII) m_string = String(m_utf8.data(), m_utf8.length()); else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -