kurlgoogle.cpp

来自「linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自Web」· C++ 代码 · 共 948 行 · 第 1/3 页
CPP
948 行
/* * Copyright (C) 2008, 2009 Google Inc. All rights reserved. *  * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: *  *     * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. *     * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. *     * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. *  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */#include "config.h"#if USE(GOOGLEURL)#include "KURL.h"#include "CString.h"#include "NotImplemented.h"#include "TextEncoding.h"#include <wtf/Vector.h>#include <googleurl/src/url_canon_internal.h>#include <googleurl/src/url_util.h>using WTF::isASCIILower;using WTF::toASCIILower;namespace WebCore {// Wraps WebCore's text encoding in a character set converter for the// canonicalizer.class KURLCharsetConverter : public url_canon::CharsetConverter {public:    // The encoding parameter may be NULL, but in this case the object must not    // be called.    KURLCharsetConverter(const TextEncoding* encoding)        : m_encoding(encoding)    {    }    virtual void ConvertFromUTF16(const url_parse::UTF16Char* input, int inputLength,                                  url_canon::CanonOutput* output)    {        CString encoded = m_encoding->encode(input, inputLength, URLEncodedEntitiesForUnencodables);        output->Append(encoded.data(), static_cast<int>(encoded.length()));    }private:    const TextEncoding* m_encoding;};// Note that this function must be named differently than the one in KURL.cpp// since our unit tests evilly include both files, and their local definition// will be ambiguous.static inline void assertProtocolIsGood(const char* protocol){#ifndef NDEBUG    const char* p = protocol;    while (*p) {        ASSERT(*p > ' ' && *p < 0x7F && !(*p >= 'A' && *p <= 'Z'));        ++p;    }#endif}// Returns the characters for the given string, or a pointer to a static empty// string if the input string is NULL. This will always ensure we have a non-// NULL character pointer since ReplaceComponents has special meaning for NULL.static inline const url_parse::UTF16Char* CharactersOrEmpty(const String& str){    static const url_parse::UTF16Char zero = 0;    return str.characters() ?           reinterpret_cast<const url_parse::UTF16Char*>(str.characters()) :           &zero;}static inline bool isUnicodeEncoding(const TextEncoding* encoding){    return encoding->encodingForFormSubmission() == UTF8Encoding();}static bool lowerCaseEqualsASCII(const char* begin, const char* end, const char* str){    while (begin != end && *str) {        ASSERT(isASCIILower(*str));        if (toASCIILower(*begin++) != *str++)            return false;    }    // Both strings are equal (ignoring case) if and only if all of the characters were equal,    // and the end of both has been reached.    return begin == end && !*str;}// KURLGooglePrivate -----------------------------------------------------------KURLGooglePrivate::KURLGooglePrivate()    : m_isValid(false)    , m_protocolInHTTPFamily(false)    , m_utf8IsASCII(true)    , m_stringIsValid(false){}KURLGooglePrivate::KURLGooglePrivate(const url_parse::Parsed& parsed, bool isValid)    : m_isValid(isValid)    , m_protocolInHTTPFamily(false)    , m_parsed(parsed)    , m_utf8IsASCII(true)    , m_stringIsValid(false){}// Setters for the data. Using the ASCII version when you know the// data is ASCII will be slightly more efficient. The UTF-8 version// will always be correct if the caller is unsure.void KURLGooglePrivate::setUtf8(const CString& str){    const char* data = str.data();    unsigned dataLength = str.length();    // The m_utf8IsASCII must always be correct since the DeprecatedString    // getter must create it with the proper constructor. This test can be    // removed when DeprecatedString is gone, but it still might be a    // performance win.    m_utf8IsASCII = true;    for (unsigned i = 0; i < dataLength; i++) {        if (static_cast<unsigned char>(data[i]) >= 0x80) {            m_utf8IsASCII = false;            break;        }    }    m_utf8 = str;    m_stringIsValid = false;    initProtocolInHTTPFamily();}void KURLGooglePrivate::setAscii(const CString& str){    m_utf8 = str;    m_utf8IsASCII = true;    m_stringIsValid = false;    initProtocolInHTTPFamily();}void KURLGooglePrivate::init(const KURL& base,                             const String& relative,                             const TextEncoding* queryEncoding){    init(base, relative.characters(), relative.length(), queryEncoding);}// Note: code mostly duplicated below.void KURLGooglePrivate::init(const KURL& base, const char* rel, int relLength,                             const TextEncoding* queryEncoding){    // As a performance optimization, we do not use the charset converter if    // encoding is UTF-8 or other Unicode encodings. Note that this is    // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be    // more efficient with no charset converter object because it    // can do UTF-8 internally with no extra copies.    // We feel free to make the charset converter object every time since it's    // just a wrapper around a reference.    KURLCharsetConverter charsetConverterObject(queryEncoding);    KURLCharsetConverter* charsetConverter =        (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 :        &charsetConverterObject;    url_canon::RawCanonOutputT<char> output;    const CString& baseStr = base.m_url.utf8String();    m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(),                                          base.m_url.m_parsed, rel, relLength,                                          charsetConverter,                                          &output, &m_parsed);    // See FIXME in KURLGooglePrivate in the header. If canonicalization has not    // changed the string, we can avoid an extra allocation by using assignment.    //    // When KURL encounters an error such that the URL is invalid and empty    // (for example, resolving a relative URL on a non-hierarchical base), it    // will produce an isNull URL, and calling setUtf8 will produce an empty    // non-null URL. This is unlikely to affect anything, but we preserve this    // just in case.    if (m_isValid || output.length()) {        // Without ref, the whole url is guaranteed to be ASCII-only.        if (m_parsed.ref.is_nonempty())            setUtf8(CString(output.data(), output.length()));        else            setAscii(CString(output.data(), output.length()));    } else {        // WebCore expects resolved URLs to be empty rather than NULL.        setUtf8(CString("", 0));    }}// Note: code mostly duplicated above. See FIXMEs and comments there.void KURLGooglePrivate::init(const KURL& base, const UChar* rel, int relLength,                             const TextEncoding* queryEncoding){    KURLCharsetConverter charsetConverterObject(queryEncoding);    KURLCharsetConverter* charsetConverter =        (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 :        &charsetConverterObject;    url_canon::RawCanonOutputT<char> output;    const CString& baseStr = base.m_url.utf8String();    m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(),                                          base.m_url.m_parsed, rel, relLength,                                          charsetConverter,                                          &output, &m_parsed);    if (m_isValid || output.length()) {        if (m_parsed.ref.is_nonempty())            setUtf8(CString(output.data(), output.length()));        else            setAscii(CString(output.data(), output.length()));    } else        setUtf8(CString("", 0));}void KURLGooglePrivate::initProtocolInHTTPFamily(){    if (!m_isValid) {        m_protocolInHTTPFamily = false;        return;    }    const char* scheme = m_utf8.data() + m_parsed.scheme.begin;    if (m_parsed.scheme.len == 4)        m_protocolInHTTPFamily = lowerCaseEqualsASCII(scheme, scheme + 4, "http");    else if (m_parsed.scheme.len == 5)        m_protocolInHTTPFamily = lowerCaseEqualsASCII(scheme, scheme + 5, "https");    else        m_protocolInHTTPFamily = false;}void KURLGooglePrivate::copyTo(KURLGooglePrivate* dest) const{    dest->m_isValid = m_isValid;    dest->m_protocolInHTTPFamily = m_protocolInHTTPFamily;    dest->m_parsed = m_parsed;    // Don't copy the 16-bit string since that will be regenerated as needed.    dest->m_utf8 = CString(m_utf8.data(), m_utf8.length());    dest->m_utf8IsASCII = m_utf8IsASCII;    dest->m_stringIsValid = false;}String KURLGooglePrivate::componentString(const url_parse::Component& comp) const{    if (!m_isValid || comp.len <= 0) {        // KURL returns a NULL string if the URL is itself a NULL string, and an        // empty string for other nonexistant entities.        if (utf8String().isNull())            return String();        return String("", 0);    }    // begin and len are in terms of bytes which do not match    // if string() is UTF-16 and input contains non-ASCII characters.    // However, the only part in urlString that can contain non-ASCII    // characters is 'ref' at the end of the string. In that case,    // begin will always match the actual value and len (in terms of    // byte) will be longer than what's needed by 'mid'. However, mid    // truncates len to avoid go past the end of a string so that we can    // get away withtout doing anything here.    return string().substring(comp.begin, comp.len);}void KURLGooglePrivate::replaceComponents(const Replacements& replacements){    url_canon::RawCanonOutputT<char> output;    url_parse::Parsed newParsed;    m_isValid = url_util::ReplaceComponents(utf8String().data(),                                            utf8String().length(), m_parsed, replacements, 0, &output, &newParsed);    m_parsed = newParsed;    if (m_parsed.ref.is_nonempty())        setUtf8(CString(output.data(), output.length()));    else        setAscii(CString(output.data(), output.length()));}const String& KURLGooglePrivate::string() const{    if (!m_stringIsValid) {        // Must special case the NULL case, since constructing the        // string like we do below will generate an empty rather than        // a NULL string.        if (m_utf8.isNull())            m_string = String();        else if (m_utf8IsASCII)            m_string = String(m_utf8.data(), m_utf8.length());        else
kurlgoogle.cpp - 源码说明

本页面展示了「linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit」中的 kurlgoogle.cpp 源码文件，采用 C++ 编程语言编写，共 948 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与WebKit相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?