📄 kurlgoogle.cpp

📁 linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit
💻 CPP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
    if (ref.isNull())        replacements.ClearRef();    else        replacements.SetRef(CharactersOrEmpty(ref), url_parse::Component(0, ref.length()));    m_url.replaceComponents(replacements);}void KURL::removeRef(){    KURLGooglePrivate::Replacements replacements;    replacements.ClearRef();    m_url.replaceComponents(replacements);}void KURL::setQuery(const String& query){    KURLGooglePrivate::Replacements replacements;    if (query.isNull()) {        // KURL.cpp sets to NULL to clear any query.        replacements.ClearQuery();    } else if (query.length() > 0 && query[0] == '?') {        // WebCore expects the query string to begin with a question mark, but        // GoogleURL doesn't. So we trim off the question mark when setting.        replacements.SetQuery(CharactersOrEmpty(query),                              url_parse::Component(1, query.length() - 1));    } else {        // When set with the empty string or something that doesn't begin with        // a question mark, KURL.cpp will add a question mark for you. The only        // way this isn't compatible is if you call this function with an empty        // string. KURL.cpp will leave a '?' with nothing following it in the        // URL, whereas we'll clear it.        // FIXME We should eliminate this difference.        replacements.SetQuery(CharactersOrEmpty(query),                              url_parse::Component(0, query.length()));    }    m_url.replaceComponents(replacements);}void KURL::setPath(const String& path){    // Empty paths will be canonicalized to "/", so we don't have to worry    // about calling ClearPath().    KURLGooglePrivate::Replacements replacements;    replacements.SetPath(CharactersOrEmpty(path),                         url_parse::Component(0, path.length()));    m_url.replaceComponents(replacements);}// On Mac, this just seems to return the same URL, but with "/foo/bar" for// file: URLs instead of file:///foo/bar. We don't bother with any of this,// at least for now.String KURL::prettyURL() const{    if (!m_url.m_isValid)        return String();    return m_url.string();}// We copied the KURL version here on Sept 12, 2008 while doing a WebKit// merge.// // FIXME Somehow share this with KURL? Like we'd theoretically merge with// decodeURLEscapeSequences below?String mimeTypeFromDataURL(const String& url){    ASSERT(protocolIs(url, "data"));    int index = url.find(';');    if (index == -1)        index = url.find(',');    if (index != -1) {        int len = index - 5;        if (len > 0)            return url.substring(5, len);        return "text/plain"; // Data URLs with no MIME type are considered text/plain.    }    return "";}String decodeURLEscapeSequences(const String& str){    return decodeURLEscapeSequences(str, UTF8Encoding());}// In KURL.cpp's implementation, this is called by every component getter.// It will unescape every character, including NULL. This is scary, and may// cause security holes. We never call this function for components, and// just return the ASCII versions instead.//// However, this static function is called directly in some cases. It appears// that this only happens for javascript: URLs, so this is essentially the// JavaScript URL decoder. It assumes UTF-8 encoding.//// IE doesn't unescape %00, forcing you to use \x00 in JS strings, so we do// the same. This also eliminates NULL-related problems should a consumer// incorrectly call this function for non-JavaScript.//// FIXME These should be merged to the KURL.cpp implementation.String decodeURLEscapeSequences(const String& str, const TextEncoding& encoding){    // FIXME We can probably use KURL.cpp's version of this function    // without modification. However, I'm concerned about    // https://bugs.webkit.org/show_bug.cgi?id=20559 so am keeping this old    // custom code for now. Using their version will also fix the bug that    // we ignore the encoding.    //    // FIXME b/1350291: This does not get called very often. We just convert    // first to 8-bit UTF-8, then unescape, then back to 16-bit. This kind of    // sucks, and we don't use the encoding properly, which will make some    // obscure anchor navigations fail.    CString cstr = str.utf8();    const char* input = cstr.data();    int inputLength = cstr.length();    url_canon::RawCanonOutputT<char> unescaped;    for (int i = 0; i < inputLength; i++) {        if (input[i] == '%') {            unsigned char ch;            if (url_canon::DecodeEscaped(input, &i, inputLength, &ch)) {                if (!ch) {                    // Never unescape NULLs.                    unescaped.push_back('%');                    unescaped.push_back('0');                    unescaped.push_back('0');                } else                    unescaped.push_back(ch);            } else {                // Invalid escape sequence, copy the percent literal.                unescaped.push_back('%');            }        } else {            // Regular non-escaped 8-bit character.            unescaped.push_back(input[i]);        }    }    // Convert that 8-bit to UTF-16. It's not clear IE does this at all to    // JavaScript URLs, but Firefox and Safari do.    url_canon::RawCanonOutputT<url_parse::UTF16Char> utf16;    for (int i = 0; i < unescaped.length(); i++) {        unsigned char uch = static_cast<unsigned char>(unescaped.at(i));        if (uch < 0x80) {            // Non-UTF-8, just append directly            utf16.push_back(uch);        } else {            // next_ch will point to the last character of the decoded            // character.            int nextCharacter = i;            unsigned codePoint;            if (url_canon::ReadUTFChar(unescaped.data(), &nextCharacter,                                       unescaped.length(), &codePoint)) {                // Valid UTF-8 character, convert to UTF-16.                url_canon::AppendUTF16Value(codePoint, &utf16);                i = nextCharacter;            } else {                // KURL.cpp strips any sequences that are not valid UTF-8. This                // sounds scary. Instead, we just keep those invalid code                // points and promote to UTF-16. We copy all characters from                // the current position to the end of the identified sqeuqnce.                while (i < nextCharacter) {                    utf16.push_back(static_cast<unsigned char>(unescaped.at(i)));                    i++;                }                utf16.push_back(static_cast<unsigned char>(unescaped.at(i)));            }        }    }    return String(reinterpret_cast<UChar*>(utf16.data()), utf16.length());}bool KURL::protocolIs(const char* protocol) const{    assertProtocolIsGood(protocol);    if (m_url.m_parsed.scheme.len <= 0)        return !protocol;    return lowerCaseEqualsASCII(        m_url.utf8String().data() + m_url.m_parsed.scheme.begin,        m_url.utf8String().data() + m_url.m_parsed.scheme.end(),        protocol);}bool KURL::isLocalFile() const{    return protocolIs("file");}// This is called to escape a URL string. It is only used externally when// constructing mailto: links to set the query section. Since our query setter// will automatically do the correct escaping, this function does not have to// do any work.//// There is a possibility that a future called may use this function in other// ways, and may expect to get a valid URL string. The dangerous thing we want// to protect against here is accidentally getting NULLs in a string that is// not supposed to have NULLs. Therefore, we escape NULLs here to prevent this.String encodeWithURLEscapeSequences(const String& notEncodedString){    CString utf8 = UTF8Encoding().encode(        reinterpret_cast<const UChar*>(notEncodedString.characters()),        notEncodedString.length(),        URLEncodedEntitiesForUnencodables);    const char* input = utf8.data();    int inputLength = utf8.length();    Vector<char, 2048> buffer;    for (int i = 0; i < inputLength; i++) {        if (!input[i])            buffer.append("%00", 3);        else            buffer.append(input[i]);    }    return String(buffer.data(), buffer.size());}bool KURL::isHierarchical() const{    if (!m_url.m_parsed.scheme.is_nonempty())        return false;    return url_util::IsStandard(        &m_url.utf8String().data()[m_url.m_parsed.scheme.begin],        m_url.utf8String().length(),        m_url.m_parsed.scheme);}#ifndef NDEBUGvoid KURL::print() const{    printf("%s\n", m_url.utf8String().data());}#endifvoid KURL::invalidate(){    // This is only called from the constructor so resetting the (automatically    // initialized) string and parsed structure would be a waste of time.    m_url.m_isValid = false;    m_url.m_protocolInHTTPFamily = false;}// Equal up to reference fragments, if any.bool equalIgnoringRef(const KURL& a, const KURL& b){    // Compute the length of each URL without its ref. Note that the reference    // begin (if it exists) points to the character *after* the '#', so we need    // to subtract one.    int aLength = a.m_url.utf8String().length();    if (a.m_url.m_parsed.ref.len >= 0)        aLength = a.m_url.m_parsed.ref.begin - 1;    int bLength = b.m_url.utf8String().length();    if (b.m_url.m_parsed.ref.len >= 0)        bLength = b.m_url.m_parsed.ref.begin - 1;    return aLength == bLength        && !strncmp(a.m_url.utf8String().data(), b.m_url.utf8String().data(), aLength);}unsigned KURL::hostStart() const{    return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::HOST, false);}unsigned KURL::hostEnd() const{    return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::PORT, true);}unsigned KURL::pathStart() const{    return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::PATH, false);}unsigned KURL::pathEnd() const{    return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::QUERY, true);}unsigned KURL::pathAfterLastSlash() const{    // When there's no path, ask for what would be the beginning of it.    if (!m_url.m_parsed.path.is_valid())        return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::PATH, false);    url_parse::Component filename;    url_parse::ExtractFileName(m_url.utf8String().data(), m_url.m_parsed.path,                               &filename);    return filename.begin;}const KURL& blankURL(){    static KURL staticBlankURL("about:blank");    return staticBlankURL;}bool protocolIs(const String& url, const char* protocol){    // Do the comparison without making a new string object.    assertProtocolIsGood(protocol);    for (int i = 0; ; ++i) {        if (!protocol[i])            return url[i] == ':';        if (toASCIILower(url[i]) != protocol[i])            return false;    }}inline bool KURL::protocolIs(const String& string, const char* protocol){    return WebCore::protocolIs(string, protocol);}} // namespace WebCore#endif // USE(GOOGLEURL)
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -