📄 kurl.cpp

📁 linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit
💻 CPP
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
        }        m_passwordEnd = p - buffer.data();        // If we had any user info, add "@"        if (p - buffer.data() != m_userStart)            *p++ = '@';        // copy in the host, except in the case of a file URL with authority="localhost"        if (!(isFile && hostIsLocalHost && !haveNonHostAuthorityPart)) {            strPtr = url + hostStart;            const char* hostEndPtr = url + hostEnd;            while (strPtr < hostEndPtr)                *p++ = *strPtr++;        }        m_hostEnd = p - buffer.data();        // copy in the port        if (hostEnd != portStart) {            *p++ = ':';            strPtr = url + portStart;            const char *portEndPtr = url + portEnd;            while (strPtr < portEndPtr)                *p++ = *strPtr++;        }        m_portEnd = p - buffer.data();    } else        m_userStart = m_userEnd = m_passwordEnd = m_hostEnd = m_portEnd = p - buffer.data();    // For canonicalization, ensure we have a '/' for no path.    // Only do this for http and https.    if (m_protocolInHTTPFamily && pathEnd - pathStart == 0)        *p++ = '/';    // add path, escaping bad characters    if (!hierarchical || !hasSlashDotOrDotDot(url))        appendEscapingBadChars(p, url + pathStart, pathEnd - pathStart);    else {        CharBuffer pathBuffer(pathEnd - pathStart + 1);        size_t length = copyPathRemovingDots(pathBuffer.data(), url, pathStart, pathEnd);        appendEscapingBadChars(p, pathBuffer.data(), length);    }    m_pathEnd = p - buffer.data();    // Find the position after the last slash in the path, or    // the position before the path if there are no slashes in it.    int i;    for (i = m_pathEnd; i > m_portEnd; --i) {        if (buffer[i - 1] == '/')            break;    }    m_pathAfterLastSlash = i;    // add query, escaping bad characters    appendEscapingBadChars(p, url + queryStart, queryEnd - queryStart);    m_queryEnd = p - buffer.data();    // add fragment, escaping bad characters    if (fragmentEnd != queryEnd) {        *p++ = '#';        appendEscapingBadChars(p, url + fragmentStart, fragmentEnd - fragmentStart);    }    m_fragmentEnd = p - buffer.data();    ASSERT(p - buffer.data() <= static_cast<int>(buffer.size()));    // If we didn't end up actually changing the original string and    // it was already in a String, reuse it to avoid extra allocation.    if (originalString && originalString->length() == static_cast<unsigned>(m_fragmentEnd) && strncmp(buffer.data(), url, m_fragmentEnd) == 0)        m_string = *originalString;    else        m_string = String(buffer.data(), m_fragmentEnd);    m_isValid = true;}bool equalIgnoringRef(const KURL& a, const KURL& b){    if (a.m_queryEnd != b.m_queryEnd)        return false;    unsigned queryLength = a.m_queryEnd;    for (unsigned i = 0; i < queryLength; ++i)        if (a.string()[i] != b.string()[i])            return false;    return true;}bool protocolHostAndPortAreEqual(const KURL& a, const KURL& b){    if (a.m_schemeEnd != b.m_schemeEnd)        return false;    int hostStartA = a.hostStart();    int hostStartB = b.hostStart();    if (a.m_hostEnd - hostStartA != b.m_hostEnd - hostStartB)        return false;    // Check the scheme    for (int i = 0; i < a.m_schemeEnd; ++i)        if (a.string()[i] != b.string()[i])            return false;        // And the host    for (int i = hostStartA; i < a.m_hostEnd; ++i)        if (a.string()[i] != b.string()[i])            return false;        if (a.port() != b.port())        return false;    return true;}    String encodeWithURLEscapeSequences(const String& notEncodedString){    CString asUTF8 = notEncodedString.utf8();    CharBuffer buffer(asUTF8.length() * 3 + 1);    char* p = buffer.data();    const char* str = asUTF8.data();    const char* strEnd = str + asUTF8.length();    while (str < strEnd) {        unsigned char c = *str++;        if (isBadChar(c)) {            *p++ = '%';            *p++ = hexDigits[c >> 4];            *p++ = hexDigits[c & 0xF];        } else            *p++ = c;    }    ASSERT(p - buffer.data() <= static_cast<int>(buffer.size()));    return String(buffer.data(), p - buffer.data());}// Appends the punycoded hostname identified by the given string and length to// the output buffer. The result will not be null terminated.static void appendEncodedHostname(UCharBuffer& buffer, const UChar* str, unsigned strLen){    // Needs to be big enough to hold an IDN-encoded name.    // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.    const unsigned hostnameBufferLength = 2048;    if (strLen > hostnameBufferLength || charactersAreAllASCII(str, strLen)) {        buffer.append(str, strLen);        return;    }#if USE(ICU_UNICODE)    UChar hostnameBuffer[hostnameBufferLength];    UErrorCode error = U_ZERO_ERROR;    int32_t numCharactersConverted = uidna_IDNToASCII(str, strLen, hostnameBuffer,        hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, 0, &error);    if (error == U_ZERO_ERROR)        buffer.append(hostnameBuffer, numCharactersConverted);#elif USE(QT4_UNICODE)    QByteArray result = QUrl::toAce(String(str, strLen));    buffer.append(result.constData(), result.length());#endif}static void findHostnamesInMailToURL(const UChar* str, int strLen, Vector<pair<int, int> >& nameRanges){    // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' or end of string character.    // Skip quoted strings so that characters in them don't confuse us.    // When we find a '?' character, we are past the part of the URL that contains host names.    nameRanges.clear();    int p = 0;    while (1) {        // Find start of host name or of quoted string.        int hostnameOrStringStart = findFirstOf(str, strLen, p, "\"@?");        if (hostnameOrStringStart == -1)            return;        UChar c = str[hostnameOrStringStart];        p = hostnameOrStringStart + 1;        if (c == '?')            return;        if (c == '@') {            // Find end of host name.            int hostnameStart = p;            int hostnameEnd = findFirstOf(str, strLen, p, ">,?");            bool done;            if (hostnameEnd == -1) {                hostnameEnd = strLen;                done = true;            } else {                p = hostnameEnd;                done = false;            }            nameRanges.append(make_pair(hostnameStart, hostnameEnd));            if (done)                return;        } else {            // Skip quoted string.            ASSERT(c == '"');            while (1) {                int escapedCharacterOrStringEnd = findFirstOf(str, strLen, p, "\"\\");                if (escapedCharacterOrStringEnd == -1)                    return;                c = str[escapedCharacterOrStringEnd];                p = escapedCharacterOrStringEnd + 1;                // If we are the end of the string, then break from the string loop back to the host name loop.                if (c == '"')                    break;                // Skip escaped character.                ASSERT(c == '\\');                if (p == strLen)                    return;                ++p;            }        }    }}static bool findHostnameInHierarchicalURL(const UChar* str, int strLen, int& startOffset, int& endOffset){    // Find the host name in a hierarchical URL.    // It comes after a "://" sequence, with scheme characters preceding, and    // this should be the first colon in the string.    // It ends with the end of the string or a ":" or a path segment ending character.    // If there is a "@" character, the host part is just the part after the "@".    int separator = findFirstOf(str, strLen, 0, ":");    if (separator == -1 || separator + 2 >= strLen ||        str[separator + 1] != '/' || str[separator + 2] != '/')        return false;    // Check that all characters before the :// are valid scheme characters.    if (!isSchemeFirstChar(str[0]))        return false;    for (int i = 1; i < separator; ++i) {        if (!isSchemeChar(str[i]))            return false;    }    // Start after the separator.    int authorityStart = separator + 3;    // Find terminating character.    int hostnameEnd = strLen;    for (int i = authorityStart; i < strLen; ++i) {        UChar c = str[i];        if (c == ':' || (isPathSegmentEndChar(c) && c != 0)) {            hostnameEnd = i;            break;        }    }    // Find "@" for the start of the host name.    int userInfoTerminator = findFirstOf(str, strLen, authorityStart, "@");    int hostnameStart;    if (userInfoTerminator == -1 || userInfoTerminator > hostnameEnd)        hostnameStart = authorityStart;    else        hostnameStart = userInfoTerminator + 1;    startOffset = hostnameStart;    endOffset = hostnameEnd;    return true;}// Converts all hostnames found in the given input to punycode, preserving the// rest of the URL unchanged. The output will NOT be null-terminated.static void encodeHostnames(const String& str, UCharBuffer& output){    output.clear();    if (protocolIs(str, "mailto")) {        Vector<pair<int, int> > hostnameRanges;        findHostnamesInMailToURL(str.characters(), str.length(), hostnameRanges);        int n = hostnameRanges.size();        int p = 0;        for (int i = 0; i < n; ++i) {            const pair<int, int>& r = hostnameRanges[i];            output.append(&str.characters()[p], r.first - p);            appendEncodedHostname(output, &str.characters()[r.first], r.second - r.first);            p = r.second;        }        // This will copy either everything after the last hostname, or the        // whole thing if there is no hostname.        output.append(&str.characters()[p], str.length() - p);    } else {        int hostStart, hostEnd;        if (findHostnameInHierarchicalURL(str.characters(), str.length(), hostStart, hostEnd)) {            output.append(str.characters(), hostStart); // Before hostname.            appendEncodedHostname(output, &str.characters()[hostStart], hostEnd - hostStart);            output.append(&str.characters()[hostEnd], str.length() - hostEnd); // After hostname.        } else {            // No hostname to encode, return the input.            output.append(str.characters(), str.length());        }    }}static void encodeRelativeString(const String& rel, const TextEncoding& encoding, CharBuffer& output){    UCharBuffer s;    encodeHostnames(rel, s);    TextEncoding pathEncoding(UTF8Encoding()); // Path is always encoded as UTF-8; other parts may depend on the scheme.    int pathEnd = -1;    if (encoding != pathEncoding && encoding.isValid() && !protocolIs(rel, "mailto") && !protocolIs(rel, "data") && !protocolIs(rel, "javascript")) {        // Find the first instance of either # or ?, keep pathEnd at -1 otherwise.        pathEnd = findFirstOf(s.data(), s.size(), 0, "#?");    }    if (pathEnd == -1) {        CString decoded = pathEncoding.encode(s.data(), s.size(), URLEncodedEntitiesForUnencodables);        output.resize(decoded.length());        memcpy(output.data(), decoded.data(), decoded.length());    } else {        CString pathDecoded = pathEncoding.encode(s.data(), pathEnd, URLEncodedEntitiesForUnencodables);        // Unencodable characters in URLs are represented by converting        // them to XML entities and escaping non-alphanumeric characters.        CString otherDecoded = encoding.encode(s.data() + pathEnd, s.size() - pathEnd, URLEncodedEntitiesForUnencodables);        output.resize(pathDecoded.length() + otherDecoded.length());        memcpy(output.data(), pathDecoded.data(), pathDecoded.length());        memcpy(output.data() + pathDecoded.length(), otherDecoded.data(), otherDecoded.length());    }    output.append('\0'); // null-terminate the output.}static String substituteBackslashes(const String& string){    int questionPos = string.find('?');    int hashPos = string.find('#');    int pathEnd;    if (hashPos >= 0 && (questionPos < 0 || questionPos > hashPos))        pathEnd = hashPos;    else if (questionPos >= 0)        pathEnd = questionPos;    else        pathEnd = string.length();    return string.left(pathEnd).replace('\\','/') + string.substring(pathEnd);}bool KURL::isHierarchical() const{    if (!m_isValid)        return false;    ASSERT(m_string[m_schemeEnd] == ':');    return m_string[m_schemeEnd + 1] == '/';}void KURL::copyToBuffer(CharBuffer& buffer) const{    // FIXME: This throws away the high bytes of all the characters in the string!    // That's fine for a valid URL, which is all ASCII, but not for invalid URLs.    buffer.resize(m_string.length());    copyASCII(m_string.characters(), m_string.length(), buffer.data());}bool protocolIs(const String& url, const char* protocol){    // Do the comparison without making a new string object.    assertProtocolIsGood(protocol);    for (int i = 0; ; ++i) {        if (!protocol[i])            return url[i] == ':';        if (toASCIILower(url[i]) != protocol[i])            return false;    }}String mimeTypeFromDataURL(const String& url){    ASSERT(protocolIs(url, "data"));    int index = url.find(';');    if (index == -1)        index = url.find(',');    if (index != -1) {        int len = index - 5;        if (len > 0)            return url.substring(5, len);        return "text/plain"; // Data URLs with no MIME type are considered text/plain.    }    return "";}const KURL& blankURL(){    DEFINE_STATIC_LOCAL(KURL, staticBlankURL, ("about:blank"));    return staticBlankURL;}#ifndef NDEBUGvoid KURL::print() const{    printf("%s\n", m_string.utf8().data());}#endif}#endif  // !USE(GOOGLEURL)
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -