📄 kurl.cpp
字号:
} m_passwordEnd = p - buffer.data(); // If we had any user info, add "@" if (p - buffer.data() != m_userStart) *p++ = '@'; // copy in the host, except in the case of a file URL with authority="localhost" if (!(isFile && hostIsLocalHost && !haveNonHostAuthorityPart)) { strPtr = url + hostStart; const char* hostEndPtr = url + hostEnd; while (strPtr < hostEndPtr) *p++ = *strPtr++; } m_hostEnd = p - buffer.data(); // copy in the port if (hostEnd != portStart) { *p++ = ':'; strPtr = url + portStart; const char *portEndPtr = url + portEnd; while (strPtr < portEndPtr) *p++ = *strPtr++; } m_portEnd = p - buffer.data(); } else m_userStart = m_userEnd = m_passwordEnd = m_hostEnd = m_portEnd = p - buffer.data(); // For canonicalization, ensure we have a '/' for no path. // Only do this for http and https. if (m_protocolInHTTPFamily && pathEnd - pathStart == 0) *p++ = '/'; // add path, escaping bad characters if (!hierarchical || !hasSlashDotOrDotDot(url)) appendEscapingBadChars(p, url + pathStart, pathEnd - pathStart); else { CharBuffer pathBuffer(pathEnd - pathStart + 1); size_t length = copyPathRemovingDots(pathBuffer.data(), url, pathStart, pathEnd); appendEscapingBadChars(p, pathBuffer.data(), length); } m_pathEnd = p - buffer.data(); // Find the position after the last slash in the path, or // the position before the path if there are no slashes in it. int i; for (i = m_pathEnd; i > m_portEnd; --i) { if (buffer[i - 1] == '/') break; } m_pathAfterLastSlash = i; // add query, escaping bad characters appendEscapingBadChars(p, url + queryStart, queryEnd - queryStart); m_queryEnd = p - buffer.data(); // add fragment, escaping bad characters if (fragmentEnd != queryEnd) { *p++ = '#'; appendEscapingBadChars(p, url + fragmentStart, fragmentEnd - fragmentStart); } m_fragmentEnd = p - buffer.data(); ASSERT(p - buffer.data() <= static_cast<int>(buffer.size())); // If we didn't end up actually changing the original string and // it was already in a String, reuse it to avoid extra allocation. if (originalString && originalString->length() == static_cast<unsigned>(m_fragmentEnd) && strncmp(buffer.data(), url, m_fragmentEnd) == 0) m_string = *originalString; else m_string = String(buffer.data(), m_fragmentEnd); m_isValid = true;}bool equalIgnoringRef(const KURL& a, const KURL& b){ if (a.m_queryEnd != b.m_queryEnd) return false; unsigned queryLength = a.m_queryEnd; for (unsigned i = 0; i < queryLength; ++i) if (a.string()[i] != b.string()[i]) return false; return true;}bool protocolHostAndPortAreEqual(const KURL& a, const KURL& b){ if (a.m_schemeEnd != b.m_schemeEnd) return false; int hostStartA = a.hostStart(); int hostStartB = b.hostStart(); if (a.m_hostEnd - hostStartA != b.m_hostEnd - hostStartB) return false; // Check the scheme for (int i = 0; i < a.m_schemeEnd; ++i) if (a.string()[i] != b.string()[i]) return false; // And the host for (int i = hostStartA; i < a.m_hostEnd; ++i) if (a.string()[i] != b.string()[i]) return false; if (a.port() != b.port()) return false; return true;} String encodeWithURLEscapeSequences(const String& notEncodedString){ CString asUTF8 = notEncodedString.utf8(); CharBuffer buffer(asUTF8.length() * 3 + 1); char* p = buffer.data(); const char* str = asUTF8.data(); const char* strEnd = str + asUTF8.length(); while (str < strEnd) { unsigned char c = *str++; if (isBadChar(c)) { *p++ = '%'; *p++ = hexDigits[c >> 4]; *p++ = hexDigits[c & 0xF]; } else *p++ = c; } ASSERT(p - buffer.data() <= static_cast<int>(buffer.size())); return String(buffer.data(), p - buffer.data());}// Appends the punycoded hostname identified by the given string and length to// the output buffer. The result will not be null terminated.static void appendEncodedHostname(UCharBuffer& buffer, const UChar* str, unsigned strLen){ // Needs to be big enough to hold an IDN-encoded name. // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK. const unsigned hostnameBufferLength = 2048; if (strLen > hostnameBufferLength || charactersAreAllASCII(str, strLen)) { buffer.append(str, strLen); return; }#if USE(ICU_UNICODE) UChar hostnameBuffer[hostnameBufferLength]; UErrorCode error = U_ZERO_ERROR; int32_t numCharactersConverted = uidna_IDNToASCII(str, strLen, hostnameBuffer, hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, 0, &error); if (error == U_ZERO_ERROR) buffer.append(hostnameBuffer, numCharactersConverted);#elif USE(QT4_UNICODE) QByteArray result = QUrl::toAce(String(str, strLen)); buffer.append(result.constData(), result.length());#endif}static void findHostnamesInMailToURL(const UChar* str, int strLen, Vector<pair<int, int> >& nameRanges){ // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' or end of string character. // Skip quoted strings so that characters in them don't confuse us. // When we find a '?' character, we are past the part of the URL that contains host names. nameRanges.clear(); int p = 0; while (1) { // Find start of host name or of quoted string. int hostnameOrStringStart = findFirstOf(str, strLen, p, "\"@?"); if (hostnameOrStringStart == -1) return; UChar c = str[hostnameOrStringStart]; p = hostnameOrStringStart + 1; if (c == '?') return; if (c == '@') { // Find end of host name. int hostnameStart = p; int hostnameEnd = findFirstOf(str, strLen, p, ">,?"); bool done; if (hostnameEnd == -1) { hostnameEnd = strLen; done = true; } else { p = hostnameEnd; done = false; } nameRanges.append(make_pair(hostnameStart, hostnameEnd)); if (done) return; } else { // Skip quoted string. ASSERT(c == '"'); while (1) { int escapedCharacterOrStringEnd = findFirstOf(str, strLen, p, "\"\\"); if (escapedCharacterOrStringEnd == -1) return; c = str[escapedCharacterOrStringEnd]; p = escapedCharacterOrStringEnd + 1; // If we are the end of the string, then break from the string loop back to the host name loop. if (c == '"') break; // Skip escaped character. ASSERT(c == '\\'); if (p == strLen) return; ++p; } } }}static bool findHostnameInHierarchicalURL(const UChar* str, int strLen, int& startOffset, int& endOffset){ // Find the host name in a hierarchical URL. // It comes after a "://" sequence, with scheme characters preceding, and // this should be the first colon in the string. // It ends with the end of the string or a ":" or a path segment ending character. // If there is a "@" character, the host part is just the part after the "@". int separator = findFirstOf(str, strLen, 0, ":"); if (separator == -1 || separator + 2 >= strLen || str[separator + 1] != '/' || str[separator + 2] != '/') return false; // Check that all characters before the :// are valid scheme characters. if (!isSchemeFirstChar(str[0])) return false; for (int i = 1; i < separator; ++i) { if (!isSchemeChar(str[i])) return false; } // Start after the separator. int authorityStart = separator + 3; // Find terminating character. int hostnameEnd = strLen; for (int i = authorityStart; i < strLen; ++i) { UChar c = str[i]; if (c == ':' || (isPathSegmentEndChar(c) && c != 0)) { hostnameEnd = i; break; } } // Find "@" for the start of the host name. int userInfoTerminator = findFirstOf(str, strLen, authorityStart, "@"); int hostnameStart; if (userInfoTerminator == -1 || userInfoTerminator > hostnameEnd) hostnameStart = authorityStart; else hostnameStart = userInfoTerminator + 1; startOffset = hostnameStart; endOffset = hostnameEnd; return true;}// Converts all hostnames found in the given input to punycode, preserving the// rest of the URL unchanged. The output will NOT be null-terminated.static void encodeHostnames(const String& str, UCharBuffer& output){ output.clear(); if (protocolIs(str, "mailto")) { Vector<pair<int, int> > hostnameRanges; findHostnamesInMailToURL(str.characters(), str.length(), hostnameRanges); int n = hostnameRanges.size(); int p = 0; for (int i = 0; i < n; ++i) { const pair<int, int>& r = hostnameRanges[i]; output.append(&str.characters()[p], r.first - p); appendEncodedHostname(output, &str.characters()[r.first], r.second - r.first); p = r.second; } // This will copy either everything after the last hostname, or the // whole thing if there is no hostname. output.append(&str.characters()[p], str.length() - p); } else { int hostStart, hostEnd; if (findHostnameInHierarchicalURL(str.characters(), str.length(), hostStart, hostEnd)) { output.append(str.characters(), hostStart); // Before hostname. appendEncodedHostname(output, &str.characters()[hostStart], hostEnd - hostStart); output.append(&str.characters()[hostEnd], str.length() - hostEnd); // After hostname. } else { // No hostname to encode, return the input. output.append(str.characters(), str.length()); } }}static void encodeRelativeString(const String& rel, const TextEncoding& encoding, CharBuffer& output){ UCharBuffer s; encodeHostnames(rel, s); TextEncoding pathEncoding(UTF8Encoding()); // Path is always encoded as UTF-8; other parts may depend on the scheme. int pathEnd = -1; if (encoding != pathEncoding && encoding.isValid() && !protocolIs(rel, "mailto") && !protocolIs(rel, "data") && !protocolIs(rel, "javascript")) { // Find the first instance of either # or ?, keep pathEnd at -1 otherwise. pathEnd = findFirstOf(s.data(), s.size(), 0, "#?"); } if (pathEnd == -1) { CString decoded = pathEncoding.encode(s.data(), s.size(), URLEncodedEntitiesForUnencodables); output.resize(decoded.length()); memcpy(output.data(), decoded.data(), decoded.length()); } else { CString pathDecoded = pathEncoding.encode(s.data(), pathEnd, URLEncodedEntitiesForUnencodables); // Unencodable characters in URLs are represented by converting // them to XML entities and escaping non-alphanumeric characters. CString otherDecoded = encoding.encode(s.data() + pathEnd, s.size() - pathEnd, URLEncodedEntitiesForUnencodables); output.resize(pathDecoded.length() + otherDecoded.length()); memcpy(output.data(), pathDecoded.data(), pathDecoded.length()); memcpy(output.data() + pathDecoded.length(), otherDecoded.data(), otherDecoded.length()); } output.append('\0'); // null-terminate the output.}static String substituteBackslashes(const String& string){ int questionPos = string.find('?'); int hashPos = string.find('#'); int pathEnd; if (hashPos >= 0 && (questionPos < 0 || questionPos > hashPos)) pathEnd = hashPos; else if (questionPos >= 0) pathEnd = questionPos; else pathEnd = string.length(); return string.left(pathEnd).replace('\\','/') + string.substring(pathEnd);}bool KURL::isHierarchical() const{ if (!m_isValid) return false; ASSERT(m_string[m_schemeEnd] == ':'); return m_string[m_schemeEnd + 1] == '/';}void KURL::copyToBuffer(CharBuffer& buffer) const{ // FIXME: This throws away the high bytes of all the characters in the string! // That's fine for a valid URL, which is all ASCII, but not for invalid URLs. buffer.resize(m_string.length()); copyASCII(m_string.characters(), m_string.length(), buffer.data());}bool protocolIs(const String& url, const char* protocol){ // Do the comparison without making a new string object. assertProtocolIsGood(protocol); for (int i = 0; ; ++i) { if (!protocol[i]) return url[i] == ':'; if (toASCIILower(url[i]) != protocol[i]) return false; }}String mimeTypeFromDataURL(const String& url){ ASSERT(protocolIs(url, "data")); int index = url.find(';'); if (index == -1) index = url.find(','); if (index != -1) { int len = index - 5; if (len > 0) return url.substring(5, len); return "text/plain"; // Data URLs with no MIME type are considered text/plain. } return "";}const KURL& blankURL(){ DEFINE_STATIC_LOCAL(KURL, staticBlankURL, ("about:blank")); return staticBlankURL;}#ifndef NDEBUGvoid KURL::print() const{ printf("%s\n", m_string.utf8().data());}#endif}#endif // !USE(GOOGLEURL)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -