📄 kwqkurl.cpp
字号:
}
queryStart = pathEnd;
queryEnd = queryStart;
if (url[queryStart] == '?') {
while (url[queryEnd] != '\0' && url[queryEnd] != '#') {
queryEnd++;
}
}
fragmentStart = queryEnd;
fragmentEnd = fragmentStart;
if (url[fragmentStart] == '#') {
fragmentStart++;
fragmentEnd = fragmentStart;
while(url[fragmentEnd] != '\0') {
fragmentEnd++;
}
}
}
// assemble it all, remembering the real ranges
char staticBuffer[256];
char *buffer;
uint bufferLength = fragmentEnd * 3 + 1;
if (bufferLength <= sizeof(staticBuffer)) {
buffer = staticBuffer;
} else {
#ifndef __OOM__
buffer = (char *)malloc(bufferLength);
#else
buffer = (char *)MemoryManager::Alloc(bufferLength);
#endif
}
char *p = buffer;
const char *strPtr = url;
// copy in the scheme
const char *schemeEndPtr = url + schemeEnd;
while (strPtr < schemeEndPtr) {
*p++ = *strPtr++;
}
schemeEndPos = p - buffer;
// Check if we're http or https.
bool isHTTPorHTTPS = matchLetter(url[0], 'h')
&& matchLetter(url[1], 't')
&& matchLetter(url[2], 't')
&& matchLetter(url[3], 'p')
&& (url[4] == ':'
|| (matchLetter(url[4], 's') && url[5] == ':'));
bool hostIsLocalHost = portEnd - userStart == 9
&& matchLetter(url[userStart], 'l')
&& matchLetter(url[userStart+1], 'o')
&& matchLetter(url[userStart+2], 'c')
&& matchLetter(url[userStart+3], 'a')
&& matchLetter(url[userStart+4], 'l')
&& matchLetter(url[userStart+5], 'h')
&& matchLetter(url[userStart+6], 'o')
&& matchLetter(url[userStart+7], 's')
&& matchLetter(url[userStart+8], 't');
bool isFile = matchLetter(url[0], 'f')
&& matchLetter(url[1], 'i')
&& matchLetter(url[2], 'l')
&& matchLetter(url[3], 'e')
&& url[4] == ':';
// File URLs need a host part unless it is just file:// or file://localhost
bool degenFilePath = pathStart == pathEnd
&& (hostStart == hostEnd
|| hostIsLocalHost);
bool haveNonHostAuthorityPart = userStart != userEnd || passwordStart != passwordEnd || portStart != portEnd;
// add ":" after scheme
*p++ = ':';
// if we have at least one authority part or a file URL - add "//" and authority
if (isFile ? !degenFilePath
: (haveNonHostAuthorityPart || hostStart != hostEnd)) {
//if ((isFile && !degenFilePath) || haveNonHostAuthorityPart || hostStart != hostEnd) {
// still adds // for file://localhost, file://
//if (!(isFile && degenFilePath) && (haveNonHostAuthorityPart || hostStart != hostEnd)) {
//doesn't add // for things like file:///foo
*p++ = '/';
*p++ = '/';
userStartPos = p - buffer;
// copy in the user
strPtr = url + userStart;
const char *userEndPtr = url + userEnd;
while (strPtr < userEndPtr) {
*p++ = *strPtr++;
}
userEndPos = p - buffer;
// copy in the password
if (passwordEnd != passwordStart) {
*p++ = ':';
strPtr = url + passwordStart;
const char *passwordEndPtr = url + passwordEnd;
while (strPtr < passwordEndPtr) {
*p++ = *strPtr++;
}
}
passwordEndPos = p - buffer;
// If we had any user info, add "@"
if (p - buffer != userStartPos) {
*p++ = '@';
}
// copy in the host, except in the case of a file URL with authority="localhost"
if (!(isFile && hostIsLocalHost && !haveNonHostAuthorityPart)) {
strPtr = url + hostStart;
const char *hostEndPtr = url + hostEnd;
while (strPtr < hostEndPtr) {
*p++ = *strPtr++;
}
}
hostEndPos = p - buffer;
// copy in the port
if (portEnd != portStart) {
*p++ = ':';
strPtr = url + portStart;
const char *portEndPtr = url + portEnd;
while (strPtr < portEndPtr) {
*p++ = *strPtr++;
}
}
portEndPos = p - buffer;
} else {
userStartPos = userEndPos = passwordEndPos = hostEndPos = portEndPos = p - buffer;
}
// For canonicalization, ensure we have a '/' for no path.
// Only do this for http and https.
if (isHTTPorHTTPS && pathEnd - pathStart == 0) {
*p++ = '/';
}
// add path, escaping bad characters
if (hierarchical && hasSlashDotOrDotDot(url)) {
char static_path_buffer[256];
char *path_buffer;
uint pathBufferLength = pathEnd - pathStart + 1;
if (pathBufferLength <= sizeof(static_path_buffer)) {
path_buffer = static_path_buffer;
} else {
#ifndef __OOM__
path_buffer = (char *)malloc(pathBufferLength);
#else
path_buffer = (char *)MemoryManager::Alloc(pathBufferLength);
#endif
}
copyPathRemovingDots(path_buffer, url, pathStart, pathEnd);
appendEscapingBadChars(p, path_buffer, strlen(path_buffer));
if (path_buffer != static_path_buffer) {
#ifndef __OOM__
free(path_buffer);
#else
MemoryManager::Free( path_buffer );
#endif
}
}
else {
appendEscapingBadChars(p, url + pathStart, pathEnd - pathStart);
}
pathEndPos = p - buffer;
// add query, escaping bad characters
appendEscapingBadChars(p, url + queryStart, queryEnd - queryStart);
queryEndPos = p - buffer;
// add fragment, escaping bad characters
if (fragmentEnd != queryEnd) {
*p++ = '#';
appendEscapingBadChars(p, url + fragmentStart, fragmentEnd - fragmentStart);
}
fragmentEndPos = p - buffer;
// If we didn't end up actually changing the original string and
// it started as a QString, just reuse it, to avoid extra
// allocation.
if (originalString != NULL && strncmp(buffer, url, fragmentEndPos) == 0) {
urlString = *originalString;
} else {
urlString = QString(buffer, fragmentEndPos);
}
ASSERT(p - buffer <= (int)bufferLength);
if (buffer != staticBuffer) {
#ifndef __OOM__
free(buffer);
#else
MemoryManager::Free( buffer );
#endif
}
}
bool operator==(const KURL &a, const KURL &b)
{
return a.urlString == b.urlString;
}
bool urlcmp(const QString &a, const QString &b, bool ignoreTrailingSlash, bool ignoreRef)
{
if (ignoreRef) {
KURL aURL(a);
KURL bURL(b);
if (aURL.m_isValid && bURL.m_isValid) {
return aURL.urlString.left(aURL.queryEndPos) == bURL.urlString.left(bURL.queryEndPos);
}
}
return a == b;
}
QString KURL::encode_string(const QString& notEncodedString)
{
QCString asUTF8 = notEncodedString.utf8();
char staticBuffer[256];
char *buffer;
uint bufferLength = asUTF8.length() * 3 + 1;
if (bufferLength <= sizeof(staticBuffer)) {
buffer = staticBuffer;
} else {
#ifndef __OOM__
buffer = (char *)malloc(bufferLength);
#else
buffer = (char *)MemoryManager::Alloc(bufferLength);
#endif
}
char *p = buffer;
const char *str = asUTF8;
const char *strEnd = str + asUTF8.length();
while (str < strEnd) {
unsigned char c = *str++;
if (isBadChar(c)) {
*p++ = '%';
*p++ = hexDigits[c >> 4];
*p++ = hexDigits[c & 0xF];
} else {
*p++ = c;
}
}
QString result(buffer, p - buffer);
ASSERT(p - buffer <= (int)bufferLength);
if (buffer != staticBuffer) {
#ifndef __OOM__
free(buffer);
#else
MemoryManager::Free( buffer );
#endif
}
return result;
}
#if HAVE_ICU_LIBRARY
QString KURL::encodeHostnames(const QString &s)
{
if (s.startsWith("mailto:", false)) {
const QMemArray<KWQIntegerPair> hostnameRanges = findHostnamesInMailToURL(s);
uint n = hostnameRanges.size();
if (n != 0) {
QString result;
uint p = 0;
for (uint i = 0; i < n; ++i) {
const KWQIntegerPair &r = hostnameRanges[i];
result += s.mid(p, r.start);
result += encodeHostname(s.mid(r.start, r.end - r.start));
p = r.end;
}
result += s.mid(p);
return result;
}
} else {
int hostStart, hostEnd;
if (findHostnameInHierarchicalURL(s, hostStart, hostEnd)) {
return s.left(hostStart) + encodeHostname(s.mid(hostStart, hostEnd - hostStart)) + s.mid(hostEnd);
}
}
return s;
}
bool KURL::findHostnameInHierarchicalURL(const QString &s, int &startOffset, int &endOffset)
{
// Find the host name in a hierarchical URL.
// It comes after a "://" sequence, with scheme characters preceding.
// If ends with the end of the string or a ":" or a path segment ending character.
// If there is a "@" character, the host part is just the part after the "@".
int separator = s.find("://");
if (separator <= 0) {
return false;
}
// Check that all characters before the :// are valid scheme characters.
if (!isSchemeFirstChar(s[0].latin1())) {
return false;
}
for (int i = 1; i < separator; ++i) {
if (!isSchemeChar(s[i].latin1())) {
return false;
}
}
// Start after the separator.
int authorityStart = separator + 3;
// Find terminating character.
int length = s.length();
int hostnameEnd = length;
for (int i = authorityStart; i < length; ++i) {
char c = s[i].latin1();
if (c == ':' || (isPathSegmentEndChar(c) && c != '\0')) {
hostnameEnd = i;
break;
}
}
// Find "@" for the start of the host name.
int userInfoTerminator = s.find('@', authorityStart);
int hostnameStart;
if (userInfoTerminator == -1 || userInfoTerminator > hostnameEnd) {
hostnameStart = authorityStart;
} else {
hostnameStart = userInfoTerminator + 1;
}
startOffset = hostnameStart;
endOffset = hostnameEnd;
return true;
}
QString KURL::encodeHostname(const QString &s)
{
// Needs to be big enough to hold an IDN-encoded name.
// For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
const unsigned hostnameBufferLength = 2048;
if (s.isAllASCII() || s.length() > hostnameBufferLength) {
return s;
}
UChar buffer[hostnameBufferLength];
UErrorCode error = U_ZERO_ERROR;
int32_t numCharactersConverted = uidna_IDNToASCII
(reinterpret_cast<const UChar *>(s.unicode()), s.length(), buffer, hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, NULL, &error);
if (error != U_ZERO_ERROR) {
return s;
}
return QString(reinterpret_cast<QChar *>(buffer), numCharactersConverted);
}
QMemArray<KWQIntegerPair> KURL::findHostnamesInMailToURL(const QString &s)
{
// In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' or end of string character.
// Skip quoted strings so that characters in them don't confuse us.
// When we find a '?' character, we are past the part of the URL that contains host names.
QMemArray<KWQIntegerPair> a;
int p = 0;
while (1) {
// Find start of host name or of quoted string.
int hostnameOrStringStart = s.find(QRegExp("[\"@?]"), p);
if (hostnameOrStringStart == -1) {
return a;
}
QChar c = s[hostnameOrStringStart];
p = hostnameOrStringStart + 1;
if (c == '?') {
return a;
}
if (c == '@') {
// Find end of host name.
int hostnameStart = p;
int hostnameEnd = s.find(QRegExp("[>,?]"), p);
bool done;
if (hostnameEnd == -1) {
hostnameEnd = s.length();
done = true;
} else {
p = hostnameEnd;
done = false;
}
int i = a.size();
a.resize(i + 1);
a[i] = KWQIntegerPair(hostnameStart, hostnameEnd);
if (done) {
return a;
}
} else {
// Skip quoted string.
ASSERT(c == '"');
while (1) {
int escapedCharacterOrStringEnd = s.find(QRegExp("[\"\\]"), p);
if (escapedCharacterOrStringEnd == -1) {
return a;
}
c = s[escapedCharacterOrStringEnd];
p = escapedCharacterOrStringEnd + 1;
// If we are the end of the string, then break from the string loop back to the host name loop.
if (c == '"') {
break;
}
// Skip escaped character.
ASSERT(c == '\\');
if (p == static_cast<int>(s.length())) {
return a;
}
++p;
}
}
}
}
#endif // HAVE_ICU_LIBRARY
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -