📄 kwqkurl.cpp

📁 khtml在gtk上的移植版本
💻 CPP
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
    char *buffer;    uint bufferLength = fragmentEnd * 3 + 1;    if (bufferLength <= sizeof(staticBuffer)) {	buffer = staticBuffer;    } else {	buffer = (char *)malloc(bufferLength);    }    char *p = buffer;    const char *strPtr = url;    // copy in the scheme    const char *schemeEndPtr = url + schemeEnd;    while (strPtr < schemeEndPtr) {	*p++ = *strPtr++;    }    schemeEndPos = p - buffer;    // Check if we're http or https.    bool isHTTPorHTTPS = matchLetter(url[0], 'h')        && matchLetter(url[1], 't')        && matchLetter(url[2], 't')        && matchLetter(url[3], 'p')        && (url[4] == ':'            || (matchLetter(url[4], 's') && url[5] == ':'));    bool hostIsLocalHost = portEnd - userStart == 9        && matchLetter(url[userStart], 'l')        && matchLetter(url[userStart+1], 'o')        && matchLetter(url[userStart+2], 'c')        && matchLetter(url[userStart+3], 'a')        && matchLetter(url[userStart+4], 'l')        && matchLetter(url[userStart+5], 'h')        && matchLetter(url[userStart+6], 'o')        && matchLetter(url[userStart+7], 's')        && matchLetter(url[userStart+8], 't');    bool isFile = matchLetter(url[0], 'f')        && matchLetter(url[1], 'i')        && matchLetter(url[2], 'l')        && matchLetter(url[3], 'e')        && url[4] == ':';            // File URLs need a host part unless it is just file:// or file://localhost    bool degenFilePath = pathStart == pathEnd        && (hostStart == hostEnd            || hostIsLocalHost);        bool haveNonHostAuthorityPart = userStart != userEnd || passwordStart != passwordEnd || portStart != portEnd;    // add ":" after scheme    *p++ = ':';    // if we have at least one authority part or a file URL - add "//" and authority    if (isFile ? !degenFilePath               : (haveNonHostAuthorityPart || hostStart != hostEnd)) {//if ((isFile && !degenFilePath) || haveNonHostAuthorityPart || hostStart != hostEnd) {// still adds // for file://localhost, file:////if (!(isFile && degenFilePath) && (haveNonHostAuthorityPart || hostStart != hostEnd)) {//doesn't add // for things like file:///foo        *p++ = '/';	*p++ = '/';	userStartPos = p - buffer;	// copy in the user	strPtr = url + userStart;	const char *userEndPtr = url + userEnd;	while (strPtr < userEndPtr) {	    *p++ = *strPtr++;	}	userEndPos = p - buffer;		// copy in the password	if (passwordEnd != passwordStart) {	    *p++ = ':';	    strPtr = url + passwordStart;	    const char *passwordEndPtr = url + passwordEnd;	    while (strPtr < passwordEndPtr) {		*p++ = *strPtr++;	    }	}	passwordEndPos = p - buffer;		// If we had any user info, add "@"	if (p - buffer != userStartPos) {	    *p++ = '@';	}		// copy in the host, except in the case of a file URL with authority="localhost"	if (!(isFile && hostIsLocalHost && !haveNonHostAuthorityPart)) {            strPtr = url + hostStart;            const char *hostEndPtr = url + hostEnd;            while (strPtr < hostEndPtr) {                *p++ = *strPtr++;            }        }	hostEndPos = p - buffer;		// copy in the port	if (portEnd != portStart) {	    *p++ = ':';	    strPtr = url + portStart;	    const char *portEndPtr = url + portEnd;	    while (strPtr < portEndPtr) {		*p++ = *strPtr++;	    }	}	portEndPos = p - buffer;    } else {	userStartPos = userEndPos = passwordEndPos = hostEndPos = portEndPos = p - buffer;    }    // For canonicalization, ensure we have a '/' for no path.    // Only do this for http and https.    if (isHTTPorHTTPS && pathEnd - pathStart == 0) {        *p++ = '/';    }           // add path, escaping bad characters        if (hierarchical && hasSlashDotOrDotDot(url)) {        char static_path_buffer[4096];        char *path_buffer;        uint pathBufferLength = pathEnd - pathStart + 1;        if (pathBufferLength <= sizeof(static_path_buffer)) {            path_buffer = static_path_buffer;        } else {            path_buffer = (char *)malloc(pathBufferLength);        }        copyPathRemovingDots(path_buffer, url, pathStart, pathEnd);        appendEscapingBadChars(p, path_buffer, strlen(path_buffer));        if (path_buffer != static_path_buffer) {            free(path_buffer);        }    }    else {        appendEscapingBadChars(p, url + pathStart, pathEnd - pathStart);    }    pathEndPos = p - buffer;            // add query, escaping bad characters    appendEscapingBadChars(p, url + queryStart, queryEnd - queryStart);    queryEndPos = p - buffer;        // add fragment, escaping bad characters    if (fragmentEnd != queryEnd) {	*p++ = '#';	appendEscapingBadChars(p, url + fragmentStart, fragmentEnd - fragmentStart);    }    fragmentEndPos = p - buffer;    // If we didn't end up actually changing the original string and    // it started as a QString, just reuse it, to avoid extra    // allocation.    if (originalString != NULL && strncmp(buffer, url, fragmentEndPos) == 0) {	urlString = *originalString;    } else {	urlString = QString(buffer, fragmentEndPos);    }    ASSERT(p - buffer <= (int)bufferLength);    if (buffer != staticBuffer) {	free(buffer);    }}bool operator==(const KURL &a, const KURL &b){    return a.urlString == b.urlString;}bool urlcmp(const QString &a, const QString &b, bool ignoreTrailingSlash, bool ignoreRef){    if (ignoreRef) {        KURL aURL(a);        KURL bURL(b);        if (aURL.m_isValid && bURL.m_isValid) {            return aURL.urlString.left(aURL.queryEndPos) == bURL.urlString.left(bURL.queryEndPos);        }    }    return a == b;}QString KURL::encode_string(const QString& notEncodedString){    QCString asUTF8 = notEncodedString.utf8();    char staticBuffer[4096];    char *buffer;    uint bufferLength = asUTF8.length() * 3 + 1;    if (bufferLength <= sizeof(staticBuffer)) {	buffer = staticBuffer;    } else {	buffer = (char *)malloc(bufferLength);    }    char *p = buffer;    const char *str = asUTF8;    const char *strEnd = str + asUTF8.length();    while (str < strEnd) {	unsigned char c = *str++;        if (isBadChar(c)) {            *p++ = '%';            *p++ = hexDigits[c >> 4];            *p++ = hexDigits[c & 0xF];	} else {	    *p++ = c;	}    }        QString result(buffer, p - buffer);        ASSERT(p - buffer <= (int)bufferLength);    if (buffer != staticBuffer) {	free(buffer);    }    return result;}#if !KWIQNSURL *KURL::getNSURL() const{    const UInt8 *bytes = (const UInt8 *)(urlString.latin1());    NSURL *result = nil;    if (urlString.length() > 0) {        // NOTE: We use UTF-8 here since this encoding is used when computing strings when returning URL components        // (e.g calls to NSURL -path). However, this function is not tolerant of illegal UTF-8 sequences, which        // could either be a malformed string or bytes in a different encoding, like shift-jis, so we fall back        // onto using ISO Latin 1 in those cases.        result = (NSURL *)CFURLCreateAbsoluteURLWithBytes(NULL, bytes, urlString.length(), kCFStringEncodingUTF8, NULL, TRUE);        if (!result) {            result = (NSURL *)CFURLCreateAbsoluteURLWithBytes(NULL, bytes, urlString.length(), kCFStringEncodingISOLatin1, NULL, TRUE);        }        [result autorelease];    }    else {        result = [NSURL URLWithString:@""];    }        return result;}NSData *KURL::getNSData() const{    const UInt8 *bytes = (const UInt8 *)(urlString.latin1());    return [NSData dataWithBytes:bytes length:urlString.length()];}#if HAVE_ICU_LIBRARYQString KURL::encodeHostnames(const QString &s){    if (s.startsWith("mailto:", false)) {        const QMemArray<KWQIntegerPair> hostnameRanges = findHostnamesInMailToURL(s);        uint n = hostnameRanges.size();        if (n != 0) {            QString result;            uint p = 0;            for (uint i = 0; i < n; ++i) {                const KWQIntegerPair &r = hostnameRanges[i];                result += s.mid(p, r.start);                result += encodeHostname(s.mid(r.start, r.end - r.start));                p = r.end;            }            result += s.mid(p);            return result;        }    } else {        int hostStart, hostEnd;        if (findHostnameInHierarchicalURL(s, hostStart, hostEnd)) {            return s.left(hostStart) + encodeHostname(s.mid(hostStart, hostEnd - hostStart)) + s.mid(hostEnd);         }    }    return s;}bool KURL::findHostnameInHierarchicalURL(const QString &s, int &startOffset, int &endOffset){    // Find the host name in a hierarchical URL.    // It comes after a "://" sequence, with scheme characters preceding.    // If ends with the end of the string or a ":" or a path segment ending character.    // If there is a "@" character, the host part is just the part after the "@".    int separator = s.find("://");    if (separator <= 0) {        return false;    }    // Check that all characters before the :// are valid scheme characters.    if (!isSchemeFirstChar(s[0].latin1())) {        return false;    }    for (int i = 1; i < separator; ++i) {        if (!isSchemeChar(s[i].latin1())) {            return false;        }    }    // Start after the separator.    int authorityStart = separator + 3;    // Find terminating character.    int length = s.length();    int hostnameEnd = length;    for (int i = authorityStart; i < length; ++i) {        char c = s[i].latin1();        if (c == ':' || (isPathSegmentEndChar(c) && c != '\0')) {            hostnameEnd = i;            break;        }    }    // Find "@" for the start of the host name.    int userInfoTerminator = s.find('@', authorityStart);    int hostnameStart;    if (userInfoTerminator == -1 || userInfoTerminator > hostnameEnd) {        hostnameStart = authorityStart;    } else {        hostnameStart = userInfoTerminator + 1;    }    startOffset = hostnameStart;    endOffset = hostnameEnd;    return true;}QString KURL::encodeHostname(const QString &s){    // Needs to be big enough to hold an IDN-encoded name.    // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.    const unsigned hostnameBufferLength = 2048;    if (s.isAllASCII() || s.length() > hostnameBufferLength) {        return s;    }    UChar buffer[hostnameBufferLength];        UErrorCode error = U_ZERO_ERROR;    int32_t numCharactersConverted = uidna_IDNToASCII        (reinterpret_cast<const UChar *>(s.unicode()), s.length(), buffer, hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, NULL, &error);    if (error != U_ZERO_ERROR) {        return s;    }    return QString(reinterpret_cast<QChar *>(buffer), numCharactersConverted);}QMemArray<KWQIntegerPair> KURL::findHostnamesInMailToURL(const QString &s){    // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' or end of string character.    // Skip quoted strings so that characters in them don't confuse us.    // When we find a '?' character, we are past the part of the URL that contains host names.    QMemArray<KWQIntegerPair> a;    int p = 0;    while (1) {        // Find start of host name or of quoted string.        int hostnameOrStringStart = s.find(QRegExp("[\"@?]"), p);        if (hostnameOrStringStart == -1) {            return a;        }        QChar c = s[hostnameOrStringStart];        p = hostnameOrStringStart + 1;        if (c == '?') {            return a;        }                if (c == '@') {            // Find end of host name.            int hostnameStart = p;            int hostnameEnd = s.find(QRegExp("[>,?]"), p);            bool done;            if (hostnameEnd == -1) {                hostnameEnd = s.length();                done = true;            } else {                p = hostnameEnd;                done = false;            }            int i = a.size();            a.resize(i + 1);            a[i] = KWQIntegerPair(hostnameStart, hostnameEnd);            if (done) {                return a;            }        } else {            // Skip quoted string.            ASSERT(c == '"');            while (1) {                int escapedCharacterOrStringEnd = s.find(QRegExp("[\"\\]"), p);                if (escapedCharacterOrStringEnd == -1) {                    return a;                }                c = s[escapedCharacterOrStringEnd];                p = escapedCharacterOrStringEnd + 1;                                // If we are the end of the string, then break from the string loop back to the host name loop.                if (c == '"') {                    break;                }                                // Skip escaped character.                ASSERT(c == '\\');                if (p == static_cast<int>(s.length())) {                    return a;                }                                ++p;            }        }    }}#endif // HAVE_ICU_LIBRARY#endif
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -