📄 kwqkurl.cpp

📁 手机浏览器源码程序,功能强大
💻 CPP
📖 第 1 页 / 共 4 页
字号:

void KURL::setPath(const QString &s)
{
    if (m_isValid) {

      // TODO: since encode_string is not implemented becuase utf8 is not implemented,
      // just skip the encode_string for now, need to change back when utf8 is implemented.
	QString newURL = urlString.left(portEndPos) + /*encode_string(s)*/s + urlString.mid(pathEndPos);
	parse(newURL.ascii(), &newURL);
    }
}

QString KURL::canonicalURL() const
{
#ifdef CONSTRUCT_CANONICAL_STRING
    bool hadPrePathComponent = false;
    QString canonicalURL;

    if (!protocol().isEmpty()) {
        canonicalURL += protocol();
        canonicalURL += "://";
        hadPrePathComponent = true;
    }
    if (!_user().isEmpty()) {
        canonicalURL += _user();
        if (!_pass().isEmpty()){
            canonicalURL += ":";
            canonicalURL += _pass();
        }
        canonicalURL += "@";
        hadPrePathComponent = true;
    }
    if (!_host().isEmpty()) {
        canonicalURL += _host();
        unsigned short int p = port();
        if (p != 0) {
            canonicalURL += ":";
            canonicalURL += QString::number(p);
        }
        hadPrePathComponent = true;
    }
    if (hadPrePathComponent && (strncasecmp ("http", url, schemeEnd) == 0 ||
        strncasecmp ("https", url, schemeEnd) == 0) && _path().isEmpty()) {
        canonicalURL += "/";
    }
    if (!_path().isEmpty()) {
        canonicalURL += _path();
    }
    if (!query().isEmpty()) {
        canonicalURL += "?";
        canonicalURL += query();
    }
    if (!ref().isEmpty()) {
        canonicalURL += "#";
        canonicalURL += ref();
    }
    return canonicalURL;
#else
    return urlString;
#endif
}


QString KURL::prettyURL() const
{
    if (!m_isValid) {
        return urlString;
    }

    QString result = protocol() + ":";

    QString authority;

    if (hostEndPos != passwordEndPos) {
	if (userEndPos != userStartPos) {
	    authority += user();
	    authority += "@";
	}
	authority += host();
	if (port() != 0) {
	    authority += ":";
	    authority += QString::number(port());
	}
    }

    if (!authority.isEmpty()) {
        result += "//" + authority;
    }

    result += path();
    result += query();

    if (fragmentEndPos != queryEndPos) {
        result += "#" + ref();
    }

    return result;
}

QString KURL::decode_string(const QString &urlString, const QTextCodec *codec)
{
    static const QTextCodec UTF8Codec(KCharacterSetIdentifierUtf8);

    QString result("");

    char staticBuffer[256];
    char *buffer = staticBuffer;
    int bufferLength = sizeof(staticBuffer);

    int length = urlString.length();
    int decodedPosition = 0;
    int searchPosition = 0;
    int encodedRunPosition;
    while ((encodedRunPosition = urlString.find('%', searchPosition)) > 0) {
        // Find the sequence of %-escape codes.
        int encodedRunEnd = encodedRunPosition;
        while (length - encodedRunEnd >= 3
                && urlString[encodedRunEnd] == '%'
                && isHexDigit(urlString[encodedRunEnd + 1].latin1())
                && isHexDigit(urlString[encodedRunEnd + 2].latin1()))
            encodedRunEnd += 3;
        if (encodedRunEnd == encodedRunPosition) {
            ++searchPosition;
            continue;
        }
        searchPosition = encodedRunEnd;

        // Copy the entire %-escape sequence into an 8-bit buffer.
        int encodedRunLength = encodedRunEnd - encodedRunPosition;
        if (encodedRunLength + 1 > bufferLength) {
            if (buffer != staticBuffer)
#ifndef __OOM__
	    	free(buffer);
#else
			MemoryManager::Free( buffer );
#endif
            bufferLength = malloc_good_size(encodedRunLength + 1);
#ifndef __OOM__
            buffer = static_cast<char *>(malloc(bufferLength));
#else
            buffer = static_cast<char *>(MemoryManager::Alloc(bufferLength));
#endif
        }
        urlString.copyLatin1(buffer, encodedRunPosition, encodedRunLength);

        // Decode the %-escapes into bytes.
        char *p = buffer;
        const char *q = buffer;
        while (*q) {
            *p++ = (hexDigitValue(q[1]) << 4) | hexDigitValue(q[2]);
            q += 3;
        }

        // Decode the bytes into Unicode characters.
        QString decoded = (codec ? codec : &UTF8Codec)->toUnicode(buffer, p - buffer);
        if (decoded.isEmpty()) {
            continue;
        }

        // Build up the string with what we just skipped and what we just decoded.
        result.append(urlString.mid(decodedPosition, encodedRunPosition - decodedPosition));
        result.append(decoded);
        decodedPosition = encodedRunEnd;
    }

    result.append(urlString.mid(decodedPosition, length - decodedPosition));

    if (buffer != staticBuffer)
#ifndef __OOM__
	    	free(buffer);
#else
			MemoryManager::Free( buffer );
#endif

    return result;
}

bool KURL::isLocalFile() const
{
    // FIXME - include feed: here too?
    return protocol() == "file";
}

static void appendEscapingBadChars(char*& buffer, const char *strStart, size_t length)
{
    char *p = buffer;

    const char *str = strStart;
    const char *strEnd = strStart + length;
    while (str < strEnd) {
	unsigned char c = *str++;
        if (isBadChar(c)) {
            if (c == '%' && strEnd - str >= 2 && isHexDigit(str[0]) && isHexDigit(str[1])) {
                *p++ = c;
                *p++ = *str++;
                *p++ = *str++;
            } else if (c == '?') {
                *p++ = c;
            } else {
                *p++ = '%';
                *p++ = hexDigits[c >> 4];
                *p++ = hexDigits[c & 0xF];
            }
	} else {
	    *p++ = c;
	}
    }

    buffer = p;
}

// copy a path, accounting for "." and ".." segments
static int copyPathRemovingDots(char *dst, const char *src, int srcStart, int srcEnd)
{
    char *bufferPathStart = dst;

    // empty path is a special case, and need not have a leading slash
    if (srcStart != srcEnd) {
        const char *baseStringStart = src + srcStart;
        const char *baseStringEnd = src + srcEnd;
        const char *baseStringPos = baseStringStart;

        // this code is unprepared for paths that do not begin with a
        // slash and we should always have one in the source string
        ASSERT(baseStringPos[0] == '/');

        // copy the leading slash into the destination
        *dst = *baseStringPos;
        baseStringPos++;
        dst++;

        while (baseStringPos < baseStringEnd) {
            if (baseStringPos[0] == '.' && dst[-1] == '/') {
                if (baseStringPos[1] == '/' || baseStringPos + 1 == baseStringEnd) {
                    // skip over "." segment
                    baseStringPos += 2;
                    continue;
                } else if (baseStringPos[1] == '.' && (baseStringPos[2] == '/' ||
                                       baseStringPos + 2 == baseStringEnd)) {
                    // skip over ".." segment and rewind the last segment
                    // the RFC leaves it up to the app to decide what to do with excess
                    // ".." segments - we choose to drop them since some web content
                    // relies on this.
                    baseStringPos += 3;
                    if (dst > bufferPathStart + 1) {
                        dst--;
                    }
                    // Note that these two while blocks differ subtly.
                    // The first helps to remove multiple adjoining slashes as we rewind.
                    // The +1 to bufferPathStart in the first while block prevents eating a leading slash
                    while (dst > bufferPathStart + 1 && dst[-1] == '/') {
                        dst--;
                    }
                    while (dst > bufferPathStart && dst[-1] != '/') {
                        dst--;
                    }
                    continue;
                }
            }

            *dst = *baseStringPos;
            baseStringPos++;
            dst++;
        }
    }
    *dst = '\0';
    return dst - bufferPathStart;
}

static inline bool hasSlashDotOrDotDot(const char *str)
{
    const char *p = str;
    if (!*p)
        return false;
    char pc = *p;
    while (char c = *++p) {
        if (c == '.' && (pc == '/' || pc == '.'))
            return true;
        pc = c;
    }
    return false;
}

static inline bool matchLetter(char c, char lowercaseLetter)
{
    return (c | 0x20) == lowercaseLetter;
}

void KURL::parse(const char *url, const QString *originalString)
{
    m_isValid = true;

    if (url == NULL || url[0] == '\0') {
	// valid URL must be non-empty
	m_isValid = false;
	urlString = url;
	return;
    }

    if (!isSchemeFirstChar(url[0])) {
	// scheme must start with an alphabetic character
	m_isValid = false;
	urlString = url;
	return;
    }

    int schemeEnd = 0;

    while (isSchemeChar(url[schemeEnd])) {
	schemeEnd++;
    }

    if (url[schemeEnd] != ':') {
	m_isValid = false;
	urlString = url;
	return;
    }

    int userStart = schemeEnd + 1;
    int userEnd;
    int passwordStart;
    int passwordEnd;
    int hostStart;
    int hostEnd;
    int portStart;
    int portEnd;

    bool hierarchical = url[schemeEnd + 1] == '/';

    if (hierarchical && url[schemeEnd + 2] == '/') {
	// part after the scheme must be a net_path, parse the authority section

	// FIXME: authority characters may be scanned twice
	userStart += 2;
	userEnd = userStart;

	int colonPos = 0;
	while (isUserInfoChar(url[userEnd])) {
	    if (url[userEnd] == ':' && colonPos == 0) {
		colonPos = userEnd;
	    }
	    userEnd++;
	}

	if (url[userEnd] == '@') {
	    // actual end of the userinfo, start on the host
	    if (colonPos != 0) {
		passwordEnd = userEnd;
		userEnd = colonPos;
		passwordStart = colonPos + 1;

	    } else {
		passwordStart = passwordEnd = userEnd;

	    }
	    hostStart = passwordEnd + 1;
	} else if (url[userEnd] == '[' || isPathSegmentEndChar(url[userEnd])) {
	    // hit the end of the authority, must have been no user
	    // or looks like an IPv6 hostname
	    // either way, try to parse it as a hostname
	    userEnd = userStart;
	    passwordStart = passwordEnd = userEnd;
	    hostStart = userStart;

	} else {
	    // invalid character
	    m_isValid = false;
	    urlString = url;

	    return;
	}

	hostEnd = hostStart;

	// IPV6 IP address
	if (url[hostEnd] == '[') {
	    hostEnd++;
	    while (isIPv6Char(url[hostEnd])) {
		hostEnd++;
	    }

	    if (url[hostEnd] == ']') {
		hostEnd++;
	    } else {
		// invalid character
		m_isValid = false;
		urlString = url;

		return;
	    }
	} else {
	    while (isHostnameChar(url[hostEnd])) {
		hostEnd++;
	    }

	}

	if (url[hostEnd] == ':') {
	    portStart = portEnd = hostEnd + 1;

	    // possible start of port
	    portEnd = portStart;
	    while (isdigit(url[portEnd])) {
		portEnd++;
	    }

	} else {
	    portStart = portEnd = hostEnd;
	}

	if (!isPathSegmentEndChar(url[portEnd])) {
	    // invalid character
	    m_isValid = false;
	    urlString = url;
	    return;
	}
    } else {
	// the part after the scheme must be an opaque_part or an abs_path
	userEnd = userStart;
	passwordStart = passwordEnd = userEnd;
	hostStart = hostEnd = passwordEnd;
	portStart = portEnd = hostEnd;

    }

    int pathStart = portEnd;
    int pathEnd = pathStart;
    int queryStart;
    int queryEnd;
    int fragmentStart;
    int fragmentEnd;

    if (!hierarchical) {
        while (url[pathEnd] != '\0' && url[pathEnd] != '?') {
            pathEnd++;
        }

    	queryStart = queryEnd = pathEnd;

        while (url[queryEnd] != '\0') {
            queryEnd++;
        }

    	fragmentStart = fragmentEnd = queryEnd;
    }
    else {
💿 文件大小 1976 K
👤 上传用户 qingriwanxia
📂 所属分类通讯/手机编程
🏷️ 相关标签

#手机 #浏览器 #源码 #程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -