📄 kurl.cpp
字号:
/* This file is part of the KDE libraries Copyright (C) 1999 Torben Weis <weis@kde.org> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/#include "kurl.h"#include <kdebug.h>#include <stdio.h>#include <assert.h>#include <ctype.h>#include <stdlib.h>#include <qurl.h>#include <qdir.h>#include <qstringlist.h>#include <qtextcodec.h>#define _CODE_BY_YMWEI 1#if !_CODE_BY_YMWEIstatic QTextCodec * codecForHint( int encoding_hint /* not 0 ! */ ){ // Get the charset name from encoding_hint - but KCharsets doesn't // know about "unicode" QString charsetName = (encoding_hint == QFont::Unicode) ? "utf8" : KGlobal::charsets()->name( (QFont::CharSet) encoding_hint ); bool ok; QTextCodec * textCodec = KGlobal::charsets()->codecForName( charsetName, ok ); return ok ? textCodec : 0L;}#endifstaticQString encode( const QString& segment, bool encode_slash, int encoding_hint ){ char encode_extra = encode_slash ? '/' : 0; QCString local; if (encoding_hint==0) local = segment.local8Bit(); else {#if _CODE_BY_YMWEI local = segment.local8Bit();#else QTextCodec * textCodec = codecForHint( encoding_hint ); if (!textCodec) local = segment.local8Bit(); else local = textCodec->fromUnicode( segment );#endif } int old_length = local.length(); if ( !old_length ) return QString::null; // a worst case approximation QChar *new_segment = new QChar[ old_length * 3 + 1 ]; int new_length = 0; for ( int i = 0; i < old_length; i++ ) { // 'unsave' and 'reserved' characters // according to RFC 1738, // 2.2. URL Character Encoding Issues (pp. 3-4) // WABA: Added non-ascii unsigned char character = local[i]; if ( (character <= 32) || (character >= 127) || strchr("<>#@\"&%$:,;?={}|^~[]\'`\\", character) || (character == encode_extra) ) { new_segment[ new_length++ ] = '%'; unsigned int c = character / 16; c += (c > 9) ? ('A' - 10) : '0'; new_segment[ new_length++ ] = c; c = character % 16; c += (c > 9) ? ('A' - 10) : '0'; new_segment[ new_length++ ] = c; } else new_segment[ new_length++ ] = local[i]; } QString result = QString(new_segment, new_length); delete [] new_segment; return result;}static char hex2int( unsigned int _char ){ if ( _char >= 'A' && _char <='F') return _char - 'A' + 10; if ( _char >= 'a' && _char <='f') return _char - 'a' + 10; if ( _char >= '0' && _char <='9') return _char - '0'; return -1;}// WABA: The result of lazy_encode isn't usable for a URL which// needs to satisfies RFC requirements. However, the following// operation will make it usable again:// encode(decode(...))//// As a result one can see that url.prettyURL() does not result in// a RFC compliant URL but that the following sequence does:// KURL(url.prettyURL()).url()static QString lazy_encode( const QString& segment ){ int old_length = segment.length(); if ( !old_length ) return QString::null; // a worst case approximation QChar *new_segment = new QChar[ old_length * 3 + 1 ]; int new_length = 0; for ( int i = 0; i < old_length; i++ ) { unsigned int character = segment[i].unicode(); // Don't use latin1() // It returns 0 for non-latin1 values // Small set of really ambiguous chars if ((character < 32) || // Low ASCII ((character == '%') && // The escape character itself (i+2 < old_length) && // But only if part of a valid escape sequence! (hex2int(segment[i+1].unicode())!= -1) && (hex2int(segment[i+2].unicode())!= -1)) || (character == '?') || // Start of query delimiter (character == '#') || // Start of reference delimiter ((character == 32) && (i+1 == old_length))) // A trailing space { new_segment[ new_length++ ] = '%'; unsigned int c = character / 16; c += (c > 9) ? ('A' - 10) : '0'; new_segment[ new_length++ ] = c; c = character % 16; c += (c > 9) ? ('A' - 10) : '0'; new_segment[ new_length++ ] = c; } else new_segment[ new_length++ ] = segment[i]; } QString result = QString(new_segment, new_length); delete [] new_segment; return result;}static QString decode( const QString& segment, bool *keepEncoded=0, int encoding_hint=0 ){ bool isUnicode = false; // This detects utf-16, not utf-8 bool isLocal = false; bool isAscii = true; int old_length = segment.length(); if ( !old_length ) return QString::null; int new_length = 0; // make a copy of the old one char *new_segment = new char[ old_length + 1]; QChar *new_usegment = new QChar[ old_length + 1 ]; int i = 0; while( i < old_length ) { unsigned int character = segment[ i++ ].unicode(); if ( (character == '%' ) && ( i+1 < old_length) ) // Must have at least two chars left! { char a = hex2int( segment[i].latin1() ); char b = hex2int( segment[i+1].latin1() ); if ((a != -1) && (b != -1)) // Only replace if sequence is valid { character = a * 16 + b; // Replace with value of %dd i += 2; // Skip dd if (character > 127) isLocal = true; } } new_segment [ new_length ] = character; new_usegment [ new_length ] = character; new_length++; if (character > 127) { isAscii = false; if (character > 255) isUnicode = true; } } new_segment [ new_length ] = 0; QString result; // Encoding specified if ( encoding_hint ) {#if _CODE_BY_YMWEI result = QString::fromLocal8Bit(new_segment, new_length);#else QTextCodec * textCodec = codecForHint( encoding_hint ); if (textCodec) { QByteArray array; array.setRawData(new_segment, new_length); result = textCodec->toUnicode( array, new_length ); array.resetRawData(new_segment, new_length); } else result = QString::fromLocal8Bit(new_segment, new_length);#endif // No idea about keepEncoded... Hmm, it's unused anyway (!) } // Guess the encoding, if not specified else if ((!isAscii && !isUnicode) || isLocal) { result = QString::fromLocal8Bit(new_segment, new_length); if (keepEncoded) *keepEncoded = true; } else { result = QString( new_usegment, new_length); if (keepEncoded) *keepEncoded = false; } delete [] new_segment; delete [] new_usegment; return result;}bool KURL::isRelativeURL(const QString &_url){ int len = _url.length(); if (!len) return true; // Very short relative URL. const QChar *str = _url.unicode(); // Absolute URL must start with alpha-character if (!isalpha(str[0].latin1())) return true; // Relative URL for(int i = 1; i < len; i++) { char c = str[i].latin1(); // Note: non-latin1 chars return 0! if (c == ':') { // URL starts with "xxx:/" -> absolute URL i++; if ((i < len) && (str[i].latin1() == '/')) return false; // Make an expection for mailto:user@host if ((i == 7) && (_url.left(7).lower() == "mailto:")) return false; // Make another expection for news:comp.os.newsgroup if ((i == 5) && (_url.left(5).lower() == "news:")) return false; // Make another expection for man:(3) if ((i == 4) && (_url.left(4).lower() == "man:")) return false; return true; // "xxx:" or "xxx::yyy" } // Protocol part may only contain alpha, digit, + or - if (!isalpha(c) && !isdigit(c) && (c != '+') && (c != '-')) return true; // Relative URL } // URL did not contain ':' return true; // Relative URL}KURL::List::List(const QStringList &list){ for (QStringList::ConstIterator it = list.begin(); it != list.end(); it++) { append( KURL(*it) ); }}QStringList KURL::List::toStringList() const{ QStringList lst; for( KURL::List::ConstIterator it = begin(); it != end(); it++) { lst.append( (*it).url() ); } return lst;}KURL::KURL(){ reset();}KURL::~KURL(){}KURL::KURL( const QString &url, int encoding_hint ){ reset(); parse( url, encoding_hint );}KURL::KURL( const char * url, int encoding_hint ){ reset(); parse( QString::fromLatin1(url), encoding_hint );}KURL::KURL( const KURL& _u ){ m_strProtocol = _u.m_strProtocol; m_strUser = _u.m_strUser; m_strPass = _u.m_strPass; m_strHost = _u.m_strHost; m_strPath = _u.m_strPath; m_strPath_encoded = _u.m_strPath_encoded; m_strQuery_encoded = _u.m_strQuery_encoded; m_strRef_encoded = _u.m_strRef_encoded; m_bIsMalformed = _u.m_bIsMalformed; m_iPort = _u.m_iPort;}QDataStream & operator<< (QDataStream & s, const KURL & a){ s << a.m_strProtocol << a.m_strUser << a.m_strPass << a.m_strHost << a.m_strPath << a.m_strPath_encoded << a.m_strQuery_encoded << a.m_strRef_encoded << Q_INT8(a.m_bIsMalformed ? 1 : 0) << a.m_iPort; return s;}QDataStream & operator>> (QDataStream & s, KURL & a){ Q_INT8 malf; s >> a.m_strProtocol >> a.m_strUser >> a.m_strPass >> a.m_strHost >> a.m_strPath >> a.m_strPath_encoded >> a.m_strQuery_encoded >> a.m_strRef_encoded >> malf >> a.m_iPort; a.m_bIsMalformed = (malf != 0); if ( a.m_strQuery_encoded.isEmpty() ) a.m_strQuery_encoded = QString::null; return s;}KURL::KURL( const QUrl &u ){ m_strProtocol = u.protocol(); m_strUser = u.user(); m_strPass = u.password(); m_strHost = u.host(); m_strPath = u.path( FALSE ); m_strPath_encoded = QString::null; m_strQuery_encoded = u.query(); m_strRef_encoded = u.ref(); m_bIsMalformed = !u.isValid(); m_iPort = u.port();}KURL::KURL( const KURL& _u, const QString& _rel_url, int encoding_hint ){ // WORKAROUND THE RFC 1606 LOOPHOLE THAT ALLOWS // http:/index.html AS A VALID SYNTAX FOR RELATIVE // URLS. ( RFC 2396 section 5.2 item # 3 ) QString rUrl = _rel_url; int len = _u.m_strProtocol.length(); if ( _u.hasHost() && rUrl.length() != 0 && rUrl.left( len ).lower() == _u.m_strProtocol.lower() && rUrl[len] == ':' && (rUrl[len+1] != '/' || (rUrl[len+1] == '/' && rUrl[len+2] != '/')) ) { rUrl.remove( 0, rUrl.find( ':' ) + 1 ); } if ( rUrl[0] == '#' ) { *this = _u; setHTMLRef( decode(rUrl.mid(1), 0, encoding_hint) ); } else if ( isRelativeURL( rUrl) ) { *this = _u; m_strQuery_encoded = QString::null; m_strRef_encoded = QString::null; if ( rUrl[0] == '/') { m_strPath = QString::null; m_strPath_encoded = QString::null; } else { int pos = m_strPath.findRev( '/' ); if (pos >= 0) m_strPath.truncate(pos); m_strPath += '/'; if (!m_strPath_encoded.isEmpty()) { pos = m_strPath_encoded.findRev( '/' ); if (pos >= 0) m_strPath_encoded.truncate(pos); m_strPath_encoded += '/'; } } KURL tmp( url() + rUrl, encoding_hint); *this = tmp; cleanPath(); } else { KURL tmp( rUrl, encoding_hint); *this = tmp; }}void KURL::reset(){ m_strProtocol = QString::null; m_strUser = QString::null; m_strPass = QString::null; m_strHost = QString::null; m_strPath = QString::null; m_strPath_encoded = QString::null; m_strRef_encoded = QString::null; m_bIsMalformed = true; m_iPort = 0;}bool KURL::isEmpty() const{ return (m_strPath.isEmpty() && m_strProtocol.isEmpty());}KURL KURL::completeURL (const DOM::DOMString & _url, const DOM::DOMString & _baseUrl){ QString url = _url.string(); QString baseUrl = _baseUrl.string(); KURL orig(baseUrl); if(_url[(unsigned int)0] != '/') { KURL u( orig, url ); return u; } orig.setEncodedPathAndQuery(url); return orig;}void KURL::parse( const QString& _url, int encoding_hint ){ // Return immediately whenever the given url // is empty or null. if ( _url.length() == 0 ) { m_strProtocol = _url; return; } // This is wrong!! All URLs should be deemed invalid until // they have been correctly parsed. Specially now since ::url() // returns the given url even if it is malformed. (DA) // m_bIsMalformed = false; QString port; int start = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -