kurl.cpp

来自「konqueror3 embedded版本, KDE环境下的当家浏览器的嵌入式版」· C++ 代码 · 共 2,357 行 · 第 1/4 页

CPP
2,357
字号
/*    Copyright (C) 1999 Torben Weis <weis@kde.org>    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Library General Public    License as published by the Free Software Foundation; either    version 2 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Library General Public License for more details.    You should have received a copy of the GNU Library General Public License    along with this library; see the file COPYING.LIB.  If not, write to    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,    Boston, MA 02110-1301, USA.*//* * The currently active RFC for URL/URIs is RFC3986 * Previous (and now deprecated) RFCs are RFC1738 and RFC2396 */#include "kurl.h"// KDE_QT_ONLY is first used for dcop/client (e.g. marshalling)#ifndef KDE_QT_ONLY#include <kdebug.h>#include <kglobal.h>#include <kidna.h>#include <kprotocolinfo.h>#endif#include <stdio.h>#include <assert.h>#include <ctype.h>#include <stdlib.h>#include <unistd.h>#include <qurl.h>#include <qdir.h>#include <qstringlist.h>#include <qregexp.h>#include <qstylesheet.h>#include <qmap.h>#include <qtextcodec.h>#include <qmutex.h>#ifdef Q_WS_WIN# define KURL_ROOTDIR_PATH "C:/"#else# define KURL_ROOTDIR_PATH "/"#endifstatic const QString fileProt = "file";static QTextCodec * codecForHint( int encoding_hint /* not 0 ! */ ){    return QTextCodec::codecForMib( encoding_hint );}// encoding_offset:// 0 encode both @ and /// 1 encode @ but not /// 2 encode neither @ or /static QString encode( const QString& segment, int encoding_offset, int encoding_hint, bool isRawURI = false ){  const char *encode_string = "/@<>#\"&?={}|^~[]\'`\\:+%";  encode_string += encoding_offset;  QCString local;  if (encoding_hint==0)    local = segment.local8Bit();  else  {      QTextCodec * textCodec = codecForHint( encoding_hint );      if (!textCodec)          local = segment.local8Bit();      else          local = textCodec->fromUnicode( segment );  }  int old_length = isRawURI ? local.size() - 1 : local.length();  if ( !old_length )    return segment.isNull() ? QString::null : QString(""); // differentiate null and empty  // a worst case approximation  QChar *new_segment = new QChar[ old_length * 3 + 1 ];  int new_length = 0;  for ( int i = 0; i < old_length; i++ )  {    // 'unsave' and 'reserved' characters    // according to RFC 1738,    // 2.2. URL Character Encoding Issues (pp. 3-4)    // WABA: Added non-ascii    unsigned char character = local[i];    if ( (character <= 32) || (character >= 127) ||         strchr(encode_string, character) )    {      new_segment[ new_length++ ] = '%';      unsigned int c = character / 16;      c += (c > 9) ? ('A' - 10) : '0';      new_segment[ new_length++ ] = c;      c = character % 16;      c += (c > 9) ? ('A' - 10) : '0';      new_segment[ new_length++ ] = c;    }    else      new_segment[ new_length++ ] = local[i];  }  QString result = QString(new_segment, new_length);  delete [] new_segment;  return result;}static QString encodeHost( const QString& segment, bool encode_slash, int encoding_hint ){  // Hostnames are encoded differently  // we use the IDNA transformation instead  // Note: when merging qt-addon, use QResolver::domainToAscii here#ifndef KDE_QT_ONLY  Q_UNUSED( encode_slash );  Q_UNUSED( encoding_hint );  QString host = KIDNA::toAscii(segment);  if (host.isEmpty())     return segment;  return host;#else  return encode(segment, encode_slash ? 0 : 1, encoding_hint);#endif}static int hex2int( unsigned int _char ){  if ( _char >= 'A' && _char <='F')    return _char - 'A' + 10;  if ( _char >= 'a' && _char <='f')    return _char - 'a' + 10;  if ( _char >= '0' && _char <='9')    return _char - '0';  return -1;}// WABA: The result of lazy_encode isn't usable for a URL which// needs to satisfies RFC requirements. However, the following// operation will make it usable again://      encode(decode(...))//// As a result one can see that url.prettyURL() does not result in// a RFC compliant URL but that the following sequence does://      KURL(url.prettyURL()).url()static QString lazy_encode( const QString& segment, bool encodeAt=true ){  int old_length = segment.length();  if ( !old_length )    return QString::null;  // a worst case approximation  QChar *new_segment = new QChar[ old_length * 3 + 1 ];  int new_length = 0;  for ( int i = 0; i < old_length; i++ )  {    unsigned int character = segment[i].unicode(); // Don't use latin1()                                                   // It returns 0 for non-latin1 values    // Small set of really ambiguous chars    if ((character < 32) ||  // Low ASCII        ((character == '%') && // The escape character itself           (i+2 < old_length) && // But only if part of a valid escape sequence!          (hex2int(segment[i+1].unicode())!= -1) &&          (hex2int(segment[i+2].unicode())!= -1)) ||        (character == '?') || // Start of query delimiter        ((character == '@') && encodeAt) || // Username delimiter        (character == '#') || // Start of reference delimiter        ((character == 32) && (i+1 == old_length))) // A trailing space    {      new_segment[ new_length++ ] = '%';      unsigned int c = character / 16;      c += (c > 9) ? ('A' - 10) : '0';      new_segment[ new_length++ ] = c;      c = character % 16;      c += (c > 9) ? ('A' - 10) : '0';      new_segment[ new_length++ ] = c;    }    else    new_segment[ new_length++ ] = segment[i];  }  QString result = QString(new_segment, new_length);  delete [] new_segment;  return result;}static void decode( const QString& segment, QString &decoded, QString &encoded, int encoding_hint=0, bool updateDecoded = true, bool isRawURI = false ){  decoded = QString::null;  encoded = segment;  int old_length = segment.length();  if ( !old_length )    return;  QTextCodec *textCodec = 0;  if (encoding_hint)      textCodec = codecForHint( encoding_hint );  if (!textCodec)      textCodec = QTextCodec::codecForLocale();  QCString csegment = textCodec->fromUnicode(segment);  // Check if everything went ok  if (textCodec->toUnicode(csegment) != segment)  {      // Uh oh      textCodec = codecForHint( 106 ); // Fall back to utf-8      csegment = textCodec->fromUnicode(segment);  }  old_length = csegment.length();  int new_length = 0;  int new_length2 = 0;  // make a copy of the old one  char *new_segment = new char[ old_length + 1 ];  QChar *new_usegment = new QChar[ old_length * 3 + 1 ];  int i = 0;  while( i < old_length )  {    bool bReencode = false;    unsigned char character = csegment[ i++ ];    if ((character <= ' ') || (character > 127))       bReencode = true;    new_usegment [ new_length2++ ] = character;    if (character == '%' )    {      int a = i+1 < old_length ? hex2int( csegment[i] ) : -1;      int b = i+1 < old_length ? hex2int( csegment[i+1] ) : -1;      if ((a == -1) || (b == -1)) // Only replace if sequence is valid      {         // Contains stray %, make sure to re-encode!         bReencode = true;      }      else      {         // Valid %xx sequence         character = a * 16 + b; // Replace with value of %dd         if (!isRawURI && !character && updateDecoded)            break; // Stop at %00         new_usegment [ new_length2++ ] = (unsigned char) csegment[i++];         new_usegment [ new_length2++ ] = (unsigned char) csegment[i++];      }    }    if (bReencode)    {      new_length2--;      new_usegment [ new_length2++ ] = '%';      unsigned int c = character / 16;      c += (c > 9) ? ('A' - 10) : '0';      new_usegment[ new_length2++ ] = c;      c = character % 16;      c += (c > 9) ? ('A' - 10) : '0';      new_usegment[ new_length2++ ] = c;    }    new_segment [ new_length++ ] = character;  }  new_segment [ new_length ] = 0;  encoded = QString( new_usegment, new_length2);  // Encoding specified  if (updateDecoded)  {     decoded = textCodec->toUnicode( new_segment );     if ( isRawURI ) {        int length = qstrlen( new_segment );        while ( length < new_length ) {            decoded += QChar::null;            length += 1;            decoded += textCodec->toUnicode( new_segment + length );            length += qstrlen( new_segment + length );        }     }     QCString validate = textCodec->fromUnicode(decoded);     if (strcmp(validate.data(), new_segment) != 0)     {        decoded = QString::fromLocal8Bit(new_segment, new_length);     }  }  delete [] new_segment;  delete [] new_usegment;}static QString decode(const QString &segment, int encoding_hint = 0, bool isRawURI = false){  QString result;  QString tmp;  decode(segment, result, tmp, encoding_hint, true, isRawURI);  return result;}static QString cleanpath(const QString &_path, bool cleanDirSeparator, bool decodeDots){  if (_path.isEmpty()) return QString::null;  if (QDir::isRelativePath(_path))     return _path; // Don't mangle mailto-style URLs  QString path = _path;  int len = path.length();  if (decodeDots)  {#ifndef KDE_QT_ONLY     static const QString &encodedDot = KGlobal::staticQString("%2e");#else     QString encodedDot("%2e");#endif     if (path.find(encodedDot, 0, false) != -1)     {#ifndef KDE_QT_ONLY        static const QString &encodedDOT = KGlobal::staticQString("%2E"); // Uppercase!#else        QString encodedDOT("%2E");#endif        path.replace(encodedDot, ".");        path.replace(encodedDOT, ".");        len = path.length();     }  }  bool slash = (len && path[len-1] == '/') ||               (len > 1 && path[len-2] == '/' && path[len-1] == '.');  // The following code cleans up directory path much like  // QDir::cleanDirPath() except it can be made to ignore multiple  // directory separators by setting the flag to false.  That fixes  // bug# 15044, mail.altavista.com and other similar brain-dead server  // implementations that do not follow what has been specified in  // RFC 2396!! (dA)  QString result;  int cdUp, orig_pos, pos;  cdUp = 0;  pos = orig_pos = len;  while ( pos && (pos = path.findRev('/',--pos)) != -1 )  {    len = orig_pos - pos - 1;    if ( len == 2 && path[pos+1] == '.' && path[pos+2] == '.' )      cdUp++;    else    {      // Ignore any occurrences of '.'      // This includes entries that simply do not make sense like /..../      if ( (len || !cleanDirSeparator) &&           (len != 1 || path[pos+1] != '.' ) )      {          if ( !cdUp )              result.prepend(path.mid(pos, len+1));          else              cdUp--;      }    }    orig_pos = pos;  }#ifdef Q_WS_WIN // prepend drive letter if exists (js)  if (orig_pos >= 2 && isalpha(path[0].latin1()) && path[1]==':') {    result.prepend(QString(path[0])+":");  }#endif  if ( result.isEmpty() )    result = KURL_ROOTDIR_PATH;  else if ( slash && result[result.length()-1] != '/' )       result.append('/');  return result;}bool KURL::isRelativeURL(const QString &_url){  int len = _url.length();  if (!len) return true; // Very short relative URL.  const QChar *str = _url.unicode();  // Absolute URL must start with alpha-character  if (!isalpha(str[0].latin1()))     return true; // Relative URL  for(int i = 1; i < len; i++)  {     char c = str[i].latin1(); // Note: non-latin1 chars return 0!     if (c == ':')        return false; // Absolute URL     // Protocol part may only contain alpha, digit, + or -     if (!isalpha(c) && !isdigit(c) && (c != '+') && (c != '-'))        return true; // Relative URL  }  // URL did not contain ':'  return true; // Relative URL}KURL::List::List(const KURL &url){    append( url );}KURL::List::List(const QStringList &list){  for (QStringList::ConstIterator it = list.begin();       it != list.end();       it++)    {      append( KURL(*it) );    }}QStringList KURL::List::toStringList() const{  QStringList lst;   for( KURL::List::ConstIterator it = begin();        it != end();        it++)   {      lst.append( (*it).url() );   }   return lst;}KURL::KURL(){  reset();}KURL::~KURL(){}KURL::KURL( const QString &url, int encoding_hint ){  reset();  parse( url, encoding_hint );}KURL::KURL( const char * url, int encoding_hint ){  reset();  parse( QString::fromLatin1(url), encoding_hint );}KURL::KURL( const QCString& url, int encoding_hint ){  reset();  parse( QString::fromLatin1(url), encoding_hint );}KURL::KURL( const KURL& _u ){  *this = _u;}QDataStream & operator<< (QDataStream & s, const KURL & a){  QString QueryForWire=a.m_strQuery_encoded;  if (!a.m_strQuery_encoded.isNull())    QueryForWire.prepend("?");    s << a.m_strProtocol << a.m_strUser << a.m_strPass << a.m_strHost      << a.m_strPath << a.m_strPath_encoded << QueryForWire << a.m_strRef_encoded      << Q_INT8(a.m_bIsMalformed ? 1 : 0) << a.m_iPort;    return s;}QDataStream & operator>> (QDataStream & s, KURL & a){    Q_INT8 malf;    QString QueryFromWire;    s >> a.m_strProtocol >> a.m_strUser >> a.m_strPass >> a.m_strHost      >> a.m_strPath >> a.m_strPath_encoded >> QueryFromWire >> a.m_strRef_encoded      >> malf >> a.m_iPort;    a.m_bIsMalformed = (malf != 0);    if ( QueryFromWire.isNull() )      a.m_strQuery_encoded = QString::null;    else if ( QueryFromWire.length() == 1 ) // empty query      a.m_strQuery_encoded = "";    else      a.m_strQuery_encoded = QueryFromWire.mid(1);    a.m_iUriMode = KURL::uriModeForProtocol( a.m_strProtocol );    return s;}#ifndef QT_NO_NETWORKPROTOCOLKURL::KURL( const QUrl &u ){  *this = u;}#endifKURL::KURL( const KURL& _u, const QString& _rel_url, int encoding_hint ){  if (_u.hasSubURL()) // Operate on the last suburl, not the first  {    KURL::List lst = split( _u );    KURL u(lst.last(), _rel_url, encoding_hint);    lst.remove( lst.last() );    lst.append( u );    *this = join( lst );    return;  }  // WORKAROUND THE RFC 1606 LOOPHOLE THAT ALLOWS  // http:/index.html AS A VALID SYNTAX FOR RELATIVE  // URLS. ( RFC 2396 section 5.2 item # 3 )  QString rUrl = _rel_url;  int len = _u.m_strProtocol.length();  if ( !_u.m_strHost.isEmpty() && !rUrl.isEmpty() &&       rUrl.find( _u.m_strProtocol, 0, false ) == 0 &&       rUrl[len] == ':' && (rUrl[len+1] != '/' ||       (rUrl[len+1] == '/' && rUrl[len+2] != '/')) )  {    rUrl.remove( 0, rUrl.find( ':' ) + 1 );  }  if ( rUrl.isEmpty() )  {    *this = _u;  }  else if ( rUrl[0] == '#' )  {    *this = _u;    m_strRef_encoded = rUrl.mid(1);    if ( m_strRef_encoded.isNull() )        m_strRef_encoded = ""; // we know there was an (empty) html ref, we saw the '#'  }  else if ( isRelativeURL( rUrl) )  {    *this = _u;    m_strQuery_encoded = QString::null;    m_strRef_encoded = QString::null;    if ( rUrl[0] == '/')    {        if ((rUrl.length() > 1) && (rUrl[1] == '/'))        {           m_strHost = QString::null;           // File protocol returns file:/// without host, strip // from rUrl           if (_u.m_strProtocol == fileProt)              rUrl.remove(0, 2);        }        m_strPath = QString::null;        m_strPath_encoded = QString::null;    }    else if ( rUrl[0] != '?' )    {       int pos = m_strPath.findRev( '/' );       if (pos >= 0)          m_strPath.truncate(pos);       m_strPath += '/';       if (!m_strPath_encoded.isEmpty())       {          pos = m_strPath_encoded.findRev( '/' );          if (pos >= 0)             m_strPath_encoded.truncate(pos);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?