📄 kurl.cpp

📁 monqueror一个很具有参考价值的源玛
💻 CPP
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* This file is part of the KDE libraries    Copyright (C) 1999 Torben Weis <weis@kde.org>    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Library General Public    License as published by the Free Software Foundation; either    version 2 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Library General Public License for more details.    You should have received a copy of the GNU Library General Public License    along with this library; see the file COPYING.LIB.  If not, write to    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,    Boston, MA 02111-1307, USA.*/#include "kurl.h"#include <kdebug.h>#include <stdio.h>#include <assert.h>#include <ctype.h>#include <stdlib.h>#include <qurl.h>#include <qdir.h>#include <qstringlist.h>#include <qtextcodec.h>#define _CODE_BY_YMWEI 1#if !_CODE_BY_YMWEIstatic QTextCodec * codecForHint( int encoding_hint /* not 0 ! */ ){    // Get the charset name from encoding_hint - but KCharsets doesn't    // know about "unicode"    QString charsetName =        (encoding_hint == QFont::Unicode) ? "utf8" :        KGlobal::charsets()->name( (QFont::CharSet) encoding_hint );    bool ok;    QTextCodec * textCodec = KGlobal::charsets()->codecForName( charsetName, ok );    return ok ? textCodec : 0L;}#endifstaticQString encode( const QString& segment, bool encode_slash, int encoding_hint ){  char encode_extra = encode_slash ? '/' : 0;  QCString local;  if (encoding_hint==0)    local = segment.local8Bit();  else  {#if _CODE_BY_YMWEI      local = segment.local8Bit();#else      QTextCodec * textCodec = codecForHint( encoding_hint );      if (!textCodec)          local = segment.local8Bit();      else          local = textCodec->fromUnicode( segment );#endif  }  int old_length = local.length();  if ( !old_length )    return QString::null;  // a worst case approximation  QChar *new_segment = new QChar[ old_length * 3 + 1 ];  int new_length = 0;  for ( int i = 0; i < old_length; i++ )  {    // 'unsave' and 'reserved' characters    // according to RFC 1738,    // 2.2. URL Character Encoding Issues (pp. 3-4)    // WABA: Added non-ascii    unsigned char character = local[i];    if ( (character <= 32) || (character >= 127) ||         strchr("<>#@\"&%$:,;?={}|^~[]\'`\\", character) ||         (character == encode_extra) )    {      new_segment[ new_length++ ] = '%';      unsigned int c = character / 16;      c += (c > 9) ? ('A' - 10) : '0';      new_segment[ new_length++ ] = c;      c = character % 16;      c += (c > 9) ? ('A' - 10) : '0';      new_segment[ new_length++ ] = c;    }    else      new_segment[ new_length++ ] = local[i];  }  QString result = QString(new_segment, new_length);  delete [] new_segment;  return result;}static char hex2int( unsigned int _char ){  if ( _char >= 'A' && _char <='F')    return _char - 'A' + 10;  if ( _char >= 'a' && _char <='f')    return _char - 'a' + 10;  if ( _char >= '0' && _char <='9')    return _char - '0';  return -1;}// WABA: The result of lazy_encode isn't usable for a URL which// needs to satisfies RFC requirements. However, the following// operation will make it usable again://      encode(decode(...))//// As a result one can see that url.prettyURL() does not result in// a RFC compliant URL but that the following sequence does://      KURL(url.prettyURL()).url()static QString lazy_encode( const QString& segment ){  int old_length = segment.length();  if ( !old_length )    return QString::null;  // a worst case approximation  QChar *new_segment = new QChar[ old_length * 3 + 1 ];  int new_length = 0;  for ( int i = 0; i < old_length; i++ )  {    unsigned int character = segment[i].unicode(); // Don't use latin1()                                                   // It returns 0 for non-latin1 values    // Small set of really ambiguous chars    if ((character < 32) ||  // Low ASCII        ((character == '%') && // The escape character itself           (i+2 < old_length) && // But only if part of a valid escape sequence!          (hex2int(segment[i+1].unicode())!= -1) &&          (hex2int(segment[i+2].unicode())!= -1)) ||        (character == '?') || // Start of query delimiter        (character == '#') || // Start of reference delimiter        ((character == 32) && (i+1 == old_length))) // A trailing space    {      new_segment[ new_length++ ] = '%';      unsigned int c = character / 16;      c += (c > 9) ? ('A' - 10) : '0';      new_segment[ new_length++ ] = c;      c = character % 16;      c += (c > 9) ? ('A' - 10) : '0';      new_segment[ new_length++ ] = c;    }    else    new_segment[ new_length++ ] = segment[i];  }  QString result = QString(new_segment, new_length);  delete [] new_segment;  return result;}static QString decode( const QString& segment, bool *keepEncoded=0, int encoding_hint=0 ){  bool isUnicode = false; // This detects utf-16, not utf-8  bool isLocal = false;  bool isAscii = true;  int old_length = segment.length();  if ( !old_length )    return QString::null;  int new_length = 0;  // make a copy of the old one  char *new_segment = new char[ old_length + 1];  QChar *new_usegment = new QChar[ old_length + 1 ];  int i = 0;  while( i < old_length )  {    unsigned int character = segment[ i++ ].unicode();    if ( (character == '%' ) &&         ( i+1 < old_length) ) // Must have at least two chars left!    {      char a = hex2int( segment[i].latin1() );      char b = hex2int( segment[i+1].latin1() );      if ((a != -1) && (b != -1)) // Only replace if sequence is valid      {         character = a * 16 + b; // Replace with value of %dd         i += 2; // Skip dd         if (character > 127)            isLocal = true;      }    }    new_segment [ new_length ] = character;    new_usegment [ new_length ] = character;    new_length++;    if (character > 127)    {       isAscii = false;       if (character > 255)          isUnicode = true;    }  }  new_segment [ new_length ] = 0;  QString result;  // Encoding specified  if ( encoding_hint )  {#if _CODE_BY_YMWEI      result = QString::fromLocal8Bit(new_segment, new_length);#else      QTextCodec * textCodec = codecForHint( encoding_hint );      if (textCodec)      {          QByteArray array;          array.setRawData(new_segment, new_length);          result = textCodec->toUnicode( array, new_length );          array.resetRawData(new_segment, new_length);      }      else          result = QString::fromLocal8Bit(new_segment, new_length);#endif      // No idea about keepEncoded... Hmm, it's unused anyway (!)  }  // Guess the encoding, if not specified  else if ((!isAscii && !isUnicode) || isLocal)  {     result = QString::fromLocal8Bit(new_segment, new_length);     if (keepEncoded)       *keepEncoded = true;  }  else  {     result = QString( new_usegment, new_length);     if (keepEncoded)       *keepEncoded = false;  }  delete [] new_segment;  delete [] new_usegment;  return result;}bool KURL::isRelativeURL(const QString &_url){  int len = _url.length();  if (!len) return true; // Very short relative URL.  const QChar *str = _url.unicode();  // Absolute URL must start with alpha-character  if (!isalpha(str[0].latin1()))     return true; // Relative URL  for(int i = 1; i < len; i++)  {     char c = str[i].latin1(); // Note: non-latin1 chars return 0!     if (c == ':')     {        // URL starts with "xxx:/" -> absolute URL        i++;        if ((i < len) && (str[i].latin1() == '/'))           return false;        // Make an expection for mailto:user@host        if ((i == 7) && (_url.left(7).lower() == "mailto:"))           return false;        // Make another expection for news:comp.os.newsgroup        if ((i == 5) && (_url.left(5).lower() == "news:"))           return false;        // Make another expection for man:(3)        if ((i == 4) && (_url.left(4).lower() == "man:"))           return false;        return true; // "xxx:" or "xxx::yyy"     }     // Protocol part may only contain alpha, digit, + or -     if (!isalpha(c) && !isdigit(c) && (c != '+') && (c != '-'))        return true; // Relative URL  }  // URL did not contain ':'  return true; // Relative URL}KURL::List::List(const QStringList &list){  for (QStringList::ConstIterator it = list.begin();       it != list.end();       it++)    {      append( KURL(*it) );    }}QStringList KURL::List::toStringList() const{  QStringList lst;   for( KURL::List::ConstIterator it = begin();        it != end();        it++)   {      lst.append( (*it).url() );   }   return lst;}KURL::KURL(){  reset();}KURL::~KURL(){}KURL::KURL( const QString &url, int encoding_hint ){  reset();  parse( url, encoding_hint );}KURL::KURL( const char * url, int encoding_hint ){  reset();  parse( QString::fromLatin1(url), encoding_hint );}KURL::KURL( const KURL& _u ){  m_strProtocol = _u.m_strProtocol;  m_strUser = _u.m_strUser;  m_strPass = _u.m_strPass;  m_strHost = _u.m_strHost;  m_strPath = _u.m_strPath;  m_strPath_encoded = _u.m_strPath_encoded;  m_strQuery_encoded = _u.m_strQuery_encoded;  m_strRef_encoded = _u.m_strRef_encoded;  m_bIsMalformed = _u.m_bIsMalformed;  m_iPort = _u.m_iPort;}QDataStream & operator<< (QDataStream & s, const KURL & a){    s << a.m_strProtocol << a.m_strUser << a.m_strPass << a.m_strHost      << a.m_strPath << a.m_strPath_encoded << a.m_strQuery_encoded << a.m_strRef_encoded      << Q_INT8(a.m_bIsMalformed ? 1 : 0) << a.m_iPort;    return s;}QDataStream & operator>> (QDataStream & s, KURL & a){    Q_INT8 malf;    s >> a.m_strProtocol >> a.m_strUser >> a.m_strPass >> a.m_strHost      >> a.m_strPath >> a.m_strPath_encoded >> a.m_strQuery_encoded >> a.m_strRef_encoded      >> malf >> a.m_iPort;    a.m_bIsMalformed = (malf != 0);    if ( a.m_strQuery_encoded.isEmpty() )      a.m_strQuery_encoded = QString::null;    return s;}KURL::KURL( const QUrl &u ){  m_strProtocol = u.protocol();  m_strUser = u.user();  m_strPass = u.password();  m_strHost = u.host();  m_strPath = u.path( FALSE );  m_strPath_encoded = QString::null;  m_strQuery_encoded = u.query();  m_strRef_encoded = u.ref();  m_bIsMalformed = !u.isValid();  m_iPort = u.port();}KURL::KURL( const KURL& _u, const QString& _rel_url, int encoding_hint ){  // WORKAROUND THE RFC 1606 LOOPHOLE THAT ALLOWS  // http:/index.html AS A VALID SYNTAX FOR RELATIVE  // URLS. ( RFC 2396 section 5.2 item # 3 )  QString rUrl = _rel_url;  int len = _u.m_strProtocol.length();  if ( _u.hasHost() && rUrl.length() != 0 &&       rUrl.left( len ).lower() == _u.m_strProtocol.lower() &&       rUrl[len] == ':' && (rUrl[len+1] != '/' ||       (rUrl[len+1] == '/' && rUrl[len+2] != '/')) )  {                rUrl.remove( 0, rUrl.find( ':' ) + 1 );  }  if ( rUrl[0] == '#' )  {    *this = _u;    setHTMLRef( decode(rUrl.mid(1), 0, encoding_hint) );  }  else if ( isRelativeURL( rUrl) )  {    *this = _u;    m_strQuery_encoded = QString::null;    m_strRef_encoded = QString::null;    if ( rUrl[0] == '/')    {        m_strPath = QString::null;        m_strPath_encoded = QString::null;    }    else    {       int pos = m_strPath.findRev( '/' );       if (pos >= 0)          m_strPath.truncate(pos);       m_strPath += '/';       if (!m_strPath_encoded.isEmpty())       {          pos = m_strPath_encoded.findRev( '/' );          if (pos >= 0)             m_strPath_encoded.truncate(pos);          m_strPath_encoded += '/';       }    }    KURL tmp( url() + rUrl, encoding_hint);    *this = tmp;    cleanPath();  }  else  {    KURL tmp( rUrl, encoding_hint);    *this = tmp;  }}void KURL::reset(){  m_strProtocol = QString::null;  m_strUser = QString::null;  m_strPass = QString::null;  m_strHost = QString::null;  m_strPath = QString::null;  m_strPath_encoded = QString::null;  m_strRef_encoded = QString::null;  m_bIsMalformed = true;  m_iPort = 0;}bool KURL::isEmpty() const{  return (m_strPath.isEmpty() && m_strProtocol.isEmpty());}KURL KURL::completeURL (const DOM::DOMString & _url, const DOM::DOMString & _baseUrl){    QString url = _url.string();    QString baseUrl = _baseUrl.string();    KURL orig(baseUrl);    if(_url[(unsigned int)0] != '/')    {        KURL u( orig, url );        return u;    }    orig.setEncodedPathAndQuery(url);    return orig;}void KURL::parse( const QString& _url, int encoding_hint ){  // Return immediately whenever the given url  // is empty or null.  if ( _url.length() == 0  )  {    m_strProtocol = _url;    return;  }  // This is wrong!! All URLs should be deemed invalid until  // they have been correctly parsed. Specially now since ::url()  // returns the given url even if it is malformed. (DA)  // m_bIsMalformed = false;  QString port;  int start = 0;
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -