⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kurl.cpp

📁 PIXIL is a small footprint operating environment, complete with PDA PIM applications, a browser and
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// -*-C++-*-/* This file is part of the KDE libraries    Copyright (C) 1997 Steffen Hansen (stefh@dit.ou.dk)    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Library General Public    License as published by the Free Software Foundation; either    version 2 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Library General Public License for more details.    You should have received a copy of the GNU Library General Public License    along with this library; see the file COPYING.LIB.  If not, write to    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,    Boston, MA 02111-1307, USA.*/// KURL// Reference: RFC 1738 Uniform Resource Locators#include <qdir.h>#include "kurl.h"#include <qregexp.h>#include <stdlib.h>/***** This method expands the specified URL by locating any special** characters and converting them to a three-character notation.** For example, the character '=' would be represented as '%3e'.***/void KURL::encodeURL( QString& _url ) {  int old_length = _url.length();      if (!old_length)    return;     // a worst case approximation  char *new_url = new char[ old_length * 3 + 1 ];  int new_length = 0;       for (int i = 0; i < old_length; i++)   {    static char *safe = "$-._!*(),/"; /* RFC 1738 */    // '/' added by David, fix found by Michael Reiher    char t = _url[i];        if ( (( t >= 'A') && ( t <= 'Z')) ||       (( t >= 'a') && ( t <= 'z')) ||	 (( t >= '0') && ( t <= '9')) ||	 (strchr(safe, t))	 )    {      new_url[ new_length++ ] = _url[i];    }    else    {      new_url[ new_length++ ] = '%';            unsigned char c = ((unsigned char)_url[ i ]) / 16;      c += (c > 9) ? ('A' - 10) : '0';      new_url[ new_length++ ] = c;            c = ((unsigned char)_url[ i ]) % 16;      c += (c > 9) ? ('A' - 10) : '0';      new_url[ new_length++ ] = c;          }   }  new_url[new_length]=0;  _url = new_url;  delete [] new_url;}/***** The following function will convert the specified character into a** numeric hex representation in the range 0-15.***/static uchar hex2int( char _char ) {  if ( _char >= 'A' && _char <='F')    return _char - 'A' + 10;  if ( _char >= 'a' && _char <='f')    return _char - 'a' + 10;  if ( _char >= '0' && _char <='9')    return _char - '0';  return 0;}/***** This method compacts the specified URL by locating three-character** notions and coverting them to a special character. For example,** the notation '%3e' would be converted to '='.***/void KURL::decodeURL( QString& _url ) {  int old_length = _url.length();  if (!old_length)    return;      int new_length = 0;    // make a copy of the old one  char *new_url = new char[ old_length + 1];    for (int i = 0; i < old_length; i++)   {    uchar character = _url[ i ];    if ( character == '%' )     {      character = hex2int( _url[i+1] ) * 16 + hex2int( _url[i+2] );      i += 2;    }    new_url [ new_length++ ] = character;  }  new_url [ new_length ] = 0;  _url = new_url;  delete [] new_url;}/***** This method is supposed to free up all memory used by the QStrings.** In reality it does nothing, as the detach() method in QString is empty.***/voidKURL::detach(){  protocol_part.detach();  host_part.detach();  path_part.detach();  ref_part.detach();  /* temporarily removed */  // dir_part.detach();  user_part.detach();  passwd_part.detach();  path_part_decoded.detach();  search_part.detach();}/***** This method is an overloaded object creator. It sets all "parts"** to empty, and marks itself as malformed.***/KURL::KURL() {   malformed = true;  protocol_part = "";  host_part = "";   path_part = "";   ref_part = "";   bNoPath = false;}/***** This method is an overloaded object creator.** This is where all the grief is.***/KURL::KURL( KURL & _base_url, const char * _rel_url ){  QString relURL ( _rel_url );  //printf("BASE protocol:  \"%s\"\n",_base_url.protocol());  //printf("BASE host:      \"%s\"\n",_base_url.host());  //printf("BASE path:      \"%s\"\n",_base_url.path());  //printf("BASE reference: \"%s\"\n",_base_url.reference());  //printf("REL:            \"%s\"\n",_rel_url);  //getchar();  // Holds location of <scheme> & <scheme-specific> separator  int sep_loc = relURL.find (':');  // Check if relative URL has the same protocol as parent (base URL)	  bool same_protocol = ( sep_loc <= 0 ) ? false : ( relURL.mid(0, sep_loc) == _base_url.protocol_part );  // check if "_rel_url" is an absolute URL (RFC 2396).	  if(relURL.find(QRegExp("^[a-zA-Z][a-zA-Z0-9\\+\\.\\-]*://")) == 0 ||     relURL.find("man", 0, false) == 0 ||     (relURL.find(QRegExp("^[a-zA-Z][a-zA-Z0-9\\+\\.\\-]*:/?[a-zA-Z0-9%_!~'();:@&=$,\\?\\*\\+\\.\\-]")) == 0 && !same_protocol ))  {    parse( _rel_url );  }  // Relative URL - starts with a "net_path" (RFC 2396).  else if ( relURL.find ( "//" ) == 0 )    parse( (QString)(_base_url.protocol_part + ":" + _rel_url) );      // davet  else if(relURL.find("./") == 0)  {    // added by jsk .. the below changed by adding in the path_part which    // is needed to combine a base url with a relative url    parse((QString)(_base_url.protocol_part + "://" + _base_url.host_part + _base_url.path_part + "/" + (_rel_url + 2)));  }  else if((_rel_url[0] == '/') && (_rel_url[1] != '/'))  {    parse((QString)(_base_url.protocol_part + "://" + _base_url.host_part + _rel_url));  }  else  {    parse((QString)(_base_url.protocol_part + "://" + _base_url.host_part + _base_url.path_part + _rel_url));    //printf("FINAL: \"%s\"\n",(char *)url());    //printf("Default!\n"); //getchar();  }  //printf("FINAL: \"%s\"\n",(char *)url());  //getchar();#ifdef NEVER    // Relative URL - all others (RFC 2396)	  else  {    malformed = _base_url.malformed;    protocol_part = _base_url.protocol_part;    host_part = _base_url.host_part;    port_number = _base_url.port_number;    path_part = _base_url.path_part;    path_part_decoded = _base_url.path_part_decoded;    ref_part = _base_url.ref_part;    dir_part = _base_url.dir_part;    user_part = _base_url.user_part;    passwd_part = _base_url.passwd_part;    bNoPath = _base_url.bNoPath;    detach();    if ( same_protocol )      cd( _rel_url + (sep_loc + 1), false );    else      cd( _rel_url, false );  } #endif}/***** This method is an overloaded object creator. It will parse the** specified URL, split it into its "parts", and check to see if** it is malformed.***/KURL::KURL( const char* _url){  parse( _url );}/***** This method will parse the specified URL. It splits out and recovers** the following URL components or "parts":****   o protocol_part:     "http"**   o host_part:         "www.censoft.com"**   o path_part:         "/dir1/dir2/element.html" (contains %xy notation)**   o path_part_decoded: "/dir1/dir2/element.html" (contains XY characters)**   o search_part:       "MT=harry&SM=MC&DV=0"     (all characters after ?)**   o ref_part:          "LABEL1"                  (all characters after #)**   o dir_part:          "/dir1/dir2/"**   o user_part:         ?**   o passwd_part:       ?**   o port_number:       80**   o bNoPath:           bool                      (indicates empty path)**   o malformed:         bool                      (indicates bogus URL)***/void KURL::parse( const char * _url ){  QString url(_url);  // defaults  malformed = false;  path_part_decoded = 0;  search_part = 0;  ref_part = "";  bNoPath = false;  // Empty or Null string ?  if ( url.isEmpty() )  {    malformed = true;    return;  }  // Only allow valid URL's that begin with "/". (Dawit A.)  if ( url[0] == '/' && url.find (QRegExp ("^//[a-zA-Z0-9]+.*")) == -1 )  {    // Create a light weight URL with protocol    path_part_decoded = _url;    path_part = path_part_decoded.data();    cleanPath();  // clean path before doing anything else!!    KURL::encodeURL( path_part );    protocol_part = "file";    return;  }      // We need a : somewhere to determine the protocol  // ":" Cannot be the first character either! (Dawit A.)  int pos = url.find( ":" );    if ( pos <= 0 )  {    malformed = true;    return;  }  protocol_part = url.left( pos ).lower();  if ( protocol_part == "info" || protocol_part == "mailto" ||        protocol_part == "man" || protocol_part == "news" )  {    path_part = url.mid( pos + 1, url.length() );    detach();    return;  }      // Is there something behind "protocol:" ?  // The minimal valid URL is "file:/"  if ( static_cast<int>(url.length()) < pos + 2 )  {    malformed = true;    return;  }    if ( strncmp( url.data() + pos, ":/", 2 ) != 0 )  {    malformed = true;    return;  }  pos += 2;  int pos2;  // Do we have a host part ?  if ( url.data()[pos] == '/' )  {    // Find end of host string    pos2 = url.find( '/', pos + 1);    // We dont have a path ?    if ( pos2 == -1 )    {      host_part = url.mid( pos + 1, url.length() );      pos2 = url.length();    }    else    {      host_part = url.mid( pos + 1, 			   (( pos2 == -1) ? url.length() : pos2) 			   - pos - 1);    }      }  else  {    host_part = "";    // Go back to the '/'    pos2 = pos - 1;  }  if ( host_part.length() > 0 )  {        int j = host_part.find( "@" );    if ( j != -1 )    {	      int i = host_part.find( ":" );      if ( i != -1 && i < j )      {	user_part = host_part.left( i );	passwd_part = host_part.mid( i + 1, j - i - 1 );	host_part = host_part.mid( j + 1, host_part.length() );      }      else      {	user_part = host_part.left( j );	passwd_part = "";	host_part = host_part.mid( j + 1, host_part.length() );      }    }    else    {      passwd_part = "";      user_part = "";    }  }  else  {    passwd_part = "";    user_part = "";  }    // find a possible port number  int p = host_part.find(":");  if ( p != -1 )  {    port_number = host_part.right( host_part.length() - (p + 1) ).toInt();    host_part = host_part.left( p );  }  else  {    port_number = 0;  }      // Find the path  if( pos2 < static_cast<int>(url.length()) && pos2 != -1)  {    QRegExp exp( "[a-zA-Z]+:" );    int pos3 = url.findRev( '#' );    // Is there a) no reference or b) only a subprotocol like file:/tmp/arch.tgz#tar:/usr/    if ( pos3 == -1 || exp.match( url, pos3 + 1 ) != -1 )    {      path_part = url.mid( pos2, url.length() );    }    else if ( pos3 > pos2 )     {      path_part = url.mid( pos2, pos3 - pos2 );      ref_part = url.mid( pos3 + 1, url.length() );      // if (path_part.right(1) == "/")  no filename and a reference      // malformed = true;    }    else    {      malformed = true;      return;    }    bNoPath = false;  }  else  {    path_part = "/";    // indicate that we did not see a path originally    bNoPath = true;    ref_part = "";  }   if ((protocol_part == "http") || (protocol_part == "imap4") || (protocol_part == "pop3"))  {    p = path_part.find('?');    if (p != -1)    {      search_part = path_part.mid( p + 1, path_part.length() );      path_part = path_part.left( p);    }  }  else  {    cleanPath();  }      /* ip-schemepart, login, see RFC1738                   */  /* Syntax [<user>[":"<password>]"@"]<host>[":"<port>]] */  /* Note that both user and password may be encoded.    */  // login    -> [userpart@]hostpart   // hostpart -> Host[:Port]  // userpart -> User[:Pass]    detach();}/***** This method is an overloaded object creator. It sets the specified** protocol, host, path, and refernce parts. No checking is done - the** parts specified must be correct.***/KURL::KURL( const char* _protocol, const char* _host, 			const char* _path, const char* _ref){  protocol_part = _protocol;  host_part = _host;  path_part = _path;  ref_part  = _ref;  malformed = false;}     /********/bool KURL::hasSubProtocol(){  return ( !path_part.isNull() && 	   strchr( path_part, '#' ) != 0L );}/********/const char* KURL::directory( bool _trailing ){  // Calculate only on demand  if ( path_part.right( 1 )[0] == '/' )    dir_part = path_part.copy();  else  {    QString p = path_part;    if ( !_trailing )      if ( p.right( 1 )[0] == '/' )	p = p.left( p.length() - 1 );    int i = p.findRev( "/" );    if ( i == -1 )      // Should never happen      dir_part = "/";    else      dir_part = p.left( i + 1 );  }    return dir_part.data();}/***** This method will return a pointer to the host string, or NULL if** the host string is empty.***/const char* KURL::host() const {  if (host_part.isNull())     return "";  else     return host_part.data();}/***** This method is the object's destructor.***/KURL::~KURL(){}/***** This method will return the path of the URL or NULL if there is none.** Any special characters are expanded to their '%xy' notation.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -