📄 kurl.cpp
字号:
// -*-C++-*-/* This file is part of the KDE libraries Copyright (C) 1997 Steffen Hansen (stefh@dit.ou.dk) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/// KURL// Reference: RFC 1738 Uniform Resource Locators#include <qdir.h>#include "kurl.h"#include <qregexp.h>#include <stdlib.h>/***** This method expands the specified URL by locating any special** characters and converting them to a three-character notation.** For example, the character '=' would be represented as '%3e'.***/void KURL::encodeURL( QString& _url ) { int old_length = _url.length(); if (!old_length) return; // a worst case approximation char *new_url = new char[ old_length * 3 + 1 ]; int new_length = 0; for (int i = 0; i < old_length; i++) { static char *safe = "$-._!*(),/"; /* RFC 1738 */ // '/' added by David, fix found by Michael Reiher char t = _url[i]; if ( (( t >= 'A') && ( t <= 'Z')) || (( t >= 'a') && ( t <= 'z')) || (( t >= '0') && ( t <= '9')) || (strchr(safe, t)) ) { new_url[ new_length++ ] = _url[i]; } else { new_url[ new_length++ ] = '%'; unsigned char c = ((unsigned char)_url[ i ]) / 16; c += (c > 9) ? ('A' - 10) : '0'; new_url[ new_length++ ] = c; c = ((unsigned char)_url[ i ]) % 16; c += (c > 9) ? ('A' - 10) : '0'; new_url[ new_length++ ] = c; } } new_url[new_length]=0; _url = new_url; delete [] new_url;}/***** The following function will convert the specified character into a** numeric hex representation in the range 0-15.***/static uchar hex2int( char _char ) { if ( _char >= 'A' && _char <='F') return _char - 'A' + 10; if ( _char >= 'a' && _char <='f') return _char - 'a' + 10; if ( _char >= '0' && _char <='9') return _char - '0'; return 0;}/***** This method compacts the specified URL by locating three-character** notions and coverting them to a special character. For example,** the notation '%3e' would be converted to '='.***/void KURL::decodeURL( QString& _url ) { int old_length = _url.length(); if (!old_length) return; int new_length = 0; // make a copy of the old one char *new_url = new char[ old_length + 1]; for (int i = 0; i < old_length; i++) { uchar character = _url[ i ]; if ( character == '%' ) { character = hex2int( _url[i+1] ) * 16 + hex2int( _url[i+2] ); i += 2; } new_url [ new_length++ ] = character; } new_url [ new_length ] = 0; _url = new_url; delete [] new_url;}/***** This method is supposed to free up all memory used by the QStrings.** In reality it does nothing, as the detach() method in QString is empty.***/voidKURL::detach(){ protocol_part.detach(); host_part.detach(); path_part.detach(); ref_part.detach(); /* temporarily removed */ // dir_part.detach(); user_part.detach(); passwd_part.detach(); path_part_decoded.detach(); search_part.detach();}/***** This method is an overloaded object creator. It sets all "parts"** to empty, and marks itself as malformed.***/KURL::KURL() { malformed = true; protocol_part = ""; host_part = ""; path_part = ""; ref_part = ""; bNoPath = false;}/***** This method is an overloaded object creator.** This is where all the grief is.***/KURL::KURL( KURL & _base_url, const char * _rel_url ){ QString relURL ( _rel_url ); //printf("BASE protocol: \"%s\"\n",_base_url.protocol()); //printf("BASE host: \"%s\"\n",_base_url.host()); //printf("BASE path: \"%s\"\n",_base_url.path()); //printf("BASE reference: \"%s\"\n",_base_url.reference()); //printf("REL: \"%s\"\n",_rel_url); //getchar(); // Holds location of <scheme> & <scheme-specific> separator int sep_loc = relURL.find (':'); // Check if relative URL has the same protocol as parent (base URL) bool same_protocol = ( sep_loc <= 0 ) ? false : ( relURL.mid(0, sep_loc) == _base_url.protocol_part ); // check if "_rel_url" is an absolute URL (RFC 2396). if(relURL.find(QRegExp("^[a-zA-Z][a-zA-Z0-9\\+\\.\\-]*://")) == 0 || relURL.find("man", 0, false) == 0 || (relURL.find(QRegExp("^[a-zA-Z][a-zA-Z0-9\\+\\.\\-]*:/?[a-zA-Z0-9%_!~'();:@&=$,\\?\\*\\+\\.\\-]")) == 0 && !same_protocol )) { parse( _rel_url ); } // Relative URL - starts with a "net_path" (RFC 2396). else if ( relURL.find ( "//" ) == 0 ) parse( (QString)(_base_url.protocol_part + ":" + _rel_url) ); // davet else if(relURL.find("./") == 0) { // added by jsk .. the below changed by adding in the path_part which // is needed to combine a base url with a relative url parse((QString)(_base_url.protocol_part + "://" + _base_url.host_part + _base_url.path_part + "/" + (_rel_url + 2))); } else if((_rel_url[0] == '/') && (_rel_url[1] != '/')) { parse((QString)(_base_url.protocol_part + "://" + _base_url.host_part + _rel_url)); } else { parse((QString)(_base_url.protocol_part + "://" + _base_url.host_part + _base_url.path_part + _rel_url)); //printf("FINAL: \"%s\"\n",(char *)url()); //printf("Default!\n"); //getchar(); } //printf("FINAL: \"%s\"\n",(char *)url()); //getchar();#ifdef NEVER // Relative URL - all others (RFC 2396) else { malformed = _base_url.malformed; protocol_part = _base_url.protocol_part; host_part = _base_url.host_part; port_number = _base_url.port_number; path_part = _base_url.path_part; path_part_decoded = _base_url.path_part_decoded; ref_part = _base_url.ref_part; dir_part = _base_url.dir_part; user_part = _base_url.user_part; passwd_part = _base_url.passwd_part; bNoPath = _base_url.bNoPath; detach(); if ( same_protocol ) cd( _rel_url + (sep_loc + 1), false ); else cd( _rel_url, false ); } #endif}/***** This method is an overloaded object creator. It will parse the** specified URL, split it into its "parts", and check to see if** it is malformed.***/KURL::KURL( const char* _url){ parse( _url );}/***** This method will parse the specified URL. It splits out and recovers** the following URL components or "parts":**** o protocol_part: "http"** o host_part: "www.censoft.com"** o path_part: "/dir1/dir2/element.html" (contains %xy notation)** o path_part_decoded: "/dir1/dir2/element.html" (contains XY characters)** o search_part: "MT=harry&SM=MC&DV=0" (all characters after ?)** o ref_part: "LABEL1" (all characters after #)** o dir_part: "/dir1/dir2/"** o user_part: ?** o passwd_part: ?** o port_number: 80** o bNoPath: bool (indicates empty path)** o malformed: bool (indicates bogus URL)***/void KURL::parse( const char * _url ){ QString url(_url); // defaults malformed = false; path_part_decoded = 0; search_part = 0; ref_part = ""; bNoPath = false; // Empty or Null string ? if ( url.isEmpty() ) { malformed = true; return; } // Only allow valid URL's that begin with "/". (Dawit A.) if ( url[0] == '/' && url.find (QRegExp ("^//[a-zA-Z0-9]+.*")) == -1 ) { // Create a light weight URL with protocol path_part_decoded = _url; path_part = path_part_decoded.data(); cleanPath(); // clean path before doing anything else!! KURL::encodeURL( path_part ); protocol_part = "file"; return; } // We need a : somewhere to determine the protocol // ":" Cannot be the first character either! (Dawit A.) int pos = url.find( ":" ); if ( pos <= 0 ) { malformed = true; return; } protocol_part = url.left( pos ).lower(); if ( protocol_part == "info" || protocol_part == "mailto" || protocol_part == "man" || protocol_part == "news" ) { path_part = url.mid( pos + 1, url.length() ); detach(); return; } // Is there something behind "protocol:" ? // The minimal valid URL is "file:/" if ( static_cast<int>(url.length()) < pos + 2 ) { malformed = true; return; } if ( strncmp( url.data() + pos, ":/", 2 ) != 0 ) { malformed = true; return; } pos += 2; int pos2; // Do we have a host part ? if ( url.data()[pos] == '/' ) { // Find end of host string pos2 = url.find( '/', pos + 1); // We dont have a path ? if ( pos2 == -1 ) { host_part = url.mid( pos + 1, url.length() ); pos2 = url.length(); } else { host_part = url.mid( pos + 1, (( pos2 == -1) ? url.length() : pos2) - pos - 1); } } else { host_part = ""; // Go back to the '/' pos2 = pos - 1; } if ( host_part.length() > 0 ) { int j = host_part.find( "@" ); if ( j != -1 ) { int i = host_part.find( ":" ); if ( i != -1 && i < j ) { user_part = host_part.left( i ); passwd_part = host_part.mid( i + 1, j - i - 1 ); host_part = host_part.mid( j + 1, host_part.length() ); } else { user_part = host_part.left( j ); passwd_part = ""; host_part = host_part.mid( j + 1, host_part.length() ); } } else { passwd_part = ""; user_part = ""; } } else { passwd_part = ""; user_part = ""; } // find a possible port number int p = host_part.find(":"); if ( p != -1 ) { port_number = host_part.right( host_part.length() - (p + 1) ).toInt(); host_part = host_part.left( p ); } else { port_number = 0; } // Find the path if( pos2 < static_cast<int>(url.length()) && pos2 != -1) { QRegExp exp( "[a-zA-Z]+:" ); int pos3 = url.findRev( '#' ); // Is there a) no reference or b) only a subprotocol like file:/tmp/arch.tgz#tar:/usr/ if ( pos3 == -1 || exp.match( url, pos3 + 1 ) != -1 ) { path_part = url.mid( pos2, url.length() ); } else if ( pos3 > pos2 ) { path_part = url.mid( pos2, pos3 - pos2 ); ref_part = url.mid( pos3 + 1, url.length() ); // if (path_part.right(1) == "/") no filename and a reference // malformed = true; } else { malformed = true; return; } bNoPath = false; } else { path_part = "/"; // indicate that we did not see a path originally bNoPath = true; ref_part = ""; } if ((protocol_part == "http") || (protocol_part == "imap4") || (protocol_part == "pop3")) { p = path_part.find('?'); if (p != -1) { search_part = path_part.mid( p + 1, path_part.length() ); path_part = path_part.left( p); } } else { cleanPath(); } /* ip-schemepart, login, see RFC1738 */ /* Syntax [<user>[":"<password>]"@"]<host>[":"<port>]] */ /* Note that both user and password may be encoded. */ // login -> [userpart@]hostpart // hostpart -> Host[:Port] // userpart -> User[:Pass] detach();}/***** This method is an overloaded object creator. It sets the specified** protocol, host, path, and refernce parts. No checking is done - the** parts specified must be correct.***/KURL::KURL( const char* _protocol, const char* _host, const char* _path, const char* _ref){ protocol_part = _protocol; host_part = _host; path_part = _path; ref_part = _ref; malformed = false;} /********/bool KURL::hasSubProtocol(){ return ( !path_part.isNull() && strchr( path_part, '#' ) != 0L );}/********/const char* KURL::directory( bool _trailing ){ // Calculate only on demand if ( path_part.right( 1 )[0] == '/' ) dir_part = path_part.copy(); else { QString p = path_part; if ( !_trailing ) if ( p.right( 1 )[0] == '/' ) p = p.left( p.length() - 1 ); int i = p.findRev( "/" ); if ( i == -1 ) // Should never happen dir_part = "/"; else dir_part = p.left( i + 1 ); } return dir_part.data();}/***** This method will return a pointer to the host string, or NULL if** the host string is empty.***/const char* KURL::host() const { if (host_part.isNull()) return ""; else return host_part.data();}/***** This method is the object's destructor.***/KURL::~KURL(){}/***** This method will return the path of the URL or NULL if there is none.** Any special characters are expanded to their '%xy' notation.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -