mgurlparser.cpp
来自「一款LINUX下的下载软件」· C++ 代码 · 共 968 行 · 第 1/2 页
CPP
968 行
/***************************************************************************
* mgurlparser.cpp
*
* Tue Sep 26 16:17:23 2006
* Copyright 2006 liubin,China
* Email multiget@gmail.com
****************************************************************************/
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "mgurlparser.h"
#include "common.h"
#include <iostream>
extern std::string gDefFtpPass;
using namespace std;
///%CF%D6%B9%DB%D7%AF%D1%CF%C2%DBCD/1-2a.mp3
#define XDIGIT_TO_XCHAR(x) (((x) < 10) ? ((x) + '0') : ((x) - 10 + 'A'))
#define ISXDIGIT(x) ( ((x) >= '0' && (x) <= '9')||\
((x) >= 'a' && (x) <= 'z')||\
((x) >= 'A' && (x) <= 'Z') )
#define XCHAR_TO_XDIGIT(x) (((x) >= '0' && (x) <= '9') ? \
((x) - '0') : (toupper(x) - 'A' + 10))
enum {
urlchr_reserved = 1, // rfc1738 reserved chars
urlchr_unsafe = 2 // rfc1738 unsafe chars
};
/* Shorthands for the table: */
#define R 1 // reserved char
#define U 2 // unsafe char
#define RU 3 // R|U
const static unsigned char urlchr_table[ 256 ] =
{
U, U, U, U, U, U, U, U, /* NUL SOH STX ETX EOT ENQ ACK BEL */
U, U, U, U, U, U, U, U, /* BS HT LF VT FF CR SO SI */
U, U, U, U, U, U, U, U, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
U, U, U, U, U, U, U, U, /* CAN EM SUB ESC FS GS RS US */
U, 0, U, RU, R, U, R, 0, /* SP ! " # $ % & ' */
0, 0, 0, R, R, 0, 0, R, /* ( ) * + , - . / */
0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */
0, 0, RU, R, U, R, U, R, /* 8 9 : ; < = > ? */
RU, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */
0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */
0, 0, 0, RU, U, RU, U, 0, /* X Y Z [ \ ] ^ _ */
U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
0, 0, 0, U, U, U, RU, U, /* x y z { | } ~ DEL */
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
};
#define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))
#define URL_RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved)
#define URL_UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe)
CUrlParser::CUrlParser()
{
m_bValidMirrorUrl = false;
}
//返回false是不支持的协议或错误的地址格式
bool CUrlParser::SetUrl( std::string url )
{
Trim( url );
m_bValidMirrorUrl = false;
//协议检查
m_protocol = UrlType( url );
if ( m_protocol == UNKNOW_PROTOCOL )
{
return false;
}
if ( m_protocol == FTP_PROTOCOL )
{ //ftp
//提取用户和密码
if ( !GetUserAndPass( url, m_user, m_pass ) )
{
return false;
}
//服务器和端口
if ( !GetServerAndPort( url, m_server, m_port ) )
{
return false;
}
if ( !GetRawUrl( url, m_raw ) )
{
return false;
}
//文件汉路径
if ( !GetPathFile( url, m_file ) )
{
return false;
}
if ( m_file[ m_file.length() - 1 ] == '/' )
{ //ftp dir not a valid file url
m_bValidMirrorUrl = true;
return false;
}
//编码文件
//Precode(m_file,m_escfile);
m_escfile = m_file;
//DBGOUT("m_file="<<m_file);
UnEscape( m_escfile ); //m_escfile反而存放非转换串
//DBGOUT("m_escfile="<<m_escfile);
}
else if ( m_protocol == HTTP_PROTOCOL )
{ //http
//提取用户和密码
if ( !GetUserAndPass( url, m_user, m_pass ) )
{
return false;
}
//服务器和端口
if ( !GetServerAndPort( url, m_server, m_port ) )
{
return false;
}
if ( !GetRawUrl( url, m_raw ) )
{
return false;
}
//文件汉路径
if ( !GetPathFile( url, m_file ) )
{
return false;
}
//编码文件
Precode( m_file, m_escfile );
if ( !GetRefer( m_raw, m_refer ) )
{
return false;
}
}
else
{
return false;
}
return true;
}
string CUrlParser::GetUser()
{
return m_user;
}
string CUrlParser::GetPass()
{
return m_pass;
}
string CUrlParser::GetServer()
{
return m_server;
}
string CUrlParser::GetFilePathName()
{
return m_file;
}
std::string CUrlParser::GetFileName()
{
//only file name without path
string::size_type pos;
pos = m_file.find_last_of( '/' ); //for linux only
if ( pos == std::string::npos )
{
std::string uns=m_file;
UnEscape(uns);
return uns;
}
else
{
std::string uns= m_file.substr( pos + 1, m_file.length() - pos - 1 );
UnEscape(uns);
return uns;
}
/*
char fn[ 512 ];
strcpy( fn, m_file.c_str() );
int i = strlen( fn );
while ( fn[ i ] != '/' && i > 0 )
i--;
return string( fn + i + 1 );
*/
}
//only http/https need refer
std::string CUrlParser::GetRefer()
{
return m_refer;
}
int CUrlParser::GetPort()
{
return m_port;
}
//目前就支持两种协议,HTTP,FTP
_PROTYPE CUrlParser::UrlType( std::string url )
{
if ( url.length() < 7 )
return UNKNOW_PROTOCOL; //太短了!
#ifdef WIN32
if ( strnicmp( url.c_str(), "ftp://", 6 ) == 0 )
return FTP_PROTOCOL;
if ( strnicmp( url.c_str(), "http://", 7 ) == 0 )
return HTTP_PROTOCOL;
#else
if ( strncasecmp( url.c_str(), "ftp://", 6 ) == 0 )
return FTP_PROTOCOL;
if ( strncasecmp( url.c_str(), "http://", 7 ) == 0 )
return HTTP_PROTOCOL;
#endif
return UNKNOW_PROTOCOL;
}
_PROTYPE CUrlParser::GetUrlType()
{
return m_protocol;
}
std::string CUrlParser::GetEscFilePathName()
{
return m_escfile;
}
//新的提取用户和密码的函数
bool CUrlParser::GetUserAndPass( const std::string& fullurl, std::string& user, std::string& pass )
{
//从后向前寻找@,如果有则服务器从@后开始,到/结束
//如果没有,则从ftp://开始
//是否太长而无法处理?
if ( fullurl.length() > 510 )
return false;
//考到url
char url[ 512 ];
strcpy( url, fullurl.c_str() );
//p是移动的指针
char * p = url;
p += strlen( url );
//look for @
//while ( *p != '@' && *p != 0 )
// p++;
findat:
while ( *p != '@' && p != url )
p--;
//根据提交的BUG,有时后面会有这个@字符,添加检查过滤无效的@ if ( *p == '@' ) { //检查是否前方的'/'位置是否是第2个'/' char *q=p; while ( *q != '/' && q > url )
q--; if( q == url ) return false; //其实不会出现这个情况 char *m = url; //从前向后找第二个'/',应该等于q while ( *m != '/' ) m++; if( q != m + 1 ) { p--; goto findat; } }
if ( *p == '@' )
{ //get @
char *e = p - 1;
char *p = e;
while ( *p != '/' && p > url )
p--;
if ( p == url )
{
return false; //没找到前面的斜杠,一个不合法的URL
}
else
{
char *s = p + 1;
//assert( e > s );
//int len=e-s+1;
//search ':'
while ( *p != ':' && p < e )
p++;
if ( p == e )
{
return false; //无冒号分割用户名和密码,一个不合法的URL
}
if ( 30 < p - s + 1 ) //用户名超过30,有点长了
{
return false;
}
char temp[ 31 ];
memcpy( temp, s, p - s );
temp[ p - s ] = 0;
user = std::string( temp );
if ( 30 < e - p + 1 ) //密码超过30,有点长了
{
return false;
}
memcpy( temp, p + 1, e - p );
temp[ e - p ] = 0;
pass = std::string( temp );
return true;
}
}
else
{
user = std::string( "anonymous" );
pass = gDefFtpPass;
return true;
}
return false;
}
bool CUrlParser::GetServerAndPort( const std::string& fullurl, std::string& server, int& port )
{
//从后向前寻找@,如果有则服务器从@后开始,到/结束
//如果没有,则从ftp://开始
//是否太长而无法处理?
if ( fullurl.length() > 510 )
return false;
//考到url
char url[ 512 ];
strcpy( url, fullurl.c_str() );
//p是移动的指针
char * p = url;
p += strlen( url );
//look for @
//while ( *p != '@' && *p != 0 )
// p++;
findat:
while ( *p != '@' && p != url )
p--;
//根据提交的BUG,有时后面会有这个@字符,添加检查过滤无效的@ if ( *p == '@' ) { //检查是否前方的'/'位置是否是第2个'/' char *q=p; while ( *q != '/' && q > url )
q--; if( q == url ) return false; //其实不会出现这个情况 char *m = url; //从前向后找第二个'/',应该等于q while ( *m != '/' ) m++; if( q != m + 1 ) { p--; goto findat; } }
if ( *p == '@' )
{
char * s = p + 1;
while ( *p != '/' && *p != 0 )
p++;
if ( *p == '/' )
{
int len = p - s; //包含端口在内的长度
//检查是否包含了端口在内
char *ck = p - 1;
while ( *ck != ':' && ck > s )
ck--;
if ( ck == s )
{ //无端口
if ( 256 < len + 1 )
return false; //server string too long
char temp[ 256 ];
memcpy( temp, s, len );
temp[ len ] = 0;
server = std::string( temp );
//按协议类型给出缺省的端口,以后可括从
port = GetDefaultPort( m_protocol );
return true;
}
else
{
//有端口
int slen = ck - s;
int plen = len - slen - 1;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?