📄 url.cpp

📁 dget是一个基于Linux平台的多线程下载工具, 采用C++开发。主要支持FTP, HTTP, MMS, RTSP协议下载, 目前为止主要实现了FTP, HTTP, MMS,RTSP协议的多线程下载
💻 CPP
字号:
/* * by balancesli * balancesli@gmail.com * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#include <stdarg.h>#include <stdio.h>#include <string.h>#include <stdlib.h>#include <errno.h>#include <ctype.h>#include "Utils.h"#include "Url.h"#include "dget.h"/********************* class Url Impl **********/char * ProtoString[] = { "ftp:", "http:", NULL };/* Canonicalize Path, and return a new Path.  The new Path differs from Pathin that:Multple `/'s are collapsed to a single `/'.Leading `./'s and trailing `/.'s are removed.Trailing `/'s are removed.Non-leading `../'s and trailing `..'s are handled by removingportions of the Path.E.g. "a/b/c/./../d/.." will yield "a/b".Changes by hniksic:Always use '/' as StubChr.Don't check for local things using canon_stat.Change the original string instead of strdup-ing.React correctly when beginning with `./' and `../'. */void TUrl :: SimplifyPath(char * Path){	register int i, Start, Ddot;    char StubChr;    if (!*Path) return;    /*     * StubChr = (*Path == '/') ? '/' : '.';      */    StubChr = '/';    /*     * Addition: Remove all `./'-s preceding the string.  If `../'-s     * precede, put `/' in front and remove them too.       */    i = 0;    Ddot = 0;    while (1)    {		if (Path[i] == '.' && Path[i + 1] == '/')	    	i += 2;		else if (Path[i] == '.' && Path[i + 1] == '.' && Path[i + 2] == '/')		{	    	i += 3;	    	Ddot = 1;		} 		else	    	break;    }	    if(i)		strcpy(Path, Path + i - Ddot);    /*  Replace single `.' or `..' with `/'. */    if((Path[0] == '.' && Path[1] == '\0') || 					(Path[0] == '.' && Path[1] == '.' && Path[2] == '\0'))    {		Path[0] = StubChr;		Path[1] = '\0';		return;    }        /* Walk along Path looking for things to compact. */	    i = 0;    while (1)    {		if(!Path[i])	    	break;		while(Path[i] && Path[i] != '/')	    	i++;		Start = i++;	/*	 * If we didn't find any slashes, then there is nothing left to do. 	 */		if(!Path[Start])	    	break;	/*	 * Handle multiple `/'s in a row. 	 */		while (Path[i] == '/')		    i++;		if ((Start + 1) != i)		{		    strcpy(Path + Start + 1, Path + i);		    i = Start + 1;		}	/*	 * Check for trailing `/'. 	 */		if (Start && !Path[i])		{		  zero_last:		    Path[--i] = '\0';		    break;		}	/*	 * Check for `../', `./' or trailing `.' by itself. 	 */		if (Path[i] == '.')		{		    /*		     * Handle trailing `.' by itself. 		     */		    if (!Path[i + 1])			goto zero_last;			    /*		     * Handle `./'. 		     */		    if (Path[i + 1] == '/')		    {				strcpy(Path + i, Path + i + 1);				i = (Start < 0) ? 0 : Start;				continue;		    }			    /*			 	 * Handle `../' or trailing `..' by itself. 			   	 */	    	if (Path[i + 1] == '.' && (Path[i + 2] == '/' || !Path[i + 2]))	    	{				while (--Start > -1 && Path[Start] != '/');				strcpy(Path + Start + 1, Path + i + 2);				i = (Start < 0) ? 0 : Start;				continue;	   		 }		}						/* Path == '.'*/    }				    if (!*Path)    {		*Path = StubChr;		Path[1] = '\0';    }}/* Returns 1 if the string contains unsafe characters, 0 otherwise. */bool TUrl :: IsContainUnsafe(const char * s){    for(; *s; s++)	{				if(strchr(URL_UNSAFE, *s))	    	return true;	}    return false;}/* Decodes the forms %xy in a URL to the character the hexadecimalcode of which is xy. xy are hexadecimal digits from[0123456789ABCDEF] (case-insensitive). If x or y are not hex-digitsor '%' is near '\0', the whole sequence is inserted literally. */void TUrl :: DecodeString(char * s){    char *p = s;    for (; *s; s++, p++)    {		if (*s != '%')		    *p = *s;		else		{	    /*	     * Do nothing if at the end of the string. Or if the chars	     * are not hex-digits. 	     */	    	if (!*(s + 1) || !*(s + 2) || !(isxdigit(*(s + 1)) && isxdigit(*(s + 2))))	    	{				*p = *s;				continue;	   		}		    *p = (ASC2HEXD(*(s + 1)) << 4) + ASC2HEXD(*(s + 2));	  	    s += 2;		}    }    *p = '\0';}/*  * Encodes the unsafe characters (listed in URL_UNSAFE) in a given * string, returning a malloc-ed %XX encoded string.  */char * TUrl :: EncodeString(const char * s){    const char *b;    char *p, *res;    int i;    	b = s;    for(i = 0; *s; s++, i++)	{		if(strchr(URL_UNSAFE, *s))	    	i += 2;			}	/*	 * Two more characters (hex digits) 	 */    res = new char[i + 1];	    /*     * TODO add error handling      */    s = b;    for (p = res; *s; s++)	{		if(strchr(URL_UNSAFE, *s))		{		   	*p++ = '%';		    *p++ = HEXD2ASC(*s >> 4);		    *p++ = HEXD2ASC(*s & 0xf);		} 		else		    *p++ = *s;	}    *p = '\0';    return res;}/* Extract the given URL of the form(http:|ftp:)//(user(:password)?@)?hostname(:port)?(/Path)?1. hostname (terminated with '/' or ':')2. port number (terminated with '/'), or chosen for the protocol3. dirname (everything after hostname)Most errors are handled. No allocation is done, you must supplypointers to allocated memory....and a host of other stuff :-)   - Recognizes hostname:dir/file for FTP andhostname(:portnum)?/dir/file for HTTP.- Parses the Path to yield directory and file- Parses the URL to yield the username and passwd (if present)- Decodes the strings, in case they contain "forbidden" characters- Writes the result to struct urlinfoIf the argument STRICT is set, it recognizes only the canonicalform.  *//* Build the directory and filename components of the Path. Both   components are *separately* malloc-ed strings! It does not change   the contents of Path.   If the Path ends with "." or "..", they are (correctly) counted as   directories. *//* note passwd user host dir file must be CLEANDUP *//* Return the URL as fine-formed string, with a proper protocol, port   number, directory and optional user/password. If the hide is != 0,   the password will be hidden. The forbidden characters in the URL   will be cleansed. *//* Find URL of format scheme:hostname[:port]/dir in a buffer. Thebuffer may contain anything, the routine should not bug out. *//* This function constructs and returns a malloced copy of the   relative link from two pieces of information: local name of the   referring file (s1) and local name of the referred file (s2).   So, if s1 is "jagor.srce.hr/index.html" and s2 is   "jagor.srce.hr/images/news.gif", new name should be   "images/news.gif".   Alternately, if the s1 is "fly.cc.fer.hr/ioccc/index.html", and s2   is "fly.cc.fer.hr/images/fly.gif", new name should be   "../images/fly.gif".   Caveats: s1 should not begin with '/', unless s2 begins with '/'   too.  s1 should not contain things like ".." and such --   construct_relative("fly/ioccc/../index.html", "fly/images/fly.gif")   will fail.  (workaround is to call Path_simplify on s1).  */TUrl :: TUrl(const char * s){	Addr = new char[strlen(s) + 1];	strcpy(Addr, s);}TUrl :: ~TUrl(void){	delete [] Addr;}	void TUrl :: ParseUrl(void){	char *cp, *sp, *up;		UserName = DEFAULT_FTP_USER;		if(strncmp(Addr, "http://", 7) == 0)	{		Port = 80;		HostName = Addr + 7;		Proto = ptHTTP;	}	else if(strncmp(Addr, "ftp://", 6) == 0)	{		Port = 21;		HostName = Addr + 6;		Proto = ptFTP;	}	else if(strncmp(Addr, "mms://", 6) == 0)	{		Port = 1755;		HostName = Addr + 6;		Proto = ptMMS;	}	else if(strncmp(Addr, "rtsp://", 7) == 0)	{		Port = 554;		HostName = Addr + 7;		Proto = ptRTSP;	}	else		ShowMsg("not an http or ftp url : %s", Addr);		sp = strchr(HostName, '/');	if(sp != NULL)	{		*sp++ = '\0';		HostDirName = sp;	}	else		HostDirName = NULL;	sp = strrchr(HostDirName, '/');	if(sp != NULL)	{		*sp++ = '\0';		HostFileName = sp;	}	else	{		HostFileName = HostDirName;		HostDirName = NULL;	}			up = strrchr(HostName, '@');	if(up != NULL)	{		UserName = HostName;		*up++ = '\0';		HostName = up;	}	up = strrchr(UserName, ':');	if(up != NULL)	{		*up++ = '\0';		PassWord = up; 	}	else			PassWord = DEFAULT_FTP_PASSWD;		cp = strrchr(HostName, ':');	if(cp != NULL)	{		*cp++ = '\0';		Port = atoi(cp);	}}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -