📄 url.cpp
字号:
/* * by balancesli * balancesli@gmail.com * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#include <stdarg.h>#include <stdio.h>#include <string.h>#include <stdlib.h>#include <errno.h>#include <ctype.h>#include "Utils.h"#include "Url.h"#include "dget.h"/********************* class Url Impl **********/char * ProtoString[] = { "ftp:", "http:", NULL };/* Canonicalize Path, and return a new Path. The new Path differs from Pathin that:Multple `/'s are collapsed to a single `/'.Leading `./'s and trailing `/.'s are removed.Trailing `/'s are removed.Non-leading `../'s and trailing `..'s are handled by removingportions of the Path.E.g. "a/b/c/./../d/.." will yield "a/b".Changes by hniksic:Always use '/' as StubChr.Don't check for local things using canon_stat.Change the original string instead of strdup-ing.React correctly when beginning with `./' and `../'. */void TUrl :: SimplifyPath(char * Path){ register int i, Start, Ddot; char StubChr; if (!*Path) return; /* * StubChr = (*Path == '/') ? '/' : '.'; */ StubChr = '/'; /* * Addition: Remove all `./'-s preceding the string. If `../'-s * precede, put `/' in front and remove them too. */ i = 0; Ddot = 0; while (1) { if (Path[i] == '.' && Path[i + 1] == '/') i += 2; else if (Path[i] == '.' && Path[i + 1] == '.' && Path[i + 2] == '/') { i += 3; Ddot = 1; } else break; } if(i) strcpy(Path, Path + i - Ddot); /* Replace single `.' or `..' with `/'. */ if((Path[0] == '.' && Path[1] == '\0') || (Path[0] == '.' && Path[1] == '.' && Path[2] == '\0')) { Path[0] = StubChr; Path[1] = '\0'; return; } /* Walk along Path looking for things to compact. */ i = 0; while (1) { if(!Path[i]) break; while(Path[i] && Path[i] != '/') i++; Start = i++; /* * If we didn't find any slashes, then there is nothing left to do. */ if(!Path[Start]) break; /* * Handle multiple `/'s in a row. */ while (Path[i] == '/') i++; if ((Start + 1) != i) { strcpy(Path + Start + 1, Path + i); i = Start + 1; } /* * Check for trailing `/'. */ if (Start && !Path[i]) { zero_last: Path[--i] = '\0'; break; } /* * Check for `../', `./' or trailing `.' by itself. */ if (Path[i] == '.') { /* * Handle trailing `.' by itself. */ if (!Path[i + 1]) goto zero_last; /* * Handle `./'. */ if (Path[i + 1] == '/') { strcpy(Path + i, Path + i + 1); i = (Start < 0) ? 0 : Start; continue; } /* * Handle `../' or trailing `..' by itself. */ if (Path[i + 1] == '.' && (Path[i + 2] == '/' || !Path[i + 2])) { while (--Start > -1 && Path[Start] != '/'); strcpy(Path + Start + 1, Path + i + 2); i = (Start < 0) ? 0 : Start; continue; } } /* Path == '.'*/ } if (!*Path) { *Path = StubChr; Path[1] = '\0'; }}/* Returns 1 if the string contains unsafe characters, 0 otherwise. */bool TUrl :: IsContainUnsafe(const char * s){ for(; *s; s++) { if(strchr(URL_UNSAFE, *s)) return true; } return false;}/* Decodes the forms %xy in a URL to the character the hexadecimalcode of which is xy. xy are hexadecimal digits from[0123456789ABCDEF] (case-insensitive). If x or y are not hex-digitsor '%' is near '\0', the whole sequence is inserted literally. */void TUrl :: DecodeString(char * s){ char *p = s; for (; *s; s++, p++) { if (*s != '%') *p = *s; else { /* * Do nothing if at the end of the string. Or if the chars * are not hex-digits. */ if (!*(s + 1) || !*(s + 2) || !(isxdigit(*(s + 1)) && isxdigit(*(s + 2)))) { *p = *s; continue; } *p = (ASC2HEXD(*(s + 1)) << 4) + ASC2HEXD(*(s + 2)); s += 2; } } *p = '\0';}/* * Encodes the unsafe characters (listed in URL_UNSAFE) in a given * string, returning a malloc-ed %XX encoded string. */char * TUrl :: EncodeString(const char * s){ const char *b; char *p, *res; int i; b = s; for(i = 0; *s; s++, i++) { if(strchr(URL_UNSAFE, *s)) i += 2; } /* * Two more characters (hex digits) */ res = new char[i + 1]; /* * TODO add error handling */ s = b; for (p = res; *s; s++) { if(strchr(URL_UNSAFE, *s)) { *p++ = '%'; *p++ = HEXD2ASC(*s >> 4); *p++ = HEXD2ASC(*s & 0xf); } else *p++ = *s; } *p = '\0'; return res;}/* Extract the given URL of the form(http:|ftp:)//(user(:password)?@)?hostname(:port)?(/Path)?1. hostname (terminated with '/' or ':')2. port number (terminated with '/'), or chosen for the protocol3. dirname (everything after hostname)Most errors are handled. No allocation is done, you must supplypointers to allocated memory....and a host of other stuff :-) - Recognizes hostname:dir/file for FTP andhostname(:portnum)?/dir/file for HTTP.- Parses the Path to yield directory and file- Parses the URL to yield the username and passwd (if present)- Decodes the strings, in case they contain "forbidden" characters- Writes the result to struct urlinfoIf the argument STRICT is set, it recognizes only the canonicalform. *//* Build the directory and filename components of the Path. Both components are *separately* malloc-ed strings! It does not change the contents of Path. If the Path ends with "." or "..", they are (correctly) counted as directories. *//* note passwd user host dir file must be CLEANDUP *//* Return the URL as fine-formed string, with a proper protocol, port number, directory and optional user/password. If the hide is != 0, the password will be hidden. The forbidden characters in the URL will be cleansed. *//* Find URL of format scheme:hostname[:port]/dir in a buffer. Thebuffer may contain anything, the routine should not bug out. *//* This function constructs and returns a malloced copy of the relative link from two pieces of information: local name of the referring file (s1) and local name of the referred file (s2). So, if s1 is "jagor.srce.hr/index.html" and s2 is "jagor.srce.hr/images/news.gif", new name should be "images/news.gif". Alternately, if the s1 is "fly.cc.fer.hr/ioccc/index.html", and s2 is "fly.cc.fer.hr/images/fly.gif", new name should be "../images/fly.gif". Caveats: s1 should not begin with '/', unless s2 begins with '/' too. s1 should not contain things like ".." and such -- construct_relative("fly/ioccc/../index.html", "fly/images/fly.gif") will fail. (workaround is to call Path_simplify on s1). */TUrl :: TUrl(const char * s){ Addr = new char[strlen(s) + 1]; strcpy(Addr, s);}TUrl :: ~TUrl(void){ delete [] Addr;} void TUrl :: ParseUrl(void){ char *cp, *sp, *up; UserName = DEFAULT_FTP_USER; if(strncmp(Addr, "http://", 7) == 0) { Port = 80; HostName = Addr + 7; Proto = ptHTTP; } else if(strncmp(Addr, "ftp://", 6) == 0) { Port = 21; HostName = Addr + 6; Proto = ptFTP; } else if(strncmp(Addr, "mms://", 6) == 0) { Port = 1755; HostName = Addr + 6; Proto = ptMMS; } else if(strncmp(Addr, "rtsp://", 7) == 0) { Port = 554; HostName = Addr + 7; Proto = ptRTSP; } else ShowMsg("not an http or ftp url : %s", Addr); sp = strchr(HostName, '/'); if(sp != NULL) { *sp++ = '\0'; HostDirName = sp; } else HostDirName = NULL; sp = strrchr(HostDirName, '/'); if(sp != NULL) { *sp++ = '\0'; HostFileName = sp; } else { HostFileName = HostDirName; HostDirName = NULL; } up = strrchr(HostName, '@'); if(up != NULL) { UserName = HostName; *up++ = '\0'; HostName = up; } up = strrchr(UserName, ':'); if(up != NULL) { *up++ = '\0'; PassWord = up; } else PassWord = DEFAULT_FTP_PASSWD; cp = strrchr(HostName, ':'); if(cp != NULL) { *cp++ = '\0'; Port = atoi(cp); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -