📄 url.cc
字号:
// Larbin// Sebastien Ailleret// 15-11-99 -> 29-03-00/* This class describes an URL */#include <assert.h>#include <stdlib.h>#include <stdio.h>#include <string.h>#include <ctype.h>#include <iostream.h>#include <sys/socket.h>#include "types.h"#include "global.h"#include "xutils/url.h"#include "xutils/text.h"#include "xutils/connexion.h"#include "xutils/debug.h"/* Constructor : Parses an url */url::url (char *u, uint depth, char *base) { newUrl(); this->depth = depth; host = NULL; port = 80; file = NULL; if (startWithIgnoreCase("http://", u)) { // absolute url parse (u + 7); } else if (base != NULL) { if (startWithIgnoreCase("http:", u)) { parseWithBase(u+5, base); } else if (isProtocol(u)) { // Unknown protocol (mailto, ftp, news, file, gopher...) } else { parseWithBase(u, base); } } delete [] u;}/* Constructor : read the url from a file (cf serialize) */url::url (String *line) { newUrl(); int i=0; // Read depth depth = 0; while ((*line)[i] >= '0' && (*line)[i] <= '9') { depth = 10*depth + (*line)[i] - '0'; i++; } assert((*line)[i] == ' '); int deb = ++i; // Read host while ((*line)[i] != ':' && (*line)[i] != 0) { i++; } assert ((*line)[i] != 0); (line->getString())[i] = 0; host = newString(line->getString()+deb); i++; // Read port port = 0; while ((*line)[i] >= '0' && (*line)[i] <= '9') { port = 10*port + (*line)[i] - '0'; i++; } // Read file name assert((*line)[i] == '/'); file = newString(line->getString()+i); delete line;}/* Destructor */url::~url () { delUrl(); delete [] host; delete [] file;}/* Is it a valid url ? */int url::isValid () { return (host != NULL) && (file != NULL);}/* print an URL */void url::print () { cout << "http://" << host << ":" << port << file << "\n";}/* return the host */char *url::getHost () { return host;}/* return the port */uint url::getPort () { return port;}/* return the file */char *url::getFile () { return file;}/** Depth in the Site */uint url::getDepth () { return depth;}/** Set depth to one if we are at an entry point in the site */void url::setDepth (char *fromHost) { if (strcmp(fromHost, host)) { depth = global::depthInSite; }}/* return the base of the url */char *url::giveBase () { char *tmp; int i = strlen(file); int j; assert (file[0] == '/'); while (file[i] != '/') { i--; } j = strlen(host); tmp = new char[12+i+j]; // strlen(host) + 1 + 9 + (i+1) + 1 // (host):(port)(file) strcpy (tmp, host); tmp[j] = ':'; sprintf(tmp + j + 1, "%u", port); while (tmp[j] != 0) { j++; } tmp[i+j+1] = 0; // delete file name, just keep the path while (i >= 0) { tmp [j+i] = file[i]; i--; } return tmp;}/** return a char * representation of the url * give means that you have to delete the string yourself */char *url::giveUrl () { char *tmp; int i = strlen(file); int j = strlen(host); tmp = new char[18+i+j]; // 7 + j + 1 + 9 + i + 1 // http://(host):(port)(file) strcpy(tmp, "http://"); strcpy (tmp+7, host); tmp[j+7] = ':'; sprintf(tmp + j + 8, "%u", port); j += 8; while (tmp[j] != 0) { j++; } // Copy file name while (i >= 0) { tmp [j+i] = file[i]; i--; } return tmp;}/* return a hashcode for the host of this url */uint url::hostHashCode () { unsigned int h=port; unsigned int i; for (i=0; i < strlen(host); i++) { h = 37*h + host[i]; } return h;}/* return a hashcode for this url */uint url::hashCode () { unsigned int h=port; unsigned int i; for (i=0; i < strlen(host); i++) { h = 31*h + host[i]; } for (i=0; i < strlen(file); i++) { h = 31*h + file[i]; } return h;}/* parses a url : * at the end, arg must not have its initial state, * at least for the real part of the url * http:// has allready been suppressed */void url::parse (char *arg) { int deb = 0, fin = deb; // Find the end of host name (put it into lowerCase) while (arg[fin] != '/' && arg[fin] != ':' && arg[fin] != '\n' && arg[fin] != 0) { if (arg[fin] >= 'A' && arg[fin] <= 'Z') { arg[fin] = arg[fin] - 'A' + 'a'; } fin++; } if (arg[fin] == '/') { // no port specified, use 80 (default), get host name arg[fin] = '\0'; host = newString(arg + deb); arg[fin] = '/'; } else if (arg[fin] == ':') { // read port number arg[fin] = '\0'; host = newString(arg + deb); arg[fin] = ':'; port = 0; fin++; while (arg[fin] >= '0' && arg[fin] <= '9') { port = port*10 + arg[fin]-'0'; fin++; } if (arg[fin] != '/') { delete [] host; host = NULL; return; } } else { // invalid url return; } // normalize file name if (normalize(arg + fin)) { // get file name file = newString(arg + fin); } else { delete [] host; host = NULL; }}/** parse a file with base */void url::parseWithBase (char *u, char *base) { int i = 0; // relative url parse (base); // suppress the #qsdf at the end of the file name while (u[i] != 0 && u[i] != '#' && u[i] != '\n') { i++; } u[i] = 0; // cat filebase and file if (isValid()) { if (u[0] == '/') { delete [] file; file = newString(u); } else { char *tmp = new char[strlen(file) + strlen(u) + 1]; strcpy(tmp, file); strcpy(tmp + strlen(file), u); delete [] file; file = tmp; } if (!normalize(file)) { delete [] host; host = NULL; delete [] file; file = NULL; } } else { // problem with base tag }}/** normalize file name * @return true if it is ok, false otherwise (cgi-bin) */bool url::normalize (char *file) { int i=0; while (file[i] != 0 && file[i] != '#' && file[i] != '\n') { if (file[i] == '/') { if (file[i+1] == '.' && file[i+2] == '/') { // suppress /./ int j=i+3; while (file[j] != 0) { file[j-2] = file[j]; j++; } file[j-2] = file[j]; // write the final char (0) } else if (file[i+1] == '.' && file[i+2] == '.' && file[i+3] == '/') { // suppress /../ if (i == 0) { // the file name starts with /../ : error return false; } else { int j = i+4, dec; i--; while (file[i] != '/') { i--; } dec = i+1-j; // dec < 0 while (file[j] != 0) { file[j+dec] = file[j]; j++; } file[j+dec] = file[j]; // write the final char (0) } } else { // nothing special, go forward i++; } } else if (file[i] == '?') { // My god : this is one more of those horrible cgi return false; } else { // nothing special, go forward i++; } } file[i] = 0; return true;}/* Does this url starts with a protocol name */bool url::isProtocol (char *s) { uint i = 0; while (isalnum(s[i])) { i++; } return s[i] == ':';}/* serialize the url */char *url::serialize (int fds) { char *s = new char[30+strlen(host)+strlen(file)]; sprintf(s, "%u %s:%u%s\n", depth, host, port, file); return s;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -