📄 uri.c

📁 微型浏览器
💻 C
字号:
/******************************************************************************* * * uri.c  * * Data structure and routines for parsing and storing uri strings.  * * Cheetah Web Browser * Copyright (C) 2001 Garett Spencley *  * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  * *******************************************************************************/#include <stdlib.h>#include <string.h>#include <stdio.h>#include <assert.h>#include "uri.h"#include "debug.h"#define MAX_PLEN 6#define TOTAL_KEYWORDS 5#define MIN_WORD_LENGTH 4#define MAX_WORD_LENGTH 7#define MIN_HASH_VALUE 5#define MAX_HASH_VALUE 10__inline static unsigned int p_hash(const char *str, unsigned int len){	static unsigned char asso_values[] = {		11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11,  0, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11,  5,  0,  0, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,      	11, 11, 11, 11, 11, 11    };  	return len + asso_values[(unsigned char)str[len - 1]] + asso_values[(unsigned char)str[0]];}__inline protocol_t *p_lookup(const char *str, unsigned int len){	static protocol_t wordlist[] = {		{""}, {""}, {""}, {""}, {""},      	{"http:", HTTP},      	{"https:", HTTPS},      	{"gopher:", GOPHER},      	{""},      	{"ftp:", FTP},      	{"file:", L_FILE}	};  	if(len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) {					register int key = p_hash(str, len);      	if(key <= MAX_HASH_VALUE && key >= 0) {				        	register const char *s = wordlist[key].string;          	if(*str == *s && !strcmp(str + 1, s + 1))            	return &wordlist[key];        }	}  	return NULL;}/* * uri_new() - create an empty uri structure */uri_t *uri_new(){	uri_t *result;	result = (uri_t *)malloc(sizeof(uri_t));	if(!result)		return NULL;	return result;}/* * uri_free() - free a uri structure */void uri_free(uri_t *uri){	if(!uri)		return;	free(uri->host);	free(uri->anchor);	free(uri->abs_path);	free(uri);}/* * protocol_lookup() - return the protocol type */__inline unsigned int protocol_lookup(char *string){	protocol_t *p;	p = p_lookup(string, strlen(string));	if(p)		return p->id;	return -1;}/* * parse_uri() - parse url and store it in a uri struct */__inline uri_t *parse_file(uri_t *uri, const char *string){	const char *p, *pp;	char *tmp;	char path[256], anchor[256];	uri->protocol = L_FILE;	/* Skip over 'file:' */	p = string + 4;		/* skip over '//', if it's present */	pp = p;	if(*pp == '/' && *(++pp) == '/') 		++p; ++p;		/* Extract the path */	tmp = path;	while(*p && *p != '#')		*tmp++ = *p++;			*tmp = 0;	uri->host      = strdup("");	uri->abs_path  = strdup(path);			/* Extract the anchor */	if(*p == '#') {		tmp = anchor;		while(*p) 			*tmp++ = *p++;		*tmp = 0;		uri->anchor = strdup(anchor);	} else		uri->anchor = strdup("");	return uri;}uri_t *parse_uri(const char *uri){	uri_t *result;	int pos = 0;	const char *p;	char *tmp;	char proto[10]; 	char port[10]; 	char host[256]; 	char abs[256]; 	char anchor[256];	result = uri_new();	if(!result)		return NULL;		/* Look for first ':' if the position is less than the maximum length of	 * a protocol string (6) then the protocol is present and we copy it */	p = strchr(uri, ':');	if(p && (p - uri) <= MAX_PLEN) {		pos = p - uri + 1;		strncpy(proto, uri, pos);		proto[pos] = 0;	}	/* Special treatment for the file protocol */	if(pos && strcasecmp(proto, "file:") == 0) {		result = parse_file(result, uri);		return result;	}	/* If pos was set (meaning there was a protocol), skip over it to 	 * get to the host. */	if(pos) {		p = strchr(uri, '/');		if(!p) {			debug_print("invalid uri: %s", uri);			return NULL;		}		++p; ++p;	} else {		p = uri;		strcpy(proto, "http:");	}		result->protocol = protocol_lookup(proto);	/* Parse out host */	tmp = host;	while(*p && *p != '/' && *p != '#' && *p != ':')		*tmp++ = *p++;			*tmp = 0;		result->host = strdup(host);	/* Now the port */	if(*p == ':') {		++p;		tmp = port;		while(*p && *p != '/')			*tmp++ = *p++;		*tmp = 0;		result->port = atoi(port);	} else		result->port = 80;	/* Now the document. If one is not present use '/' */	if(*p && *p != '#') {		tmp = abs;		while(*p && *p != '#') 			*tmp++ = *p++;		*tmp = 0;	} else 		strcpy(abs, "/");		result->abs_path = strdup(abs);	/* And finally the anchor */	if(*p == '#') {		tmp = anchor;		while(*p) 			*tmp++ = *p++;		*tmp = 0;		result->anchor = strdup(anchor);	} else		result->anchor = strdup("");	return result;}int is_absolute(const char *uri){	char *p;	p = strchr(uri, ':');	if(p && (uri - p) <= MAX_PLEN)		return 1;	return 0;}char *resolve_relative_uri(const char *base, const char *uri_string){	uri_t *uri;	char *path, *result;    char *lastslash;    const char* proto;	if(is_absolute(uri_string))		return strdup(uri_string);		/* parse the base uri */	uri = parse_uri(base);	if(!uri)		return NULL;	/* Allocate a relative uri */	result = (char *)malloc((strlen(uri->host)+                             strlen(uri->abs_path)+                             strlen(uri_string)) * 2);	if(!result)		return NULL;	/* Determine protocol */	switch(uri->protocol) {	case HTTP:        proto = "http";		break;	case HTTPS:        proto = "https";		break;	case L_FILE:		proto = "file";		break;	case FTP:		proto = "ftp";		break;			case GOPHER:		proto = "gopher";		break;    default:        // FIXME: Unknown protocol, need to handle this.        assert(0);	}	/* If uri begins with '/' then we don't have to worry about path */	if(*uri_string == '/') {        /* NOTE: uri->host MUST NOT have a trailing slash. */        sprintf(result, "%s://%s%s", proto, uri->host, uri_string);        debug_print("resolve_relative_uri: ==> %s\n", result);		return result;	}	/* Grab the directory path (ie: strip off file from base) */	path = strdup(uri->abs_path);    lastslash = strrchr(path, '/');	if(lastslash)        lastslash[0] = '\0';	/* Format the string */    sprintf(result, "%s://%s%s/%s", proto, uri->host, path, uri_string);    uri_free(uri);    free(path);	return result;}
💿 文件大小 185 K
👤 上传用户 yufei66900
📂 所属分类 *行业应用
🏷️ 相关标签

#浏览器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -