📄 uri.c
字号:
/******************************************************************************* * * uri.c * * Data structure and routines for parsing and storing uri strings. * * Cheetah Web Browser * Copyright (C) 2001 Garett Spencley * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * *******************************************************************************/#include <stdlib.h>#include <string.h>#include <stdio.h>#include <assert.h>#include "uri.h"#include "debug.h"#define MAX_PLEN 6#define TOTAL_KEYWORDS 5#define MIN_WORD_LENGTH 4#define MAX_WORD_LENGTH 7#define MIN_HASH_VALUE 5#define MAX_HASH_VALUE 10__inline static unsigned int p_hash(const char *str, unsigned int len){ static unsigned char asso_values[] = { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 0, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 }; return len + asso_values[(unsigned char)str[len - 1]] + asso_values[(unsigned char)str[0]];}__inline protocol_t *p_lookup(const char *str, unsigned int len){ static protocol_t wordlist[] = { {""}, {""}, {""}, {""}, {""}, {"http:", HTTP}, {"https:", HTTPS}, {"gopher:", GOPHER}, {""}, {"ftp:", FTP}, {"file:", L_FILE} }; if(len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) { register int key = p_hash(str, len); if(key <= MAX_HASH_VALUE && key >= 0) { register const char *s = wordlist[key].string; if(*str == *s && !strcmp(str + 1, s + 1)) return &wordlist[key]; } } return NULL;}/* * uri_new() - create an empty uri structure */uri_t *uri_new(){ uri_t *result; result = (uri_t *)malloc(sizeof(uri_t)); if(!result) return NULL; return result;}/* * uri_free() - free a uri structure */void uri_free(uri_t *uri){ if(!uri) return; free(uri->host); free(uri->anchor); free(uri->abs_path); free(uri);}/* * protocol_lookup() - return the protocol type */__inline unsigned int protocol_lookup(char *string){ protocol_t *p; p = p_lookup(string, strlen(string)); if(p) return p->id; return -1;}/* * parse_uri() - parse url and store it in a uri struct */__inline uri_t *parse_file(uri_t *uri, const char *string){ const char *p, *pp; char *tmp; char path[256], anchor[256]; uri->protocol = L_FILE; /* Skip over 'file:' */ p = string + 4; /* skip over '//', if it's present */ pp = p; if(*pp == '/' && *(++pp) == '/') ++p; ++p; /* Extract the path */ tmp = path; while(*p && *p != '#') *tmp++ = *p++; *tmp = 0; uri->host = strdup(""); uri->abs_path = strdup(path); /* Extract the anchor */ if(*p == '#') { tmp = anchor; while(*p) *tmp++ = *p++; *tmp = 0; uri->anchor = strdup(anchor); } else uri->anchor = strdup(""); return uri;}uri_t *parse_uri(const char *uri){ uri_t *result; int pos = 0; const char *p; char *tmp; char proto[10]; char port[10]; char host[256]; char abs[256]; char anchor[256]; result = uri_new(); if(!result) return NULL; /* Look for first ':' if the position is less than the maximum length of * a protocol string (6) then the protocol is present and we copy it */ p = strchr(uri, ':'); if(p && (p - uri) <= MAX_PLEN) { pos = p - uri + 1; strncpy(proto, uri, pos); proto[pos] = 0; } /* Special treatment for the file protocol */ if(pos && strcasecmp(proto, "file:") == 0) { result = parse_file(result, uri); return result; } /* If pos was set (meaning there was a protocol), skip over it to * get to the host. */ if(pos) { p = strchr(uri, '/'); if(!p) { debug_print("invalid uri: %s", uri); return NULL; } ++p; ++p; } else { p = uri; strcpy(proto, "http:"); } result->protocol = protocol_lookup(proto); /* Parse out host */ tmp = host; while(*p && *p != '/' && *p != '#' && *p != ':') *tmp++ = *p++; *tmp = 0; result->host = strdup(host); /* Now the port */ if(*p == ':') { ++p; tmp = port; while(*p && *p != '/') *tmp++ = *p++; *tmp = 0; result->port = atoi(port); } else result->port = 80; /* Now the document. If one is not present use '/' */ if(*p && *p != '#') { tmp = abs; while(*p && *p != '#') *tmp++ = *p++; *tmp = 0; } else strcpy(abs, "/"); result->abs_path = strdup(abs); /* And finally the anchor */ if(*p == '#') { tmp = anchor; while(*p) *tmp++ = *p++; *tmp = 0; result->anchor = strdup(anchor); } else result->anchor = strdup(""); return result;}int is_absolute(const char *uri){ char *p; p = strchr(uri, ':'); if(p && (uri - p) <= MAX_PLEN) return 1; return 0;}char *resolve_relative_uri(const char *base, const char *uri_string){ uri_t *uri; char *path, *result; char *lastslash; const char* proto; if(is_absolute(uri_string)) return strdup(uri_string); /* parse the base uri */ uri = parse_uri(base); if(!uri) return NULL; /* Allocate a relative uri */ result = (char *)malloc((strlen(uri->host)+ strlen(uri->abs_path)+ strlen(uri_string)) * 2); if(!result) return NULL; /* Determine protocol */ switch(uri->protocol) { case HTTP: proto = "http"; break; case HTTPS: proto = "https"; break; case L_FILE: proto = "file"; break; case FTP: proto = "ftp"; break; case GOPHER: proto = "gopher"; break; default: // FIXME: Unknown protocol, need to handle this. assert(0); } /* If uri begins with '/' then we don't have to worry about path */ if(*uri_string == '/') { /* NOTE: uri->host MUST NOT have a trailing slash. */ sprintf(result, "%s://%s%s", proto, uri->host, uri_string); debug_print("resolve_relative_uri: ==> %s\n", result); return result; } /* Grab the directory path (ie: strip off file from base) */ path = strdup(uri->abs_path); lastslash = strrchr(path, '/'); if(lastslash) lastslash[0] = '\0'; /* Format the string */ sprintf(result, "%s://%s%s/%s", proto, uri->host, path, uri_string); uri_free(uri); free(path); return result;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -