📄 url.c
字号:
/* * File: url.c * * Copyright (C) 2001 Jorge Arellano Cid <jcid@dillo.org> * 2001 Livio Baldini Soares <livio@linux.ime.usp.br> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. *//* * Parse and normalize all URL's inside Dillo. * - <scheme> <authority> <path> <query> and <fragment> point to 'buffer'. * - 'url_string' is built upon demand (transparent to the caller). * - 'hostname' and 'port' are also being handled on demand. *//* * Regular Expression as given in RFC2396 for URL parsing. * * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? * 12 3 4 5 6 7 8 9 * * scheme = $2 * authority = $4 * path = $5 * query = $7 * fragment = $9 */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <glib.h>#include "url.h"/*#define DEBUG_LEVEL 2 */#include "debug.h"/* * Return the url as a string. * (initializing 'url_string' camp if necessary) */gchar *a_Url_str(const DilloUrl *u){ /* Internal url handling IS transparent to the caller */ DilloUrl *url = (DilloUrl *) u; g_return_val_if_fail (url != NULL, NULL); if (!url->url_string) { url->url_string = g_string_sized_new(60); g_string_sprintf( url->url_string, "%s%s%s%s%s%s%s%s%s%s", url->scheme ? url->scheme : "", url->scheme ? ":" : "", url->authority ? "//" : "", url->authority ? url->authority : "", (url->path && url->path[0] != '/' && url->authority) ? "/" : "", url->path ? url->path : "", url->query ? "?" : "", url->query ? url->query : "", url->fragment ? "#" : "", url->fragment ? url->fragment : ""); } return url->url_string->str;}/* * Return the hostname as a string. * (initializing 'hostname' and 'port' camps if necessary) * Note: a similar approach can be taken for user:password auth. */const gchar *a_Url_hostname(const DilloUrl *u){ gchar *p; /* Internal url handling IS transparent to the caller */ DilloUrl *url = (DilloUrl *) u; if (!url->hostname && url->authority) { if ((p = strchr(url->authority, ':'))) { url->port = strtol(p + 1, NULL, 10); url->hostname = g_strndup(url->authority,(guint)(p - url->authority)); } else url->hostname = url->authority; } return url->hostname;}/* * Create a DilloUrl object and initialize it. * (buffer, scheme, authority, path, query and fragment). */static DilloUrl *Url_object_new(const gchar *uri_str){ DilloUrl *url; gchar *s, *p; g_return_val_if_fail (uri_str != NULL, NULL); url = g_new0(DilloUrl, 1); /* remove leading & trailing space from buffer */ for (p = (gchar *)uri_str; isspace(*p); ++p); url->buffer = g_strchomp(g_strdup(p)); s = (gchar *) url->buffer; p = strpbrk(s, ":/?#"); if (p && p[0] == ':' && p > s) { /* scheme */ *p = 0; url->scheme = s; s = ++p; } /* p = strpbrk(s, "/"); */ if (p == s && p[0] == '/' && p[1] == '/') { /* authority */ s = p + 2; p = strpbrk(s, "/?#"); if (p) { memmove(s - 2, s, (size_t)MAX(p - s, 1)); url->authority = s - 2; p[-2] = 0; s = p; } else if (*s) { url->authority = s; return url; } } p = strpbrk(s, "?#"); if (p) { /* path */ url->path = (p > s) ? s : NULL; s = p; } else if (*s) { url->path = s; return url; } p = strpbrk(s, "?#"); if (p && p[0] == '?') { /* query */ *p = 0; s = p + 1; url->query = s; p = strpbrk(s, "#"); } if (p && p[0] == '#') { /* fragment */ *p = 0; s = p + 1; url->fragment = s; } return url;}/* * Free a DilloUrl */void a_Url_free(DilloUrl *url){ if (url) { if (url->url_string) g_string_free(url->url_string, TRUE); if (url->hostname != url->authority) g_free((gchar *)url->hostname); g_free((gchar *)url->buffer); g_free((gchar *)url->data); g_free((gchar *)url->alt); g_free(url); }}/* * Resolve the URL as RFC2396 suggests. */static GString *Url_resolve_relative(const gchar *RelStr, DilloUrl *BaseUrlPar, const gchar *BaseStr){ gchar *p, *s, *e; gint i; GString *SolvedUrl, *Path; DilloUrl *RelUrl, *BaseUrl = NULL; /* parse relative URL */ RelUrl = Url_object_new(RelStr); if (BaseUrlPar) { BaseUrl = BaseUrlPar; } else if (RelUrl->scheme == NULL) { /* only required when there's no <scheme> in RelStr */ BaseUrl = Url_object_new(BaseStr); } SolvedUrl = g_string_sized_new(64); Path = g_string_sized_new(64); /* path empty && scheme, authority and query undefined */ if (!RelUrl->path && !RelUrl->scheme && !RelUrl->authority && !RelUrl->query) { g_string_append(SolvedUrl, BaseStr); if (RelUrl->fragment) { /* fragment */ if (BaseUrl->fragment) g_string_truncate(SolvedUrl, BaseUrl->fragment-BaseUrl->buffer-1); g_string_append_c(SolvedUrl, '#'); g_string_append(SolvedUrl, RelUrl->fragment); } goto done; } else if (RelUrl->scheme) { /* scheme */ g_string_append(SolvedUrl, RelStr); goto done; } else if (RelUrl->authority) { /* authority */ /* Set the Path buffer and goto "STEP 7"; */ if (RelUrl->path) g_string_append(Path, RelUrl->path); } else if (RelUrl->path && RelUrl->path[0] == '/') { /* path */ g_string_append(Path, RelUrl->path); } else { /* solve relative path */ if (BaseUrl->path) { g_string_append(Path, BaseUrl->path); for (i = Path->len; --i >= 0 && Path->str[i] != '/'; ); if (Path->str[i] == '/') g_string_truncate(Path, ++i); } if (RelUrl->path) g_string_append(Path, RelUrl->path); /* erase "./" */ while ((p=strstr(Path->str, "./")) && (p == Path->str || p[-1] == '/')) g_string_erase(Path, p - Path->str, 2); /* erase last "." */ if (Path->len && Path->str[Path->len - 1] == '.' && (Path->len == 1 || Path->str[Path->len - 2] == '/')) g_string_truncate(Path, Path->len - 1); /* erase "<segment>/../" and "<segment>/.." */ s = p = Path->str; while ( (p = strstr(p, "/..")) != NULL ) { if ((p[3] == '/' || !p[3]) && (p - s)) { /* "/../" | "/.." */ for (e = p + 3 ; p[-1] != '/' && p > s; --p); if (p[0] != '.' || p[1] != '.' || p[2] != '/') { g_string_erase(Path, p - Path->str, e - p + (*e != 0)); p -= (p > Path->str); } else p = e; } else p += 3; } } /* STEP 7 */ /* scheme */ if (BaseUrl->scheme) { g_string_append(SolvedUrl, BaseUrl->scheme); g_string_append_c(SolvedUrl, ':'); } /* authority */ if (RelUrl->authority) { g_string_append(SolvedUrl, "//"); g_string_append(SolvedUrl, RelUrl->authority); } else if (BaseUrl->authority) { g_string_append(SolvedUrl, "//"); g_string_append(SolvedUrl, BaseUrl->authority); } /* path */ if ((RelUrl->authority || BaseUrl->authority) && ((Path->len == 0 && (RelUrl->query || RelUrl->fragment)) || (Path->len && Path->str[0] != '/'))) g_string_append_c(SolvedUrl, '/'); /* hack? */ g_string_append(SolvedUrl, Path->str); /* query */ if (RelUrl->query) { g_string_append_c(SolvedUrl, '?'); g_string_append(SolvedUrl, RelUrl->query); } /* fragment */ if (RelUrl->fragment) { g_string_append_c(SolvedUrl, '#'); g_string_append(SolvedUrl, RelUrl->fragment); }done: g_string_free(Path, TRUE); a_Url_free(RelUrl); if (BaseUrl != BaseUrlPar) a_Url_free(BaseUrl); return SolvedUrl;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -