📄 url_relative.c
字号:
static char rcsid[] = "$Id: url_relative.c,v 2.1 1997/03/21 18:10:11 sxw Exp $";/* This code is taken from libwww-3.0 *//* HTParse.c * URI MANAGEMENT * * (c) COPYRIGHT CERN 1994. * Please first read the full copyright statement in the file COPYRIGH. * * history: * May 12 94 TAB added as legal char in HTCleanTelnetString * *//* * url_relative.c - Build a full URL from a partial URL and its relative URL * * Duane Wessels, wessels@cs.colorado.edu, June 1995 * * ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include <string.h>#include <stdlib.h>#include <stdio.h>#include "util.h"#ifndef NULL#define NULL 0#endif#define TOLOWER(c) (isupper(c) ? tolower(c) : (c))static char *HTSimplify();struct struct_parts { char *access; /* Now known as "scheme" */ char *host; char *absolute; char *relative;/* char * search; no - treated as part of path */ char *anchor;};/* Scan a filename for its consituents * ** ----------------------------------- * ** * ** On entry, * ** name points to a document name which may be incomplete. * ** On exit, * ** absolute or relative may be nonzero (but not both). * ** host, anchor and access may be nonzero if they were specified. * ** Any which are nonzero point to zero terminated strings. */static void scan(name, parts) char *name; struct struct_parts *parts;{ char *after_access; char *p; int length = strlen(name); parts->access = 0; parts->host = 0; parts->absolute = 0; parts->relative = 0; parts->anchor = 0; after_access = name; for (p = name; *p; p++) { if (*p == ':') { *p = 0; parts->access = after_access; /* Scheme has been specified */ after_access = p + 1; if (0 == strcasecmp("URL", parts->access)) { parts->access = NULL; /* Ignore IETF's URL: pre-prefix */ } else break; } if (*p == '/') break; /* Access has not been specified */ if (*p == '#') break; } for (p = name + length - 1; p >= name; p--) { if (*p == '#') { parts->anchor = p + 1; *p = 0; /* terminate the rest */ } } p = after_access; if (*p == '/') { if (p[1] == '/') { parts->host = p + 2; /* host has been specified */ *p = 0; /* Terminate access */ p = strchr(parts->host, '/'); /* look for end of host name if any */ if (p) { *p = 0; /* Terminate host */ parts->absolute = p + 1; /* Root has been found */ } } else { parts->absolute = p + 1; /* Root found but no host */ } } else { parts->relative = (*after_access) ? after_access : 0; /* zero for "" */ }}/* Parse a Name relative to another name * ** ------------------------------------- * ** * ** This returns those parts of a name which are given (and requested) * ** substituting bits from the related name where necessary. * ** * ** On entry, * ** aName A filename given * ** relatedName A name relative to which aName is to be parsed. Give * ** it an empty string if aName is absolute. * ** On exit, * ** returns A pointer to a malloc'd string which MUST BE FREED */char *url_parse_relative(aName, relatedName) char *aName; char *relatedName;{ char *result = 0; char *return_value = 0; int len; char *name = 0; char *rel = 0; char *p; char *access; struct struct_parts given, related; if (!relatedName) /* HWL 23/8/94: dont dump due to NULL */ relatedName = ""; /* Make working copies of input strings to cut up: */ len = strlen(aName) + strlen(relatedName) + 10; result = (char *) xmalloc(len); /* Lots of space: more than enough */ name = xstrdup(aName); rel = xstrdup(relatedName); scan(name, &given); scan(rel, &related); result[0] = 0; /* Clear string */ access = given.access ? given.access : related.access; if (access) { strcat(result, access); strcat(result, ":"); } if (given.access && related.access) /* If different, inherit nothing. */ if (strcmp(given.access, related.access) != 0) { related.host = 0; related.absolute = 0; related.relative = 0; related.anchor = 0; } if (given.host || related.host) { strcat(result, "//"); strcat(result, given.host ? given.host : related.host); } if (given.host && related.host) /* If different hosts, inherit no path. */ if (strcmp(given.host, related.host) != 0) { related.absolute = 0; related.relative = 0; related.anchor = 0; } if (given.absolute) { /* All is given */ strcat(result, "/"); strcat(result, given.absolute); } else if (related.absolute) { /* Adopt path not name */ strcat(result, "/"); strcat(result, related.absolute); if (given.relative) { p = strchr(result, '?'); /* Search part? */ if (!p) p = result + strlen(result) - 1; for (; *p != '/'; p--); /* last / */ p[1] = 0; /* Remove filename */ strcat(result, given.relative); /* Add given one */ result = HTSimplify(result); } } else if (given.relative) { strcat(result, given.relative); /* what we've got */ } else if (related.relative) { strcat(result, related.relative); } else { /* No inheritance */ strcat(result, "/"); } if (given.anchor || related.anchor) { strcat(result, "#"); strcat(result, given.anchor ? given.anchor : related.anchor); } xfree(rel); xfree(name); return_value = xstrdup(result); xfree(result); return return_value; /* exactly the right length */}/* Simplify a URI * // -------------- * // A URI is allowed to contain the seqeunce xxx/../ which may be * // replaced by "" , and the seqeunce "/./" which may be replaced by "/". * // Simplification helps us recognize duplicate URIs. * // * // Thus, /etc/junk/../fred becomes /etc/fred * // /etc/junk/./fred becomes /etc/junk/fred * // * // but we should NOT change * // http://fred.xxx.edu/../.. * // * // or ../../albert.html * // * // In the same manner, the following prefixed are preserved: * // * // ./<etc> * // //<etc> * // * // In order to avoid empty URLs the following URLs become: * // * // /fred/.. becomes /fred/.. * // /fred/././.. becomes /fred/.. * // /fred/.././junk/.././ becomes /fred/.. * // * // If more than one set of `://' is found (several proxies in cascade) then * // only the part after the last `://' is simplified. * // * // Returns: A string which might be the old one or a new one. */static char *HTSimplify(filename) char *filename;{ char *path; char *p; if (!filename) { return filename; } if ((path = strstr(filename, "://")) != NULL) { /* Find host name */ char *newptr; path += 3; while ((newptr = strstr(path, "://")) != NULL) path = newptr + 3; } else if ((path = strstr(filename, ":/")) != NULL) { path += 2; } else path = filename; if (*path == '/' && *(path + 1) == '/') { /* Some URLs start //<foo> */ path += 1; } else if (!strncmp(path, "news:", 5)) { char *ptr = strchr(path + 5, '@'); if (!ptr) ptr = path + 5; while (*ptr) { /* Make group or host lower case */ *ptr = TOLOWER(*ptr); ptr++; } return filename; /* Doesn't need to do any more */ } if ((p = path)) { int segments = 0; /* Parse string first time to find number of `real' tokens */ while (*p) { if (*p == '/' || p == path) { if (!((*(p + 1) == '/' || !*(p + 1)) || (*(p + 1) == '.' && (*(p + 2) == '/' || !*(p + 2))) || (*(p + 1) == '.' && *(p + 2) == '.' && (*(p + 3) == '/' || !*(p + 3))))) segments++; } p++; } /* Parse string second time to simplify */ p = path; while (*p) { if (*p == '/') { if (p > path && *(p + 1) == '.' && (*(p + 2) == '/' || !*(p + 2))) { char *orig = p, *dest = p + 2; while ((*orig++ = *dest++)); /* Remove a slash and a dot */ p--; } else if (segments > 1 && *(p + 1) == '.' && *(p + 2) == '.' && (*(p + 3) == '/' || !*(p + 3))) { char *q = p; while (q > path && *--q != '/'); /* prev slash */ if (strncmp(q, "/../", 4) && strncmp(q, "/./", 3) && strncmp(q, "./", 2)) { char *orig = q, *dest = p + 3; if (*q != '/') dest++; while ((*orig++ = *dest++)); /* Remove /xxx/.. */ segments--; p = q - 1; /* Start again with prev slash */ } else p++; } else if (*(p + 1) == '/') { while (*(p + 1) == '/') { char *orig = p, *dest = p + 1; while ((*orig++ = *dest++)); /* Remove multiple /'s */ } } } p++; } /* end while (*p) */ } return filename;}#ifdef MAKE_MAINmain(argc, argv) int argc; char *argv[];{ char *url; url = url_parse_relative(argv[1], argv[2]); printf("%s\n", url);}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -