📄 url.c
字号:
path[1] = '\0'; return; } /* Walk along PATH looking for things to compact. */ i = 0; while (1) { if (!path[i]) break; while (path[i] && path[i] != '/') i++; start = i++; /* If we didn't find any slashes, then there is nothing left to do. */ if (!path[start]) break; /* Handle multiple `/'s in a row. */ while (path[i] == '/') i++; if ((start + 1) != i) { strcpy(path + start + 1, path + i); i = start + 1; } /* Check for trailing `/'. */ if (start && !path[i]) { zero_last: path[--i] = '\0'; break; } /* Check for `../', `./' or trailing `.' by itself. */ if (path[i] == '.') { /* Handle trailing `.' by itself. */ if (!path[i + 1]) goto zero_last; /* Handle `./'. */ if (path[i + 1] == '/') { strcpy(path + i, path + i + 1); i = (start < 0) ? 0 : start; continue; } /* Handle `../' or trailing `..' by itself. */ if (path[i + 1] == '.' && (path[i + 2] == '/' || !path[i + 2])) { while (--start > -1 && path[start] != '/'); strcpy(path + start + 1, path + i + 2); i = (start < 0) ? 0 : start; continue; } } /* path == '.' */ } /* while */ if (!*path) { *path = stub_char; path[1] = '\0'; }}/* Special versions of DOTP and DDOTP for parse_dir(). They work like DOTP and DDOTP, but they also recognize `?' as end-of-string delimiter. This is needed for correct handling of query strings. */#define PD_DOTP(x) ((*(x) == '.') && (!*((x) + 1) || *((x) + 1) == '?'))#define PD_DDOTP(x) ((*(x) == '.') && (*(x) == '.') \ && (!*((x) + 2) || *((x) + 2) == '?'))/* Like strlen(), but allow the URL to be ended with '?'. */int urlpath_length(const char *url){ const char *q = strchr(url, '?'); if (q) return q - url; return strlen(url);}/* Build the directory and filename components of the path. Both components are *separately* malloc-ed strings! It does not change the contents of path. If the path ends with "." or "..", they are (correctly) counted as directories. */void parse_dir(const char *path, char **dir, char **file){ int i, l; l = urlpath_length(path); for (i = l; i && path[i] != '/'; i--); if (!i && *path != '/') /* Just filename */ { if (PD_DOTP(path) || PD_DDOTP(path)) { *dir = strdupdelim(path, path + l); *file = kstrdup(path + l); /* normally empty, but could contain ?... */ } else { *dir = kstrdup(""); /* This is required because of FTP */ *file = kstrdup(path); } } else if (!i) /* /filename */ { if (PD_DOTP(path + 1) || PD_DDOTP(path + 1)) { *dir = strdupdelim(path, path + l); *file = kstrdup(path + l); /* normally empty, but could contain ?... */ } else { *dir = kstrdup("/"); *file = kstrdup(path + 1); } } else /* Nonempty directory with or without a filename */ { if (PD_DOTP(path + i + 1) || PD_DDOTP(path + i + 1)) { *dir = strdupdelim(path, path + l); *file = kstrdup(path + l); /* normally empty, but could contain ?... */ } else { *dir = strdupdelim(path, path + i); *file = kstrdup(path + i + 1); } }}/* Skip the protocol part of the URL, e.g. `http://'. If no protocol part is found, returns 0. */int skip_proto(const char *url){ char **s; int l; for (s = protostrings; *s; s++) if (!strncasecmp(*s, url, strlen(*s))) break; if (!*s) return 0; l = strlen(*s); /* HTTP and FTP protocols are expected to yield exact host names (i.e. the `//' part must be skipped, too). */ if (!strcmp(*s, "http:") || !strcmp(*s, "ftp:")) l += 2; return l;}/* Find the optional username and password within the URL, as per RFC1738. The returned user and passwd char pointers are malloc-ed. */static uerr_t parse_uname(const char *url, char **user, char **passwd){ int l; const char *p, *q, *col; char **where; *user = NULL; *passwd = NULL; /* Look for the end of the protocol string. */ l = skip_proto(url); if (!l) return URLUNKNOWN; /* Add protocol offset. */ url += l; /* Is there an `@' character? */ for (p = url; *p && *p != '/'; p++) if (*p == '@') break; /* If not, return. */ if (*p != '@') return URLOK; /* Else find the username and password. */ for (p = q = col = url; *p && *p != '/'; p++) { if (*p == ':' && !*user) { *user = (char *) kmalloc(p - url + 1); memcpy(*user, url, p - url); (*user)[p - url] = '\0'; col = p + 1; } if (*p == '@') q = p; } /* Decide whether you have only the username or both. */ where = *user ? passwd : user; *where = (char *) kmalloc(q - col + 1); memcpy(*where, col, q - col); (*where)[q - col] = '\0'; return URLOK;}/* Return the URL as fine-formed string, with a proper protocol, optional port number, directory and optional user/password. If `hide' is non-zero (as it is when we're calling this on a URL we plan to print, but not when calling it to canonicalize a URL for use within the program), password will be hidden. The forbidden characters in the URL will be cleansed. */char *str_url(const urlinfo * u, int hide){ char *res, *host, *user, *passwd, *proto_name, *dir, *file; int i, l, ln, lu, lh, lp, lf, ld; unsigned short proto_default_port; /* Look for the protocol name. */ for (i = 0; i < ARRAY_SIZE(sup_protos); i++) if (sup_protos[i].ind == u->proto) break; if (i == ARRAY_SIZE(sup_protos)) return NULL; proto_name = sup_protos[i].name; proto_default_port = sup_protos[i].port; host = encode_string(u->host); dir = encode_string(u->dir); file = encode_string(u->file); user = passwd = NULL; if (u->user) user = encode_string(u->user); if (u->passwd) { if (hide) /* Don't output the password, or someone might see it over the user's shoulder (or in saved wget output). Don't give away the number of characters in the password, either, as we did in past versions of this code, when we replaced the password characters with 'x's. */ passwd = kstrdup("<password>"); else passwd = encode_string(u->passwd); } if (u->proto == URLFTP && *dir == '/') { char *tmp = (char *) kmalloc(strlen(dir) + 3); /*sprintf (tmp, "%%2F%s", dir + 1); */ tmp[0] = '%'; tmp[1] = '2'; tmp[2] = 'F'; strcpy(tmp + 3, dir + 1); kfree(dir); dir = tmp; } ln = strlen(proto_name); lu = user ? strlen(user) : 0; lp = passwd ? strlen(passwd) : 0; lh = strlen(host); ld = strlen(dir); lf = strlen(file); res = (char *) kmalloc(ln + lu + lp + lh + ld + lf + 20); /* safe sex */ /* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", proto_name, (user ? user : ""), (passwd ? ":" : ""), (passwd ? passwd : ""), (user ? "@" : ""), host, u->port, dir, *dir ? "/" : "", file); */ l = 0; memcpy(res, proto_name, ln); l += ln; if (user) { memcpy(res + l, user, lu); l += lu; if (passwd) { res[l++] = ':'; memcpy(res + l, passwd, lp); l += lp; } res[l++] = '@'; } memcpy(res + l, host, lh); l += lh; if (u->port != proto_default_port) { res[l++] = ':'; sprintf(res + l, "%ld", (long) u->port); l += numdigit(u->port); } res[l++] = '/'; memcpy(res + l, dir, ld); l += ld; if (*dir) res[l++] = '/'; strcpy(res + l, file); kfree(host); kfree(dir); kfree(file); kfree(user); kfree(passwd); return res;}/* Extract the given URL of the form (http:|ftp:)// (user (:password)?@)?hostname (:port)? (/path)? 1. hostname (terminated with `/' or `:') 2. port number (terminated with `/'), or chosen for the protocol 3. dirname (everything after hostname) Most errors are handled. No allocation is done, you must supply pointers to allocated memory. ...and a host of other stuff :-) - Recognizes hostname:dir/file for FTP and hostname (:portnum)?/dir/file for HTTP. - Parses the path to yield directory and file - Parses the URL to yield the username and passwd (if present) - Decodes the strings, in case they contain "forbidden" characters - Writes the result to struct urlinfo If the argument STRICT is set, it recognizes only the canonical form. */uerr_t proz_parse_url(const char *url, urlinfo * u, int strict){ int i, l, abs_ftp; int recognizable; /* Recognizable URL is the one where the protocol name was explicitly named, i.e. it wasn't deduced from the URL format. */ uerr_t type; memset(u, 0, sizeof(urlinfo)); recognizable = has_proto(url); if (strict && !recognizable) return URLUNKNOWN; for (i = 0, l = 0; i < ARRAY_SIZE(sup_protos); i++) { l = strlen(sup_protos[i].name); if (!strncasecmp(sup_protos[i].name, url, l)) break; } /* If protocol is recognizable, but unsupported, bail out, else suppose unknown. */ if (recognizable && i == ARRAY_SIZE(sup_protos)) return URLUNKNOWN; else if (i == ARRAY_SIZE(sup_protos)) type = URLUNKNOWN; else u->proto = type = sup_protos[i].ind; if (type == URLUNKNOWN) l = 0; /* Allow a username and password to be specified (i.e. just skip them for now). */ if (recognizable) l += skip_uname(url + l); for (i = l; url[i] && url[i] != ':' && url[i] != '/'; i++); if (i == l) return URLBADHOST; /* Get the hostname. */ u->host = strdupdelim(url + l, url + i); /* Assume no port has been given. */ u->port = 0; if (url[i] == ':') { /* We have a colon delimiting the hostname. It could mean that a port number is following it, or a directory. */ if (isdigit(url[++i])) /* A port number */ { if (type == URLUNKNOWN) u->proto = type = URLHTTP; for (; url[i] && url[i] != '/'; i++) if (isdigit(url[i])) u->port = 10 * u->port + (url[i] - '0'); else return URLBADPORT; if (!u->port) return URLBADPORT;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -