📄 url.c
字号:
/* * This file is part of the Sofia-SIP package * * Copyright (C) 2005 Nokia Corporation. * * Contact: Pekka Pessi <pekka.pessi@nokia.com> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA * *//**@CFILE url.c * * Implementation of basic URL parsing and handling. * * @author Pekka Pessi <Pekka.Pessi@nokia.com> * * @date Created: Thu Jun 29 22:44:37 2000 ppessi */#include "config.h"#include <sofia-sip/su_alloc.h>#include <sofia-sip/bnf.h>#include <sofia-sip/hostdomain.h>#include <sofia-sip/url.h>#include <sofia-sip/string0.h>#include <stdio.h>#include <string.h>#include <stdlib.h>#include <assert.h>#include <ctype.h>#include <limits.h>/**@def URL_PRINT_FORMAT * Format string used when printing url with printf(). * * The macro URL_PRINT_FORMAT is used in format string of printf() or * similar printing functions. A URL can be printed like this: * @code * printf("%s received URL " URL_PRINT_FORMAT "\n", * my_name, URL_PRINT_ARGS(url)); * @endcode *//** @def URL_PRINT_ARGS(u) * Argument list used when printing url with printf(). * * The macro URL_PRINT_ARGS() is used to create a stdarg list for printf() * or similar printing functions. Using it, a URL can be printed like this: * * @code * printf("%s received URL " URL_PRINT_FORMAT "\n", * my_name, URL_PRINT_ARGS(url)); * @endcode */#define RESERVED ";/?:@&=+$,"#define DELIMS "<>#%\""#define UNWISE "{}|\\^[]`"#define EXCLUDED RESERVED DELIMS UNWISE#define UNRESERVED "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ "abcdefghijklmnopqrstuvwxyz" \ "0123456789" \ "-_.!~*'()"#define IS_EXCLUDED(u, m32, m64, m96) \ (u <= ' ' \ || u >= '\177' \ || (u < 64 ? (m32 & (1 << (63 - u))) \ : (u < 96 ? (m64 & (1 << (95 - u))) \ : /*u < 128*/ (m96 & (1 << (127 - u))))) != 0)#define MASKS_WITH_RESERVED(reserved, m32, m64, m96) \ if (reserved == NULL) { \ m32 = 0xbe19003f, m64 = 0x8000001e, m96 = 0x8000001d; \ } else do { \ m32 = 0xb400000a, m64 = 0x0000001e, m96 = 0x8000001d; \ \ for (;reserved[0]; reserved++) { \ unsigned r = reserved[0]; \ RESERVE(r, m32, m64, m96); \ } \ } while (0)#define RESERVE(reserved, m32, m64, m96) \ if (r < 32) \ ; \ else if (r < 64) \ m32 |= 1U << (63 - r); \ else if (r < 96) \ m64 |= 1U << (95 - r); \ else if (r < 128) \ m96 |= 1U << (127 - r)#define MASKS_WITH_ALLOWED(allowed, mask32, mask64, mask96) \ do { \ if (allowed) { \ for (;allowed[0]; allowed++) { \ unsigned a = allowed[0]; \ ALLOW(a, mask32, mask64, mask96); \ } \ } \ } while (0)#define ALLOW(a, mask32, mask64, mask96) \ if (a < 32) \ ; \ else if (a < 64) \ mask32 &= ~(1U << (63 - a)); \ else if (a < 96) \ mask64 &= ~(1U << (95 - a)); \ else if (a < 128) \ mask96 &= ~(1U << (127 - a))#define NUL '\0'#define NULNULNUL '\0', '\0', '\0'#define RMASK1 0xbe19003f#define RMASK2 0x8000001e#define RMASK3 0x8000001d#define RESERVED_MASK 0xbe19003f, 0x8000001e, 0x8000001d#define URIC_MASK 0xb400000a, 0x0000001e, 0x8000001d#define IS_EXCLUDED_MASK(u, m) IS_EXCLUDED(u, m)/* Internal prototypes */static char *url_canonize(char *d, char const *s, size_t n, unsigned syn33, char const allowed[]);static char *url_canonize2(char *d, char const *s, size_t n, unsigned syn33, unsigned m32, unsigned m64, unsigned m96);static int url_tel_cmp_numbers(char const *A, char const *B);/**Test if string contains excluded or url-reserved characters. * * * * @param s string to be searched * * @retval 0 if no reserved characters were found. * @retval l if a reserved character was found. */int url_reserved_p(char const *s){ if (s) while (*s) { unsigned char u = *s++; if (IS_EXCLUDED(u, RMASK1, RMASK2, RMASK3)) return 1; } return 0;}/** Calculate length of string when escaped with %-notation. * * Calculate the length of string @a s when the excluded or reserved * characters in it have been escaped. * * @param s String with reserved URL characters. [IN * @param reserved Optional array of reserved characters [IN] * * @return * The number of characters in corresponding but escaped string. * * You can handle a part of URL with reserved characters like this: *燖code * if (url_reserved_p(s)) { * n = malloc(url_esclen(s, NULL) + 1); * if (n) url_escape(n, s); * } else { * n = malloc(strlen(s) + 1); * if (n) strcpy(n, s); * } * @endcode */isize_t url_esclen(char const *s, char const reserved[]){ size_t n; unsigned mask32, mask64, mask96; MASKS_WITH_RESERVED(reserved, mask32, mask64, mask96); for (n = 0; s && *s; n++) { unsigned char u = *s++; if (IS_EXCLUDED(u, mask32, mask64, mask96)) n += 2; } return (isize_t)n;}/** Escape a string. * * The function url_escape() copies the string pointed by @a s to the array * pointed by @a d, @b excluding the terminating \\0 character. All reserved * characters in @a s are copied in hexadecimal format, for instance, @c * "$%#" is copied as @c "%24%25%23". The destination array @a d must be * large enough to receive the escaped copy. * * @param d Destination buffer [OUT] * @param s String to be copied [IN] * @param reserved Array of reserved characters [IN] * * @return Pointer to the destination array. */char *url_escape(char *d, char const *s, char const reserved[]){ char *retval = d; unsigned mask32, mask64, mask96; MASKS_WITH_RESERVED(reserved, mask32, mask64, mask96); while (s && *s) { unsigned char u = *s++; if (IS_EXCLUDED(u, mask32, mask64, mask96)) {# define URL_HEXIFY(u) ((u) + '0' + ((u) >= 10 ? 'A' - '0' - 10 : 0)) *d++ = '%'; *d++ = URL_HEXIFY(u >> 4); *d++ = URL_HEXIFY(u & 15);# undef URL_HEXIFY } else { *d++ = u; } } *d = '\0'; return retval;}/**Unescape url-escaped string fragment. * * Unescape @a n characters from string @a s to the buffer @a d, including * the terminating \\0 character. All %-escaped triplets in @a s are * unescaped, for instance, @c "%40%25%23" is copied as @c "@%#". The * destination array @a d must be large enough to receive the escaped copy * (@a n bytes is always enough). * * @param d destination buffer * @param s string to be unescaped * @param n maximum number of characters to unescape * * @return Length of unescaped string * * @NEW_1_12_4. */size_t url_unescape_to(char *d, char const *s, size_t n){ size_t i = 0, j = 0; if (s == NULL) return 0; i = j = strncspn(s, n, "%"); if (d && d != s) memmove(d, s, i); for (; i < n;) { char c = s[i++]; if (c == '\0') break; if (c == '%' && i + 1 < n && IS_HEX(s[i]) && IS_HEX(s[i + 1])) {#define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0'))) c = (UNHEX(s[i]) << 4) | UNHEX(s[i + 1]);#undef UNHEX i += 2; } if (d) d[j] = c; j++; } return j;}/**Unescape url-escaped string. * * Unescape string @a s to the buffer @a d, including the terminating \\0 * character. All %-escaped triplets in @a s are unescaped, for instance, @c * "%40%25%23" is copied as @c "@%#". The destination array @a d must be * large enough to receive the escaped copy. * * @param d destination buffer * @param s string to be copied * * @return Pointer to the destination buffer. */char *url_unescape(char *d, char const *s){ size_t n = url_unescape_to(d, s, SIZE_MAX); if (d) d[n] = '\0'; return d;}/** Canonize a URL component */staticchar *url_canonize(char *d, char const *s, size_t n, unsigned syn33, char const allowed[]){ unsigned mask32 = 0xbe19003f, mask64 = 0x8000001e, mask96 = 0x8000001d; MASKS_WITH_ALLOWED(allowed, mask32, mask64, mask96); return url_canonize2(d, s, n, syn33, mask32, mask64, mask96);}#define SYN33(c) (1U << (c - 33))#define IS_SYN33(syn33, c) ((syn33 & (1U << (c - 33))) != 0)/** Canonize a URL component (with precomputed mask) */staticchar *url_canonize2(char *d, char const * const s, size_t n, unsigned syn33, unsigned m32, unsigned m64, unsigned m96){ size_t i = 0; if (d == s) for (;s[i] && i < n; d++, i++) if (s[i] == '%') break; for (;s[i] && i < n; d++, i++) { unsigned char c = s[i], h1, h2; if (c != '%') { if (!IS_SYN33(syn33, c) && IS_EXCLUDED(c, m32, m64, m96)) return NULL; *d = c; continue; } h1 = s[i + 1], h2 = s[i + 2]; if (!IS_HEX(h1) || !IS_HEX(h2)) { *d = '\0'; return NULL; } #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0'))) c = (UNHEX(h1) << 4) | UNHEX(h2); if (!IS_EXCLUDED(c, m32, m64, m96)) { /* Convert hex to normal character */ *d = c, i += 2; continue; } /* Convert hex to uppercase */ if (h1 >= 'a' /* && h1 <= 'f' */) h1 = h1 - 'a' + 'A'; if (h2 >= 'a' /* && h2 <= 'f' */) h2 = h2 - 'a' + 'A'; d[0] = '%', d[1] = h1, d[2] = h2; d +=2, i += 2;#undef UNHEX } *d = '\0'; return d;}/** Canonize a URL component (with precomputed mask). * * This version does not flag error if *s contains character that should * be escaped. */staticchar *url_canonize3(char *d, char const * const s, size_t n, unsigned m32, unsigned m64, unsigned m96){ size_t i = 0; if (d == s) for (;s[i] && i < n; d++, i++) if (s[i] == '%') break; for (;s[i] && i < n; d++, i++) { unsigned char c = s[i], h1, h2; if (c != '%') { *d = c; continue; } h1 = s[i + 1], h2 = s[i + 2]; if (!IS_HEX(h1) || !IS_HEX(h2)) { *d = '\0'; return NULL; } #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0'))) c = (UNHEX(h1) << 4) | UNHEX(h2); if (!IS_EXCLUDED(c, m32, m64, m96)) { *d = c, i += 2; continue; } /* Convert hex to uppercase */ if (h1 >= 'a' /* && h1 <= 'f' */) h1 = h1 - 'a' + 'A'; if (h2 >= 'a' /* && h2 <= 'f' */) h2 = h2 - 'a' + 'A'; d[0] = '%', d[1] = h1, d[2] = h2; d +=2, i += 2;#undef UNHEX } *d = '\0'; return d;}/** Get URL scheme. */char const* url_scheme(enum url_type_e url_type){ switch (url_type) { case url_any: return "*"; case url_sip: return "sip"; case url_sips: return "sips"; case url_tel: return "tel"; case url_fax: return "fax"; case url_modem: return "modem"; case url_http: return "http"; case url_https: return "https"; case url_ftp: return "ftp"; case url_file: return "file"; case url_rtsp: return "rtsp"; case url_rtspu: return "rtspu"; case url_mailto: return "mailto"; case url_im: return "im"; case url_pres: return "pres"; case url_cid: return "cid"; case url_msrp: return "msrp"; case url_msrps: return "msrps"; case url_wv: return "wv"; default: assert(url_type == url_unknown); return NULL; }}su_inlineint url_type_is_opaque(enum url_type_e url_type){ return url_type == url_invalid || url_type == url_tel || url_type == url_modem || url_type == url_fax || url_type == url_cid;}/** Init an url as given type */void url_init(url_t *url, enum url_type_e type){ memset(url, 0, sizeof(*url)); url->url_type = type; if (type > url_unknown) { char const *scheme = url_scheme(url->url_type); if (scheme) url->url_scheme = scheme; }}/** Get url type */su_inlineenum url_type_e url_get_type(char const *scheme, size_t len){#define test_scheme(s) \ if (len == strlen(#s) && !strncasecmp(scheme, #s, len)) return url_##s switch (scheme[0]) { case '*': if (strcmp(scheme, "*") == 0) return url_any; case 'c': case 'C': test_scheme(cid); break; case 'f': case 'F': test_scheme(ftp); test_scheme(file); test_scheme(fax); break; case 'h': case 'H': test_scheme(http); test_scheme(https); break; case 'i': case 'I': test_scheme(im); break; case 'm': case 'M': test_scheme(mailto); test_scheme(modem); test_scheme(msrp); test_scheme(msrps); break; case 'p': case 'P': test_scheme(pres); break; case 'r': case 'R': test_scheme(rtsp); test_scheme(rtspu); break; case 's': case 'S': test_scheme(sip); test_scheme(sips); break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -