📄 url.c

📁 Internet Phone, Chat, Conferencing
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * This file is part of the Sofia-SIP package * * Copyright (C) 2005 Nokia Corporation. * * Contact: Pekka Pessi <pekka.pessi@nokia.com> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA * *//**@CFILE url.c * * Implementation of basic URL parsing and handling. * * @author Pekka Pessi <Pekka.Pessi@nokia.com> * * @date Created: Thu Jun 29 22:44:37 2000 ppessi */#include "config.h"#include <string.h>#include <stdlib.h>#include <assert.h>#include <sofia-sip/su_alloc.h>#include <sofia-sip/bnf.h>#include <sofia-sip/hostdomain.h>#include <sofia-sip/url.h>/**@def URL_PRINT_FORMAT * Format string used when printing url with printf(). * * The macro URL_PRINT_FORMAT is used in format string of printf() or * similar printing functions.  A URL can be printed like this: * @code *   printf("%s received URL " URL_PRINT_FORMAT "\n",  *          my_name, URL_PRINT_ARGS(url)); * @endcode *//** @def URL_PRINT_ARGS(u) * Argument list used when printing url with printf(). * * The macro URL_PRINT_ARGS() is used to create a stdarg list for printf() * or similar printing functions.  Using it, a URL can be printed like this: * * @code *   printf("%s received URL " URL_PRINT_FORMAT "\n",  *          my_name, URL_PRINT_ARGS(url)); * @endcode */#define RESERVED        ";/?:@&=+$,"#define DELIMS          "<>#%\""#define UNWISE		"{}|\\^[]`"#define EXCLUDED	RESERVED DELIMS UNWISE#define UNRESERVED    	"ABCDEFGHIJKLMNOPQRSTUVWXYZ" \                      	"abcdefghijklmnopqrstuvwxyz" \                      	"0123456789" \                      	"-_.!~*'()"#define IS_EXCLUDED(u, m32, m64, m96)			\  (u <= ' '						\   || u >= '\177'					\   || (u < 64 ? (m32 & (1 << (63 - u)))			\       : (u < 96 ? (m64 & (1 << (95 - u)))		\	  : /*u < 128*/ (m96 & (1 << (127 - u))))))#define MASKS_WITH_RESERVED(reserved, m32, m64, m96)		\  if (reserved == NULL) {					\    m32 = 0xbe19003f, m64 = 0x8000001e, m96 = 0x8000001d;	\  } else do {							\    m32 = 0xb400000a, m64 = 0x0000001e, m96 = 0x8000001d;	\    								\    for (;reserved[0]; reserved++) {				\      unsigned r = reserved[0];					\      RESERVE(r, m32, m64, m96);				\    }								\  } while (0)#define RESERVE(reserved, m32, m64, m96)				\  if (r < 32)								\    ;									\  else if (r < 64)							\    m32 |= 1U << (63 - r);						\  else if (r < 96)							\    m64 |= 1U << (95 - r);						\  else if (r < 128)							\    m96 |= 1U << (127 - r)#define MASKS_WITH_ALLOWED(allowed, mask32, mask64, mask96)	\  do {								\    if (allowed) {						\      for (;allowed[0]; allowed++) {				\	unsigned a = allowed[0];				\	ALLOW(a, mask32, mask64, mask96);			\      }								\    }								\  } while (0)#define ALLOW(a, mask32, mask64, mask96)	\  if (a < 32)					\    ;						\  else if (a < 64)				\    mask32 &= ~(1U << (63 - a));		\  else if (a < 96)				\    mask64 &= ~(1U << (95 - a));		\  else if (a < 128)				\    mask96 &= ~(1U << (127 - a))#define RMASK1 0xbe19003f#define RMASK2 0x8000001e#define RMASK3 0x8000001d#define RESERVED_MASK 0xbe19003f, 0x8000001e, 0x8000001d#define URIC_MASK     0xb400000a, 0x0000001e, 0x8000001d#define IS_EXCLUDED_MASK(u, m) IS_EXCLUDED(u, m)/**Test if string contains excluded or url-reserved characters.  * * @param s  string to be searched * * @retval 0 if no reserved characters were found. * @retval 1 if a reserved character was found. */int url_reserved_p(char const *s){  if (s)     while (*s) {      unsigned char u = *s++;      if (IS_EXCLUDED(u, RMASK1, RMASK2, RMASK3))	return 1;    }  return 0;}/** Calculate length of string escaped. * * The function url_esclen() calculates the length of string @a s when * the excluded or reserved characters in it have been escaped. *  * @param s         String with reserved URL characters. [IN * @param reserved  Optional array of reserved characters [IN] * * @return  * The function url_esclen() returns the number of characters in * corresponding but escaped string. * * You can handle a part of URL with reserved characters like this: *燖code * if (url_reserved_p(s))  { *   n = malloc(url_esclen(s, NULL) + 1); *   if (n) url_escape(n, s); * } else { *   n = malloc(strlen(s) + 1); *   if (n) strcpy(n, s); * } * @endcode *  */int url_esclen(char const *s, char const reserved[]){  int n;  unsigned mask32, mask64, mask96;  MASKS_WITH_RESERVED(reserved, mask32, mask64, mask96);  for (n = 0; s && *s; n++) {    unsigned char u = *s++;    if (IS_EXCLUDED(u, mask32, mask64, mask96))      n += 2;  }  return n;}/** Escape a string. * * The function url_escape() copies the string pointed by @a s to the array * pointed by @a d, @b excluding the terminating \\0 character.  All reserved * characters in @a s are copied in hexadecimal format, for instance, @c * "$%#" is copied as @c "%24%25%23".  The destination array @a d must be * large enough to receive the escaped copy. * * @param d         Destination buffer [OUT] * @param s         String to be copied [IN] * @param reserved  Array of reserved characters [IN] * * @return Pointer to the destination array. */char *url_escape(char *d, char const *s, char const reserved[]){  char *retval = d;  unsigned mask32, mask64, mask96;   MASKS_WITH_RESERVED(reserved, mask32, mask64, mask96);  while (s && *s) {    unsigned char u = *s++;    if (IS_EXCLUDED(u, mask32, mask64, mask96)) {#     define URL_HEXIFY(u) ((u) + '0' + ((u) >= 10 ? 'A' - '0' - 10 : 0))      *d++ = '%';      *d++ = URL_HEXIFY(u >> 4);      *d++ = URL_HEXIFY(u & 15);#     undef URL_HEXIFY    }    else {      *d++ = u;    }  }  *d = '\0';    return retval;}/**Unescape an string. * * The function url_unescape() copies the string pointed by @a s to the * array pointed by @a d, including the terminating \\0 character. All * %-escaped triplets in @a s are unescaped, for instance, @c "%40%25%23" is * copied as @c "@%#". The destination array @a d must be large enough to * receive the escaped copy. * * @param d  destination buffer * @param s  string to be copied * * @return Pointer to the destination array. */char *url_unescape(char *d, char const *s){  char *r = d;  if (s) {    if (d == s)      s = d = strchr(d, '%');        if (d) {      for (;IS_NON_LWS(*s); d++, s++) {	if (*s == '%' && IS_HEX(s[1]) && IS_HEX(s[2])) {#define   UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))	  *d = (UNHEX(s[1]) << 4) | UNHEX(s[2]);#undef    UNHEX	  s += 2;	}	else {	  *d = *s;	}      }    }  }  if (d) *d = '\0';  return r;}staticchar *url_canonize2(char *d, char const *s, int n, 		    unsigned m32, unsigned m64, unsigned m96);/** Canonize a URL component */staticchar *url_canonize(char *d, char const *s, int n, char const allowed[]){  unsigned mask32 = 0xbe19003f, mask64 = 0x8000001e, mask96 = 0x8000001d;  MASKS_WITH_ALLOWED(allowed, mask32, mask64, mask96);  return url_canonize2(d, s, n, mask32, mask64, mask96);}/** Canonize a URL component (with precomputed mask) */staticchar *url_canonize2(char *d, char const *s, int n, 		    unsigned m32, unsigned m64, unsigned m96){  char const *s0 = s;  if (d == s)    for (;*s && s - s0 < (unsigned)n; d++, s++)       if (*s == '%')	break;  for (;*s && s - s0 < (unsigned)n; d++, s++) {    unsigned char c = *s, h1, h2;    if (c != '%') {      if (IS_EXCLUDED(c, m32, m64, m96))	return NULL;      *d = c;      continue;    }    h1 = s[1], h2 = s[2];        if (!IS_HEX(h1) || !IS_HEX(h2)) {      *d = '\0';      return NULL;    }    #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))    c = (UNHEX(h1) << 4) | UNHEX(h2);    if (!IS_EXCLUDED(c, m32, m64, m96)) {      *d = c, s += 2;      continue;    }    /* Convert hex to uppercase */    if (h1 >= 'a' /* && h1 <= 'f' */)      h1 = h1 - 'a' + 'A';    if (h2 >= 'a' /* && h2 <= 'f' */)      h2 = h2 - 'a' + 'A';    d[0] = '%', d[1] = h1, d[2] = h2;    d +=2, s += 2;#undef    UNHEX  }    *d = '\0';  return d;}/** Canonize a URL component (with precomputed mask) */staticchar *url_canonize3(char *d, char const *s, int n, 		    unsigned m32, unsigned m64, unsigned m96){  char const *s0 = s;  if (d == s)    for (;*s && s - s0 < (unsigned)n; d++, s++)       if (*s == '%')	break;  for (;*s && s - s0 < (unsigned)n; d++, s++) {    unsigned char c = *s, h1, h2;    if (c != '%') {      *d = c;      continue;    }    h1 = s[1], h2 = s[2];        if (!IS_HEX(h1) || !IS_HEX(h2)) {      *d = '\0';      return NULL;    }    #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))    c = (UNHEX(h1) << 4) | UNHEX(h2);    if (!IS_EXCLUDED(c, m32, m64, m96)) {      *d = c, s += 2;      continue;    }    /* Convert hex to uppercase */    if (h1 >= 'a' /* && h1 <= 'f' */)      h1 = h1 - 'a' + 'A';    if (h2 >= 'a' /* && h2 <= 'f' */)      h2 = h2 - 'a' + 'A';    d[0] = '%', d[1] = h1, d[2] = h2;    d +=2, s += 2;#undef    UNHEX  }    *d = '\0';  return d;}/** Get URL scheme. */char const* url_scheme(enum url_type_e url_type){  switch (url_type) {  case url_any:    return "*";  case url_sip:    return "sip";  case url_sips:   return "sips";  case url_tel:    return "tel";  case url_fax:    return "fax";  case url_modem:  return "modem";  case url_http:   return "http";  case url_https:  return "https";  case url_ftp:    return "ftp";  case url_file:   return "file";  case url_rtsp:   return "rtsp";  case url_rtspu:  return "rtspu";  case url_mailto: return "mailto";  case url_im:     return "im";  case url_pres:   return "pres";  case url_cid:    return "cid";  case url_msrp:   return "msrp";  case url_wv:     return "wv";  default:           assert(url_type == url_unknown);    return NULL;  }}static inlineint url_type_is_opaque(enum url_type_e url_type){  return     url_type == url_invalid ||    url_type == url_tel ||     url_type == url_modem ||     url_type == url_fax ||    url_type == url_cid;}/** Init an url as given type */void url_init(url_t *url, enum url_type_e type){  memset(url, 0, sizeof(*url));  url->url_type = type;  if (type > url_unknown) {    char const *scheme = url_scheme(url->url_type);    if (scheme)      url->url_scheme = scheme;  }}/** Get url type */static inlineenum url_type_e url_get_type(char const *scheme, int len){#define test_scheme(s) \   if (len == strlen(#s) && !strncasecmp(scheme, #s, len)) return url_##s    switch (scheme[0]) {  case '*': if (strcmp(scheme, "*") == 0) return url_any;  case 'c': case 'C':     test_scheme(cid); break;  case 'f': case 'F':     test_scheme(ftp); test_scheme(file); test_scheme(fax); break;  case 'h': case 'H':     test_scheme(http); test_scheme(https); break;  case 'i': case 'I':     test_scheme(im); break;  case 'm': case 'M':     test_scheme(mailto); test_scheme(modem);     test_scheme(msrp); break;  case 'p': case 'P':     test_scheme(pres); break;  case 'r': case 'R':     test_scheme(rtsp); test_scheme(rtspu); break;  case 's': case 'S':     test_scheme(sip); test_scheme(sips); break;  case 't': case 'T':     test_scheme(tel); break;  case 'w': case 'W':     test_scheme(wv); break;  default: break;  }#undef test_scheme  if (len != span_unreserved(scheme))    return url_invalid;  else    return url_unknown;}/** * Decode a URL. * * This function decodes a (SIP) URL string to a url_t structure. * * @param url structure to store the parsing result * @param s   NUL-terminated string to be parsed * * @note The parsed string @a s will be modified when parsing it. * * @retval 0 if successful,  * @retval -1 otherwise. */staticint _url_d(url_t *url, char *s){  int n;  char *s0, rest_c, *host;  int net_path = 1;  memset(url, 0, sizeof(*url));    if (strcmp(s, "*") == 0) {    url->url_type = url_any;    url->url_scheme = "*";    return 0;  }  s0 = s;  n = strcspn(s, ":/?#");  if (n && s[n] == ':') {    char *scheme;    url->url_scheme = scheme = s; s[n] = '\0'; s = s + n + 1;    if (!(scheme = url_canonize(scheme, scheme, -1, "+")))      return -1;    n = scheme - url->url_scheme;    url->url_type = url_get_type(url->url_scheme, n);    net_path = !url_type_is_opaque(url->url_type);  }  else {    url->url_type = url_unknown;  }  host = s;  if (url->url_type == url_sip || url->url_type == url_sips) {    /* SIP URL may have /; in user part */    n = strcspn(s, "@#");	/* Opaque part */    if (s[n] != '@')      n = 0;    n += strcspn(s + n, "/;?#");  }  else if (url->url_type == url_wv) {    /* WV URL may have / in user part */    n = strcspn(s, "@#?;");    if (s[n] == '@')      n += strcspn(s + n, ";?#");  }  else if (url->url_type == url_invalid) {    n = strcspn(s, "#");  } else if (net_path && host[0] == '/') {    url->url_root = host[0];	/* Absolute path */    if (host[1] == '/') {	/* We have host-part */      host += 2; s += 2;    }    else       host = NULL;    n = strcspn(s, "/;?#");	/* Find path, query and/or fragment */  } else {    n = strcspn(s, "/;?#");	/* Find params, query and/or fragment */  }  rest_c = s[n]; s[n] = 0; s = rest_c ? s + n + 1 : NULL;  if (host) {    char *atsign, *port;
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -