📄 bnf.c
字号:
/* * This file is part of the Sofia-SIP package * * Copyright (C) 2005,2006 Nokia Corporation. * * Contact: Pekka Pessi <pekka.pessi@nokia.com> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA * *//**@CFILE bnf.c * @brief Character syntax table for HTTP-like protocols. * * @author Pekka Pessi <Pekka.Pessi@nokia.com> * @author Kai Vehmanen <Kai.Vehmanen@nokia.com> * * @date Created: Thu Jun 8 19:28:55 2000 ppessi */#include "config.h"#include "sofia-sip/bnf.h"#include <stdio.h>#include <assert.h>#define ws bnf_ws#define crlf bnf_crlf#define alpha bnf_alpha#define digit bnf_mark|bnf_token0|bnf_safe#define sep bnf_separator#define msep bnf_mark|bnf_separator#define psep bnf_param0|bnf_separator#define tok bnf_token0#define mtok bnf_mark|bnf_token0#define smtok bnf_mark|bnf_token0|bnf_safe#define safe bnf_safe/** Table for determining class of a character */unsigned char const _bnf_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, ws, crlf, 0, 0, crlf, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ws, mtok, sep, 0, safe, mtok, 0, mtok, /* !"#$%&' */ msep, msep, mtok, tok, sep, smtok, smtok, psep, /* ()*+,-./ */ digit, digit, digit, digit, digit, digit, digit, digit, /* 01234567 */ digit, digit, psep, sep, sep, sep, sep, sep, /* 89:;<=>? */ sep, alpha, alpha, alpha, alpha, alpha, alpha, alpha, /* @ABCDEFG */ alpha, alpha, alpha, alpha, alpha, alpha, alpha, alpha, /* HIJKLMNO */ alpha, alpha, alpha, alpha, alpha, alpha, alpha, alpha, /* PQRSTUVW */ alpha, alpha, alpha, psep, sep, psep, 0, smtok, /* XYZ[\]^_ */ tok, alpha, alpha, alpha, alpha, alpha, alpha, alpha, /* `abcdefg */ alpha, alpha, alpha, alpha, alpha, alpha, alpha, alpha, /* hijklmno */ alpha, alpha, alpha, alpha, alpha, alpha, alpha, alpha, /* pqrstuvw */ alpha, alpha, alpha, sep, 0, sep, mtok, 0, /* xyz{|}~ */};#if 0 /* This escaped lab */#define BM(c, m00, m32, m64, m96) \ ((c < 64) \ ? ((c < 32) \ ? (m00 & (1 << (31 - c))) \ : (m32 & (1 << (63 - c)))) \ : ((c < 96) \ ? (m64 & (1 << (95 - c))) \ : (m96 & (1 << (127 - c)))))/** Span of a token */size_t bnf_span_token(char const *s){ char const *e = s; unsigned const m32 = 0x4536FFC0U, m64 = 0x7FFFFFE1U, m96 = 0xFFFFFFE2U; while (BM(*e, 0, m32, m64, m96)) e++; return e - s;}/** Span of a token */size_t bnf_span_token4(char const *s){ char const *e = s; while (_bnf_table[(unsigned char)(*e)] & bnf_token) e++; return e - s; }char * bnf_span_token_end(char const *s){ return (char *)s;}#endif/** Return length of decimal-octet */su_inline int span_ip4_octet(char const *host){ /* decimal-octet = DIGIT / DIGIT DIGIT / (("0"/"1") 2*(DIGIT)) / ("2" ("0"/"1"/"2"/"3"/"4") DIGIT) / ("2" "5" ("0"/"1"/"2"/"3"/"4"/"5")) */ if (!IS_DIGIT(host[0])) return 0; /* DIGIT */ if (!IS_DIGIT(host[1])) return 1; if (host[0] == '2') { /* ("2" "5" ("0"/"1"/"2"/"3"/"4"/"5")) */ if (host[1] == '5' && host[2] >= '0' && host[2] <= '5') return 3; /* ("2" ("0"/"1"/"2"/"3"/"4") DIGIT) */ if (host[1] >= '0' && host[1] <= '4' && host[2] >= '0' && host[2] <= '9') return 3; } else if (host[0] == '0' || host[0] == '1') { if (IS_DIGIT(host[2])) /* ("1" 2*(DIGIT)) ... or "0" 2*(DIGIT) */ return 3; } /* POS-DIGIT DIGIT */ return 2;}/** Return length of valid IP4 address */staticint span_canonic_ip4_address(char const *host, int *return_canonize){ int n, len, canonize = 0; if (host == NULL) return 0; /* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet */ len = span_ip4_octet(host); if (len == 0 || host[len] != '.') return 0; if (len > 1 && host[0] == '0') canonize = 1; n = len + 1; len = span_ip4_octet(host + n); if (len == 0 || host[n + len] != '.') return 0; if (len > 1 && host[n] == '0') canonize = 1; n += len + 1; len = span_ip4_octet(host + n); if (len == 0 || host[n + len] != '.') return 0; if (len > 1 && host[n] == '0') canonize = 1; n += len + 1; len = span_ip4_octet(host + n); if (len == 0 || IS_DIGIT(host[n + len]) || host[n + len] == '.') return 0; if (len > 1 && host[n] == '0') canonize = 1; n += len; if (canonize && return_canonize) *return_canonize = 1; return n;}/** Return length of valid IP4 address. * * Note that we accept here up to two leading zeroes * which makes "dotted decimal" notation ambiguous: * 127.000.000.001 is interpreted same as 127.0.0.1 * * Note that traditionally IP address octets starting * with zero have been interpreted as octal: * 172.055.055.001 has been same as 172.45.45.1 * * @b However, we interpret them as @b decimal, * 172.055.055.001 is same as 172.55.55.1. */int span_ip4_address(char const *host){ return span_canonic_ip4_address(host, NULL);}/** Scan and canonize a valid IP4 address. */int scan_ip4_address(char **inout_host){ char *src = *inout_host, *dst = src; issize_t n; int canonize = 0; if (src == NULL) return -1; n = span_canonic_ip4_address(src, &canonize); if (n == 0) return -1; *inout_host += n; if (!canonize) return n; for (;;) { char c = *dst++ = *src++; if (IS_DIGIT(*src)) { if (canonize && c == '0') dst--; else if (c == '.') canonize = 1; else canonize = 0; } else if (*src != '.') { break; } } *dst = '\0'; return n;}/** Return length of hex4 */su_inline int span_hex4(char const *host){ if (!IS_HEX(host[0])) return 0; if (!IS_HEX(host[1])) return 1; if (!IS_HEX(host[2])) return 2; if (!IS_HEX(host[3])) return 3; return 4;}/** Return length of valid IP6 address */su_inlineint span_canonic_ip6_address(char const *host, int *return_canonize, char *hexparts[9]){ int n = 0, len, hex4, doublecolon = 0, canonize = 0; /* IPv6address = hexpart [ ":" IPv4address ] hexpart = hexseq / hexseq "::" [ hexseq ] / "::" [ hexseq ] hexseq = hex4 *( ":" hex4) hex4 = 1*4HEXDIG There is at most 8 hex4, 6 hex4 if IPv4address is included. */ if (host == NULL) return 0; for (hex4 = 0; hex4 < 8; ) { len = span_hex4(host + n); if (return_canonize) { if ((len > 1 && host[n + 1] == '0') || host[n] == '0') canonize = 1; if (hexparts) hexparts[hex4 + doublecolon] = (char *)(host + n); } if (host[n + len] == ':') { if (len != 0) { hex4++; n += len + 1; if (!doublecolon && host[n] == ':') { if (return_canonize && hexparts) { hexparts[hex4] = (char *)(host + n - 1); } doublecolon++, n++; } } else if (n == 0 && host[1] == ':') { doublecolon++, n = 2; } else break; } else if (host[n + len] == '.') { len = span_canonic_ip4_address(host + n, return_canonize); if (len == 0 || hex4 > 6 || !(doublecolon || hex4 == 6)) return 0; if (canonize && return_canonize) *return_canonize = 1; return n + len; } else { if (len != 0) hex4++; n += len; break; } } if (hex4 != 8 && !doublecolon) return 0; if (IS_HEX(host[n]) || host[n] == ':') return 0; if (canonize && return_canonize) *return_canonize = canonize; return n;}/** Canonize scanned IP6 address. * * @retval Length of canonized IP6 address. */su_inlineint canonize_ip6_address(char *host, char *hexparts[9]){ char *dst, *hex, *ip4 = NULL; int i, doublecolon, j, maxparts, maxspan, span, len; char buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; /* Canonic representation has fewest chars - except for mapped/compatible IP4 addresses, like ::15.21.117.42 or ::ffff:15.21.117.42 which have non-canonic forms of ::f15:752a or ::ffff:f15:752a => we just canonize hexparts and ip4part separately and select optimal place for doublecolon (with expection of ::1 and ::, which are canonized) */ for (i = 0, doublecolon = -1; i < 9; i++) { hex = hexparts[i]; if (!hex) break; if (hex[0] == ':') doublecolon = i; while (hex[0] == '0' && IS_HEX(hex[1])) hex++; hexparts[i] = hex; } assert(i > 0); if (hexparts[i - 1][span_hex4(hexparts[i - 1])] == '.') ip4 = hexparts[--i]; maxparts = ip4 ? 6 : 8; if (doublecolon >= 0) { /* Order at most 8 (or 6) hexparts */ assert(i <= maxparts + 1); if (i == maxparts + 1) { /* There is an extra doublecolon */ for (j = doublecolon; j + 1 < i; j++) hexparts[j] = hexparts[j + 1]; i--; } else { for (j = maxparts; i > doublecolon + 1; ) hexparts[--j] = hexparts[--i]; for (;j > doublecolon;) hexparts[--j] = "0:"; i = maxparts; } } assert(i == maxparts); /* Scan for optimal place for "::" */ for (i = 0, maxspan = 0, span = 0, doublecolon = 0; i < maxparts; i++) { if (hexparts[i][0] == '0') span++; else if (span > maxspan) doublecolon = i - span, maxspan = span, span = 0; else span = 0; } if (span > maxspan) doublecolon = i - span, maxspan = span; dst = buf; for (i = 0; i < maxparts; i++) { if (i == doublecolon) hex = i == 0 ? "::" : ":", len = 1; else if (i > doublecolon && i < doublecolon + maxspan) continue; else hex = hexparts[i], len = span_hex4(hex); if (hex[len] == ':') len++; memcpy(dst, hex, len); dst += len; } if (ip4) { hex = ip4; len = scan_ip4_address(&hex); assert(len > 0); /* Canonize :: and ::1 */ if (doublecolon == 0 && maxspan == 6) { if (len == 7 && strncmp(ip4, "0.0.0.0", len) == 0) ip4 = "", len = 0; else if (len == 7 && strncmp(ip4, "0.0.0.1", len) == 0) ip4 = "1", len = 1; } memcpy(dst, ip4, len); dst += len; } len = dst - buf; memcpy(host, buf, len); return len;}/** Return length of valid IP6 address */int span_ip6_address(char const *host){ return span_canonic_ip6_address(host, NULL, NULL);}/** Scan and canonize valid IP6 address. * * @param inout_host input pointer to string to scan * output pointer to first character after valid IP6 address *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -