📄 http_parser.c
字号:
/* * This file is part of the Sofia-SIP package * * Copyright (C) 2005 Nokia Corporation. * * Contact: Pekka Pessi <pekka.pessi@nokia.com> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA * *//**@CFILE http_parser.c * * HTTP parser. * * @author Pekka Pessi <Pekka.Pessi@nokia.com> * * @date Created: Thu Oct 5 14:01:24 2000 ppessi */#include "config.h"#include <stddef.h>#include <stdlib.h>#include <string.h>#include <stdio.h>#include <assert.h>#include <limits.h>#include <stdarg.h>#include <sofia-sip/su_tagarg.h>#include <sofia-sip/su_alloc.h>#include "sofia-sip/http_parser.h"#include <sofia-sip/msg_parser.h>#include <sofia-sip/http_header.h>#include <sofia-sip/http_status.h>#include <sofia-sip/msg_mclass.h>/** HTTP version 1.1. */char const http_version_1_1[] = "HTTP/1.1";/** HTTP version 1.0. */char const http_version_1_0[] = "HTTP/1.0";/** HTTP version 0.9 is an empty string. */char const http_version_0_9[] = "";msg_mclass_t *http_default_mclass(void){ extern msg_mclass_t http_mclass[]; return http_mclass;}static int http_extract_chunk(msg_t *msg, http_t *http, char b[], int bsiz, int eos);/** Calculate length of line ending (0, 1 or 2) */#define CRLF_TEST(s) \ (((s)[0]) == '\r' ? (((s)[1]) == '\n') + 1 : ((s)[0])=='\n')/** Extract the HTTP message body, including separator line. * * @retval -1 error * @retval 0 cannot proceed * @retval other number of bytes extracted */int http_extract_body(msg_t *msg, http_t *http, char b[], int bsiz, int eos){ int m = 0; unsigned body_len; int flags = http->http_flags; if (eos && bsiz == 0) { msg_mark_as_complete(msg, MSG_FLG_COMPLETE); return 0; } if (flags & MSG_FLG_TRAILERS) { /* The empty line after trailers */ if (!eos && (bsiz == 0 || (bsiz == 1 && b[0] == '\r'))) return 0; m = CRLF_TEST(b); assert(m > 0 || eos); /* We should be looking at an empty line */ /* We have completed trailers */ msg_mark_as_complete(msg, MSG_FLG_COMPLETE); return m; } if (flags & MSG_FLG_CHUNKS) return http_extract_chunk(msg, http, b, bsiz, eos); if (!(flags & MSG_FLG_BODY)) { /* We are looking at a potential empty line */ m = msg_extract_separator(msg, http, b, bsiz, eos); if (m == 0) /* Not yet */ return 0; http->http_flags |= MSG_FLG_BODY; b += m, bsiz -= m; } /* body_len is determined by rules in RFC2616 sections 4.3 and 4.4 */ /* 1XX, 204, 304 do not have message-body, ever */ if (http->http_status) { int status = http->http_status->st_status; if (status < 200 || status == 204 || status == 304) flags |= HTTP_FLG_NO_BODY; } if (flags & HTTP_FLG_NO_BODY) { msg_mark_as_complete(msg, MSG_FLG_COMPLETE); return m; } if (http->http_transfer_encoding) { if (/* NOTE - there is really no Transfer-Encoding: identity in RFC 2616 * but it was used in drafts... */ http->http_transfer_encoding->k_items && http->http_transfer_encoding->k_items[0] && strcasecmp(http->http_transfer_encoding->k_items[0], "identity") != 0) { http->http_flags |= MSG_FLG_CHUNKS; if (http->http_flags & MSG_FLG_STREAMING) msg_set_streaming(msg, msg_start_streaming); if (m) return m; return http_extract_chunk(msg, http, b, bsiz, eos); } } if (http->http_content_length) body_len = http->http_content_length->l_length; /* We cannot parse multipart/byteranges ... */ else if (http->http_content_type && http->http_content_type->c_type && strcasecmp(http->http_content_type->c_type, "multipart/byteranges") == 0) return -1; else if (MSG_IS_MAILBOX(flags)) /* message fragments */ body_len = 0; else if (http->http_request) body_len = 0; else if (eos) body_len = bsiz; else return 0; /* XXX */ if (body_len == 0) { msg_mark_as_complete(msg, MSG_FLG_COMPLETE); return m; } if (http->http_flags & MSG_FLG_STREAMING) msg_set_streaming(msg, msg_start_streaming); if (m) return m; m = msg_extract_payload(msg, http, NULL, body_len, b, bsiz, eos); if (m == -1) return -1; /* We have now all message fragments in place */ http->http_flags |= MSG_FLG_FRAGS; if (bsiz >= body_len) { msg_mark_as_complete(msg, MSG_FLG_COMPLETE); } return m;}/** Extract a chunk. * * @retval -1 error * @retval 0 cannot proceed * @retval other number of bytes extracted */int http_extract_chunk(msg_t *msg, http_t *http, char b[], int bsiz, int eos){ int crlf, n; unsigned chunk_len; char *b0 = b, *s; int bsiz0 = bsiz; union { msg_header_t *header; msg_payload_t *chunk; } h = { NULL }; if (bsiz == 0) return 0; /* We should be looking at an empty line followed by the chunk header */ while ((crlf = CRLF_TEST(b))) { if (bsiz == 1 && crlf == 1 && b[0] == '\r' && !eos) return 0; if (crlf == bsiz) { if (eos) { msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS); return (b - b0) + crlf; } else return 0; } assert(crlf < bsiz); /* Skip crlf */ b += crlf; bsiz -= crlf; } /* Now, looking at the chunk header */ n = strcspn(b, CRLF); if (!eos && n == bsiz) return 0; crlf = CRLF_TEST(b + n); if (n == 0) { if (crlf == bsiz && eos) { msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS); return crlf; } else return -1; /* XXX - should we be more liberal? */ } if (!eos && n + crlf == bsiz && (crlf == 0 || (crlf == 1 && b[n] == '\r'))) return 0; chunk_len = strtoul(b, &s, 16); if (s == b) return -1; skip_ws(&s); if (s != b + n && s[0] != ';') /* Extra stuff that is not parameter */ return -1; if (chunk_len == 0) { /* We found last-chunk */ b += n + crlf, bsiz -= n + crlf; crlf = bsiz > 0 ? CRLF_TEST(b) : 0; if ((eos && bsiz == 0) || crlf == 2 || (crlf == 1 && (bsiz > 1 || b[0] == '\n'))) { /* Shortcut - We got empty trailers */ b += crlf; msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS); } else { /* We have to parse trailers */ http->http_flags |= MSG_FLG_TRAILERS; } return b - b0; } // if (bsiz == n + crlf) // return 0; b += n + crlf, bsiz -= n + crlf; /* Extract chunk */ bsiz = msg_extract_payload(msg, http, &h.header, chunk_len + (b - b0), b0, bsiz0, eos); if (bsiz != -1 && h.header) { assert(h.chunk->pl_data); h.chunk->pl_data += (b - b0); h.chunk->pl_len -= (b - b0); } return bsiz;}/** Parse HTTP version. * * The function http_version_d() parses a HTTP method. * * @retval 0 when successful, * @retval -1 upon an error. */int http_version_d(char **ss, char const **ver){ char *s = *ss; char const *result; int const version_size = sizeof(http_version_1_1) - 1; if (strncasecmp(s, http_version_1_1, version_size) == 0 && !IS_TOKEN(s[version_size])) { result = http_version_1_1; s += version_size; } else if (strncasecmp(s, http_version_1_0, version_size) == 0 && !IS_TOKEN(s[version_size])) { result = http_version_1_0; s += version_size; } else if (s[0] == '\0') { result = http_version_0_9; } else { /* Version consists of one or two tokens, separated by / */ int l1 = 0, l2 = 0, n; result = s; l1 = span_token(s); for (n = l1; IS_LWS(s[n]); n++) s[n] = '\0'; if (s[n] == '/') { for (n = n + 1; IS_LWS(s[n]); n++); l2 = span_token(s + n); n += l2; } if (l1 == 0) return -1; /* If there is extra ws between tokens, compact version */ if (l2 > 0 && n > l1 + 1 + l2) { s[l1] = '/'; memmove(s + l1 + 1, s + n - l2, l2); s[l1 + 1 + l2] = 0; /* Compare again with compacted version */ if (strcasecmp(s, http_version_1_1) == 0) result = http_version_1_1; else if (strcasecmp(s, http_version_1_0) == 0) result = http_version_1_0; } s += n; } while (IS_LWS(*s)) *s++ = '\0'; *ss = s; if (ver) *ver = result; return 0;}/** Calculate extra space required by version string */int http_version_xtra(char const *version){ if (version == http_version_1_1) return 0; else if (version == http_version_1_0) return 0; else return MSG_STRING_SIZE(version);}/** Duplicate a transport string */void http_version_dup(char **pp, char const **dd, char const *s){ if (s == http_version_1_1) *dd = s; else if (s == http_version_1_0) *dd = s; else MSG_STRING_DUP(*pp, *dd, s);}/** Well-known HTTP method names. */static char const * const methods[] = { "<UNKNOWN>", http_method_name_get, http_method_name_post, http_method_name_head, http_method_name_options, http_method_name_put, http_method_name_delete, http_method_name_trace, http_method_name_connect, NULL, /* If you add something here, add also them to http_method_d! */};char const http_method_name_get[] = "GET";char const http_method_name_post[] = "POST";char const http_method_name_head[] = "HEAD";char const http_method_name_options[] = "OPTIONS";char const http_method_name_put[] = "PUT";char const http_method_name_delete[] = "DELETE";char const http_method_name_trace[] = "TRACE";char const http_method_name_connect[] = "CONNECT";char const *http_method_name(http_method_t method, char const *name){ if (method > 0 && method < sizeof(methods)/sizeof(methods[0])) return methods[method]; else if (method == 0) return name; else return NULL;}/**Parse a HTTP method name. * * The function @c http_method_d() parses a HTTP method, and returns a code * corresponding to the method. It stores the address of the first non-LWS * character after method name in @c *ss. * * @param ss pointer to pointer to string to be parsed * @param nname pointer to value-result parameter formethod name * * @note * If there is no whitespace after method name, the value in @a *nname * may not be NUL-terminated. The calling function @b must NUL terminate * the value by setting the @a **ss to NUL after first examining its value. * * @return The function @c http_method_d returns the method code if method * was identified, 0 (@c http_method_unknown) if method is not known, or @c -1 * (@c http_method_invalid) if an error occurred. * * If the value-result argument @a nname is not @c NULL, http_method_d() * stores a pointer to the method name to it. */http_method_t http_method_d(char **ss, char const **nname){ char *s = *ss, c = *s; char const *name; int code = http_method_unknown; int n = 0;#define MATCH(s, m) (strncasecmp(s, m, n = sizeof(m) - 1) == 0) if (c >= 'a' && c <= 'z') c += 'A' - 'a'; switch (c) { case 'C': if (MATCH(s, "CONNECT")) code = http_method_connect; break; case 'D': if (MATCH(s, "DELETE")) code = http_method_delete; break; case 'G': if (MATCH(s, "GET")) code = http_method_get; break; case 'H': if (MATCH(s, "HEAD")) code = http_method_head; break; case 'O': if (MATCH(s, "OPTIONS")) code = http_method_options; break; case 'P': if (MATCH(s, "POST")) code = http_method_post; else if (MATCH(s, "PUT")) code = http_method_put; break; case 'T': if (MATCH(s, "TRACE")) code = http_method_trace; break; }#undef MATCH if (!code || IS_NON_WS(s[n])) { /* Unknown method */ code = http_method_unknown; name = s; for (n = 0; IS_UNRESERVED(s[n]); n++) ; if (s[n]) { if (!IS_LWS(s[n])) return http_method_invalid; if (nname) s[n++] = '\0'; } } else { name = methods[code]; } while (IS_LWS(s[n])) n++; *ss = (s + n); if (nname) *nname = name; return code;}/** Get method enum corresponding to method name */http_method_t http_method_code(char const *name){ /* Note that http_method_d() does not change string if nname is NULL */ return http_method_d((char **)&name, NULL);}/**Parse HTTP query string. * * The function http_query_parse() searches for the given keys in HTTP @a * query. For each key, a query element (in the form name=value) is searched * from the query string. If a query element has a beginning matching with * the key, a copy of the rest of the element is returned in corresponding * return_value argument. * * @note The @a query string will be modified. * * @return * The function http_query_parse() returns number keys that matched within * the @a query string. */int http_query_parse(char *query, /* char const *key, char **return_value, */ ...){ va_list ap; char *q, *q_next; char *name, *value, **return_value; char const *key; size_t namelen, valuelen, keylen, N; int has_value; if (!query) return -1; for (q = query, N = 0; *q; q = q_next) { namelen = strcspn(q, "=&"); valuelen = namelen + strcspn(q + namelen, "&"); q_next = q + valuelen; if (*q_next) *q_next++ = '\0'; value = q + namelen; has_value = (*value) != '\0'; /* is the part in form of name=value? */ if (has_value) *value++ = '\0'; name = url_unescape(q, q); if (has_value) { namelen = strlen(name); name[namelen] = '='; url_unescape(name + namelen + 1, value); } va_start(ap, query); while ((key = va_arg(ap, char const *))) { return_value = va_arg(ap, char **); keylen = strlen(key); if (strncmp(key, name, keylen) == 0) { *return_value = name + keylen; N++; } } va_end(ap); } return N;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -