123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573 |
- /*
- * This file is part of the Sofia-SIP package
- *
- * Copyright (C) 2005 Nokia Corporation.
- *
- * Contact: Pekka Pessi <pekka.pessi@nokia.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
- /**@CFILE http_parser.c
- *
- * HTTP parser.
- *
- * @author Pekka Pessi <Pekka.Pessi@nokia.com>
- *
- * @date Created: Thu Oct 5 14:01:24 2000 ppessi
- */
- #include "config.h"
- /* Avoid casting http_t to msg_pub_t and http_header_t to msg_header_t */
- #define MSG_PUB_T struct http_s
- #define MSG_HDR_T union http_header_u
- #include <sofia-sip/su_alloc.h>
- #include <sofia-sip/su_string.h>
- #include "sofia-sip/http_parser.h"
- #include <sofia-sip/msg_parser.h>
- #include <sofia-sip/http_header.h>
- #include <sofia-sip/http_status.h>
- #include <sofia-sip/msg_mclass.h>
- #include <sofia-sip/su_tagarg.h>
- #include <stddef.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include <assert.h>
- #include <limits.h>
- #include <stdarg.h>
- /** HTTP version 1.1. */
- char const http_version_1_1[] = "HTTP/1.1";
- /** HTTP version 1.0. */
- char const http_version_1_0[] = "HTTP/1.0";
- /** HTTP version 0.9 is an empty string. */
- char const http_version_0_9[] = "";
- msg_mclass_t const *http_default_mclass(void)
- {
- extern msg_mclass_t const http_mclass[];
- return http_mclass;
- }
- static
- issize_t http_extract_chunk(msg_t *, http_t *, char b[], isize_t bsiz, int eos);
- /** Calculate length of line ending (0, 1 or 2) */
- #define CRLF_TEST(s) \
- (((s)[0]) == '\r' ? (((s)[1]) == '\n') + 1 : ((s)[0])=='\n')
- /** Extract the HTTP message body, including separator line.
- *
- * @retval -1 error
- * @retval 0 cannot proceed
- * @retval other number of bytes extracted
- */
- issize_t http_extract_body(msg_t *msg, http_t *http, char b[], isize_t bsiz, int eos)
- {
- issize_t m = 0;
- size_t body_len;
- int flags = http->http_flags;
- if (eos && bsiz == 0) {
- msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
- return 0;
- }
- if (flags & MSG_FLG_TRAILERS) {
- /* The empty line after trailers */
- if (!eos && (bsiz == 0 || (bsiz == 1 && b[0] == '\r')))
- return 0;
- m = CRLF_TEST(b);
- assert(m > 0 || eos); /* We should be looking at an empty line */
- /* We have completed trailers */
- msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
- return m;
- }
- if (flags & MSG_FLG_CHUNKS)
- return http_extract_chunk(msg, http, b, bsiz, eos);
- if (!(flags & MSG_FLG_BODY)) {
- /* We are looking at a potential empty line */
- m = msg_extract_separator(msg, http, b, bsiz, eos);
- if (m == 0) /* Not yet */
- return 0;
- http->http_flags |= MSG_FLG_BODY;
- b += m, bsiz -= m;
- }
- /* body_len is determined by rules in RFC2616 sections 4.3 and 4.4 */
- /* 1XX, 204, 304 do not have message-body, ever */
- if (http->http_status) {
- int status = http->http_status->st_status;
- if (status < 200 || status == 204 || status == 304)
- flags |= HTTP_FLG_NO_BODY;
- }
- if (flags & HTTP_FLG_NO_BODY) {
- msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
- return m;
- }
- if (http->http_transfer_encoding) {
- if (/* NOTE - there is really no Transfer-Encoding: identity in RFC 2616
- * but it was used in drafts...
- */
- http->http_transfer_encoding->k_items &&
- http->http_transfer_encoding->k_items[0] &&
- !su_casematch(http->http_transfer_encoding->k_items[0], "identity")) {
- http->http_flags |= MSG_FLG_CHUNKS;
- if (http->http_flags & MSG_FLG_STREAMING)
- msg_set_streaming(msg, msg_start_streaming);
- if (m)
- return m;
- return http_extract_chunk(msg, http, b, bsiz, eos);
- }
- }
- if (http->http_content_length)
- body_len = http->http_content_length->l_length;
- /* We cannot parse multipart/byteranges ... */
- else if (http->http_content_type && http->http_content_type->c_type &&
- su_casematch(http->http_content_type->c_type, "multipart/byteranges"))
- return -1;
- else if (MSG_IS_MAILBOX(flags)) /* message fragments */
- body_len = 0;
- else if (http->http_request)
- body_len = 0;
- else if (eos)
- body_len = bsiz;
- else
- return 0; /* XXX */
- if (body_len == 0) {
- msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
- return m;
- }
- if (http->http_flags & MSG_FLG_STREAMING)
- msg_set_streaming(msg, msg_start_streaming);
- if (m)
- return m;
- m = msg_extract_payload(msg, http, NULL, body_len, b, bsiz, eos);
- if (m == -1)
- return -1;
- /* We have now all message fragments in place */
- http->http_flags |= MSG_FLG_FRAGS;
- if (bsiz >= body_len) {
- msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
- }
- return m;
- }
- /** Extract a chunk.
- *
- * @retval -1 error
- * @retval 0 cannot proceed
- * @retval other number of bytes extracted
- */
- issize_t http_extract_chunk(msg_t *msg, http_t *http, char b[], isize_t bsiz, int eos)
- {
- size_t n;
- unsigned crlf, chunk_len;
- char *b0 = b, *s;
- union {
- msg_header_t *header;
- msg_payload_t *chunk;
- } h = { NULL };
- size_t bsiz0 = bsiz;
- if (bsiz == 0)
- return 0;
- /* We should be looking at an empty line followed by the chunk header */
- while ((crlf = CRLF_TEST(b))) {
- if (bsiz == 1 && crlf == 1 && b[0] == '\r' && !eos)
- return 0;
- if (crlf == bsiz) {
- if (eos) {
- msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS);
- return (b - b0) + crlf;
- }
- else
- return 0;
- }
- assert(crlf < bsiz);
- /* Skip crlf */
- b += crlf; bsiz -= crlf;
- }
- /* Now, looking at the chunk header */
- n = strcspn(b, CRLF);
- if (!eos && n == bsiz)
- return 0;
- crlf = CRLF_TEST(b + n);
- if (n == 0) {
- if (crlf == bsiz && eos) {
- msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS);
- return crlf;
- }
- else
- return -1; /* XXX - should we be more liberal? */
- }
- if (!eos && n + crlf == bsiz && (crlf == 0 || (crlf == 1 && b[n] == '\r')))
- return 0;
- chunk_len = strtoul(b, &s, 16);
- if (s == b)
- return -1;
- skip_ws(&s);
- if (s != b + n && s[0] != ';') /* Extra stuff that is not parameter */
- return -1;
- if (chunk_len == 0) { /* We found last-chunk */
- b += n + crlf, bsiz -= n + crlf;
- crlf = bsiz > 0 ? CRLF_TEST(b) : 0;
- if ((eos && bsiz == 0) || crlf == 2 ||
- (crlf == 1 && (bsiz > 1 || b[0] == '\n'))) {
- /* Shortcut - We got empty trailers */
- b += crlf;
- msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS);
- } else {
- /* We have to parse trailers */
- http->http_flags |= MSG_FLG_TRAILERS;
- }
- return b - b0;
- }
- else {
- issize_t chunk;
- b += n + crlf;
- /* Extract chunk */
- chunk = msg_extract_payload(msg, http,
- &h.header, chunk_len + (b - b0),
- b0, bsiz0, eos);
- if (chunk != -1 && h.header) {
- assert(h.chunk->pl_data);
- h.chunk->pl_data += (b - b0);
- h.chunk->pl_len -= (b - b0);
- }
- return chunk;
- }
- }
- /** Parse HTTP version.
- *
- * The function http_version_d() parses a HTTP method.
- *
- * @retval 0 when successful,
- * @retval -1 upon an error.
- */
- int http_version_d(char **ss, char const **ver)
- {
- char *s = *ss;
- char const *result;
- int const version_size = sizeof(http_version_1_1) - 1;
- if (su_casenmatch(s, http_version_1_1, version_size) &&
- !IS_TOKEN(s[version_size])) {
- result = http_version_1_1;
- s += version_size;
- }
- else if (su_casenmatch(s, http_version_1_0, version_size) &&
- !IS_TOKEN(s[version_size])) {
- result = http_version_1_0;
- s += version_size;
- }
- else if (s[0] == '\0') {
- result = http_version_0_9;
- } else {
- /* Version consists of one or two tokens, separated by / */
- size_t l1 = 0, l2 = 0, n;
- result = s;
- l1 = span_token(s);
- for (n = l1; IS_LWS(s[n]); n++)
- s[n] = '\0';
- if (s[n] == '/') {
- for (n = n + 1; IS_LWS(s[n]); n++)
- {}
- l2 = span_token(s + n);
- n += l2;
- }
- if (l1 == 0)
- return -1;
- /* If there is extra ws between tokens, compact version */
- if (l2 > 0 && n > l1 + 1 + l2) {
- s[l1] = '/';
- memmove(s + l1 + 1, s + n - l2, l2);
- s[l1 + 1 + l2] = 0;
- /* Compare again with compacted version */
- if (su_casematch(s, http_version_1_1))
- result = http_version_1_1;
- else if (su_casematch(s, http_version_1_0))
- result = http_version_1_0;
- }
- s += n;
- }
- while (IS_LWS(*s)) *s++ = '\0';
- *ss = s;
- if (ver)
- *ver = result;
- return 0;
- }
- /** Calculate extra space required by version string */
- isize_t http_version_xtra(char const *version)
- {
- if (version == http_version_1_1)
- return 0;
- else if (version == http_version_1_0)
- return 0;
- else
- return MSG_STRING_SIZE(version);
- }
- /** Duplicate a transport string */
- void http_version_dup(char **pp, char const **dd, char const *s)
- {
- if (s == http_version_1_1)
- *dd = s;
- else if (s == http_version_1_0)
- *dd = s;
- else
- MSG_STRING_DUP(*pp, *dd, s);
- }
- /** Well-known HTTP method names. */
- static char const * const methods[] = {
- "<UNKNOWN>",
- http_method_name_get,
- http_method_name_post,
- http_method_name_head,
- http_method_name_options,
- http_method_name_put,
- http_method_name_delete,
- http_method_name_trace,
- http_method_name_connect,
- NULL,
- /* If you add something here, add also them to http_method_d! */
- };
- char const http_method_name_get[] = "GET";
- char const http_method_name_post[] = "POST";
- char const http_method_name_head[] = "HEAD";
- char const http_method_name_options[] = "OPTIONS";
- char const http_method_name_put[] = "PUT";
- char const http_method_name_delete[] = "DELETE";
- char const http_method_name_trace[] = "TRACE";
- char const http_method_name_connect[] = "CONNECT";
- char const *http_method_name(http_method_t method, char const *name)
- {
- if (method > 0 && (size_t)method < sizeof(methods)/sizeof(methods[0]))
- return methods[method];
- else if (method == 0)
- return name;
- else
- return NULL;
- }
- /**Parse a HTTP method name.
- *
- * The function @c http_method_d() parses a HTTP method, and returns a code
- * corresponding to the method. It stores the address of the first non-LWS
- * character after method name in @c *ss.
- *
- * @param ss pointer to pointer to string to be parsed
- * @param nname pointer to value-result parameter formethod name
- *
- * @note
- * If there is no whitespace after method name, the value in @a *nname
- * may not be NUL-terminated. The calling function @b must NUL terminate
- * the value by setting the @a **ss to NUL after first examining its value.
- *
- * @return The function @c http_method_d returns the method code if method
- * was identified, 0 (@c http_method_unknown) if method is not known, or @c -1
- * (@c http_method_invalid) if an error occurred.
- *
- * If the value-result argument @a nname is not @c NULL, http_method_d()
- * stores a pointer to the method name to it.
- */
- http_method_t http_method_d(char **ss, char const **nname)
- {
- char *s = *ss, c = *s;
- char const *name;
- int code = http_method_unknown;
- size_t n = 0;
- #define MATCH(s, m) (su_casenmatch(s, m, n = sizeof(m) - 1))
- if (c >= 'a' && c <= 'z')
- c += 'A' - 'a';
- switch (c) {
- case 'C': if (MATCH(s, "CONNECT")) code = http_method_connect; break;
- case 'D': if (MATCH(s, "DELETE")) code = http_method_delete; break;
- case 'G': if (MATCH(s, "GET")) code = http_method_get; break;
- case 'H': if (MATCH(s, "HEAD")) code = http_method_head; break;
- case 'O': if (MATCH(s, "OPTIONS")) code = http_method_options; break;
- case 'P': if (MATCH(s, "POST")) code = http_method_post;
- else
- if (MATCH(s, "PUT")) code = http_method_put;
- break;
- case 'T': if (MATCH(s, "TRACE")) code = http_method_trace; break;
- }
- #undef MATCH
- if (!code || IS_NON_WS(s[n])) {
- /* Unknown method */
- code = http_method_unknown;
- name = s;
- for (n = 0; IS_UNRESERVED(s[n]); n++)
- ;
- if (s[n]) {
- if (!IS_LWS(s[n]))
- return http_method_invalid;
- if (nname)
- s[n++] = '\0';
- }
- }
- else {
- name = methods[code];
- }
- while (IS_LWS(s[n]))
- n++;
- *ss = (s + n);
- if (nname) *nname = name;
- return (http_method_t)code;
- }
- /** Get method enum corresponding to method name */
- http_method_t http_method_code(char const *name)
- {
- /* Note that http_method_d() does not change string if nname is NULL */
- return http_method_d((char **)&name, NULL);
- }
- /**Parse HTTP query string.
- *
- * The function http_query_parse() searches for the given keys in HTTP @a
- * query. For each key, a query element (in the form name=value) is searched
- * from the query string. If a query element has a beginning matching with
- * the key, a copy of the rest of the element is returned in corresponding
- * return_value argument.
- *
- * @note The @a query string will be modified.
- *
- * @return
- * The function http_query_parse() returns number keys that matched within
- * the @a query string.
- */
- issize_t http_query_parse(char *query,
- /* char const *key, char **return_value, */
- ...)
- {
- va_list ap;
- char *q, *q_next;
- char *name, *value, **return_value;
- char const *key;
- size_t namelen, valuelen, keylen;
- isize_t N;
- int has_value;
- if (!query)
- return -1;
- for (q = query, N = 0; *q; q = q_next) {
- namelen = strcspn(q, "=&");
- valuelen = namelen + strcspn(q + namelen, "&");
- q_next = q + valuelen;
- if (*q_next)
- *q_next++ = '\0';
- value = q + namelen;
- has_value = (*value) != '\0'; /* is the part in form of name=value? */
- if (has_value)
- *value++ = '\0';
- name = url_unescape(q, q);
- if (has_value) {
- namelen = strlen(name);
- name[namelen] = '=';
- url_unescape(name + namelen + 1, value);
- }
- va_start(ap, query);
- while ((key = va_arg(ap, char const *))) {
- return_value = va_arg(ap, char **);
- keylen = strlen(key);
- if (strncmp(key, name, keylen) == 0) {
- *return_value = name + keylen;
- N++;
- }
- }
- va_end(ap);
- }
- return N;
- }
|