http_parser.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. /*
  2. * This file is part of the Sofia-SIP package
  3. *
  4. * Copyright (C) 2005 Nokia Corporation.
  5. *
  6. * Contact: Pekka Pessi <pekka.pessi@nokia.com>
  7. *
  8. * This library is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public License
  10. * as published by the Free Software Foundation; either version 2.1 of
  11. * the License, or (at your option) any later version.
  12. *
  13. * This library is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with this library; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
  21. * 02110-1301 USA
  22. *
  23. */
  24. /**@CFILE http_parser.c
  25. *
  26. * HTTP parser.
  27. *
  28. * @author Pekka Pessi <Pekka.Pessi@nokia.com>
  29. *
  30. * @date Created: Thu Oct 5 14:01:24 2000 ppessi
  31. */
  32. #include "config.h"
  33. /* Avoid casting http_t to msg_pub_t and http_header_t to msg_header_t */
  34. #define MSG_PUB_T struct http_s
  35. #define MSG_HDR_T union http_header_u
  36. #include <sofia-sip/su_alloc.h>
  37. #include <sofia-sip/su_string.h>
  38. #include "sofia-sip/http_parser.h"
  39. #include <sofia-sip/msg_parser.h>
  40. #include <sofia-sip/http_header.h>
  41. #include <sofia-sip/http_status.h>
  42. #include <sofia-sip/msg_mclass.h>
  43. #include <sofia-sip/su_tagarg.h>
  44. #include <stddef.h>
  45. #include <stdlib.h>
  46. #include <stdio.h>
  47. #include <assert.h>
  48. #include <limits.h>
  49. #include <stdarg.h>
  50. /** HTTP version 1.1. */
  51. char const http_version_1_1[] = "HTTP/1.1";
  52. /** HTTP version 1.0. */
  53. char const http_version_1_0[] = "HTTP/1.0";
  54. /** HTTP version 0.9 is an empty string. */
  55. char const http_version_0_9[] = "";
  56. msg_mclass_t const *http_default_mclass(void)
  57. {
  58. extern msg_mclass_t const http_mclass[];
  59. return http_mclass;
  60. }
  61. static
  62. issize_t http_extract_chunk(msg_t *, http_t *, char b[], isize_t bsiz, int eos);
  63. /** Calculate length of line ending (0, 1 or 2) */
  64. #define CRLF_TEST(s) \
  65. (((s)[0]) == '\r' ? (((s)[1]) == '\n') + 1 : ((s)[0])=='\n')
  66. /** Extract the HTTP message body, including separator line.
  67. *
  68. * @retval -1 error
  69. * @retval 0 cannot proceed
  70. * @retval other number of bytes extracted
  71. */
  72. issize_t http_extract_body(msg_t *msg, http_t *http, char b[], isize_t bsiz, int eos)
  73. {
  74. issize_t m = 0;
  75. size_t body_len;
  76. int flags = http->http_flags;
  77. if (eos && bsiz == 0) {
  78. msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
  79. return 0;
  80. }
  81. if (flags & MSG_FLG_TRAILERS) {
  82. /* The empty line after trailers */
  83. if (!eos && (bsiz == 0 || (bsiz == 1 && b[0] == '\r')))
  84. return 0;
  85. m = CRLF_TEST(b);
  86. assert(m > 0 || eos); /* We should be looking at an empty line */
  87. /* We have completed trailers */
  88. msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
  89. return m;
  90. }
  91. if (flags & MSG_FLG_CHUNKS)
  92. return http_extract_chunk(msg, http, b, bsiz, eos);
  93. if (!(flags & MSG_FLG_BODY)) {
  94. /* We are looking at a potential empty line */
  95. m = msg_extract_separator(msg, http, b, bsiz, eos);
  96. if (m == 0) /* Not yet */
  97. return 0;
  98. http->http_flags |= MSG_FLG_BODY;
  99. b += m, bsiz -= m;
  100. }
  101. /* body_len is determined by rules in RFC2616 sections 4.3 and 4.4 */
  102. /* 1XX, 204, 304 do not have message-body, ever */
  103. if (http->http_status) {
  104. int status = http->http_status->st_status;
  105. if (status < 200 || status == 204 || status == 304)
  106. flags |= HTTP_FLG_NO_BODY;
  107. }
  108. if (flags & HTTP_FLG_NO_BODY) {
  109. msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
  110. return m;
  111. }
  112. if (http->http_transfer_encoding) {
  113. if (/* NOTE - there is really no Transfer-Encoding: identity in RFC 2616
  114. * but it was used in drafts...
  115. */
  116. http->http_transfer_encoding->k_items &&
  117. http->http_transfer_encoding->k_items[0] &&
  118. !su_casematch(http->http_transfer_encoding->k_items[0], "identity")) {
  119. http->http_flags |= MSG_FLG_CHUNKS;
  120. if (http->http_flags & MSG_FLG_STREAMING)
  121. msg_set_streaming(msg, msg_start_streaming);
  122. if (m)
  123. return m;
  124. return http_extract_chunk(msg, http, b, bsiz, eos);
  125. }
  126. }
  127. if (http->http_content_length)
  128. body_len = http->http_content_length->l_length;
  129. /* We cannot parse multipart/byteranges ... */
  130. else if (http->http_content_type && http->http_content_type->c_type &&
  131. su_casematch(http->http_content_type->c_type, "multipart/byteranges"))
  132. return -1;
  133. else if (MSG_IS_MAILBOX(flags)) /* message fragments */
  134. body_len = 0;
  135. else if (http->http_request)
  136. body_len = 0;
  137. else if (eos)
  138. body_len = bsiz;
  139. else
  140. return 0; /* XXX */
  141. if (body_len == 0) {
  142. msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
  143. return m;
  144. }
  145. if (http->http_flags & MSG_FLG_STREAMING)
  146. msg_set_streaming(msg, msg_start_streaming);
  147. if (m)
  148. return m;
  149. m = msg_extract_payload(msg, http, NULL, body_len, b, bsiz, eos);
  150. if (m == -1)
  151. return -1;
  152. /* We have now all message fragments in place */
  153. http->http_flags |= MSG_FLG_FRAGS;
  154. if (bsiz >= body_len) {
  155. msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
  156. }
  157. return m;
  158. }
  159. /** Extract a chunk.
  160. *
  161. * @retval -1 error
  162. * @retval 0 cannot proceed
  163. * @retval other number of bytes extracted
  164. */
  165. issize_t http_extract_chunk(msg_t *msg, http_t *http, char b[], isize_t bsiz, int eos)
  166. {
  167. size_t n;
  168. unsigned crlf, chunk_len;
  169. char *b0 = b, *s;
  170. union {
  171. msg_header_t *header;
  172. msg_payload_t *chunk;
  173. } h = { NULL };
  174. size_t bsiz0 = bsiz;
  175. if (bsiz == 0)
  176. return 0;
  177. /* We should be looking at an empty line followed by the chunk header */
  178. while ((crlf = CRLF_TEST(b))) {
  179. if (bsiz == 1 && crlf == 1 && b[0] == '\r' && !eos)
  180. return 0;
  181. if (crlf == bsiz) {
  182. if (eos) {
  183. msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS);
  184. return (b - b0) + crlf;
  185. }
  186. else
  187. return 0;
  188. }
  189. assert(crlf < bsiz);
  190. /* Skip crlf */
  191. b += crlf; bsiz -= crlf;
  192. }
  193. /* Now, looking at the chunk header */
  194. n = strcspn(b, CRLF);
  195. if (!eos && n == bsiz)
  196. return 0;
  197. crlf = CRLF_TEST(b + n);
  198. if (n == 0) {
  199. if (crlf == bsiz && eos) {
  200. msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS);
  201. return crlf;
  202. }
  203. else
  204. return -1; /* XXX - should we be more liberal? */
  205. }
  206. if (!eos && n + crlf == bsiz && (crlf == 0 || (crlf == 1 && b[n] == '\r')))
  207. return 0;
  208. chunk_len = strtoul(b, &s, 16);
  209. if (s == b)
  210. return -1;
  211. skip_ws(&s);
  212. if (s != b + n && s[0] != ';') /* Extra stuff that is not parameter */
  213. return -1;
  214. if (chunk_len == 0) { /* We found last-chunk */
  215. b += n + crlf, bsiz -= n + crlf;
  216. crlf = bsiz > 0 ? CRLF_TEST(b) : 0;
  217. if ((eos && bsiz == 0) || crlf == 2 ||
  218. (crlf == 1 && (bsiz > 1 || b[0] == '\n'))) {
  219. /* Shortcut - We got empty trailers */
  220. b += crlf;
  221. msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS);
  222. } else {
  223. /* We have to parse trailers */
  224. http->http_flags |= MSG_FLG_TRAILERS;
  225. }
  226. return b - b0;
  227. }
  228. else {
  229. issize_t chunk;
  230. b += n + crlf;
  231. /* Extract chunk */
  232. chunk = msg_extract_payload(msg, http,
  233. &h.header, chunk_len + (b - b0),
  234. b0, bsiz0, eos);
  235. if (chunk != -1 && h.header) {
  236. assert(h.chunk->pl_data);
  237. h.chunk->pl_data += (b - b0);
  238. h.chunk->pl_len -= (b - b0);
  239. }
  240. return chunk;
  241. }
  242. }
  243. /** Parse HTTP version.
  244. *
  245. * The function http_version_d() parses a HTTP method.
  246. *
  247. * @retval 0 when successful,
  248. * @retval -1 upon an error.
  249. */
  250. int http_version_d(char **ss, char const **ver)
  251. {
  252. char *s = *ss;
  253. char const *result;
  254. int const version_size = sizeof(http_version_1_1) - 1;
  255. if (su_casenmatch(s, http_version_1_1, version_size) &&
  256. !IS_TOKEN(s[version_size])) {
  257. result = http_version_1_1;
  258. s += version_size;
  259. }
  260. else if (su_casenmatch(s, http_version_1_0, version_size) &&
  261. !IS_TOKEN(s[version_size])) {
  262. result = http_version_1_0;
  263. s += version_size;
  264. }
  265. else if (s[0] == '\0') {
  266. result = http_version_0_9;
  267. } else {
  268. /* Version consists of one or two tokens, separated by / */
  269. size_t l1 = 0, l2 = 0, n;
  270. result = s;
  271. l1 = span_token(s);
  272. for (n = l1; IS_LWS(s[n]); n++)
  273. s[n] = '\0';
  274. if (s[n] == '/') {
  275. for (n = n + 1; IS_LWS(s[n]); n++)
  276. {}
  277. l2 = span_token(s + n);
  278. n += l2;
  279. }
  280. if (l1 == 0)
  281. return -1;
  282. /* If there is extra ws between tokens, compact version */
  283. if (l2 > 0 && n > l1 + 1 + l2) {
  284. s[l1] = '/';
  285. memmove(s + l1 + 1, s + n - l2, l2);
  286. s[l1 + 1 + l2] = 0;
  287. /* Compare again with compacted version */
  288. if (su_casematch(s, http_version_1_1))
  289. result = http_version_1_1;
  290. else if (su_casematch(s, http_version_1_0))
  291. result = http_version_1_0;
  292. }
  293. s += n;
  294. }
  295. while (IS_LWS(*s)) *s++ = '\0';
  296. *ss = s;
  297. if (ver)
  298. *ver = result;
  299. return 0;
  300. }
  301. /** Calculate extra space required by version string */
  302. isize_t http_version_xtra(char const *version)
  303. {
  304. if (version == http_version_1_1)
  305. return 0;
  306. else if (version == http_version_1_0)
  307. return 0;
  308. else
  309. return MSG_STRING_SIZE(version);
  310. }
  311. /** Duplicate a transport string */
  312. void http_version_dup(char **pp, char const **dd, char const *s)
  313. {
  314. if (s == http_version_1_1)
  315. *dd = s;
  316. else if (s == http_version_1_0)
  317. *dd = s;
  318. else
  319. MSG_STRING_DUP(*pp, *dd, s);
  320. }
  321. /** Well-known HTTP method names. */
  322. static char const * const methods[] = {
  323. "<UNKNOWN>",
  324. http_method_name_get,
  325. http_method_name_post,
  326. http_method_name_head,
  327. http_method_name_options,
  328. http_method_name_put,
  329. http_method_name_delete,
  330. http_method_name_trace,
  331. http_method_name_connect,
  332. NULL,
  333. /* If you add something here, add also them to http_method_d! */
  334. };
  335. char const http_method_name_get[] = "GET";
  336. char const http_method_name_post[] = "POST";
  337. char const http_method_name_head[] = "HEAD";
  338. char const http_method_name_options[] = "OPTIONS";
  339. char const http_method_name_put[] = "PUT";
  340. char const http_method_name_delete[] = "DELETE";
  341. char const http_method_name_trace[] = "TRACE";
  342. char const http_method_name_connect[] = "CONNECT";
  343. char const *http_method_name(http_method_t method, char const *name)
  344. {
  345. if (method > 0 && (size_t)method < sizeof(methods)/sizeof(methods[0]))
  346. return methods[method];
  347. else if (method == 0)
  348. return name;
  349. else
  350. return NULL;
  351. }
  352. /**Parse a HTTP method name.
  353. *
  354. * The function @c http_method_d() parses a HTTP method, and returns a code
  355. * corresponding to the method. It stores the address of the first non-LWS
  356. * character after method name in @c *ss.
  357. *
  358. * @param ss pointer to pointer to string to be parsed
  359. * @param nname pointer to value-result parameter formethod name
  360. *
  361. * @note
  362. * If there is no whitespace after method name, the value in @a *nname
  363. * may not be NUL-terminated. The calling function @b must NUL terminate
  364. * the value by setting the @a **ss to NUL after first examining its value.
  365. *
  366. * @return The function @c http_method_d returns the method code if method
  367. * was identified, 0 (@c http_method_unknown) if method is not known, or @c -1
  368. * (@c http_method_invalid) if an error occurred.
  369. *
  370. * If the value-result argument @a nname is not @c NULL, http_method_d()
  371. * stores a pointer to the method name to it.
  372. */
  373. http_method_t http_method_d(char **ss, char const **nname)
  374. {
  375. char *s = *ss, c = *s;
  376. char const *name;
  377. int code = http_method_unknown;
  378. size_t n = 0;
  379. #define MATCH(s, m) (su_casenmatch(s, m, n = sizeof(m) - 1))
  380. if (c >= 'a' && c <= 'z')
  381. c += 'A' - 'a';
  382. switch (c) {
  383. case 'C': if (MATCH(s, "CONNECT")) code = http_method_connect; break;
  384. case 'D': if (MATCH(s, "DELETE")) code = http_method_delete; break;
  385. case 'G': if (MATCH(s, "GET")) code = http_method_get; break;
  386. case 'H': if (MATCH(s, "HEAD")) code = http_method_head; break;
  387. case 'O': if (MATCH(s, "OPTIONS")) code = http_method_options; break;
  388. case 'P': if (MATCH(s, "POST")) code = http_method_post;
  389. else
  390. if (MATCH(s, "PUT")) code = http_method_put;
  391. break;
  392. case 'T': if (MATCH(s, "TRACE")) code = http_method_trace; break;
  393. }
  394. #undef MATCH
  395. if (!code || IS_NON_WS(s[n])) {
  396. /* Unknown method */
  397. code = http_method_unknown;
  398. name = s;
  399. for (n = 0; IS_UNRESERVED(s[n]); n++)
  400. ;
  401. if (s[n]) {
  402. if (!IS_LWS(s[n]))
  403. return http_method_invalid;
  404. if (nname)
  405. s[n++] = '\0';
  406. }
  407. }
  408. else {
  409. name = methods[code];
  410. }
  411. while (IS_LWS(s[n]))
  412. n++;
  413. *ss = (s + n);
  414. if (nname) *nname = name;
  415. return (http_method_t)code;
  416. }
  417. /** Get method enum corresponding to method name */
  418. http_method_t http_method_code(char const *name)
  419. {
  420. /* Note that http_method_d() does not change string if nname is NULL */
  421. return http_method_d((char **)&name, NULL);
  422. }
  423. /**Parse HTTP query string.
  424. *
  425. * The function http_query_parse() searches for the given keys in HTTP @a
  426. * query. For each key, a query element (in the form name=value) is searched
  427. * from the query string. If a query element has a beginning matching with
  428. * the key, a copy of the rest of the element is returned in corresponding
  429. * return_value argument.
  430. *
  431. * @note The @a query string will be modified.
  432. *
  433. * @return
  434. * The function http_query_parse() returns number keys that matched within
  435. * the @a query string.
  436. */
  437. issize_t http_query_parse(char *query,
  438. /* char const *key, char **return_value, */
  439. ...)
  440. {
  441. va_list ap;
  442. char *q, *q_next;
  443. char *name, *value, **return_value;
  444. char const *key;
  445. size_t namelen, valuelen, keylen;
  446. isize_t N;
  447. int has_value;
  448. if (!query)
  449. return -1;
  450. for (q = query, N = 0; *q; q = q_next) {
  451. namelen = strcspn(q, "=&");
  452. valuelen = namelen + strcspn(q + namelen, "&");
  453. q_next = q + valuelen;
  454. if (*q_next)
  455. *q_next++ = '\0';
  456. value = q + namelen;
  457. has_value = (*value) != '\0'; /* is the part in form of name=value? */
  458. if (has_value)
  459. *value++ = '\0';
  460. name = url_unescape(q, q);
  461. if (has_value) {
  462. namelen = strlen(name);
  463. name[namelen] = '=';
  464. url_unescape(name + namelen + 1, value);
  465. }
  466. va_start(ap, query);
  467. while ((key = va_arg(ap, char const *))) {
  468. return_value = va_arg(ap, char **);
  469. keylen = strlen(key);
  470. if (strncmp(key, name, keylen) == 0) {
  471. *return_value = name + keylen;
  472. N++;
  473. }
  474. }
  475. va_end(ap);
  476. }
  477. return N;
  478. }