|
- /*
- * This file is part of the Sofia-SIP package
- *
- * Copyright (C) 2005 Nokia Corporation.
- *
- * Contact: Pekka Pessi <pekka.pessi@nokia.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
- /**@CFILE url.c
- *
- * Implementation of basic URL parsing and handling.
- *
- * @author Pekka Pessi <Pekka.Pessi@nokia.com>
- *
- * @date Created: Thu Jun 29 22:44:37 2000 ppessi
- */
- #include "config.h"
- #include <sofia-sip/su_alloc.h>
- #include <sofia-sip/bnf.h>
- #include <sofia-sip/hostdomain.h>
- #include <sofia-sip/url.h>
- #include <sofia-sip/string0.h>
- #include <stdio.h>
- #include <string.h>
- #include <stdlib.h>
- #include <assert.h>
- #include <ctype.h>
- #include <limits.h>
- /**@def URL_PRINT_FORMAT
- * Format string used when printing url with printf().
- *
- * The macro URL_PRINT_FORMAT is used in format string of printf() or
- * similar printing functions. A URL can be printed like this:
- * @code
- * printf("%s received URL " URL_PRINT_FORMAT "\n",
- * my_name, URL_PRINT_ARGS(url));
- * @endcode
- */
- /** @def URL_PRINT_ARGS(u)
- * Argument list used when printing url with printf().
- *
- * The macro URL_PRINT_ARGS() is used to create a stdarg list for printf()
- * or similar printing functions. Using it, a URL can be printed like this:
- *
- * @code
- * printf("%s received URL " URL_PRINT_FORMAT "\n",
- * my_name, URL_PRINT_ARGS(url));
- * @endcode
- */
- #define RESERVED ";/?:@&=+$,"
- #define DELIMS "<>#%\""
- #define UNWISE "{}|\\^[]`"
- #define EXCLUDED RESERVED DELIMS UNWISE
- #define UNRESERVED "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
- "abcdefghijklmnopqrstuvwxyz" \
- "0123456789" \
- "-_.!~*'()"
- #define IS_EXCLUDED(u, m32, m64, m96) \
- (u <= ' ' \
- || u >= '\177' \
- || (u < 64 ? (m32 & (1 << (63 - u))) \
- : (u < 96 ? (m64 & (1 << (95 - u))) \
- : /*u < 128*/ (m96 & (1 << (127 - u))))) != 0)
- #define MASKS_WITH_RESERVED(reserved, m32, m64, m96) \
- if (reserved == NULL) { \
- m32 = 0xbe19003f, m64 = 0x8000001e, m96 = 0x8000001d; \
- } else do { \
- m32 = 0xb400000a, m64 = 0x0000001e, m96 = 0x8000001d; \
- \
- for (;reserved[0]; reserved++) { \
- unsigned r = reserved[0]; \
- RESERVE(r, m32, m64, m96); \
- } \
- } while (0)
- #define RESERVE(reserved, m32, m64, m96) \
- if (r < 32) \
- ; \
- else if (r < 64) \
- m32 |= 1U << (63 - r); \
- else if (r < 96) \
- m64 |= 1U << (95 - r); \
- else if (r < 128) \
- m96 |= 1U << (127 - r)
- #define MASKS_WITH_ALLOWED(allowed, mask32, mask64, mask96) \
- do { \
- if (allowed) { \
- for (;allowed[0]; allowed++) { \
- unsigned a = allowed[0]; \
- ALLOW(a, mask32, mask64, mask96); \
- } \
- } \
- } while (0)
- #define ALLOW(a, mask32, mask64, mask96) \
- if (a < 32) \
- ; \
- else if (a < 64) \
- mask32 &= ~(1U << (63 - a)); \
- else if (a < 96) \
- mask64 &= ~(1U << (95 - a)); \
- else if (a < 128) \
- mask96 &= ~(1U << (127 - a))
- #define NUL '\0'
- #define NULNULNUL '\0', '\0', '\0'
- #define RMASK1 0xbe19003f
- #define RMASK2 0x8000001e
- #define RMASK3 0x8000001d
- #define RESERVED_MASK 0xbe19003f, 0x8000001e, 0x8000001d
- #define URIC_MASK 0xb400000a, 0x0000001e, 0x8000001d
- #define IS_EXCLUDED_MASK(u, m) IS_EXCLUDED(u, m)
- /* Internal prototypes */
- static char *url_canonize(char *d, char const *s, size_t n,
- unsigned syn33,
- char const allowed[]);
- static char *url_canonize2(char *d, char const *s, size_t n,
- unsigned syn33,
- unsigned m32, unsigned m64, unsigned m96);
- static int url_tel_cmp_numbers(char const *A, char const *B);
- /**Test if string contains excluded or url-reserved characters.
- *
- *
- *
- * @param s string to be searched
- *
- * @retval 0 if no reserved characters were found.
- * @retval l if a reserved character was found.
- */
- int url_reserved_p(char const *s)
- {
- if (s)
- while (*s) {
- unsigned char u = *s++;
- if (IS_EXCLUDED(u, RMASK1, RMASK2, RMASK3))
- return 1;
- }
- return 0;
- }
- /** Calculate length of string when escaped with %-notation.
- *
- * Calculate the length of string @a s when the excluded or reserved
- * characters in it have been escaped.
- *
- * @param s String with reserved URL characters. [IN
- * @param reserved Optional array of reserved characters [IN]
- *
- * @return
- * The number of characters in corresponding but escaped string.
- *
- * You can handle a part of URL with reserved characters like this:
- * @code
- * if (url_reserved_p(s)) {
- * n = malloc(url_esclen(s, NULL) + 1);
- * if (n) url_escape(n, s);
- * } else {
- * n = malloc(strlen(s) + 1);
- * if (n) strcpy(n, s);
- * }
- * @endcode
- */
- isize_t url_esclen(char const *s, char const reserved[])
- {
- size_t n;
- unsigned mask32, mask64, mask96;
- MASKS_WITH_RESERVED(reserved, mask32, mask64, mask96);
- for (n = 0; s && *s; n++) {
- unsigned char u = *s++;
- if (IS_EXCLUDED(u, mask32, mask64, mask96))
- n += 2;
- }
- return (isize_t)n;
- }
- /** Escape a string.
- *
- * The function url_escape() copies the string pointed by @a s to the array
- * pointed by @a d, @b excluding the terminating \\0 character. All reserved
- * characters in @a s are copied in hexadecimal format, for instance, @c
- * "$%#" is copied as @c "%24%25%23". The destination array @a d must be
- * large enough to receive the escaped copy.
- *
- * @param d Destination buffer [OUT]
- * @param s String to be copied [IN]
- * @param reserved Array of reserved characters [IN]
- *
- * @return Pointer to the destination array.
- */
- char *url_escape(char *d, char const *s, char const reserved[])
- {
- char *retval = d;
- unsigned mask32, mask64, mask96;
- MASKS_WITH_RESERVED(reserved, mask32, mask64, mask96);
- while (s && *s) {
- unsigned char u = *s++;
- if (IS_EXCLUDED(u, mask32, mask64, mask96)) {
- # define URL_HEXIFY(u) ((u) + '0' + ((u) >= 10 ? 'A' - '0' - 10 : 0))
- *d++ = '%';
- *d++ = URL_HEXIFY(u >> 4);
- *d++ = URL_HEXIFY(u & 15);
- # undef URL_HEXIFY
- }
- else {
- *d++ = u;
- }
- }
- *d = '\0';
- return retval;
- }
- /**Unescape url-escaped string fragment.
- *
- * Unescape @a n characters from string @a s to the buffer @a d, including
- * the terminating \\0 character. All %-escaped triplets in @a s are
- * unescaped, for instance, @c "%40%25%23" is copied as @c "@%#". The
- * destination array @a d must be large enough to receive the escaped copy
- * (@a n bytes is always enough).
- *
- * @param d destination buffer
- * @param s string to be unescaped
- * @param n maximum number of characters to unescape
- *
- * @return Length of unescaped string
- *
- * @NEW_1_12_4.
- */
- size_t url_unescape_to(char *d, char const *s, size_t n)
- {
- size_t i = 0, j = 0;
- if (s == NULL)
- return 0;
- i = j = strncspn(s, n, "%");
- if (d && d != s)
- memmove(d, s, i);
- for (; i < n;) {
- char c = s[i++];
- if (c == '\0')
- break;
- if (c == '%' && i + 1 < n && IS_HEX(s[i]) && IS_HEX(s[i + 1])) {
- #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))
- c = (UNHEX(s[i]) << 4) | UNHEX(s[i + 1]);
- #undef UNHEX
- i += 2;
- }
- if (d)
- d[j] = c;
- j++;
- }
- return j;
- }
- /**Unescape url-escaped string.
- *
- * Unescape string @a s to the buffer @a d, including the terminating \\0
- * character. All %-escaped triplets in @a s are unescaped, for instance, @c
- * "%40%25%23" is copied as @c "@%#". The destination array @a d must be
- * large enough to receive the escaped copy.
- *
- * @param d destination buffer
- * @param s string to be copied
- *
- * @return Pointer to the destination buffer.
- */
- char *url_unescape(char *d, char const *s)
- {
- size_t n = url_unescape_to(d, s, SIZE_MAX);
- if (d)
- d[n] = '\0';
- return d;
- }
- /** Canonize a URL component */
- static
- char *url_canonize(char *d, char const *s, size_t n,
- unsigned syn33,
- char const allowed[])
- {
- unsigned mask32 = 0xbe19003f, mask64 = 0x8000001e, mask96 = 0x8000001d;
- MASKS_WITH_ALLOWED(allowed, mask32, mask64, mask96);
- return url_canonize2(d, s, n, syn33, mask32, mask64, mask96);
- }
- #define SYN33(c) (1U << (c - 33))
- #define IS_SYN33(syn33, c) ((syn33 & (1U << (c - 33))) != 0)
- /** Canonize a URL component (with precomputed mask) */
- static
- char *url_canonize2(char *d, char const * const s, size_t n,
- unsigned syn33,
- unsigned m32, unsigned m64, unsigned m96)
- {
- size_t i = 0;
- if (d == s)
- for (;s[i] && i < n; d++, i++)
- if (s[i] == '%')
- break;
- for (;s[i] && i < n; d++, i++) {
- unsigned char c = s[i], h1, h2;
- if (c != '%') {
- if (!IS_SYN33(syn33, c) && IS_EXCLUDED(c, m32, m64, m96))
- return NULL;
- *d = c;
- continue;
- }
- h1 = s[i + 1];
- if (!h1) {
- *d = '\0';
- return NULL;
- }
- h2 = s[i + 2];
- if (!IS_HEX(h1) || !IS_HEX(h2)) {
- *d = '\0';
- return NULL;
- }
- #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))
- c = (UNHEX(h1) << 4) | UNHEX(h2);
- if (!IS_EXCLUDED(c, m32, m64, m96)) {
- /* Convert hex to normal character */
- *d = c, i += 2;
- continue;
- }
- /* Convert hex to uppercase */
- if (h1 >= 'a' /* && h1 <= 'f' */)
- h1 = h1 - 'a' + 'A';
- if (h2 >= 'a' /* && h2 <= 'f' */)
- h2 = h2 - 'a' + 'A';
- d[0] = '%', d[1] = h1, d[2] = h2;
- d +=2, i += 2;
- #undef UNHEX
- }
- *d = '\0';
- return d;
- }
- /** Canonize a URL component (with precomputed mask).
- *
- * This version does not flag error if *s contains character that should
- * be escaped.
- */
- static
- char *url_canonize3(char *d, char const * const s, size_t n,
- unsigned m32, unsigned m64, unsigned m96)
- {
- size_t i = 0;
- if (d == s)
- for (;s[i] && i < n; d++, i++)
- if (s[i] == '%')
- break;
- for (;s[i] && i < n; d++, i++) {
- unsigned char c = s[i], h1, h2;
- if (c != '%') {
- *d = c;
- continue;
- }
- h1 = s[i + 1];
- if (!h1) {
- *d = '\0';
- return NULL;
- }
- h2 = s[i + 2];
- if (!IS_HEX(h1) || !IS_HEX(h2)) {
- *d = '\0';
- return NULL;
- }
- #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))
- c = (UNHEX(h1) << 4) | UNHEX(h2);
- if (!IS_EXCLUDED(c, m32, m64, m96)) {
- *d = c, i += 2;
- continue;
- }
- /* Convert hex to uppercase */
- if (h1 >= 'a' /* && h1 <= 'f' */)
- h1 = h1 - 'a' + 'A';
- if (h2 >= 'a' /* && h2 <= 'f' */)
- h2 = h2 - 'a' + 'A';
- d[0] = '%', d[1] = h1, d[2] = h2;
- d +=2, i += 2;
- #undef UNHEX
- }
- *d = '\0';
- return d;
- }
- /** Get URL scheme. */
- char const* url_scheme(enum url_type_e url_type)
- {
- switch (url_type) {
- case url_any: return "*";
- case url_sip: return "sip";
- case url_sips: return "sips";
- case url_tel: return "tel";
- case url_fax: return "fax";
- case url_modem: return "modem";
- case url_http: return "http";
- case url_https: return "https";
- case url_ftp: return "ftp";
- case url_file: return "file";
- case url_rtsp: return "rtsp";
- case url_rtspu: return "rtspu";
- case url_mailto: return "mailto";
- case url_im: return "im";
- case url_pres: return "pres";
- case url_cid: return "cid";
- case url_msrp: return "msrp";
- case url_msrps: return "msrps";
- case url_urn: return "urn";
- case url_wv: return "wv";
- default:
- assert(url_type == url_unknown);
- return NULL;
- }
- }
- su_inline
- int url_type_is_opaque(enum url_type_e url_type)
- {
- return
- url_type == url_invalid ||
- url_type == url_tel ||
- url_type == url_modem ||
- url_type == url_fax ||
- url_type == url_cid;
- }
- /** Init an url as given type */
- void url_init(url_t *url, enum url_type_e type)
- {
- memset(url, 0, sizeof(*url));
- url->url_type = type;
- if (type > url_unknown) {
- char const *scheme = url_scheme((enum url_type_e)url->url_type);
- if (scheme)
- url->url_scheme = scheme;
- }
- }
- /** Get url type */
- su_inline
- enum url_type_e url_get_type(char const *scheme, size_t len)
- {
- #define test_scheme(s) \
- if (len == strlen(#s) && !strncasecmp(scheme, #s, len)) return url_##s
- switch (scheme[0]) {
- case '*': if (strcmp(scheme, "*") == 0) return url_any;
- case 'c': case 'C':
- test_scheme(cid); break;
- case 'f': case 'F':
- test_scheme(ftp); test_scheme(file); test_scheme(fax); break;
- case 'h': case 'H':
- test_scheme(http); test_scheme(https); break;
- case 'i': case 'I':
- test_scheme(im); break;
- case 'm': case 'M':
- test_scheme(mailto); test_scheme(modem);
- test_scheme(msrp); test_scheme(msrps); break;
- case 'p': case 'P':
- test_scheme(pres); break;
- case 'r': case 'R':
- test_scheme(rtsp); test_scheme(rtspu); break;
- case 's': case 'S':
- test_scheme(sip); test_scheme(sips); break;
- case 't': case 'T':
- test_scheme(tel); break;
- case 'u': case 'U':
- test_scheme(urn); break;
- case 'w': case 'W':
- test_scheme(wv); break;
- default: break;
- }
- #undef test_scheme
- if (len != span_unreserved(scheme))
- return url_invalid;
- else
- return url_unknown;
- }
- /**
- * Decode a URL.
- *
- * This function decodes a (SIP) URL string to a url_t structure.
- *
- * @param url structure to store the parsing result
- * @param s NUL-terminated string to be parsed
- *
- * @note The parsed string @a s will be modified when parsing it.
- *
- * @retval 0 if successful,
- * @retval -1 otherwise.
- */
- static
- int _url_d(url_t *url, char *s)
- {
- size_t n, p;
- char rest_c, *host, *user;
- int have_authority = 1;
- memset(url, 0, sizeof(*url));
- if (strcmp(s, "*") == 0) {
- url->url_type = url_any;
- url->url_scheme = "*";
- return 0;
- }
- n = strcspn(s, ":/?#");
- if (n && s[n] == ':') {
- char *scheme;
- url->url_scheme = scheme = s; s[n] = '\0'; s = s + n + 1;
- if (!(scheme = url_canonize(scheme, scheme, SIZE_MAX, 0, "+")))
- return -1;
- n = scheme - url->url_scheme;
- url->url_type = url_get_type(url->url_scheme, n);
- have_authority = !url_type_is_opaque((enum url_type_e)url->url_type);
- }
- else {
- url->url_type = url_unknown;
- }
- user = NULL, host = s;
- if (url->url_type == url_sip || url->url_type == url_sips) {
- /* SIP URL may have /;? in user part but no path */
- /* user-unreserved = "&" / "=" / "+" / "$" / "," / ";" / "?" / "/" */
- /* Some #*@#* phones include unescaped # there, too */
- n = strcspn(s, "@/;?#");
- p = strcspn(s + n, "@");
- if (s[n + p] == '@') {
- n += p;
- user = s;
- host = s + n + 1;
- }
- n += strcspn(s + n, "/;?#");
- }
- else if (have_authority) {
- if (url->url_type == url_wv) {
- /* WV URL may have / in user part */
- n = strcspn(s, "@#?;");
- if (s[n] == '@') {
- user = s;
- host = s + n + 1;
- n += strcspn(s + n, ";?#");
- }
- }
- else if (host[0] == '/' && host[1] != '/') {
- /* foo:/bar or /bar - no authority, just path */
- url->url_root = '/'; /* Absolute path */
- host = NULL, n = 0;
- }
- else {
- if (host[0] == '/' && host[1] == '/') {
- /* We have authority, / / foo or foo */
- host += 2; s += 2, url->url_root = '/';
- n = strcspn(s, "/?#@[]");
- }
- else
- n = strcspn(s, "@;/?#");
- if (s[n] == '@')
- user = host, host = user + n + 1;
- n += strcspn(s + n, ";/?#"); /* Find path, query and/or fragment */
- }
- }
- else /* !have_authority */ {
- user = host, host = NULL;
- if (url->url_type != url_invalid)
- n = strcspn(s, "/;?#"); /* Find params, query and/or fragment */
- else
- n = strcspn(s, "#");
- }
- rest_c = s[n]; s[n] = 0; s = rest_c ? s + n + 1 : NULL;
- if (user) {
- if (host) host[-1] = '\0';
- url->url_user = user;
- if (url->url_type != url_unknown) {
- n = strcspn(user, ":");
- if (user[n]) {
- user[n] = '\0';
- url->url_password = user + n + 1;
- }
- }
- }
- if (host) {
- url->url_host = host;
- /* IPv6 (and in some cases, IPv4) addresses are quoted with [] */
- if (host[0] == '[') {
- n = strcspn(host, "]");
- if (host[n] && (host[n + 1] == '\0' || host[n + 1] == ':'))
- n++;
- else
- n = 0;
- }
- else {
- n = strcspn(host, ":");
- }
- /* We allow empty host by default */
- if (n == 0) switch (url->url_type) {
- case url_sip:
- case url_sips:
- case url_im:
- case url_pres:
- return -1;
- default:
- break;
- }
- if (host[n] == ':') {
- char *port = host + n + 1;
- url->url_port = port;
- switch (url->url_type) {
- case url_any:
- case url_sip:
- case url_sips:
- case url_http:
- case url_https:
- case url_ftp:
- case url_file:
- case url_rtsp:
- case url_rtspu:
- if (!url_canonize2(port, port, SIZE_MAX, 0, RESERVED_MASK))
- return -1;
- /* Check that port is really numeric or wildcard */
- /* Port can be *digit, empty string or "*" */
- while (*port >= '0' && *port <= '9')
- port++;
- if (port != url->url_port) {
- if (port[0] != '\0')
- return -1;
- }
- else if (port[0] == '\0')
- /* empty string */;
- else if (port[0] == '*' && port[1] == '\0')
- /* wildcard */;
- else
- return -1;
- }
- host[n] = 0;
- }
- }
- if (rest_c == '/') {
- url->url_path = s; n = strcspn(s, "?#");
- rest_c = s[n]; s[n] = 0; s = rest_c ? s + n + 1 : NULL;
- }
- if (rest_c == ';') {
- url->url_params = s; n = strcspn(s, "?#");
- rest_c = s[n]; s[n] = 0; s = rest_c ? s + n + 1 : NULL;
- }
- if (rest_c == '?') {
- url->url_headers = s; n = strcspn(s, "#");
- rest_c = s[n]; s[n] = 0; s = rest_c ? s + n + 1 : NULL;
- }
- if (rest_c == '#') {
- url->url_fragment = s;
- rest_c = '\0';
- }
- if (rest_c)
- return -1;
- return 0;
- }
- /* Unreserved things */
- /**
- * Decode a URL.
- *
- * This function decodes a URL string to a url_t structure.
- *
- * @param url structure to store the parsing result
- * @param s NUL-terminated string to be parsed
- *
- * @note The parsed string @a s will be modified when parsing it.
- *
- * @retval 0 if successful,
- * @retval -1 otherwise.
- */
- int url_d(url_t *url, char *s)
- {
- if (url == NULL || _url_d(url, s) < 0)
- return -1;
- /* Canonize URL */
- /* scheme is canonized by _url_d() */
- if (url->url_type == url_sip || url->url_type == url_sips) {
- # define SIP_USER_UNRESERVED "&=+$,;?/"
- s = (char *)url->url_user;
- if (s && !url_canonize(s, s, SIZE_MAX, 0, SIP_USER_UNRESERVED))
- return -1;
- /* Having different charset in user and password does not make sense */
- /* but that is how it is defined in RFC 3261 */
- # define SIP_PASS_UNRESERVED "&=+$,"
- s = (char *)url->url_password;
- if (s && !url_canonize(s, s, SIZE_MAX, 0, SIP_PASS_UNRESERVED))
- return -1;
- }
- else {
- # define USER_UNRESERVED "&=+$,;"
- s = (char *)url->url_user;
- if (s && !url_canonize(s, s, SIZE_MAX, 0, USER_UNRESERVED))
- return -1;
- # define PASS_UNRESERVED "&=+$,;:"
- s = (char *)url->url_password;
- if (s && !url_canonize(s, s, SIZE_MAX, 0, PASS_UNRESERVED))
- return -1;
- }
- s = (char *)url->url_host;
- if (s && !url_canonize2(s, s, SIZE_MAX, 0, RESERVED_MASK))
- return -1;
- /* port is canonized by _url_d() */
- s = (char *)url->url_path;
- if (s && !url_canonize(s, s, SIZE_MAX,
- /* Allow all URI characters but ? */
- /* Allow unescaped /;?@, - but do not convert */
- SYN33('/') | SYN33(';') | SYN33('=') | SYN33('@') |
- SYN33(','),
- /* Convert escaped :&+$ to unescaped */
- ":&+$"))
- return -1;
- s = (char *)url->url_params;
- if (s && !url_canonize(s, s, SIZE_MAX,
- /* Allow all URI characters but ? */
- /* Allow unescaped ;=@, - but do not convert */
- SYN33(';') | SYN33('=') | SYN33('@') | SYN33(','),
- /* Convert escaped /:&+$ to unescaped */
- "/:&+$"))
- return -1;
- /* Unhex alphanumeric and unreserved URI characters */
- s = (char *)url->url_headers;
- if (s && !url_canonize3(s, s, SIZE_MAX, RESERVED_MASK))
- return -1;
- /* Allow all URI characters (including reserved ones) */
- s = (char *)url->url_fragment;
- if (s && !url_canonize2(s, s, SIZE_MAX, 0, URIC_MASK))
- return -1;
- return 0;
- }
- /** Encode an URL.
- *
- * The function url_e() combines a URL from substrings in url_t structure
- * according the @ref url_syntax "URL syntax" presented above. The encoded
- * @a url is stored in a @a buffer of @a n bytes.
- *
- * @param buffer memory area to store the encoded @a url.
- * @param n size of @a buffer.
- * @param url URL to be encoded.
- *
- * @return
- * Return the number of bytes in the encoding.
- *
- * @note The function follows the convention set by C99 snprintf(). Even if
- * the result does not fit into the @a buffer and it is truncated, the
- * function returns the number of bytes in an untruncated encoding.
- */
- issize_t url_e(char buffer[], isize_t n, url_t const *url)
- {
- size_t i;
- char *b = buffer;
- size_t m = n;
- int do_copy = n > 0;
- if (url == NULL)
- return -1;
- if (URL_STRING_P(url)) {
- char const *u = (char *)url;
- i = strlen(u);
- if (!buffer)
- return i;
- if (i >= n) {
- memcpy(buffer, u, n - 2);
- buffer[n - 1] = '\0';
- } else {
- memcpy(buffer, u, i + 1);
- }
- return i;
- }
- if (url->url_type == url_any) {
- if (b && m > 0) {
- if (m > 1) strcpy(b, "*"); else b[0] = '\0';
- }
- return 1;
- }
- if (url->url_scheme && url->url_scheme[0]) {
- i = strlen(url->url_scheme) + 1;
- if (do_copy && (do_copy = i <= n)) {
- memcpy(b, url->url_scheme, i - 1);
- b[i - 1] = ':';
- }
- b += i; n -= i;
- }
- if (url->url_root && (url->url_host || url->url_user)) {
- if (do_copy && (do_copy = 2 <= n))
- memcpy(b, "//", 2);
- b += 2; n -= 2;
- }
- if (url->url_user) {
- i = strlen(url->url_user);
- if (do_copy && (do_copy = i <= n))
- memcpy(b, url->url_user, i);
- b += i; n -= i;
- if (url->url_password) {
- if (do_copy && (do_copy = 1 <= n))
- *b = ':';
- b++; n--;
- i = strlen(url->url_password);
- if (do_copy && (do_copy = i <= n))
- memcpy(b, url->url_password, i);
- b += i; n -= i;
- }
- if (url->url_host) {
- if (do_copy && (do_copy = 1 <= n))
- *b = '@';
- b++; n--;
- }
- }
- if (url->url_host) {
- i = strlen(url->url_host);
- if (do_copy && (do_copy = i <= n))
- memcpy(b, url->url_host, i);
- b += i; n -= i;
- if (url->url_port) {
- i = strlen(url->url_port) + 1;
- if (do_copy && (do_copy = i <= n)) {
- b[0] = ':';
- memcpy(b + 1, url->url_port, i - 1);
- }
- b += i; n -= i;
- }
- }
- if (url->url_path) {
- if (url->url_root) {
- if (do_copy && (do_copy = 1 <= n))
- b[0] = '/';
- b++, n--;
- }
- i = strlen(url->url_path);
- if (do_copy && (do_copy = i < n))
- memcpy(b, url->url_path, i);
- b += i; n -= i;
- }
- {
- static char const sep[] = ";?#";
- char const *pp[3];
- size_t j;
- pp[0] = url->url_params;
- pp[1] = url->url_headers;
- pp[2] = url->url_fragment;
- for (j = 0; j < 3; j++) {
- char const *p = pp[j];
- if (!p) continue;
- i = strlen(p) + 1;
- if (do_copy && (do_copy = i <= n)) {
- *b = sep[j];
- memcpy(b + 1, p, i - 1);
- }
- b += i; n -= i;
- }
- }
- if (do_copy && (1 <= n))
- *b = '\0';
- else if (buffer && m > 0)
- buffer[m - 1] = '\0';
- assert((size_t)(b - buffer) == (size_t)(m - n));
- /* This follows the snprintf(C99) return value,
- * Number of characters written (excluding NUL)
- */
- return b - buffer;
- }
- /** Calculate the length of URL when encoded.
- *
- */
- isize_t url_len(url_t const * url)
- {
- size_t rv = 0;
- if (url->url_scheme) rv += strlen(url->url_scheme) + 1; /* plus ':' */
- if (url->url_user) {
- rv += strlen(url->url_user);
- if (url->url_password)
- rv += strlen(url->url_password) + 1; /* plus ':' */
- rv += url->url_host != NULL; /* plus '@' */
- }
- if (url->url_host) rv += strlen(url->url_host);
- if (url->url_port) rv += strlen(url->url_port) + 1; /* plus ':' */
- if (url->url_path) rv += strlen(url->url_path) + 1; /* plus initial / */
- if (url->url_params) rv += strlen(url->url_params) + 1; /* plus initial ; */
- if (url->url_headers) rv += strlen(url->url_headers) + 1; /* plus '?' */
- if (url->url_fragment) rv += strlen(url->url_fragment) + 1; /* plus '#' */
- return rv;
- }
- /**@def URL_E(buf, end, url)
- * Encode an URL: use @a buf up to @a end.
- * @hideinitializer
- */
- /**
- * Calculate the size of strings associated with a #url_t sructure.
- *
- * @param url pointer to a #url_t structure or string
- * @return Number of bytes for URL
- */
- isize_t url_xtra(url_t const *url)
- {
- size_t xtra;
- if (URL_STRING_P(url)) {
- xtra = strlen((char const *)url) + 1;
- }
- else {
- size_t len_scheme, len_user, len_password,
- len_host, len_port, len_path, len_params,
- len_headers, len_fragment;
- len_scheme = (url->url_type <= url_unknown && url->url_scheme) ?
- strlen(url->url_scheme) + 1 : 0;
- len_user = url->url_user ? strlen(url->url_user) + 1 : 0;
- len_password = url->url_password ? strlen(url->url_password) + 1 : 0;
- len_host = url->url_host ? strlen(url->url_host) + 1 : 0;
- len_port = url->url_port ? strlen(url->url_port) + 1 : 0;
- len_path = url->url_path ? strlen(url->url_path) + 1 : 0;
- len_params = url->url_params ? strlen(url->url_params) + 1 : 0;
- len_headers = url->url_headers ? strlen(url->url_headers) + 1 : 0;
- len_fragment = url->url_fragment ? strlen(url->url_fragment) + 1 : 0;
- xtra =
- len_scheme + len_user + len_password + len_host + len_port +
- len_path + len_params + len_headers + len_fragment;
- }
- return xtra;
- }
- su_inline
- char *copy(char *buf, char *end, char const *src)
- {
- #if HAVE_MEMCCPY
- char *b = memccpy(buf, src, '\0', end - buf);
- if (b)
- return b;
- else
- return end + strlen(src + (end - buf)) + 1;
- #else
- for (; buf < end && (*buf = *src); buf++, src++)
- ;
- if (buf >= end)
- while (*src++)
- buf++;
- return buf + 1;
- #endif
- }
- /**
- * Duplicate the url.
- *
- * The function url_dup() copies the url structure @a src and the strings
- * attached to it to @a url. The non-constant strings in @a src are copied
- * to @a buf. If the size of duplicated strings exceed @a bufsize, the
- * corresponding string fields in @a url are set to NULL.
- *
- * The calling function can calculate the size of buffer required by calling
- * url_dup() with zero as @a bufsize and NULL as @a dst.
- * @param buf Buffer for non-constant strings copied from @a src.
- * @param bufsize Size of @a buf.
- * @param dst Destination URL structure.
- * @param src Source URL structure.
- *
- * @return Number of characters required for
- * duplicating the strings in @a str, or -1 if an error
- * occurred.
- */
- issize_t url_dup(char *buf, isize_t bufsize, url_t *dst, url_t const *src)
- {
- if (!src && !dst)
- return -1;
- else if (URL_STRING_P(src)) {
- size_t n = strlen((char *)src) + 1;
- if (n > bufsize || dst == NULL)
- return n;
- strcpy(buf, (char *)src);
- memset(dst, 0, sizeof(*dst));
- if (url_d(dst, buf) < 0)
- return -1;
- return n;
- }
- else {
- char *b = buf;
- char *end = b + bufsize;
- char const **dstp;
- char const * const *srcp;
- url_t dst0[1];
- if (dst == NULL)
- dst = dst0;
- memset(dst, 0, sizeof(*dst));
- if (!src)
- return 0;
- memset(dst->url_pad, 0, sizeof dst->url_pad);
- dst->url_type = src->url_type;
- dst->url_root = src->url_root;
- dstp = &dst->url_scheme;
- srcp = &src->url_scheme;
- if (dst->url_type > url_unknown)
- *dstp = url_scheme((enum url_type_e)dst->url_type);
- if (*dstp != NULL)
- dstp++, srcp++; /* Skip scheme if it is constant */
- if (dst != dst0 && buf != NULL && bufsize != 0)
- for (; srcp <= &src->url_fragment; srcp++, dstp++)
- if (*srcp) {
- char *next = copy(b, end, *srcp);
- if (next > end)
- break;
- *dstp = b, b = next;
- }
- for (; srcp <= &src->url_fragment; srcp++)
- if (*srcp) {
- b += strlen(*srcp) + 1;
- }
- return b - buf;
- }
- }
- /**@def URL_DUP(buf, end, dst, src)
- * Duplicate the url: use @a buf up to @a end. @HI
- *
- * The macro URL_DUP() duplicates the url. The non-constant strings in @a
- * src are copied to @a buf. However, no strings are copied past @a end.
- * In other words, the size of buffer is @a end - @a buf.
- *
- * The macro updates the buffer pointer @a buf, so that it points to the
- * first unused byte in the buffer. The buffer pointer @a buf is updated,
- * even if the buffer is too small for the duplicated strings.
- *
- * @param buf Buffer for non-constant strings copied from @a src.
- * @param end End of @a buf.
- * @param dst Destination URL structure.
- * @param src Source URL structure.
- *
- * @return
- * The macro URL_DUP() returns pointer to first unused byte in the
- * buffer @a buf.
- */
- /** Duplicate the url to memory allocated via home.
- *
- * The function url_hdup() duplicates (deep copies) an #url_t structure.
- * Alternatively, it can be passed a string; string is then copied and
- * parsed to the #url_t structure.
- *
- * The function url_hdup() allocates the destination structure from @a home
- * as a single memory block. It is possible to free the copied url structure
- * and all the associated strings using a single call to su_free().
- *
- * @param home memory home used to allocate new url object
- * @param src pointer to URL (or string)
- *
- * @return
- * The function url_hdup() returns a pointer to the newly allocated #url_t
- * structure, or NULL upon an error.
- */
- url_t *url_hdup(su_home_t *home, url_t const *src)
- {
- if (src) {
- size_t len = sizeof(*src) + url_xtra(src);
- url_t *dst = su_alloc(home, len);
- if (dst) {
- ssize_t actual;
- actual = url_dup((char *)(dst + 1), len - sizeof(*src), dst, src);
- if (actual < 0)
- su_free(home, dst), dst = NULL;
- else
- assert(len == sizeof(*src) + actual);
- }
- return dst;
- }
- else
- return NULL;
- }
- /** Convert an string to an url */
- url_t *url_make(su_home_t *h, char const *str)
- {
- return url_hdup(h, URL_STRING_MAKE(str)->us_url);
- }
- /** Print an URL */
- url_t *url_format(su_home_t *h, char const *fmt, ...)
- {
- url_t *url;
- char *us;
- va_list ap;
- va_start(ap, fmt);
- us = su_vsprintf(h, fmt, ap);
- va_end(ap);
- if (us == NULL)
- return NULL;
- url = url_hdup(h, URL_STRING_MAKE(us)->us_url);
- su_free(h, us);
- return url;
- }
- /** Convert @a url to a string allocated from @a home.
- *
- * @param home memory home to allocate the new string
- * @param url url to convert to string
- *
- * The @a url can be a string, too.
- *
- * @return Newly allocated conversion result, or NULL upon an error.
- */
- char *url_as_string(su_home_t *home, url_t const *url)
- {
- if (url) {
- int len = url_e(NULL, 0, url);
- char *b = su_alloc(home, len + 1);
- url_e(b, len + 1, url);
- return b;
- } else {
- return NULL;
- }
- }
- /** Test if param @a tag matches to parameter string @a p.
- */
- #define URL_PARAM_MATCH(p, tag) \
- (strncasecmp(p, tag, strlen(tag)) == 0 && \
- (p[strlen(tag)] == '\0' || p[strlen(tag)] == ';' || p[strlen(tag)] == '='))
- /**
- * Search for a parameter.
- *
- * This function searches for a parameter from a parameter list.
- *
- * If you want to test if there is parameter @b user=phone,
- * call this function like
- * @code if (url_param(url->url_param, "user=phone", NULL, 0))
- * @endcode
- *
- * @param params URL parameter string (excluding first semicolon)
- * @param tag parameter name
- * @param value string to which the parameter value is copied
- * @param vlen length of string reserved for value
- *
- * @retval positive length of parameter value (including final NUL) if found
- * @retval zero if not found.
- */
- isize_t url_param(char const *params,
- char const *tag,
- char value[], isize_t vlen)
- {
- size_t n, tlen, flen;
- char *p;
- if (!params)
- return 0;
- tlen = strlen(tag);
- if (tlen && tag[tlen - 1] == '=')
- tlen--;
- for (p = (char *)params; *p; p += n + 1) {
- n = strcspn(p, ";");
- if (n < tlen) {
- if (p[n]) continue; else break;
- }
- if (strncasecmp(p, tag, tlen) == 0) {
- if (n == tlen) {
- if (vlen > 0)
- value[0] = '\0';
- return 1;
- }
- if (p[tlen] != '=')
- continue;
- flen = n - tlen - 1;
- if (flen >= (size_t)vlen)
- return flen + 1;
- memcpy(value, p + tlen + 1, flen);
- value[flen] = '\0';
- return flen + 1;
- }
- if (!p[n])
- break;
- }
- return 0;
- }
- /** Check for a parameter.
- *
- * @deprecated
- * Bad grammar. Use url_has_param().
- */
- isize_t url_have_param(char const *params, char const *tag)
- {
- return url_param(params, tag, NULL, 0);
- }
- /** Check for a parameter. */
- int url_has_param(url_t const *url, char const *tag)
- {
- return url && url->url_params && url_param(url->url_params, tag, NULL, 0);
- }
- /** Add an parameter. */
- int url_param_add(su_home_t *h, url_t *url, char const *param)
- {
- /* XXX - should remove existing parameters with same name? */
- size_t n = url->url_params ? strlen(url->url_params) + 1: 0;
- size_t nn = strlen(param) + 1;
- char *s = su_alloc(h, n + nn);
- if (!s)
- return -1;
- if (url->url_params)
- strcpy(s, url->url_params)[n - 1] = ';';
- strcpy(s + n, param);
- url->url_params = s;
- return 0;
- }
- /** Remove a named parameter from url_param string.
- *
- * Remove a named parameter and its possible value from the URL parameter
- * string (url_s##url_param).
- *
- * @return Pointer to modified string, or NULL if nothing is left in there.
- */
- char *url_strip_param_string(char *params, char const *name)
- {
- if (params && name) {
- size_t i, n = strlen(name), remove, rest;
- for (i = 0; params[i];) {
- if (strncasecmp(params + i, name, n) ||
- (params[i + n] != '=' && params[i + n] != ';' && params[i + n])) {
- i = i + strcspn(params + i, ";");
- if (!params[i++])
- break;
- continue;
- }
- remove = n + strcspn(params + i + n, ";");
- if (params[i + remove] == ';')
- remove++;
- if (i == 0) {
- params += remove;
- continue;
- }
- rest = strlen(params + i + remove);
- if (!rest) {
- if (i == 0)
- return NULL; /* removed everything */
- params[i - 1] = '\0';
- break;
- }
- memmove(params + i, params + i + remove, rest + 1);
- }
- if (!params[0])
- return NULL;
- }
- return params;
- }
- int url_string_p(url_string_t const *url)
- {
- return URL_STRING_P(url);
- }
- int url_is_string(url_string_t const *url)
- {
- return URL_IS_STRING(url);
- }
- /** Strip transport-specific stuff. */
- static
- int url_strip_transport2(url_t *url, int modify)
- {
- char *p, *d;
- size_t n;
- int semi;
- if (url->url_type != url_sip && url->url_type != url_sips)
- return 0;
- if (url->url_port != NULL) {
- if (!modify)
- return 1;
- url->url_port = NULL;
- }
- if (!url->url_params)
- return 0;
- for (d = p = (char *)url->url_params; *p; p += n + semi) {
- n = strcspn(p, ";");
- semi = (p[n] != '\0');
- if (modify && n == 0)
- continue;
- if (URL_PARAM_MATCH(p, "method"))
- continue;
- if (URL_PARAM_MATCH(p, "maddr"))
- continue;
- if (URL_PARAM_MATCH(p, "ttl"))
- continue;
- if (URL_PARAM_MATCH(p, "transport"))
- continue;
- if (p != d) {
- if (d != url->url_params)
- d++;
- if (p != d) {
- if (!modify)
- return 1;
- memmove(d, p, n + 1);
- }
- }
- d += n;
- }
- if (d == p)
- return 0;
- else if (d + 1 == p) /* empty param */
- return 0;
- else if (!modify)
- return 1;
- if (d != url->url_params)
- *d = '\0';
- else
- url->url_params = NULL;
- return 1;
- }
- /** Strip transport-specific stuff.
- *
- * The function url_strip_transport() removes transport-specific parameters
- * from a SIP or SIPS URI. These parameters include:
- * - the port number
- * - "maddr=" parameter
- * - "transport=" parameter
- * - "ttl=" parameter
- * - "method=" parameter
- *
- * @note
- * The @a url must be a pointer to a URL structure. It is stripped in-place.
- *
- * @note
- * If the parameter string contains empty parameters, they are stripped, too.
- *
- * @return
- * The function url_strip_transport() returns @e true, if the URL was
- * modified, @e false otherwise.
- */
- int url_strip_transport(url_t *url)
- {
- return url_strip_transport2(url, 1);
- }
- /** Check for transport-specific stuff.
- *
- * The function url_have_transport() tests if there are transport-specific
- * parameters in a SIP or SIPS URI. These parameters include:
- * - the port number
- * - "maddr=" parameters
- * - "transport=" parameters
- *
- * @note
- * The @a url must be a pointer to a URL structure.
- *
- * @return The function url_have_transport() returns @e true, if the URL
- * contains transport parameters, @e false otherwise.
- */
- int url_have_transport(url_t const *url)
- {
- return url_strip_transport2((url_t *)url, 0);
- }
- /**Lazily compare two URLs.
- *
- * Compare essential parts of URLs: schema, host, port, and username.
- *
- * any_url compares 0 with any other URL.
- *
- * pres: and im: URIs compares 0 with SIP URIs.
- *
- * @note
- * The @a a and @a b must be pointers to URL structures.
- *
- * @note Currently, the url parameters are not compared. This is because the
- * url_cmp() is used to sort URLs: taking parameters into account makes that
- * impossible.
- */
- int url_cmp(url_t const *a, url_t const *b)
- {
- int rv;
- int url_type;
- if ((a && a->url_type == url_any) || (b && b->url_type == url_any))
- return 0;
- if (!a || !b)
- return (a != NULL) - (b != NULL);
- if ((rv = a->url_type - b->url_type)) {
- #if 0
- /* presence and instant messaging URLs match magically with SIP */
- enum url_type_e a_type = a->url_type;
- enum url_type_e b_type = b->url_type;
- if (a_type == url_im || a_type == url_pres)
- a_type = url_sip;
- if (b_type == url_im || b_type == url_pres)
- b_type = url_sip;
- if (a_type != b_type)
- #endif
- return rv;
- }
- url_type = a->url_type; /* Or b->url_type, they are equal! */
- if (url_type <= url_unknown &&
- ((rv = !a->url_scheme - !b->url_scheme) ||
- (a->url_scheme && b->url_scheme &&
- (rv = strcasecmp(a->url_scheme, b->url_scheme)))))
- return rv;
- if ((rv = host_cmp(a->url_host, b->url_host)))
- return rv;
- if (a->url_port != b->url_port) {
- char const *a_port;
- char const *b_port;
- if (url_type != url_sip && url_type != url_sips)
- a_port = b_port = url_port_default((enum url_type_e)url_type);
- else if (host_is_ip_address(a->url_host))
- a_port = b_port = url_port_default((enum url_type_e)url_type);
- else
- a_port = b_port = "";
- if (a->url_port) a_port = a->url_port;
- if (b->url_port) b_port = b->url_port;
- if ((rv = strcmp(a_port, b_port)))
- return rv;
- }
- if (a->url_user != b->url_user) {
- if (a->url_user == NULL) return -1;
- if (b->url_user == NULL) return +1;
- switch (url_type) {
- case url_tel: case url_modem: case url_fax:
- rv = url_tel_cmp_numbers(a->url_user, b->url_user);
- break;
- default:
- rv = strcmp(a->url_user, b->url_user);
- break;
- }
- if (rv)
- return rv;
- }
- #if 0
- if (a->url_path != b->url_path) {
- if (a->url_path == NULL) return -1;
- if (b->url_path == NULL) return +1;
- if ((rv = strcmp(a->url_path, b->url_path)))
- return rv;
- }
- #endif
- return 0;
- }
- static
- int url_tel_cmp_numbers(char const *A, char const *B)
- {
- short a, b;
- int rv;
- while (*A && *B) {
- #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))
- /* Skip visual-separators */
- do {
- a = *A++;
- if (a == '%' && IS_HEX(A[0]) && IS_HEX(A[1]))
- a = (UNHEX(A[0]) << 4) | UNHEX(A[1]), A +=2;
- } while (a == ' ' || a == '-' || a == '.' || a == '(' || a == ')');
- if (isupper(a))
- a = tolower(a);
- do {
- b = *B++;
- if (b == '%' && IS_HEX(B[0]) && IS_HEX(B[1]))
- b = (UNHEX(B[0]) << 4) | UNHEX(B[1]), B +=2;
- } while (b == ' ' || b == '-' || b == '.' || b == '(' || b == ')');
- if (isupper(b))
- b = tolower(b);
- if ((rv = a - b))
- return rv;
- }
- return (int)*A - (int)*B;
- }
- /**Conservative comparison of urls.
- *
- * Compare all parts of URLs.
- *
- * @note
- * The @a a and @a b must be pointers to URL structures.
- *
- */
- int url_cmp_all(url_t const *a, url_t const *b)
- {
- int rv, url_type;
- if (!a || !b)
- return (a != NULL) - (b != NULL);
- if ((rv = a->url_type - b->url_type))
- return rv;
- url_type = a->url_type; /* Or b->url_type, they are equal! */
- if (url_type <= url_unknown &&
- ((rv = !a->url_scheme - !b->url_scheme) ||
- (a->url_scheme && b->url_scheme &&
- (rv = strcasecmp(a->url_scheme, b->url_scheme)))))
- return rv;
- if ((rv = a->url_root - b->url_root))
- return rv;
- if ((rv = host_cmp(a->url_host, b->url_host)))
- return rv;
- if (a->url_port != b->url_port) {
- char const *a_port;
- char const *b_port;
- if (url_type != url_sip && url_type != url_sips)
- a_port = b_port = url_port_default((enum url_type_e)url_type);
- else if (host_is_ip_address(a->url_host))
- a_port = b_port = url_port_default((enum url_type_e)url_type);
- else
- a_port = b_port = "";
- if (a->url_port) a_port = a->url_port;
- if (b->url_port) b_port = b->url_port;
- if ((rv = strcmp(a_port, b_port)))
- return rv;
- }
- if (a->url_user != b->url_user) {
- if (a->url_user == NULL) return -1;
- if (b->url_user == NULL) return +1;
- switch (url_type) {
- case url_tel: case url_modem: case url_fax:
- rv = url_tel_cmp_numbers(a->url_user, b->url_user);
- break;
- default:
- rv = strcmp(a->url_user, b->url_user);
- break;
- }
- if (rv)
- return rv;
- }
- if (a->url_path != b->url_path) {
- if (a->url_path == NULL) return -1;
- if (b->url_path == NULL) return +1;
- if ((rv = strcmp(a->url_path, b->url_path)))
- return rv;
- }
- if (a->url_params != b->url_params) {
- if (a->url_params == NULL) return -1;
- if (b->url_params == NULL) return +1;
- if ((rv = strcmp(a->url_params, b->url_params)))
- return rv;
- }
- if (a->url_headers != b->url_headers) {
- if (a->url_headers == NULL) return -1;
- if (b->url_headers == NULL) return +1;
- if ((rv = strcmp(a->url_headers, b->url_headers)))
- return rv;
- }
- if (a->url_headers != b->url_headers) {
- if (a->url_headers == NULL) return -1;
- if (b->url_headers == NULL) return +1;
- if ((rv = strcmp(a->url_headers, b->url_headers)))
- return rv;
- }
- if (a->url_fragment != b->url_fragment) {
- if (a->url_fragment == NULL) return -1;
- if (b->url_fragment == NULL) return +1;
- if ((rv = strcmp(a->url_fragment, b->url_fragment)))
- return rv;
- }
- return 0;
- }
- /** Return default port number corresponding to the url type */
- char const *url_port_default(enum url_type_e url_type)
- {
- switch (url_type) {
- case url_sip: /* "sip:" */
- return "5060";
- case url_sips: /* "sips:" */
- return "5061";
- case url_http: /* "http:" */
- return "80";
- case url_https: /* "https:" */
- return "443";
- case url_ftp: /* "ftp:" */
- case url_file: /* "file:" */
- return "21";
- case url_rtsp: /* "rtsp:" */
- case url_rtspu: /* "rtspu:" */
- return "554";
- case url_mailto: /* "mailto:" */
- return "25";
- case url_any: /* "*" */
- return "*";
- case url_msrp:
- case url_msrps:
- return "9999"; /* XXXX */
- case url_tel:
- case url_urn:
- case url_fax:
- case url_modem:
- case url_im:
- case url_pres:
- case url_cid:
- case url_wv:
- default: /* Unknown scheme */
- return "";
- }
- }
- /** Return default transport name corresponding to the url type */
- char const *url_tport_default(enum url_type_e url_type)
- {
- switch (url_type) {
- case url_sip:
- return "*";
- case url_sips:
- return "tls";
- case url_http:
- return "tcp";
- case url_https:
- return "tls";
- case url_ftp:
- case url_file:
- return "tcp";
- case url_rtsp:
- return "tcp";
- case url_rtspu:
- return "udp";
- case url_mailto:
- return "tcp";
- case url_msrp:
- return "tcp";
- case url_msrps:
- return "tls";
- case url_any: /* "*" */
- case url_tel:
- case url_fax:
- case url_modem:
- case url_im:
- case url_pres:
- case url_cid:
- case url_urn:
- case url_wv:
- default: /* Unknown scheme */
- return "*";
- }
- }
- /** Return the URL port string */
- char const *url_port(url_t const *u)
- {
- if (!u)
- return "";
- else if (u->url_port && u->url_port[0])
- return u->url_port;
- if (u->url_type == url_sips || u->url_type == url_sip)
- if (!host_is_ip_address(u->url_host))
- return "";
- return url_port_default((enum url_type_e)u->url_type);
- }
- /** Sanitize URL.
- *
- * The function url_sanitize() adds a scheme to an incomplete URL. It
- * modifies its parameter structure @a url. Currently, the function follows
- * simple heuristics:
- *
- * - URL with host name starting with @c ftp. is an FTP URL
- * - URL with host name starting with @c www. is an HTTP URL
- * - URL with host and path, e.g., @c host/foo;bar, is an HTTP URL
- * - URL with host name, no path is a SIP URL.
- *
- * @param url pointer to URL struct to be sanitized (IN/OUT)
- *
- * @return
- * The function url_sanitize() returns 0 if it considers URL to be
- * sane, and -1 otherwise.
- */
- int url_sanitize(url_t *url)
- {
- if (!url)
- return -1;
- else if (url->url_scheme != NULL)
- /* xyzzy */;
- else if (url->url_host == NULL)
- return -1;
- else if (strncasecmp(url->url_host, "ftp.", strlen("ftp.")) == 0)
- url->url_type = url_ftp, url->url_scheme = "ftp", url->url_root = '/';
- else if (strncasecmp(url->url_host, "www.", strlen("www.")) == 0
- || url->url_path)
- url->url_type = url_http, url->url_scheme = "http", url->url_root = '/';
- else
- url->url_type = url_sip, url->url_scheme = "sip";
- return 0;
- }
- #include <sofia-sip/su_md5.h>
- static
- void canon_update(su_md5_t *md5, char const *s, size_t n, char const *allow)
- {
- size_t i, j;
- for (i = 0, j = 0; i < n && s[i]; i++) {
- char c;
- if (s[i] == '%' && i + 2 < n && IS_HEX(s[i+1]) && IS_HEX(s[i+2])) {
- #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))
- c = (UNHEX(s[i+1]) << 4) | UNHEX(s[i+2]);
- #undef UNHEX
- if (c != '%' && c > ' ' && c < '\177' &&
- (!strchr(EXCLUDED, c) || strchr(allow, c))) {
- if (i != j)
- su_md5_iupdate(md5, s + j, i - j);
- su_md5_iupdate(md5, &c, 1);
- j = i + 3;
- }
- i += 2;
- }
- }
- if (i != j)
- su_md5_iupdate(md5, s + j, i - j);
- }
- /** Update MD5 sum with url-string contents */
- static
- void url_string_update(su_md5_t *md5, char const *s)
- {
- size_t n, p;
- int have_authority = 1;
- enum url_type_e type = url_any;
- char const *at, *colon;
- char schema[48] = { 0 };
- if (s == NULL || strlen(s) == 0 || strcmp(s, "*") == 0) {
- su_md5_update(md5, "*\0\0*", 4);
- return;
- }
- n = strcspn(s, ":/?#");
- if (n >= sizeof schema) {
- su_md5_update(md5, ":", 1);
- }
- else if (n && s[n] == ':' ) {
- at = url_canonize(schema, s, n, 0, "+");
- type = url_get_type(schema, at - schema);
- su_md5_iupdate(md5, schema, at - schema);
- have_authority = !url_type_is_opaque(type);
- s += n + 1;
- }
- else {
- su_md5_update(md5, "", 1);
- }
- if (type == url_sip || type == url_sips) {
- /* SIP URL may have /;? in user part but no path */
- /* user-unreserved = "&" / "=" / "+" / "$" / "," / ";" / "?" / "/" */
- /* Some #*@#* phones include unescaped # there, too */
- n = strcspn(s, "@/;?#");
- p = strcspn(s + n, "@");
- if (s[n + p] == '@') {
- n += p;
- /* Ignore password in hash */
- colon = memchr(s, ':', n);
- p = colon ? (size_t)(colon - s) : n;
- canon_update(md5, s, p, SIP_USER_UNRESERVED);
- s += n + 1; n = 0;
- }
- else
- su_md5_iupdate(md5, "", 1); /* user */
- n += strcspn(s + n, "/;?#");
- }
- else if (have_authority) {
- if (type == url_wv) { /* WV URL may have / in user part */
- n = strcspn(s, "@;?#");
- }
- else if (type != url_wv && s[0] == '/' && s[1] != '/') {
- /* foo:/bar */
- su_md5_update(md5, "\0\0", 2); /* user, host */
- su_md5_striupdate(md5, url_port_default(type));
- return;
- }
- else if (s[0] == '/' && s[1] == '/') {
- /* We have authority, / / foo or foo */
- s += 2;
- n = strcspn(s, "/?#@[]");
- }
- else
- n = strcspn(s, "@;/?#");
- if (s[n] == '@') {
- /* Ignore password in hash */
- colon = type != url_unknown ? memchr(s, ':', n) : NULL;
- p = colon ? (size_t)(colon - s) : n;
- canon_update(md5, s, p, SIP_USER_UNRESERVED);
- s += n + 1;
- n = strcspn(s, "/;?#"); /* Until path, query or fragment */
- }
- else {
- su_md5_iupdate(md5, "", 1); /* user */
- n += strcspn(s + n, "/;?#"); /* Until path, query or fragment */
- }
- }
- else /* if (!have_authority) */ {
- n = strcspn(s, ":/;?#"); /* Until pass, path, query or fragment */
- canon_update(md5, s, n, ""); /* user */
- su_md5_update(md5, "\0", 1); /* host, no port */
- su_md5_striupdate(md5, url_port_default(type));
- return;
- }
- if (n > 0 && s[0] == '[') { /* IPv6reference */
- colon = memchr(s, ']', n);
- if (colon == NULL || ++colon == s + n || *colon != ':')
- colon = NULL;
- }
- else
- colon = memchr(s, ':', n);
- if (colon) {
- canon_update(md5, s, colon - s, ""); /* host */
- canon_update(md5, colon + 1, (s + n) - (colon + 1), "");
- }
- else {
- canon_update(md5, s, n, ""); /* host */
- su_md5_strupdate(md5, url_port_default(type)); /* port */
- }
- /* ignore parameters/path/headers.... */
- }
- /** Update md5 digest with contents of URL.
- *
- */
- void url_update(su_md5_t *md5, url_t const *url)
- {
- if (url_string_p((url_string_t *)url)) {
- url_string_update(md5, (char const *)url);
- }
- else {
- SU_MD5_STRI0UPDATE(md5, url->url_scheme);
- SU_MD5_STRI0UPDATE(md5, url->url_user);
- SU_MD5_STRI0UPDATE(md5, url->url_host);
- su_md5_striupdate(md5, URL_PORT(url));
- /* XXX - parameters/path.... */
- /* SU_MD5_STRI0UPDATE(md5, url->url_path); */
- }
- }
- /** Calculate a digest from URL contents. */
- void url_digest(void *hash, int hsize, url_t const *url, char const *key)
- {
- su_md5_t md5[1];
- uint8_t digest[SU_MD5_DIGEST_SIZE];
- su_md5_init(md5);
- if (key) su_md5_strupdate(md5, key);
- url_update(md5, url);
- su_md5_digest(md5, digest);
- if (hsize > SU_MD5_DIGEST_SIZE) {
- memset((char *)hash + SU_MD5_DIGEST_SIZE, 0, hsize - SU_MD5_DIGEST_SIZE);
- hsize = SU_MD5_DIGEST_SIZE;
- }
- memcpy(hash, digest, hsize);
- }
- /** Convert a URL query to a header string.
- *
- * URL query is converted by replacing each "=" in header name "=" value
- * pair with semicolon (":"), and the "&" separating header-name-value pairs
- * with line feed ("\n"). The "body" pseudoheader is moved last in the
- * string. The %-escaping is removed. Note that if the @a query contains %00,
- * the resulting string will be truncated.
- *
- * @param home memory home used to alloate string (if NULL, malloc() it)
- * @param query query part from SIP URL
- *
- * The result string is allocated from @a home, and it can be used as
- * argument to msg_header_parse_str(), msg_header_add_str() or
- * SIPTAG_HEADER_STR().
- *
- * @sa msg_header_add_str(), SIPTAG_HEADER_STR(),
- * sip_headers_as_url_query(), sip_url_query_as_taglist(),
- * @RFC3261 section 19.1.1 "Headers", #url_t, url_s#url_headers,
- * url_unescape(), url_unescape_to()
- *
- * @since New in @VERSION_1_12_4.
- */
- char *url_query_as_header_string(su_home_t *home,
- char const *query)
- {
- size_t i, j, n, b_start = 0, b_len = 0;
- char *s = su_strdup(home, query);
- if (!s)
- return NULL;
- for (i = 0, j = 0; s[i];) {
- n = strcspn(s + i, "=");
- if (!s[i + n])
- break;
- if (n == 4 && strncasecmp(s + i, "body", 4) == 0) {
- if (b_start)
- break;
- b_start = i + n + 1, b_len = strcspn(s + b_start, "&");
- i = b_start + b_len;
- if (s[i] != '\0') i += 1;
- continue;
- }
- if (i != j)
- memmove(s + j, s + i, n);
- s[j + n] = ':';
- i += n + 1, j += n + 1;
- n = strcspn(s + i, "&");
- j += url_unescape_to(s + j, s + i, n);
- i += n;
- if (s[i]) {
- s[j++] = '\n', i++;
- }
- }
- if (s[i])
- return (void)su_free(home, s), NULL;
- if (b_start) {
- s[j++] = '\n', s[j++] = '\n';
- j += url_unescape_to(s + j, query + b_start, b_len);
- }
- s[j] = '\0'; assert(j <= i);
- return s;
- }
|