2
0

url.c 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160
  1. /*
  2. * This file is part of the Sofia-SIP package
  3. *
  4. * Copyright (C) 2005 Nokia Corporation.
  5. *
  6. * Contact: Pekka Pessi <pekka.pessi@nokia.com>
  7. *
  8. * This library is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public License
  10. * as published by the Free Software Foundation; either version 2.1 of
  11. * the License, or (at your option) any later version.
  12. *
  13. * This library is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with this library; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
  21. * 02110-1301 USA
  22. *
  23. */
  24. /**@CFILE url.c
  25. *
  26. * Implementation of basic URL parsing and handling.
  27. *
  28. * @author Pekka Pessi <Pekka.Pessi@nokia.com>
  29. *
  30. * @date Created: Thu Jun 29 22:44:37 2000 ppessi
  31. */
  32. #include "config.h"
  33. #include <sofia-sip/su_alloc.h>
  34. #include <sofia-sip/bnf.h>
  35. #include <sofia-sip/hostdomain.h>
  36. #include <sofia-sip/url.h>
  37. #include <sofia-sip/string0.h>
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include <stdlib.h>
  41. #include <assert.h>
  42. #include <ctype.h>
  43. #include <limits.h>
  44. /**@def URL_PRINT_FORMAT
  45. * Format string used when printing url with printf().
  46. *
  47. * The macro URL_PRINT_FORMAT is used in format string of printf() or
  48. * similar printing functions. A URL can be printed like this:
  49. * @code
  50. * printf("%s received URL " URL_PRINT_FORMAT "\n",
  51. * my_name, URL_PRINT_ARGS(url));
  52. * @endcode
  53. */
  54. /** @def URL_PRINT_ARGS(u)
  55. * Argument list used when printing url with printf().
  56. *
  57. * The macro URL_PRINT_ARGS() is used to create a stdarg list for printf()
  58. * or similar printing functions. Using it, a URL can be printed like this:
  59. *
  60. * @code
  61. * printf("%s received URL " URL_PRINT_FORMAT "\n",
  62. * my_name, URL_PRINT_ARGS(url));
  63. * @endcode
  64. */
  65. #define RESERVED ";/?:@&=+$,"
  66. #define DELIMS "<>#%\""
  67. #define UNWISE "{}|\\^[]`"
  68. #define EXCLUDED RESERVED DELIMS UNWISE
  69. #define UNRESERVED "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
  70. "abcdefghijklmnopqrstuvwxyz" \
  71. "0123456789" \
  72. "-_.!~*'()"
  73. #define IS_EXCLUDED(u, m32, m64, m96) \
  74. (u <= ' ' \
  75. || u >= '\177' \
  76. || (u < 64 ? (m32 & (1 << (63 - u))) \
  77. : (u < 96 ? (m64 & (1 << (95 - u))) \
  78. : /*u < 128*/ (m96 & (1 << (127 - u))))) != 0)
  79. #define MASKS_WITH_RESERVED(reserved, m32, m64, m96) \
  80. if (reserved == NULL) { \
  81. m32 = 0xbe19003f, m64 = 0x8000001e, m96 = 0x8000001d; \
  82. } else do { \
  83. m32 = 0xb400000a, m64 = 0x0000001e, m96 = 0x8000001d; \
  84. \
  85. for (;reserved[0]; reserved++) { \
  86. unsigned r = reserved[0]; \
  87. RESERVE(r, m32, m64, m96); \
  88. } \
  89. } while (0)
  90. #define RESERVE(reserved, m32, m64, m96) \
  91. if (r < 32) \
  92. ; \
  93. else if (r < 64) \
  94. m32 |= 1U << (63 - r); \
  95. else if (r < 96) \
  96. m64 |= 1U << (95 - r); \
  97. else if (r < 128) \
  98. m96 |= 1U << (127 - r)
  99. #define MASKS_WITH_ALLOWED(allowed, mask32, mask64, mask96) \
  100. do { \
  101. if (allowed) { \
  102. for (;allowed[0]; allowed++) { \
  103. unsigned a = allowed[0]; \
  104. ALLOW(a, mask32, mask64, mask96); \
  105. } \
  106. } \
  107. } while (0)
  108. #define ALLOW(a, mask32, mask64, mask96) \
  109. if (a < 32) \
  110. ; \
  111. else if (a < 64) \
  112. mask32 &= ~(1U << (63 - a)); \
  113. else if (a < 96) \
  114. mask64 &= ~(1U << (95 - a)); \
  115. else if (a < 128) \
  116. mask96 &= ~(1U << (127 - a))
  117. #define NUL '\0'
  118. #define NULNULNUL '\0', '\0', '\0'
  119. #define RMASK1 0xbe19003f
  120. #define RMASK2 0x8000001e
  121. #define RMASK3 0x8000001d
  122. #define RESERVED_MASK 0xbe19003f, 0x8000001e, 0x8000001d
  123. #define URIC_MASK 0xb400000a, 0x0000001e, 0x8000001d
  124. #define IS_EXCLUDED_MASK(u, m) IS_EXCLUDED(u, m)
  125. /* Internal prototypes */
  126. static char *url_canonize(char *d, char const *s, size_t n,
  127. unsigned syn33,
  128. char const allowed[]);
  129. static char *url_canonize2(char *d, char const *s, size_t n,
  130. unsigned syn33,
  131. unsigned m32, unsigned m64, unsigned m96);
  132. static int url_tel_cmp_numbers(char const *A, char const *B);
  133. /**Test if string contains excluded or url-reserved characters.
  134. *
  135. *
  136. *
  137. * @param s string to be searched
  138. *
  139. * @retval 0 if no reserved characters were found.
  140. * @retval l if a reserved character was found.
  141. */
  142. int url_reserved_p(char const *s)
  143. {
  144. if (s)
  145. while (*s) {
  146. unsigned char u = *s++;
  147. if (IS_EXCLUDED(u, RMASK1, RMASK2, RMASK3))
  148. return 1;
  149. }
  150. return 0;
  151. }
  152. /** Calculate length of string when escaped with %-notation.
  153. *
  154. * Calculate the length of string @a s when the excluded or reserved
  155. * characters in it have been escaped.
  156. *
  157. * @param s String with reserved URL characters. [IN
  158. * @param reserved Optional array of reserved characters [IN]
  159. *
  160. * @return
  161. * The number of characters in corresponding but escaped string.
  162. *
  163. * You can handle a part of URL with reserved characters like this:
  164. * @code
  165. * if (url_reserved_p(s)) {
  166. * n = malloc(url_esclen(s, NULL) + 1);
  167. * if (n) url_escape(n, s);
  168. * } else {
  169. * n = malloc(strlen(s) + 1);
  170. * if (n) strcpy(n, s);
  171. * }
  172. * @endcode
  173. */
  174. isize_t url_esclen(char const *s, char const reserved[])
  175. {
  176. size_t n;
  177. unsigned mask32, mask64, mask96;
  178. MASKS_WITH_RESERVED(reserved, mask32, mask64, mask96);
  179. for (n = 0; s && *s; n++) {
  180. unsigned char u = *s++;
  181. if (IS_EXCLUDED(u, mask32, mask64, mask96))
  182. n += 2;
  183. }
  184. return (isize_t)n;
  185. }
  186. /** Escape a string.
  187. *
  188. * The function url_escape() copies the string pointed by @a s to the array
  189. * pointed by @a d, @b excluding the terminating \\0 character. All reserved
  190. * characters in @a s are copied in hexadecimal format, for instance, @c
  191. * "$%#" is copied as @c "%24%25%23". The destination array @a d must be
  192. * large enough to receive the escaped copy.
  193. *
  194. * @param d Destination buffer [OUT]
  195. * @param s String to be copied [IN]
  196. * @param reserved Array of reserved characters [IN]
  197. *
  198. * @return Pointer to the destination array.
  199. */
  200. char *url_escape(char *d, char const *s, char const reserved[])
  201. {
  202. char *retval = d;
  203. unsigned mask32, mask64, mask96;
  204. MASKS_WITH_RESERVED(reserved, mask32, mask64, mask96);
  205. while (s && *s) {
  206. unsigned char u = *s++;
  207. if (IS_EXCLUDED(u, mask32, mask64, mask96)) {
  208. # define URL_HEXIFY(u) ((u) + '0' + ((u) >= 10 ? 'A' - '0' - 10 : 0))
  209. *d++ = '%';
  210. *d++ = URL_HEXIFY(u >> 4);
  211. *d++ = URL_HEXIFY(u & 15);
  212. # undef URL_HEXIFY
  213. }
  214. else {
  215. *d++ = u;
  216. }
  217. }
  218. *d = '\0';
  219. return retval;
  220. }
  221. /**Unescape url-escaped string fragment.
  222. *
  223. * Unescape @a n characters from string @a s to the buffer @a d, including
  224. * the terminating \\0 character. All %-escaped triplets in @a s are
  225. * unescaped, for instance, @c "%40%25%23" is copied as @c "@%#". The
  226. * destination array @a d must be large enough to receive the escaped copy
  227. * (@a n bytes is always enough).
  228. *
  229. * @param d destination buffer
  230. * @param s string to be unescaped
  231. * @param n maximum number of characters to unescape
  232. *
  233. * @return Length of unescaped string
  234. *
  235. * @NEW_1_12_4.
  236. */
  237. size_t url_unescape_to(char *d, char const *s, size_t n)
  238. {
  239. size_t i = 0, j = 0;
  240. if (s == NULL)
  241. return 0;
  242. i = j = strncspn(s, n, "%");
  243. if (d && d != s)
  244. memmove(d, s, i);
  245. for (; i < n;) {
  246. char c = s[i++];
  247. if (c == '\0')
  248. break;
  249. if (c == '%' && i + 1 < n && IS_HEX(s[i]) && IS_HEX(s[i + 1])) {
  250. #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))
  251. c = (UNHEX(s[i]) << 4) | UNHEX(s[i + 1]);
  252. #undef UNHEX
  253. i += 2;
  254. }
  255. if (d)
  256. d[j] = c;
  257. j++;
  258. }
  259. return j;
  260. }
  261. /**Unescape url-escaped string.
  262. *
  263. * Unescape string @a s to the buffer @a d, including the terminating \\0
  264. * character. All %-escaped triplets in @a s are unescaped, for instance, @c
  265. * "%40%25%23" is copied as @c "@%#". The destination array @a d must be
  266. * large enough to receive the escaped copy.
  267. *
  268. * @param d destination buffer
  269. * @param s string to be copied
  270. *
  271. * @return Pointer to the destination buffer.
  272. */
  273. char *url_unescape(char *d, char const *s)
  274. {
  275. size_t n = url_unescape_to(d, s, SIZE_MAX);
  276. if (d)
  277. d[n] = '\0';
  278. return d;
  279. }
  280. /** Canonize a URL component */
  281. static
  282. char *url_canonize(char *d, char const *s, size_t n,
  283. unsigned syn33,
  284. char const allowed[])
  285. {
  286. unsigned mask32 = 0xbe19003f, mask64 = 0x8000001e, mask96 = 0x8000001d;
  287. MASKS_WITH_ALLOWED(allowed, mask32, mask64, mask96);
  288. return url_canonize2(d, s, n, syn33, mask32, mask64, mask96);
  289. }
  290. #define SYN33(c) (1U << (c - 33))
  291. #define IS_SYN33(syn33, c) ((syn33 & (1U << (c - 33))) != 0)
  292. /** Canonize a URL component (with precomputed mask) */
  293. static
  294. char *url_canonize2(char *d, char const * const s, size_t n,
  295. unsigned syn33,
  296. unsigned m32, unsigned m64, unsigned m96)
  297. {
  298. size_t i = 0;
  299. if (d == s)
  300. for (;s[i] && i < n; d++, i++)
  301. if (s[i] == '%')
  302. break;
  303. for (;s[i] && i < n; d++, i++) {
  304. unsigned char c = s[i], h1, h2;
  305. if (c != '%') {
  306. if (!IS_SYN33(syn33, c) && IS_EXCLUDED(c, m32, m64, m96))
  307. return NULL;
  308. *d = c;
  309. continue;
  310. }
  311. h1 = s[i + 1];
  312. if (!h1) {
  313. *d = '\0';
  314. return NULL;
  315. }
  316. h2 = s[i + 2];
  317. if (!IS_HEX(h1) || !IS_HEX(h2)) {
  318. *d = '\0';
  319. return NULL;
  320. }
  321. #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))
  322. c = (UNHEX(h1) << 4) | UNHEX(h2);
  323. if (!IS_EXCLUDED(c, m32, m64, m96)) {
  324. /* Convert hex to normal character */
  325. *d = c, i += 2;
  326. continue;
  327. }
  328. /* Convert hex to uppercase */
  329. if (h1 >= 'a' /* && h1 <= 'f' */)
  330. h1 = h1 - 'a' + 'A';
  331. if (h2 >= 'a' /* && h2 <= 'f' */)
  332. h2 = h2 - 'a' + 'A';
  333. d[0] = '%', d[1] = h1, d[2] = h2;
  334. d +=2, i += 2;
  335. #undef UNHEX
  336. }
  337. *d = '\0';
  338. return d;
  339. }
  340. /** Canonize a URL component (with precomputed mask).
  341. *
  342. * This version does not flag error if *s contains character that should
  343. * be escaped.
  344. */
  345. static
  346. char *url_canonize3(char *d, char const * const s, size_t n,
  347. unsigned m32, unsigned m64, unsigned m96)
  348. {
  349. size_t i = 0;
  350. if (d == s)
  351. for (;s[i] && i < n; d++, i++)
  352. if (s[i] == '%')
  353. break;
  354. for (;s[i] && i < n; d++, i++) {
  355. unsigned char c = s[i], h1, h2;
  356. if (c != '%') {
  357. *d = c;
  358. continue;
  359. }
  360. h1 = s[i + 1];
  361. if (!h1) {
  362. *d = '\0';
  363. return NULL;
  364. }
  365. h2 = s[i + 2];
  366. if (!IS_HEX(h1) || !IS_HEX(h2)) {
  367. *d = '\0';
  368. return NULL;
  369. }
  370. #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))
  371. c = (UNHEX(h1) << 4) | UNHEX(h2);
  372. if (!IS_EXCLUDED(c, m32, m64, m96)) {
  373. *d = c, i += 2;
  374. continue;
  375. }
  376. /* Convert hex to uppercase */
  377. if (h1 >= 'a' /* && h1 <= 'f' */)
  378. h1 = h1 - 'a' + 'A';
  379. if (h2 >= 'a' /* && h2 <= 'f' */)
  380. h2 = h2 - 'a' + 'A';
  381. d[0] = '%', d[1] = h1, d[2] = h2;
  382. d +=2, i += 2;
  383. #undef UNHEX
  384. }
  385. *d = '\0';
  386. return d;
  387. }
  388. /** Get URL scheme. */
  389. char const* url_scheme(enum url_type_e url_type)
  390. {
  391. switch (url_type) {
  392. case url_any: return "*";
  393. case url_sip: return "sip";
  394. case url_sips: return "sips";
  395. case url_tel: return "tel";
  396. case url_fax: return "fax";
  397. case url_modem: return "modem";
  398. case url_http: return "http";
  399. case url_https: return "https";
  400. case url_ftp: return "ftp";
  401. case url_file: return "file";
  402. case url_rtsp: return "rtsp";
  403. case url_rtspu: return "rtspu";
  404. case url_mailto: return "mailto";
  405. case url_im: return "im";
  406. case url_pres: return "pres";
  407. case url_cid: return "cid";
  408. case url_msrp: return "msrp";
  409. case url_msrps: return "msrps";
  410. case url_urn: return "urn";
  411. case url_wv: return "wv";
  412. default:
  413. assert(url_type == url_unknown);
  414. return NULL;
  415. }
  416. }
  417. su_inline
  418. int url_type_is_opaque(enum url_type_e url_type)
  419. {
  420. return
  421. url_type == url_invalid ||
  422. url_type == url_tel ||
  423. url_type == url_modem ||
  424. url_type == url_fax ||
  425. url_type == url_cid;
  426. }
  427. /** Init an url as given type */
  428. void url_init(url_t *url, enum url_type_e type)
  429. {
  430. memset(url, 0, sizeof(*url));
  431. url->url_type = type;
  432. if (type > url_unknown) {
  433. char const *scheme = url_scheme((enum url_type_e)url->url_type);
  434. if (scheme)
  435. url->url_scheme = scheme;
  436. }
  437. }
  438. /** Get url type */
  439. su_inline
  440. enum url_type_e url_get_type(char const *scheme, size_t len)
  441. {
  442. #define test_scheme(s) \
  443. if (len == strlen(#s) && !strncasecmp(scheme, #s, len)) return url_##s
  444. switch (scheme[0]) {
  445. case '*': if (strcmp(scheme, "*") == 0) return url_any;
  446. case 'c': case 'C':
  447. test_scheme(cid); break;
  448. case 'f': case 'F':
  449. test_scheme(ftp); test_scheme(file); test_scheme(fax); break;
  450. case 'h': case 'H':
  451. test_scheme(http); test_scheme(https); break;
  452. case 'i': case 'I':
  453. test_scheme(im); break;
  454. case 'm': case 'M':
  455. test_scheme(mailto); test_scheme(modem);
  456. test_scheme(msrp); test_scheme(msrps); break;
  457. case 'p': case 'P':
  458. test_scheme(pres); break;
  459. case 'r': case 'R':
  460. test_scheme(rtsp); test_scheme(rtspu); break;
  461. case 's': case 'S':
  462. test_scheme(sip); test_scheme(sips); break;
  463. case 't': case 'T':
  464. test_scheme(tel); break;
  465. case 'u': case 'U':
  466. test_scheme(urn); break;
  467. case 'w': case 'W':
  468. test_scheme(wv); break;
  469. default: break;
  470. }
  471. #undef test_scheme
  472. if (len != span_unreserved(scheme))
  473. return url_invalid;
  474. else
  475. return url_unknown;
  476. }
  477. /**
  478. * Decode a URL.
  479. *
  480. * This function decodes a (SIP) URL string to a url_t structure.
  481. *
  482. * @param url structure to store the parsing result
  483. * @param s NUL-terminated string to be parsed
  484. *
  485. * @note The parsed string @a s will be modified when parsing it.
  486. *
  487. * @retval 0 if successful,
  488. * @retval -1 otherwise.
  489. */
  490. static
  491. int _url_d(url_t *url, char *s)
  492. {
  493. size_t n, p;
  494. char rest_c, *host, *user;
  495. int have_authority = 1;
  496. memset(url, 0, sizeof(*url));
  497. if (strcmp(s, "*") == 0) {
  498. url->url_type = url_any;
  499. url->url_scheme = "*";
  500. return 0;
  501. }
  502. n = strcspn(s, ":/?#");
  503. if (n && s[n] == ':') {
  504. char *scheme;
  505. url->url_scheme = scheme = s; s[n] = '\0'; s = s + n + 1;
  506. if (!(scheme = url_canonize(scheme, scheme, SIZE_MAX, 0, "+")))
  507. return -1;
  508. n = scheme - url->url_scheme;
  509. url->url_type = url_get_type(url->url_scheme, n);
  510. have_authority = !url_type_is_opaque((enum url_type_e)url->url_type);
  511. }
  512. else {
  513. url->url_type = url_unknown;
  514. }
  515. user = NULL, host = s;
  516. if (url->url_type == url_sip || url->url_type == url_sips) {
  517. /* SIP URL may have /;? in user part but no path */
  518. /* user-unreserved = "&" / "=" / "+" / "$" / "," / ";" / "?" / "/" */
  519. /* Some #*@#* phones include unescaped # there, too */
  520. n = strcspn(s, "@/;?#");
  521. p = strcspn(s + n, "@");
  522. if (s[n + p] == '@') {
  523. n += p;
  524. user = s;
  525. host = s + n + 1;
  526. }
  527. n += strcspn(s + n, "/;?#");
  528. }
  529. else if (have_authority) {
  530. if (url->url_type == url_wv) {
  531. /* WV URL may have / in user part */
  532. n = strcspn(s, "@#?;");
  533. if (s[n] == '@') {
  534. user = s;
  535. host = s + n + 1;
  536. n += strcspn(s + n, ";?#");
  537. }
  538. }
  539. else if (host[0] == '/' && host[1] != '/') {
  540. /* foo:/bar or /bar - no authority, just path */
  541. url->url_root = '/'; /* Absolute path */
  542. host = NULL, n = 0;
  543. }
  544. else {
  545. if (host[0] == '/' && host[1] == '/') {
  546. /* We have authority, / / foo or foo */
  547. host += 2; s += 2, url->url_root = '/';
  548. n = strcspn(s, "/?#@[]");
  549. }
  550. else
  551. n = strcspn(s, "@;/?#");
  552. if (s[n] == '@')
  553. user = host, host = user + n + 1;
  554. n += strcspn(s + n, ";/?#"); /* Find path, query and/or fragment */
  555. }
  556. }
  557. else /* !have_authority */ {
  558. user = host, host = NULL;
  559. if (url->url_type != url_invalid)
  560. n = strcspn(s, "/;?#"); /* Find params, query and/or fragment */
  561. else
  562. n = strcspn(s, "#");
  563. }
  564. rest_c = s[n]; s[n] = 0; s = rest_c ? s + n + 1 : NULL;
  565. if (user) {
  566. if (host) host[-1] = '\0';
  567. url->url_user = user;
  568. if (url->url_type != url_unknown) {
  569. n = strcspn(user, ":");
  570. if (user[n]) {
  571. user[n] = '\0';
  572. url->url_password = user + n + 1;
  573. }
  574. }
  575. }
  576. if (host) {
  577. url->url_host = host;
  578. /* IPv6 (and in some cases, IPv4) addresses are quoted with [] */
  579. if (host[0] == '[') {
  580. n = strcspn(host, "]");
  581. if (host[n] && (host[n + 1] == '\0' || host[n + 1] == ':'))
  582. n++;
  583. else
  584. n = 0;
  585. }
  586. else {
  587. n = strcspn(host, ":");
  588. }
  589. /* We allow empty host by default */
  590. if (n == 0) switch (url->url_type) {
  591. case url_sip:
  592. case url_sips:
  593. case url_im:
  594. case url_pres:
  595. return -1;
  596. default:
  597. break;
  598. }
  599. if (host[n] == ':') {
  600. char *port = host + n + 1;
  601. url->url_port = port;
  602. switch (url->url_type) {
  603. case url_any:
  604. case url_sip:
  605. case url_sips:
  606. case url_http:
  607. case url_https:
  608. case url_ftp:
  609. case url_file:
  610. case url_rtsp:
  611. case url_rtspu:
  612. if (!url_canonize2(port, port, SIZE_MAX, 0, RESERVED_MASK))
  613. return -1;
  614. /* Check that port is really numeric or wildcard */
  615. /* Port can be *digit, empty string or "*" */
  616. while (*port >= '0' && *port <= '9')
  617. port++;
  618. if (port != url->url_port) {
  619. if (port[0] != '\0')
  620. return -1;
  621. }
  622. else if (port[0] == '\0')
  623. /* empty string */;
  624. else if (port[0] == '*' && port[1] == '\0')
  625. /* wildcard */;
  626. else
  627. return -1;
  628. }
  629. host[n] = 0;
  630. }
  631. }
  632. if (rest_c == '/') {
  633. url->url_path = s; n = strcspn(s, "?#");
  634. rest_c = s[n]; s[n] = 0; s = rest_c ? s + n + 1 : NULL;
  635. }
  636. if (rest_c == ';') {
  637. url->url_params = s; n = strcspn(s, "?#");
  638. rest_c = s[n]; s[n] = 0; s = rest_c ? s + n + 1 : NULL;
  639. }
  640. if (rest_c == '?') {
  641. url->url_headers = s; n = strcspn(s, "#");
  642. rest_c = s[n]; s[n] = 0; s = rest_c ? s + n + 1 : NULL;
  643. }
  644. if (rest_c == '#') {
  645. url->url_fragment = s;
  646. rest_c = '\0';
  647. }
  648. if (rest_c)
  649. return -1;
  650. return 0;
  651. }
  652. /* Unreserved things */
  653. /**
  654. * Decode a URL.
  655. *
  656. * This function decodes a URL string to a url_t structure.
  657. *
  658. * @param url structure to store the parsing result
  659. * @param s NUL-terminated string to be parsed
  660. *
  661. * @note The parsed string @a s will be modified when parsing it.
  662. *
  663. * @retval 0 if successful,
  664. * @retval -1 otherwise.
  665. */
  666. int url_d(url_t *url, char *s)
  667. {
  668. if (url == NULL || _url_d(url, s) < 0)
  669. return -1;
  670. /* Canonize URL */
  671. /* scheme is canonized by _url_d() */
  672. if (url->url_type == url_sip || url->url_type == url_sips) {
  673. # define SIP_USER_UNRESERVED "&=+$,;?/"
  674. s = (char *)url->url_user;
  675. if (s && !url_canonize(s, s, SIZE_MAX, 0, SIP_USER_UNRESERVED))
  676. return -1;
  677. /* Having different charset in user and password does not make sense */
  678. /* but that is how it is defined in RFC 3261 */
  679. # define SIP_PASS_UNRESERVED "&=+$,"
  680. s = (char *)url->url_password;
  681. if (s && !url_canonize(s, s, SIZE_MAX, 0, SIP_PASS_UNRESERVED))
  682. return -1;
  683. }
  684. else {
  685. # define USER_UNRESERVED "&=+$,;"
  686. s = (char *)url->url_user;
  687. if (s && !url_canonize(s, s, SIZE_MAX, 0, USER_UNRESERVED))
  688. return -1;
  689. # define PASS_UNRESERVED "&=+$,;:"
  690. s = (char *)url->url_password;
  691. if (s && !url_canonize(s, s, SIZE_MAX, 0, PASS_UNRESERVED))
  692. return -1;
  693. }
  694. s = (char *)url->url_host;
  695. if (s && !url_canonize2(s, s, SIZE_MAX, 0, RESERVED_MASK))
  696. return -1;
  697. /* port is canonized by _url_d() */
  698. s = (char *)url->url_path;
  699. if (s && !url_canonize(s, s, SIZE_MAX,
  700. /* Allow all URI characters but ? */
  701. /* Allow unescaped /;?@, - but do not convert */
  702. SYN33('/') | SYN33(';') | SYN33('=') | SYN33('@') |
  703. SYN33(','),
  704. /* Convert escaped :&+$ to unescaped */
  705. ":&+$"))
  706. return -1;
  707. s = (char *)url->url_params;
  708. if (s && !url_canonize(s, s, SIZE_MAX,
  709. /* Allow all URI characters but ? */
  710. /* Allow unescaped ;=@, - but do not convert */
  711. SYN33(';') | SYN33('=') | SYN33('@') | SYN33(','),
  712. /* Convert escaped /:&+$ to unescaped */
  713. "/:&+$"))
  714. return -1;
  715. /* Unhex alphanumeric and unreserved URI characters */
  716. s = (char *)url->url_headers;
  717. if (s && !url_canonize3(s, s, SIZE_MAX, RESERVED_MASK))
  718. return -1;
  719. /* Allow all URI characters (including reserved ones) */
  720. s = (char *)url->url_fragment;
  721. if (s && !url_canonize2(s, s, SIZE_MAX, 0, URIC_MASK))
  722. return -1;
  723. return 0;
  724. }
  725. /** Encode an URL.
  726. *
  727. * The function url_e() combines a URL from substrings in url_t structure
  728. * according the @ref url_syntax "URL syntax" presented above. The encoded
  729. * @a url is stored in a @a buffer of @a n bytes.
  730. *
  731. * @param buffer memory area to store the encoded @a url.
  732. * @param n size of @a buffer.
  733. * @param url URL to be encoded.
  734. *
  735. * @return
  736. * Return the number of bytes in the encoding.
  737. *
  738. * @note The function follows the convention set by C99 snprintf(). Even if
  739. * the result does not fit into the @a buffer and it is truncated, the
  740. * function returns the number of bytes in an untruncated encoding.
  741. */
  742. issize_t url_e(char buffer[], isize_t n, url_t const *url)
  743. {
  744. size_t i;
  745. char *b = buffer;
  746. size_t m = n;
  747. int do_copy = n > 0;
  748. if (url == NULL)
  749. return -1;
  750. if (URL_STRING_P(url)) {
  751. char const *u = (char *)url;
  752. i = strlen(u);
  753. if (!buffer)
  754. return i;
  755. if (i >= n) {
  756. memcpy(buffer, u, n - 2);
  757. buffer[n - 1] = '\0';
  758. } else {
  759. memcpy(buffer, u, i + 1);
  760. }
  761. return i;
  762. }
  763. if (url->url_type == url_any) {
  764. if (b && m > 0) {
  765. if (m > 1) strcpy(b, "*"); else b[0] = '\0';
  766. }
  767. return 1;
  768. }
  769. if (url->url_scheme && url->url_scheme[0]) {
  770. i = strlen(url->url_scheme) + 1;
  771. if (do_copy && (do_copy = i <= n)) {
  772. memcpy(b, url->url_scheme, i - 1);
  773. b[i - 1] = ':';
  774. }
  775. b += i; n -= i;
  776. }
  777. if (url->url_root && (url->url_host || url->url_user)) {
  778. if (do_copy && (do_copy = 2 <= n))
  779. memcpy(b, "//", 2);
  780. b += 2; n -= 2;
  781. }
  782. if (url->url_user) {
  783. i = strlen(url->url_user);
  784. if (do_copy && (do_copy = i <= n))
  785. memcpy(b, url->url_user, i);
  786. b += i; n -= i;
  787. if (url->url_password) {
  788. if (do_copy && (do_copy = 1 <= n))
  789. *b = ':';
  790. b++; n--;
  791. i = strlen(url->url_password);
  792. if (do_copy && (do_copy = i <= n))
  793. memcpy(b, url->url_password, i);
  794. b += i; n -= i;
  795. }
  796. if (url->url_host) {
  797. if (do_copy && (do_copy = 1 <= n))
  798. *b = '@';
  799. b++; n--;
  800. }
  801. }
  802. if (url->url_host) {
  803. i = strlen(url->url_host);
  804. if (do_copy && (do_copy = i <= n))
  805. memcpy(b, url->url_host, i);
  806. b += i; n -= i;
  807. if (url->url_port) {
  808. i = strlen(url->url_port) + 1;
  809. if (do_copy && (do_copy = i <= n)) {
  810. b[0] = ':';
  811. memcpy(b + 1, url->url_port, i - 1);
  812. }
  813. b += i; n -= i;
  814. }
  815. }
  816. if (url->url_path) {
  817. if (url->url_root) {
  818. if (do_copy && (do_copy = 1 <= n))
  819. b[0] = '/';
  820. b++, n--;
  821. }
  822. i = strlen(url->url_path);
  823. if (do_copy && (do_copy = i < n))
  824. memcpy(b, url->url_path, i);
  825. b += i; n -= i;
  826. }
  827. {
  828. static char const sep[] = ";?#";
  829. char const *pp[3];
  830. size_t j;
  831. pp[0] = url->url_params;
  832. pp[1] = url->url_headers;
  833. pp[2] = url->url_fragment;
  834. for (j = 0; j < 3; j++) {
  835. char const *p = pp[j];
  836. if (!p) continue;
  837. i = strlen(p) + 1;
  838. if (do_copy && (do_copy = i <= n)) {
  839. *b = sep[j];
  840. memcpy(b + 1, p, i - 1);
  841. }
  842. b += i; n -= i;
  843. }
  844. }
  845. if (do_copy && (1 <= n))
  846. *b = '\0';
  847. else if (buffer && m > 0)
  848. buffer[m - 1] = '\0';
  849. assert((size_t)(b - buffer) == (size_t)(m - n));
  850. /* This follows the snprintf(C99) return value,
  851. * Number of characters written (excluding NUL)
  852. */
  853. return b - buffer;
  854. }
  855. /** Calculate the length of URL when encoded.
  856. *
  857. */
  858. isize_t url_len(url_t const * url)
  859. {
  860. size_t rv = 0;
  861. if (url->url_scheme) rv += strlen(url->url_scheme) + 1; /* plus ':' */
  862. if (url->url_user) {
  863. rv += strlen(url->url_user);
  864. if (url->url_password)
  865. rv += strlen(url->url_password) + 1; /* plus ':' */
  866. rv += url->url_host != NULL; /* plus '@' */
  867. }
  868. if (url->url_host) rv += strlen(url->url_host);
  869. if (url->url_port) rv += strlen(url->url_port) + 1; /* plus ':' */
  870. if (url->url_path) rv += strlen(url->url_path) + 1; /* plus initial / */
  871. if (url->url_params) rv += strlen(url->url_params) + 1; /* plus initial ; */
  872. if (url->url_headers) rv += strlen(url->url_headers) + 1; /* plus '?' */
  873. if (url->url_fragment) rv += strlen(url->url_fragment) + 1; /* plus '#' */
  874. return rv;
  875. }
  876. /**@def URL_E(buf, end, url)
  877. * Encode an URL: use @a buf up to @a end.
  878. * @hideinitializer
  879. */
  880. /**
  881. * Calculate the size of strings associated with a #url_t sructure.
  882. *
  883. * @param url pointer to a #url_t structure or string
  884. * @return Number of bytes for URL
  885. */
  886. isize_t url_xtra(url_t const *url)
  887. {
  888. size_t xtra;
  889. if (URL_STRING_P(url)) {
  890. xtra = strlen((char const *)url) + 1;
  891. }
  892. else {
  893. size_t len_scheme, len_user, len_password,
  894. len_host, len_port, len_path, len_params,
  895. len_headers, len_fragment;
  896. len_scheme = (url->url_type <= url_unknown && url->url_scheme) ?
  897. strlen(url->url_scheme) + 1 : 0;
  898. len_user = url->url_user ? strlen(url->url_user) + 1 : 0;
  899. len_password = url->url_password ? strlen(url->url_password) + 1 : 0;
  900. len_host = url->url_host ? strlen(url->url_host) + 1 : 0;
  901. len_port = url->url_port ? strlen(url->url_port) + 1 : 0;
  902. len_path = url->url_path ? strlen(url->url_path) + 1 : 0;
  903. len_params = url->url_params ? strlen(url->url_params) + 1 : 0;
  904. len_headers = url->url_headers ? strlen(url->url_headers) + 1 : 0;
  905. len_fragment = url->url_fragment ? strlen(url->url_fragment) + 1 : 0;
  906. xtra =
  907. len_scheme + len_user + len_password + len_host + len_port +
  908. len_path + len_params + len_headers + len_fragment;
  909. }
  910. return xtra;
  911. }
  912. su_inline
  913. char *copy(char *buf, char *end, char const *src)
  914. {
  915. #if HAVE_MEMCCPY
  916. char *b = memccpy(buf, src, '\0', end - buf);
  917. if (b)
  918. return b;
  919. else
  920. return end + strlen(src + (end - buf)) + 1;
  921. #else
  922. for (; buf < end && (*buf = *src); buf++, src++)
  923. ;
  924. if (buf >= end)
  925. while (*src++)
  926. buf++;
  927. return buf + 1;
  928. #endif
  929. }
  930. /**
  931. * Duplicate the url.
  932. *
  933. * The function url_dup() copies the url structure @a src and the strings
  934. * attached to it to @a url. The non-constant strings in @a src are copied
  935. * to @a buf. If the size of duplicated strings exceed @a bufsize, the
  936. * corresponding string fields in @a url are set to NULL.
  937. *
  938. * The calling function can calculate the size of buffer required by calling
  939. * url_dup() with zero as @a bufsize and NULL as @a dst.
  940. * @param buf Buffer for non-constant strings copied from @a src.
  941. * @param bufsize Size of @a buf.
  942. * @param dst Destination URL structure.
  943. * @param src Source URL structure.
  944. *
  945. * @return Number of characters required for
  946. * duplicating the strings in @a str, or -1 if an error
  947. * occurred.
  948. */
  949. issize_t url_dup(char *buf, isize_t bufsize, url_t *dst, url_t const *src)
  950. {
  951. if (!src && !dst)
  952. return -1;
  953. else if (URL_STRING_P(src)) {
  954. size_t n = strlen((char *)src) + 1;
  955. if (n > bufsize || dst == NULL)
  956. return n;
  957. strcpy(buf, (char *)src);
  958. memset(dst, 0, sizeof(*dst));
  959. if (url_d(dst, buf) < 0)
  960. return -1;
  961. return n;
  962. }
  963. else {
  964. char *b = buf;
  965. char *end = b + bufsize;
  966. char const **dstp;
  967. char const * const *srcp;
  968. url_t dst0[1];
  969. if (dst == NULL)
  970. dst = dst0;
  971. memset(dst, 0, sizeof(*dst));
  972. if (!src)
  973. return 0;
  974. memset(dst->url_pad, 0, sizeof dst->url_pad);
  975. dst->url_type = src->url_type;
  976. dst->url_root = src->url_root;
  977. dstp = &dst->url_scheme;
  978. srcp = &src->url_scheme;
  979. if (dst->url_type > url_unknown)
  980. *dstp = url_scheme((enum url_type_e)dst->url_type);
  981. if (*dstp != NULL)
  982. dstp++, srcp++; /* Skip scheme if it is constant */
  983. if (dst != dst0 && buf != NULL && bufsize != 0)
  984. for (; srcp <= &src->url_fragment; srcp++, dstp++)
  985. if (*srcp) {
  986. char *next = copy(b, end, *srcp);
  987. if (next > end)
  988. break;
  989. *dstp = b, b = next;
  990. }
  991. for (; srcp <= &src->url_fragment; srcp++)
  992. if (*srcp) {
  993. b += strlen(*srcp) + 1;
  994. }
  995. return b - buf;
  996. }
  997. }
  998. /**@def URL_DUP(buf, end, dst, src)
  999. * Duplicate the url: use @a buf up to @a end. @HI
  1000. *
  1001. * The macro URL_DUP() duplicates the url. The non-constant strings in @a
  1002. * src are copied to @a buf. However, no strings are copied past @a end.
  1003. * In other words, the size of buffer is @a end - @a buf.
  1004. *
  1005. * The macro updates the buffer pointer @a buf, so that it points to the
  1006. * first unused byte in the buffer. The buffer pointer @a buf is updated,
  1007. * even if the buffer is too small for the duplicated strings.
  1008. *
  1009. * @param buf Buffer for non-constant strings copied from @a src.
  1010. * @param end End of @a buf.
  1011. * @param dst Destination URL structure.
  1012. * @param src Source URL structure.
  1013. *
  1014. * @return
  1015. * The macro URL_DUP() returns pointer to first unused byte in the
  1016. * buffer @a buf.
  1017. */
  1018. /** Duplicate the url to memory allocated via home.
  1019. *
  1020. * The function url_hdup() duplicates (deep copies) an #url_t structure.
  1021. * Alternatively, it can be passed a string; string is then copied and
  1022. * parsed to the #url_t structure.
  1023. *
  1024. * The function url_hdup() allocates the destination structure from @a home
  1025. * as a single memory block. It is possible to free the copied url structure
  1026. * and all the associated strings using a single call to su_free().
  1027. *
  1028. * @param home memory home used to allocate new url object
  1029. * @param src pointer to URL (or string)
  1030. *
  1031. * @return
  1032. * The function url_hdup() returns a pointer to the newly allocated #url_t
  1033. * structure, or NULL upon an error.
  1034. */
  1035. url_t *url_hdup(su_home_t *home, url_t const *src)
  1036. {
  1037. if (src) {
  1038. size_t len = sizeof(*src) + url_xtra(src);
  1039. url_t *dst = su_alloc(home, len);
  1040. if (dst) {
  1041. ssize_t actual;
  1042. actual = url_dup((char *)(dst + 1), len - sizeof(*src), dst, src);
  1043. if (actual < 0)
  1044. su_free(home, dst), dst = NULL;
  1045. else
  1046. assert(len == sizeof(*src) + actual);
  1047. }
  1048. return dst;
  1049. }
  1050. else
  1051. return NULL;
  1052. }
  1053. /** Convert an string to an url */
  1054. url_t *url_make(su_home_t *h, char const *str)
  1055. {
  1056. return url_hdup(h, URL_STRING_MAKE(str)->us_url);
  1057. }
  1058. /** Print an URL */
  1059. url_t *url_format(su_home_t *h, char const *fmt, ...)
  1060. {
  1061. url_t *url;
  1062. char *us;
  1063. va_list ap;
  1064. va_start(ap, fmt);
  1065. us = su_vsprintf(h, fmt, ap);
  1066. va_end(ap);
  1067. if (us == NULL)
  1068. return NULL;
  1069. url = url_hdup(h, URL_STRING_MAKE(us)->us_url);
  1070. su_free(h, us);
  1071. return url;
  1072. }
  1073. /** Convert @a url to a string allocated from @a home.
  1074. *
  1075. * @param home memory home to allocate the new string
  1076. * @param url url to convert to string
  1077. *
  1078. * The @a url can be a string, too.
  1079. *
  1080. * @return Newly allocated conversion result, or NULL upon an error.
  1081. */
  1082. char *url_as_string(su_home_t *home, url_t const *url)
  1083. {
  1084. if (url) {
  1085. int len = url_e(NULL, 0, url);
  1086. char *b = su_alloc(home, len + 1);
  1087. url_e(b, len + 1, url);
  1088. return b;
  1089. } else {
  1090. return NULL;
  1091. }
  1092. }
  1093. /** Test if param @a tag matches to parameter string @a p.
  1094. */
  1095. #define URL_PARAM_MATCH(p, tag) \
  1096. (strncasecmp(p, tag, strlen(tag)) == 0 && \
  1097. (p[strlen(tag)] == '\0' || p[strlen(tag)] == ';' || p[strlen(tag)] == '='))
  1098. /**
  1099. * Search for a parameter.
  1100. *
  1101. * This function searches for a parameter from a parameter list.
  1102. *
  1103. * If you want to test if there is parameter @b user=phone,
  1104. * call this function like
  1105. * @code if (url_param(url->url_param, "user=phone", NULL, 0))
  1106. * @endcode
  1107. *
  1108. * @param params URL parameter string (excluding first semicolon)
  1109. * @param tag parameter name
  1110. * @param value string to which the parameter value is copied
  1111. * @param vlen length of string reserved for value
  1112. *
  1113. * @retval positive length of parameter value (including final NUL) if found
  1114. * @retval zero if not found.
  1115. */
  1116. isize_t url_param(char const *params,
  1117. char const *tag,
  1118. char value[], isize_t vlen)
  1119. {
  1120. size_t n, tlen, flen;
  1121. char *p;
  1122. if (!params)
  1123. return 0;
  1124. tlen = strlen(tag);
  1125. if (tlen && tag[tlen - 1] == '=')
  1126. tlen--;
  1127. for (p = (char *)params; *p; p += n + 1) {
  1128. n = strcspn(p, ";");
  1129. if (n < tlen) {
  1130. if (p[n]) continue; else break;
  1131. }
  1132. if (strncasecmp(p, tag, tlen) == 0) {
  1133. if (n == tlen) {
  1134. if (vlen > 0)
  1135. value[0] = '\0';
  1136. return 1;
  1137. }
  1138. if (p[tlen] != '=')
  1139. continue;
  1140. flen = n - tlen - 1;
  1141. if (flen >= (size_t)vlen)
  1142. return flen + 1;
  1143. memcpy(value, p + tlen + 1, flen);
  1144. value[flen] = '\0';
  1145. return flen + 1;
  1146. }
  1147. if (!p[n])
  1148. break;
  1149. }
  1150. return 0;
  1151. }
  1152. /** Check for a parameter.
  1153. *
  1154. * @deprecated
  1155. * Bad grammar. Use url_has_param().
  1156. */
  1157. isize_t url_have_param(char const *params, char const *tag)
  1158. {
  1159. return url_param(params, tag, NULL, 0);
  1160. }
  1161. /** Check for a parameter. */
  1162. int url_has_param(url_t const *url, char const *tag)
  1163. {
  1164. return url && url->url_params && url_param(url->url_params, tag, NULL, 0);
  1165. }
  1166. /** Add an parameter. */
  1167. int url_param_add(su_home_t *h, url_t *url, char const *param)
  1168. {
  1169. /* XXX - should remove existing parameters with same name? */
  1170. size_t n = url->url_params ? strlen(url->url_params) + 1: 0;
  1171. size_t nn = strlen(param) + 1;
  1172. char *s = su_alloc(h, n + nn);
  1173. if (!s)
  1174. return -1;
  1175. if (url->url_params)
  1176. strcpy(s, url->url_params)[n - 1] = ';';
  1177. strcpy(s + n, param);
  1178. url->url_params = s;
  1179. return 0;
  1180. }
  1181. /** Remove a named parameter from url_param string.
  1182. *
  1183. * Remove a named parameter and its possible value from the URL parameter
  1184. * string (url_s##url_param).
  1185. *
  1186. * @return Pointer to modified string, or NULL if nothing is left in there.
  1187. */
  1188. char *url_strip_param_string(char *params, char const *name)
  1189. {
  1190. if (params && name) {
  1191. size_t i, n = strlen(name), remove, rest;
  1192. for (i = 0; params[i];) {
  1193. if (strncasecmp(params + i, name, n) ||
  1194. (params[i + n] != '=' && params[i + n] != ';' && params[i + n])) {
  1195. i = i + strcspn(params + i, ";");
  1196. if (!params[i++])
  1197. break;
  1198. continue;
  1199. }
  1200. remove = n + strcspn(params + i + n, ";");
  1201. if (params[i + remove] == ';')
  1202. remove++;
  1203. if (i == 0) {
  1204. params += remove;
  1205. continue;
  1206. }
  1207. rest = strlen(params + i + remove);
  1208. if (!rest) {
  1209. if (i == 0)
  1210. return NULL; /* removed everything */
  1211. params[i - 1] = '\0';
  1212. break;
  1213. }
  1214. memmove(params + i, params + i + remove, rest + 1);
  1215. }
  1216. if (!params[0])
  1217. return NULL;
  1218. }
  1219. return params;
  1220. }
  1221. int url_string_p(url_string_t const *url)
  1222. {
  1223. return URL_STRING_P(url);
  1224. }
  1225. int url_is_string(url_string_t const *url)
  1226. {
  1227. return URL_IS_STRING(url);
  1228. }
  1229. /** Strip transport-specific stuff. */
  1230. static
  1231. int url_strip_transport2(url_t *url, int modify)
  1232. {
  1233. char *p, *d;
  1234. size_t n;
  1235. int semi;
  1236. if (url->url_type != url_sip && url->url_type != url_sips)
  1237. return 0;
  1238. if (url->url_port != NULL) {
  1239. if (!modify)
  1240. return 1;
  1241. url->url_port = NULL;
  1242. }
  1243. if (!url->url_params)
  1244. return 0;
  1245. for (d = p = (char *)url->url_params; *p; p += n + semi) {
  1246. n = strcspn(p, ";");
  1247. semi = (p[n] != '\0');
  1248. if (modify && n == 0)
  1249. continue;
  1250. if (URL_PARAM_MATCH(p, "method"))
  1251. continue;
  1252. if (URL_PARAM_MATCH(p, "maddr"))
  1253. continue;
  1254. if (URL_PARAM_MATCH(p, "ttl"))
  1255. continue;
  1256. if (URL_PARAM_MATCH(p, "transport"))
  1257. continue;
  1258. if (p != d) {
  1259. if (d != url->url_params)
  1260. d++;
  1261. if (p != d) {
  1262. if (!modify)
  1263. return 1;
  1264. memmove(d, p, n + 1);
  1265. }
  1266. }
  1267. d += n;
  1268. }
  1269. if (d == p)
  1270. return 0;
  1271. else if (d + 1 == p) /* empty param */
  1272. return 0;
  1273. else if (!modify)
  1274. return 1;
  1275. if (d != url->url_params)
  1276. *d = '\0';
  1277. else
  1278. url->url_params = NULL;
  1279. return 1;
  1280. }
  1281. /** Strip transport-specific stuff.
  1282. *
  1283. * The function url_strip_transport() removes transport-specific parameters
  1284. * from a SIP or SIPS URI. These parameters include:
  1285. * - the port number
  1286. * - "maddr=" parameter
  1287. * - "transport=" parameter
  1288. * - "ttl=" parameter
  1289. * - "method=" parameter
  1290. *
  1291. * @note
  1292. * The @a url must be a pointer to a URL structure. It is stripped in-place.
  1293. *
  1294. * @note
  1295. * If the parameter string contains empty parameters, they are stripped, too.
  1296. *
  1297. * @return
  1298. * The function url_strip_transport() returns @e true, if the URL was
  1299. * modified, @e false otherwise.
  1300. */
  1301. int url_strip_transport(url_t *url)
  1302. {
  1303. return url_strip_transport2(url, 1);
  1304. }
  1305. /** Check for transport-specific stuff.
  1306. *
  1307. * The function url_have_transport() tests if there are transport-specific
  1308. * parameters in a SIP or SIPS URI. These parameters include:
  1309. * - the port number
  1310. * - "maddr=" parameters
  1311. * - "transport=" parameters
  1312. *
  1313. * @note
  1314. * The @a url must be a pointer to a URL structure.
  1315. *
  1316. * @return The function url_have_transport() returns @e true, if the URL
  1317. * contains transport parameters, @e false otherwise.
  1318. */
  1319. int url_have_transport(url_t const *url)
  1320. {
  1321. return url_strip_transport2((url_t *)url, 0);
  1322. }
  1323. /**Lazily compare two URLs.
  1324. *
  1325. * Compare essential parts of URLs: schema, host, port, and username.
  1326. *
  1327. * any_url compares 0 with any other URL.
  1328. *
  1329. * pres: and im: URIs compares 0 with SIP URIs.
  1330. *
  1331. * @note
  1332. * The @a a and @a b must be pointers to URL structures.
  1333. *
  1334. * @note Currently, the url parameters are not compared. This is because the
  1335. * url_cmp() is used to sort URLs: taking parameters into account makes that
  1336. * impossible.
  1337. */
  1338. int url_cmp(url_t const *a, url_t const *b)
  1339. {
  1340. int rv;
  1341. int url_type;
  1342. if ((a && a->url_type == url_any) || (b && b->url_type == url_any))
  1343. return 0;
  1344. if (!a || !b)
  1345. return (a != NULL) - (b != NULL);
  1346. if ((rv = a->url_type - b->url_type)) {
  1347. #if 0
  1348. /* presence and instant messaging URLs match magically with SIP */
  1349. enum url_type_e a_type = a->url_type;
  1350. enum url_type_e b_type = b->url_type;
  1351. if (a_type == url_im || a_type == url_pres)
  1352. a_type = url_sip;
  1353. if (b_type == url_im || b_type == url_pres)
  1354. b_type = url_sip;
  1355. if (a_type != b_type)
  1356. #endif
  1357. return rv;
  1358. }
  1359. url_type = a->url_type; /* Or b->url_type, they are equal! */
  1360. if (url_type <= url_unknown &&
  1361. ((rv = !a->url_scheme - !b->url_scheme) ||
  1362. (a->url_scheme && b->url_scheme &&
  1363. (rv = strcasecmp(a->url_scheme, b->url_scheme)))))
  1364. return rv;
  1365. if ((rv = host_cmp(a->url_host, b->url_host)))
  1366. return rv;
  1367. if (a->url_port != b->url_port) {
  1368. char const *a_port;
  1369. char const *b_port;
  1370. if (url_type != url_sip && url_type != url_sips)
  1371. a_port = b_port = url_port_default((enum url_type_e)url_type);
  1372. else if (host_is_ip_address(a->url_host))
  1373. a_port = b_port = url_port_default((enum url_type_e)url_type);
  1374. else
  1375. a_port = b_port = "";
  1376. if (a->url_port) a_port = a->url_port;
  1377. if (b->url_port) b_port = b->url_port;
  1378. if ((rv = strcmp(a_port, b_port)))
  1379. return rv;
  1380. }
  1381. if (a->url_user != b->url_user) {
  1382. if (a->url_user == NULL) return -1;
  1383. if (b->url_user == NULL) return +1;
  1384. switch (url_type) {
  1385. case url_tel: case url_modem: case url_fax:
  1386. rv = url_tel_cmp_numbers(a->url_user, b->url_user);
  1387. break;
  1388. default:
  1389. rv = strcmp(a->url_user, b->url_user);
  1390. break;
  1391. }
  1392. if (rv)
  1393. return rv;
  1394. }
  1395. #if 0
  1396. if (a->url_path != b->url_path) {
  1397. if (a->url_path == NULL) return -1;
  1398. if (b->url_path == NULL) return +1;
  1399. if ((rv = strcmp(a->url_path, b->url_path)))
  1400. return rv;
  1401. }
  1402. #endif
  1403. return 0;
  1404. }
  1405. static
  1406. int url_tel_cmp_numbers(char const *A, char const *B)
  1407. {
  1408. short a, b;
  1409. int rv;
  1410. while (*A && *B) {
  1411. #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))
  1412. /* Skip visual-separators */
  1413. do {
  1414. a = *A++;
  1415. if (a == '%' && IS_HEX(A[0]) && IS_HEX(A[1]))
  1416. a = (UNHEX(A[0]) << 4) | UNHEX(A[1]), A +=2;
  1417. } while (a == ' ' || a == '-' || a == '.' || a == '(' || a == ')');
  1418. if (isupper(a))
  1419. a = tolower(a);
  1420. do {
  1421. b = *B++;
  1422. if (b == '%' && IS_HEX(B[0]) && IS_HEX(B[1]))
  1423. b = (UNHEX(B[0]) << 4) | UNHEX(B[1]), B +=2;
  1424. } while (b == ' ' || b == '-' || b == '.' || b == '(' || b == ')');
  1425. if (isupper(b))
  1426. b = tolower(b);
  1427. if ((rv = a - b))
  1428. return rv;
  1429. }
  1430. return (int)*A - (int)*B;
  1431. }
  1432. /**Conservative comparison of urls.
  1433. *
  1434. * Compare all parts of URLs.
  1435. *
  1436. * @note
  1437. * The @a a and @a b must be pointers to URL structures.
  1438. *
  1439. */
  1440. int url_cmp_all(url_t const *a, url_t const *b)
  1441. {
  1442. int rv, url_type;
  1443. if (!a || !b)
  1444. return (a != NULL) - (b != NULL);
  1445. if ((rv = a->url_type - b->url_type))
  1446. return rv;
  1447. url_type = a->url_type; /* Or b->url_type, they are equal! */
  1448. if (url_type <= url_unknown &&
  1449. ((rv = !a->url_scheme - !b->url_scheme) ||
  1450. (a->url_scheme && b->url_scheme &&
  1451. (rv = strcasecmp(a->url_scheme, b->url_scheme)))))
  1452. return rv;
  1453. if ((rv = a->url_root - b->url_root))
  1454. return rv;
  1455. if ((rv = host_cmp(a->url_host, b->url_host)))
  1456. return rv;
  1457. if (a->url_port != b->url_port) {
  1458. char const *a_port;
  1459. char const *b_port;
  1460. if (url_type != url_sip && url_type != url_sips)
  1461. a_port = b_port = url_port_default((enum url_type_e)url_type);
  1462. else if (host_is_ip_address(a->url_host))
  1463. a_port = b_port = url_port_default((enum url_type_e)url_type);
  1464. else
  1465. a_port = b_port = "";
  1466. if (a->url_port) a_port = a->url_port;
  1467. if (b->url_port) b_port = b->url_port;
  1468. if ((rv = strcmp(a_port, b_port)))
  1469. return rv;
  1470. }
  1471. if (a->url_user != b->url_user) {
  1472. if (a->url_user == NULL) return -1;
  1473. if (b->url_user == NULL) return +1;
  1474. switch (url_type) {
  1475. case url_tel: case url_modem: case url_fax:
  1476. rv = url_tel_cmp_numbers(a->url_user, b->url_user);
  1477. break;
  1478. default:
  1479. rv = strcmp(a->url_user, b->url_user);
  1480. break;
  1481. }
  1482. if (rv)
  1483. return rv;
  1484. }
  1485. if (a->url_path != b->url_path) {
  1486. if (a->url_path == NULL) return -1;
  1487. if (b->url_path == NULL) return +1;
  1488. if ((rv = strcmp(a->url_path, b->url_path)))
  1489. return rv;
  1490. }
  1491. if (a->url_params != b->url_params) {
  1492. if (a->url_params == NULL) return -1;
  1493. if (b->url_params == NULL) return +1;
  1494. if ((rv = strcmp(a->url_params, b->url_params)))
  1495. return rv;
  1496. }
  1497. if (a->url_headers != b->url_headers) {
  1498. if (a->url_headers == NULL) return -1;
  1499. if (b->url_headers == NULL) return +1;
  1500. if ((rv = strcmp(a->url_headers, b->url_headers)))
  1501. return rv;
  1502. }
  1503. if (a->url_headers != b->url_headers) {
  1504. if (a->url_headers == NULL) return -1;
  1505. if (b->url_headers == NULL) return +1;
  1506. if ((rv = strcmp(a->url_headers, b->url_headers)))
  1507. return rv;
  1508. }
  1509. if (a->url_fragment != b->url_fragment) {
  1510. if (a->url_fragment == NULL) return -1;
  1511. if (b->url_fragment == NULL) return +1;
  1512. if ((rv = strcmp(a->url_fragment, b->url_fragment)))
  1513. return rv;
  1514. }
  1515. return 0;
  1516. }
  1517. /** Return default port number corresponding to the url type */
  1518. char const *url_port_default(enum url_type_e url_type)
  1519. {
  1520. switch (url_type) {
  1521. case url_sip: /* "sip:" */
  1522. return "5060";
  1523. case url_sips: /* "sips:" */
  1524. return "5061";
  1525. case url_http: /* "http:" */
  1526. return "80";
  1527. case url_https: /* "https:" */
  1528. return "443";
  1529. case url_ftp: /* "ftp:" */
  1530. case url_file: /* "file:" */
  1531. return "21";
  1532. case url_rtsp: /* "rtsp:" */
  1533. case url_rtspu: /* "rtspu:" */
  1534. return "554";
  1535. case url_mailto: /* "mailto:" */
  1536. return "25";
  1537. case url_any: /* "*" */
  1538. return "*";
  1539. case url_msrp:
  1540. case url_msrps:
  1541. return "9999"; /* XXXX */
  1542. case url_tel:
  1543. case url_urn:
  1544. case url_fax:
  1545. case url_modem:
  1546. case url_im:
  1547. case url_pres:
  1548. case url_cid:
  1549. case url_wv:
  1550. default: /* Unknown scheme */
  1551. return "";
  1552. }
  1553. }
  1554. /** Return default transport name corresponding to the url type */
  1555. char const *url_tport_default(enum url_type_e url_type)
  1556. {
  1557. switch (url_type) {
  1558. case url_sip:
  1559. return "*";
  1560. case url_sips:
  1561. return "tls";
  1562. case url_http:
  1563. return "tcp";
  1564. case url_https:
  1565. return "tls";
  1566. case url_ftp:
  1567. case url_file:
  1568. return "tcp";
  1569. case url_rtsp:
  1570. return "tcp";
  1571. case url_rtspu:
  1572. return "udp";
  1573. case url_mailto:
  1574. return "tcp";
  1575. case url_msrp:
  1576. return "tcp";
  1577. case url_msrps:
  1578. return "tls";
  1579. case url_any: /* "*" */
  1580. case url_tel:
  1581. case url_fax:
  1582. case url_modem:
  1583. case url_im:
  1584. case url_pres:
  1585. case url_cid:
  1586. case url_urn:
  1587. case url_wv:
  1588. default: /* Unknown scheme */
  1589. return "*";
  1590. }
  1591. }
  1592. /** Return the URL port string */
  1593. char const *url_port(url_t const *u)
  1594. {
  1595. if (!u)
  1596. return "";
  1597. else if (u->url_port && u->url_port[0])
  1598. return u->url_port;
  1599. if (u->url_type == url_sips || u->url_type == url_sip)
  1600. if (!host_is_ip_address(u->url_host))
  1601. return "";
  1602. return url_port_default((enum url_type_e)u->url_type);
  1603. }
  1604. /** Sanitize URL.
  1605. *
  1606. * The function url_sanitize() adds a scheme to an incomplete URL. It
  1607. * modifies its parameter structure @a url. Currently, the function follows
  1608. * simple heuristics:
  1609. *
  1610. * - URL with host name starting with @c ftp. is an FTP URL
  1611. * - URL with host name starting with @c www. is an HTTP URL
  1612. * - URL with host and path, e.g., @c host/foo;bar, is an HTTP URL
  1613. * - URL with host name, no path is a SIP URL.
  1614. *
  1615. * @param url pointer to URL struct to be sanitized (IN/OUT)
  1616. *
  1617. * @return
  1618. * The function url_sanitize() returns 0 if it considers URL to be
  1619. * sane, and -1 otherwise.
  1620. */
  1621. int url_sanitize(url_t *url)
  1622. {
  1623. if (!url)
  1624. return -1;
  1625. else if (url->url_scheme != NULL)
  1626. /* xyzzy */;
  1627. else if (url->url_host == NULL)
  1628. return -1;
  1629. else if (strncasecmp(url->url_host, "ftp.", strlen("ftp.")) == 0)
  1630. url->url_type = url_ftp, url->url_scheme = "ftp", url->url_root = '/';
  1631. else if (strncasecmp(url->url_host, "www.", strlen("www.")) == 0
  1632. || url->url_path)
  1633. url->url_type = url_http, url->url_scheme = "http", url->url_root = '/';
  1634. else
  1635. url->url_type = url_sip, url->url_scheme = "sip";
  1636. return 0;
  1637. }
  1638. #include <sofia-sip/su_md5.h>
  1639. static
  1640. void canon_update(su_md5_t *md5, char const *s, size_t n, char const *allow)
  1641. {
  1642. size_t i, j;
  1643. for (i = 0, j = 0; i < n && s[i]; i++) {
  1644. char c;
  1645. if (s[i] == '%' && i + 2 < n && IS_HEX(s[i+1]) && IS_HEX(s[i+2])) {
  1646. #define UNHEX(a) (a - (a >= 'a' ? 'a' - 10 : (a >= 'A' ? 'A' - 10 : '0')))
  1647. c = (UNHEX(s[i+1]) << 4) | UNHEX(s[i+2]);
  1648. #undef UNHEX
  1649. if (c != '%' && c > ' ' && c < '\177' &&
  1650. (!strchr(EXCLUDED, c) || strchr(allow, c))) {
  1651. if (i != j)
  1652. su_md5_iupdate(md5, s + j, i - j);
  1653. su_md5_iupdate(md5, &c, 1);
  1654. j = i + 3;
  1655. }
  1656. i += 2;
  1657. }
  1658. }
  1659. if (i != j)
  1660. su_md5_iupdate(md5, s + j, i - j);
  1661. }
  1662. /** Update MD5 sum with url-string contents */
  1663. static
  1664. void url_string_update(su_md5_t *md5, char const *s)
  1665. {
  1666. size_t n, p;
  1667. int have_authority = 1;
  1668. enum url_type_e type = url_any;
  1669. char const *at, *colon;
  1670. char schema[48] = { 0 };
  1671. if (s == NULL || strlen(s) == 0 || strcmp(s, "*") == 0) {
  1672. su_md5_update(md5, "*\0\0*", 4);
  1673. return;
  1674. }
  1675. n = strcspn(s, ":/?#");
  1676. if (n >= sizeof schema) {
  1677. su_md5_update(md5, ":", 1);
  1678. }
  1679. else if (n && s[n] == ':' ) {
  1680. at = url_canonize(schema, s, n, 0, "+");
  1681. type = url_get_type(schema, at - schema);
  1682. su_md5_iupdate(md5, schema, at - schema);
  1683. have_authority = !url_type_is_opaque(type);
  1684. s += n + 1;
  1685. }
  1686. else {
  1687. su_md5_update(md5, "", 1);
  1688. }
  1689. if (type == url_sip || type == url_sips) {
  1690. /* SIP URL may have /;? in user part but no path */
  1691. /* user-unreserved = "&" / "=" / "+" / "$" / "," / ";" / "?" / "/" */
  1692. /* Some #*@#* phones include unescaped # there, too */
  1693. n = strcspn(s, "@/;?#");
  1694. p = strcspn(s + n, "@");
  1695. if (s[n + p] == '@') {
  1696. n += p;
  1697. /* Ignore password in hash */
  1698. colon = memchr(s, ':', n);
  1699. p = colon ? (size_t)(colon - s) : n;
  1700. canon_update(md5, s, p, SIP_USER_UNRESERVED);
  1701. s += n + 1; n = 0;
  1702. }
  1703. else
  1704. su_md5_iupdate(md5, "", 1); /* user */
  1705. n += strcspn(s + n, "/;?#");
  1706. }
  1707. else if (have_authority) {
  1708. if (type == url_wv) { /* WV URL may have / in user part */
  1709. n = strcspn(s, "@;?#");
  1710. }
  1711. else if (type != url_wv && s[0] == '/' && s[1] != '/') {
  1712. /* foo:/bar */
  1713. su_md5_update(md5, "\0\0", 2); /* user, host */
  1714. su_md5_striupdate(md5, url_port_default(type));
  1715. return;
  1716. }
  1717. else if (s[0] == '/' && s[1] == '/') {
  1718. /* We have authority, / / foo or foo */
  1719. s += 2;
  1720. n = strcspn(s, "/?#@[]");
  1721. }
  1722. else
  1723. n = strcspn(s, "@;/?#");
  1724. if (s[n] == '@') {
  1725. /* Ignore password in hash */
  1726. colon = type != url_unknown ? memchr(s, ':', n) : NULL;
  1727. p = colon ? (size_t)(colon - s) : n;
  1728. canon_update(md5, s, p, SIP_USER_UNRESERVED);
  1729. s += n + 1;
  1730. n = strcspn(s, "/;?#"); /* Until path, query or fragment */
  1731. }
  1732. else {
  1733. su_md5_iupdate(md5, "", 1); /* user */
  1734. n += strcspn(s + n, "/;?#"); /* Until path, query or fragment */
  1735. }
  1736. }
  1737. else /* if (!have_authority) */ {
  1738. n = strcspn(s, ":/;?#"); /* Until pass, path, query or fragment */
  1739. canon_update(md5, s, n, ""); /* user */
  1740. su_md5_update(md5, "\0", 1); /* host, no port */
  1741. su_md5_striupdate(md5, url_port_default(type));
  1742. return;
  1743. }
  1744. if (n > 0 && s[0] == '[') { /* IPv6reference */
  1745. colon = memchr(s, ']', n);
  1746. if (colon == NULL || ++colon == s + n || *colon != ':')
  1747. colon = NULL;
  1748. }
  1749. else
  1750. colon = memchr(s, ':', n);
  1751. if (colon) {
  1752. canon_update(md5, s, colon - s, ""); /* host */
  1753. canon_update(md5, colon + 1, (s + n) - (colon + 1), "");
  1754. }
  1755. else {
  1756. canon_update(md5, s, n, ""); /* host */
  1757. su_md5_strupdate(md5, url_port_default(type)); /* port */
  1758. }
  1759. /* ignore parameters/path/headers.... */
  1760. }
  1761. /** Update md5 digest with contents of URL.
  1762. *
  1763. */
  1764. void url_update(su_md5_t *md5, url_t const *url)
  1765. {
  1766. if (url_string_p((url_string_t *)url)) {
  1767. url_string_update(md5, (char const *)url);
  1768. }
  1769. else {
  1770. SU_MD5_STRI0UPDATE(md5, url->url_scheme);
  1771. SU_MD5_STRI0UPDATE(md5, url->url_user);
  1772. SU_MD5_STRI0UPDATE(md5, url->url_host);
  1773. su_md5_striupdate(md5, URL_PORT(url));
  1774. /* XXX - parameters/path.... */
  1775. /* SU_MD5_STRI0UPDATE(md5, url->url_path); */
  1776. }
  1777. }
  1778. /** Calculate a digest from URL contents. */
  1779. void url_digest(void *hash, int hsize, url_t const *url, char const *key)
  1780. {
  1781. su_md5_t md5[1];
  1782. uint8_t digest[SU_MD5_DIGEST_SIZE];
  1783. su_md5_init(md5);
  1784. if (key) su_md5_strupdate(md5, key);
  1785. url_update(md5, url);
  1786. su_md5_digest(md5, digest);
  1787. if (hsize > SU_MD5_DIGEST_SIZE) {
  1788. memset((char *)hash + SU_MD5_DIGEST_SIZE, 0, hsize - SU_MD5_DIGEST_SIZE);
  1789. hsize = SU_MD5_DIGEST_SIZE;
  1790. }
  1791. memcpy(hash, digest, hsize);
  1792. }
  1793. /** Convert a URL query to a header string.
  1794. *
  1795. * URL query is converted by replacing each "=" in header name "=" value
  1796. * pair with semicolon (":"), and the "&" separating header-name-value pairs
  1797. * with line feed ("\n"). The "body" pseudoheader is moved last in the
  1798. * string. The %-escaping is removed. Note that if the @a query contains %00,
  1799. * the resulting string will be truncated.
  1800. *
  1801. * @param home memory home used to alloate string (if NULL, malloc() it)
  1802. * @param query query part from SIP URL
  1803. *
  1804. * The result string is allocated from @a home, and it can be used as
  1805. * argument to msg_header_parse_str(), msg_header_add_str() or
  1806. * SIPTAG_HEADER_STR().
  1807. *
  1808. * @sa msg_header_add_str(), SIPTAG_HEADER_STR(),
  1809. * sip_headers_as_url_query(), sip_url_query_as_taglist(),
  1810. * @RFC3261 section 19.1.1 "Headers", #url_t, url_s#url_headers,
  1811. * url_unescape(), url_unescape_to()
  1812. *
  1813. * @since New in @VERSION_1_12_4.
  1814. */
  1815. char *url_query_as_header_string(su_home_t *home,
  1816. char const *query)
  1817. {
  1818. size_t i, j, n, b_start = 0, b_len = 0;
  1819. char *s = su_strdup(home, query);
  1820. if (!s)
  1821. return NULL;
  1822. for (i = 0, j = 0; s[i];) {
  1823. n = strcspn(s + i, "=");
  1824. if (!s[i + n])
  1825. break;
  1826. if (n == 4 && strncasecmp(s + i, "body", 4) == 0) {
  1827. if (b_start)
  1828. break;
  1829. b_start = i + n + 1, b_len = strcspn(s + b_start, "&");
  1830. i = b_start + b_len;
  1831. if (s[i] != '\0') i += 1;
  1832. continue;
  1833. }
  1834. if (i != j)
  1835. memmove(s + j, s + i, n);
  1836. s[j + n] = ':';
  1837. i += n + 1, j += n + 1;
  1838. n = strcspn(s + i, "&");
  1839. j += url_unescape_to(s + j, s + i, n);
  1840. i += n;
  1841. if (s[i]) {
  1842. s[j++] = '\n', i++;
  1843. }
  1844. }
  1845. if (s[i])
  1846. return (void)su_free(home, s), NULL;
  1847. if (b_start) {
  1848. s[j++] = '\n', s[j++] = '\n';
  1849. j += url_unescape_to(s + j, query + b_start, b_len);
  1850. }
  1851. s[j] = '\0'; assert(j <= i);
  1852. return s;
  1853. }