xlate.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. /* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
  2. * applicable.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "apu.h"
  17. #include "apu_config.h"
  18. #include "apr_lib.h"
  19. #include "apr_strings.h"
  20. #include "apr_portable.h"
  21. #include "apr_xlate.h"
  22. /* If no implementation is available, don't generate code here since
  23. * apr_xlate.h emitted macros which return APR_ENOTIMPL.
  24. */
  25. #if APR_HAS_XLATE
  26. #ifdef HAVE_STDDEF_H
  27. #include <stddef.h> /* for NULL */
  28. #endif
  29. #if APR_HAVE_STRING_H
  30. #include <string.h>
  31. #endif
  32. #if APR_HAVE_STRINGS_H
  33. #include <strings.h>
  34. #endif
  35. #ifdef HAVE_ICONV_H
  36. #include <iconv.h>
  37. #endif
  38. #if APU_HAVE_APR_ICONV
  39. #include <apr_iconv.h>
  40. #endif
  41. #if defined(APU_ICONV_INBUF_CONST) || APU_HAVE_APR_ICONV
  42. #define ICONV_INBUF_TYPE const char **
  43. #else
  44. #define ICONV_INBUF_TYPE char **
  45. #endif
  46. #ifndef min
  47. #define min(x,y) ((x) <= (y) ? (x) : (y))
  48. #endif
  49. struct apr_xlate_t {
  50. apr_pool_t *pool;
  51. char *frompage;
  52. char *topage;
  53. char *sbcs_table;
  54. #if APU_HAVE_ICONV
  55. iconv_t ich;
  56. #elif APU_HAVE_APR_ICONV
  57. apr_iconv_t ich;
  58. #endif
  59. };
  60. static const char *handle_special_names(const char *page, apr_pool_t *pool)
  61. {
  62. if (page == APR_DEFAULT_CHARSET) {
  63. return apr_os_default_encoding(pool);
  64. }
  65. else if (page == APR_LOCALE_CHARSET) {
  66. return apr_os_locale_encoding(pool);
  67. }
  68. else {
  69. return page;
  70. }
  71. }
  72. static apr_status_t apr_xlate_cleanup(void *convset)
  73. {
  74. apr_xlate_t *old = convset;
  75. #if APU_HAVE_APR_ICONV
  76. if (old->ich != (apr_iconv_t)-1) {
  77. return apr_iconv_close(old->ich, old->pool);
  78. }
  79. #elif APU_HAVE_ICONV
  80. if (old->ich != (iconv_t)-1) {
  81. if (iconv_close(old->ich)) {
  82. int rv = errno;
  83. /* Sometimes, iconv is not good about setting errno. */
  84. return rv ? rv : APR_EINVAL;
  85. }
  86. }
  87. #endif
  88. return APR_SUCCESS;
  89. }
  90. #if APU_HAVE_ICONV
  91. static void check_sbcs(apr_xlate_t *convset)
  92. {
  93. char inbuf[256], outbuf[256];
  94. char *inbufptr = inbuf;
  95. char *outbufptr = outbuf;
  96. apr_size_t inbytes_left, outbytes_left;
  97. int i;
  98. apr_size_t translated;
  99. for (i = 0; i < sizeof(inbuf); i++) {
  100. inbuf[i] = i;
  101. }
  102. inbytes_left = outbytes_left = sizeof(inbuf);
  103. translated = iconv(convset->ich, (ICONV_INBUF_TYPE)&inbufptr,
  104. &inbytes_left, &outbufptr, &outbytes_left);
  105. if (translated != (apr_size_t)-1
  106. && inbytes_left == 0
  107. && outbytes_left == 0) {
  108. /* hurray... this is simple translation; save the table,
  109. * close the iconv descriptor
  110. */
  111. convset->sbcs_table = apr_palloc(convset->pool, sizeof(outbuf));
  112. memcpy(convset->sbcs_table, outbuf, sizeof(outbuf));
  113. iconv_close(convset->ich);
  114. convset->ich = (iconv_t)-1;
  115. /* TODO: add the table to the cache */
  116. }
  117. else {
  118. /* reset the iconv descriptor, since it's now in an undefined
  119. * state. */
  120. iconv_close(convset->ich);
  121. convset->ich = iconv_open(convset->topage, convset->frompage);
  122. }
  123. }
  124. #elif APU_HAVE_APR_ICONV
  125. static void check_sbcs(apr_xlate_t *convset)
  126. {
  127. char inbuf[256], outbuf[256];
  128. char *inbufptr = inbuf;
  129. char *outbufptr = outbuf;
  130. apr_size_t inbytes_left, outbytes_left;
  131. int i;
  132. apr_size_t translated;
  133. apr_status_t rv;
  134. for (i = 0; i < sizeof(inbuf); i++) {
  135. inbuf[i] = i;
  136. }
  137. inbytes_left = outbytes_left = sizeof(inbuf);
  138. rv = apr_iconv(convset->ich, (ICONV_INBUF_TYPE)&inbufptr,
  139. &inbytes_left, &outbufptr, &outbytes_left,
  140. &translated);
  141. if ((rv == APR_SUCCESS)
  142. && (translated != (apr_size_t)-1)
  143. && inbytes_left == 0
  144. && outbytes_left == 0) {
  145. /* hurray... this is simple translation; save the table,
  146. * close the iconv descriptor
  147. */
  148. convset->sbcs_table = apr_palloc(convset->pool, sizeof(outbuf));
  149. memcpy(convset->sbcs_table, outbuf, sizeof(outbuf));
  150. apr_iconv_close(convset->ich, convset->pool);
  151. convset->ich = (apr_iconv_t)-1;
  152. /* TODO: add the table to the cache */
  153. }
  154. else {
  155. /* reset the iconv descriptor, since it's now in an undefined
  156. * state. */
  157. apr_iconv_close(convset->ich, convset->pool);
  158. rv = apr_iconv_open(convset->topage, convset->frompage,
  159. convset->pool, &convset->ich);
  160. }
  161. }
  162. #endif /* APU_HAVE_APR_ICONV */
  163. static void make_identity_table(apr_xlate_t *convset)
  164. {
  165. int i;
  166. convset->sbcs_table = apr_palloc(convset->pool, 256);
  167. for (i = 0; i < 256; i++)
  168. convset->sbcs_table[i] = i;
  169. }
  170. APU_DECLARE(apr_status_t) apr_xlate_open(apr_xlate_t **convset,
  171. const char *topage,
  172. const char *frompage,
  173. apr_pool_t *pool)
  174. {
  175. apr_status_t rv;
  176. apr_xlate_t *new;
  177. int found = 0;
  178. *convset = NULL;
  179. topage = handle_special_names(topage, pool);
  180. frompage = handle_special_names(frompage, pool);
  181. new = (apr_xlate_t *)apr_pcalloc(pool, sizeof(apr_xlate_t));
  182. if (!new) {
  183. return APR_ENOMEM;
  184. }
  185. new->pool = pool;
  186. new->topage = apr_pstrdup(pool, topage);
  187. new->frompage = apr_pstrdup(pool, frompage);
  188. if (!new->topage || !new->frompage) {
  189. return APR_ENOMEM;
  190. }
  191. #ifdef TODO
  192. /* search cache of codepage pairs; we may be able to avoid the
  193. * expensive iconv_open()
  194. */
  195. set found to non-zero if found in the cache
  196. #endif
  197. if ((! found) && (strcmp(topage, frompage) == 0)) {
  198. /* to and from are the same */
  199. found = 1;
  200. make_identity_table(new);
  201. }
  202. #if APU_HAVE_APR_ICONV
  203. if (!found) {
  204. rv = apr_iconv_open(topage, frompage, pool, &new->ich);
  205. if (rv != APR_SUCCESS) {
  206. return rv;
  207. }
  208. found = 1;
  209. check_sbcs(new);
  210. } else
  211. new->ich = (apr_iconv_t)-1;
  212. #elif APU_HAVE_ICONV
  213. if (!found) {
  214. new->ich = iconv_open(topage, frompage);
  215. if (new->ich == (iconv_t)-1) {
  216. int rv = errno;
  217. /* Sometimes, iconv is not good about setting errno. */
  218. return rv ? rv : APR_EINVAL;
  219. }
  220. found = 1;
  221. check_sbcs(new);
  222. } else
  223. new->ich = (iconv_t)-1;
  224. #endif /* APU_HAVE_ICONV */
  225. if (found) {
  226. *convset = new;
  227. apr_pool_cleanup_register(pool, (void *)new, apr_xlate_cleanup,
  228. apr_pool_cleanup_null);
  229. rv = APR_SUCCESS;
  230. }
  231. else {
  232. rv = APR_EINVAL; /* iconv() would return EINVAL if it
  233. couldn't handle the pair */
  234. }
  235. return rv;
  236. }
  237. APU_DECLARE(apr_status_t) apr_xlate_sb_get(apr_xlate_t *convset, int *onoff)
  238. {
  239. *onoff = convset->sbcs_table != NULL;
  240. return APR_SUCCESS;
  241. }
  242. APU_DECLARE(apr_status_t) apr_xlate_conv_buffer(apr_xlate_t *convset,
  243. const char *inbuf,
  244. apr_size_t *inbytes_left,
  245. char *outbuf,
  246. apr_size_t *outbytes_left)
  247. {
  248. apr_status_t status = APR_SUCCESS;
  249. #if APU_HAVE_APR_ICONV
  250. if (convset->ich != (apr_iconv_t)-1) {
  251. const char *inbufptr = inbuf;
  252. apr_size_t translated;
  253. char *outbufptr = outbuf;
  254. status = apr_iconv(convset->ich, &inbufptr, inbytes_left,
  255. &outbufptr, outbytes_left, &translated);
  256. /* If everything went fine but we ran out of buffer, don't
  257. * report it as an error. Caller needs to look at the two
  258. * bytes-left values anyway.
  259. *
  260. * There are three expected cases where rc is -1. In each of
  261. * these cases, *inbytes_left != 0.
  262. * a) the non-error condition where we ran out of output
  263. * buffer
  264. * b) the non-error condition where we ran out of input (i.e.,
  265. * the last input character is incomplete)
  266. * c) the error condition where the input is invalid
  267. */
  268. switch (status) {
  269. case E2BIG: /* out of space on output */
  270. status = 0; /* change table lookup code below if you
  271. make this an error */
  272. break;
  273. case EINVAL: /* input character not complete (yet) */
  274. status = APR_INCOMPLETE;
  275. break;
  276. case EILSEQ: /* bad input byte */
  277. status = APR_EINVAL;
  278. break;
  279. /* Sometimes, iconv is not good about setting errno. */
  280. case 0:
  281. if (*inbytes_left)
  282. status = APR_INCOMPLETE;
  283. break;
  284. default:
  285. break;
  286. }
  287. }
  288. else
  289. #elif APU_HAVE_ICONV
  290. if (convset->ich != (iconv_t)-1) {
  291. const char *inbufptr = inbuf;
  292. char *outbufptr = outbuf;
  293. apr_size_t translated;
  294. translated = iconv(convset->ich, (ICONV_INBUF_TYPE)&inbufptr,
  295. inbytes_left, &outbufptr, outbytes_left);
  296. /* If everything went fine but we ran out of buffer, don't
  297. * report it as an error. Caller needs to look at the two
  298. * bytes-left values anyway.
  299. *
  300. * There are three expected cases where rc is -1. In each of
  301. * these cases, *inbytes_left != 0.
  302. * a) the non-error condition where we ran out of output
  303. * buffer
  304. * b) the non-error condition where we ran out of input (i.e.,
  305. * the last input character is incomplete)
  306. * c) the error condition where the input is invalid
  307. */
  308. if (translated == (apr_size_t)-1) {
  309. int rv = errno;
  310. switch (rv) {
  311. case E2BIG: /* out of space on output */
  312. status = 0; /* change table lookup code below if you
  313. make this an error */
  314. break;
  315. case EINVAL: /* input character not complete (yet) */
  316. status = APR_INCOMPLETE;
  317. break;
  318. case EILSEQ: /* bad input byte */
  319. status = APR_EINVAL;
  320. break;
  321. /* Sometimes, iconv is not good about setting errno. */
  322. case 0:
  323. status = APR_INCOMPLETE;
  324. break;
  325. default:
  326. status = rv;
  327. break;
  328. }
  329. }
  330. }
  331. else
  332. #endif
  333. if (inbuf) {
  334. int to_convert = min(*inbytes_left, *outbytes_left);
  335. int converted = to_convert;
  336. char *table = convset->sbcs_table;
  337. while (to_convert) {
  338. *outbuf = table[(unsigned char)*inbuf];
  339. ++outbuf;
  340. ++inbuf;
  341. --to_convert;
  342. }
  343. *inbytes_left -= converted;
  344. *outbytes_left -= converted;
  345. }
  346. return status;
  347. }
  348. APU_DECLARE(apr_int32_t) apr_xlate_conv_byte(apr_xlate_t *convset,
  349. unsigned char inchar)
  350. {
  351. if (convset->sbcs_table) {
  352. return convset->sbcs_table[inchar];
  353. }
  354. else {
  355. return -1;
  356. }
  357. }
  358. APU_DECLARE(apr_status_t) apr_xlate_close(apr_xlate_t *convset)
  359. {
  360. return apr_pool_cleanup_run(convset->pool, convset, apr_xlate_cleanup);
  361. }
  362. #else /* !APR_HAS_XLATE */
  363. APU_DECLARE(apr_status_t) apr_xlate_open(apr_xlate_t **convset,
  364. const char *topage,
  365. const char *frompage,
  366. apr_pool_t *pool)
  367. {
  368. return APR_ENOTIMPL;
  369. }
  370. APU_DECLARE(apr_status_t) apr_xlate_sb_get(apr_xlate_t *convset, int *onoff)
  371. {
  372. return APR_ENOTIMPL;
  373. }
  374. APU_DECLARE(apr_int32_t) apr_xlate_conv_byte(apr_xlate_t *convset,
  375. unsigned char inchar)
  376. {
  377. return (-1);
  378. }
  379. APU_DECLARE(apr_status_t) apr_xlate_conv_buffer(apr_xlate_t *convset,
  380. const char *inbuf,
  381. apr_size_t *inbytes_left,
  382. char *outbuf,
  383. apr_size_t *outbytes_left)
  384. {
  385. return APR_ENOTIMPL;
  386. }
  387. APU_DECLARE(apr_status_t) apr_xlate_close(apr_xlate_t *convset)
  388. {
  389. return APR_ENOTIMPL;
  390. }
  391. #endif /* APR_HAS_XLATE */