utils.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. /*
  2. * Utility routines
  3. *
  4. * Copyright 1998,2000 Bertho A. Stultiens
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19. */
  20. #include "config.h"
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <stdarg.h>
  24. #include <string.h>
  25. #include <assert.h>
  26. #include <ctype.h>
  27. #include "wmc.h"
  28. #include "winnls.h"
  29. #include "utils.h"
  30. #define SUPPRESS_YACC_ERROR_MESSAGE
  31. static void generic_msg(const char *s, const char *t, va_list ap)
  32. {
  33. fprintf(stderr, "%s:%d:%d: %s: ", input_name ? input_name : "stdin", line_number, char_number, t);
  34. vfprintf(stderr, s, ap);
  35. }
  36. /*
  37. * The yyerror routine should not exit because we use the error-token
  38. * to determine the syntactic error in the source. However, YACC
  39. * uses the same routine to print an error just before the error
  40. * token is reduced.
  41. * The extra routine 'xyyerror' is used to exit after giving a real
  42. * message.
  43. */
  44. int mcy_error(const char *s, ...)
  45. {
  46. #ifndef SUPPRESS_YACC_ERROR_MESSAGE
  47. va_list ap;
  48. va_start(ap, s);
  49. generic_msg(s, "Yacc error", ap);
  50. va_end(ap);
  51. #endif
  52. return 1;
  53. }
  54. int xyyerror(const char *s, ...)
  55. {
  56. va_list ap;
  57. va_start(ap, s);
  58. generic_msg(s, "Error", ap);
  59. va_end(ap);
  60. exit(1);
  61. return 1;
  62. }
  63. int mcy_warning(const char *s, ...)
  64. {
  65. va_list ap;
  66. va_start(ap, s);
  67. generic_msg(s, "Warning", ap);
  68. va_end(ap);
  69. return 0;
  70. }
  71. void internal_error(const char *file, int line, const char *s, ...)
  72. {
  73. va_list ap;
  74. va_start(ap, s);
  75. fprintf(stderr, "Internal error (please report) %s %d: ", file, line);
  76. vfprintf(stderr, s, ap);
  77. va_end(ap);
  78. exit(3);
  79. }
  80. void fatal_perror( const char *msg, ... )
  81. {
  82. va_list valist;
  83. va_start( valist, msg );
  84. fprintf(stderr, "Error: ");
  85. vfprintf( stderr, msg, valist );
  86. perror( " " );
  87. va_end( valist );
  88. exit(2);
  89. }
  90. void error(const char *s, ...)
  91. {
  92. va_list ap;
  93. va_start(ap, s);
  94. fprintf(stderr, "Error: ");
  95. vfprintf(stderr, s, ap);
  96. va_end(ap);
  97. exit(2);
  98. }
  99. void warning(const char *s, ...)
  100. {
  101. va_list ap;
  102. va_start(ap, s);
  103. fprintf(stderr, "Warning: ");
  104. vfprintf(stderr, s, ap);
  105. va_end(ap);
  106. }
  107. int unistrlen(const WCHAR *s)
  108. {
  109. int n;
  110. for(n = 0; *s; n++, s++)
  111. ;
  112. return n;
  113. }
  114. WCHAR *unistrcpy(WCHAR *dst, const WCHAR *src)
  115. {
  116. WCHAR *t = dst;
  117. while(*src)
  118. *t++ = *src++;
  119. *t = 0;
  120. return dst;
  121. }
  122. WCHAR *xunistrdup(const WCHAR * str)
  123. {
  124. WCHAR *s;
  125. assert(str != NULL);
  126. s = xmalloc((unistrlen(str)+1) * sizeof(WCHAR));
  127. return unistrcpy(s, str);
  128. }
  129. int unistricmp(const WCHAR *s1, const WCHAR *s2)
  130. {
  131. int i;
  132. int once = 0;
  133. static const char warn[] = "Don't know the uppercase equivalent of non ascii characters;"
  134. "comparison might yield wrong results";
  135. while(*s1 && *s2)
  136. {
  137. if((*s1 & 0xffff) > 0x7f || (*s2 & 0xffff) > 0x7f)
  138. {
  139. if(!once)
  140. {
  141. once++;
  142. mcy_warning(warn);
  143. }
  144. i = *s1++ - *s2++;
  145. }
  146. else
  147. i = toupper(*s1++) - toupper(*s2++);
  148. if(i)
  149. return i;
  150. }
  151. if((*s1 & 0xffff) > 0x7f || (*s2 & 0xffff) > 0x7f)
  152. {
  153. if(!once)
  154. mcy_warning(warn);
  155. return *s1 - *s2;
  156. }
  157. else
  158. return toupper(*s1) - toupper(*s2);
  159. }
  160. int unistrcmp(const WCHAR *s1, const WCHAR *s2)
  161. {
  162. int i;
  163. while(*s1 && *s2)
  164. {
  165. i = *s1++ - *s2++;
  166. if(i)
  167. return i;
  168. }
  169. return *s1 - *s2;
  170. }
  171. WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen )
  172. {
  173. static const char utf8_length[128] =
  174. {
  175. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
  176. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
  177. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
  178. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
  179. 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
  180. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
  181. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
  182. 3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0 /* 0xf0-0xff */
  183. };
  184. static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 };
  185. const char *srcend = src + srclen;
  186. int len, res;
  187. WCHAR *ret, *dst;
  188. dst = ret = xmalloc( (srclen + 1) * sizeof(WCHAR) );
  189. while (src < srcend)
  190. {
  191. unsigned char ch = *src++;
  192. if (ch < 0x80) /* special fast case for 7-bit ASCII */
  193. {
  194. *dst++ = ch;
  195. continue;
  196. }
  197. len = utf8_length[ch - 0x80];
  198. if (len && src + len <= srcend)
  199. {
  200. res = ch & utf8_mask[len];
  201. switch (len)
  202. {
  203. case 3:
  204. if ((ch = *src ^ 0x80) >= 0x40) break;
  205. res = (res << 6) | ch;
  206. src++;
  207. if (res < 0x10) break;
  208. case 2:
  209. if ((ch = *src ^ 0x80) >= 0x40) break;
  210. res = (res << 6) | ch;
  211. if (res >= 0x110000 >> 6) break;
  212. src++;
  213. if (res < 0x20) break;
  214. if (res >= 0xd800 >> 6 && res <= 0xdfff >> 6) break;
  215. case 1:
  216. if ((ch = *src ^ 0x80) >= 0x40) break;
  217. res = (res << 6) | ch;
  218. src++;
  219. if (res < 0x80) break;
  220. if (res <= 0xffff) *dst++ = res;
  221. else
  222. {
  223. res -= 0x10000;
  224. *dst++ = 0xd800 | (res >> 10);
  225. *dst++ = 0xdc00 | (res & 0x3ff);
  226. }
  227. continue;
  228. }
  229. }
  230. *dst++ = 0xfffd;
  231. }
  232. *dst = 0;
  233. *dstlen = dst - ret;
  234. return ret;
  235. }
  236. char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen )
  237. {
  238. char *ret, *dst;
  239. dst = ret = xmalloc( srclen * 3 + 1 );
  240. for ( ; srclen; srclen--, src++)
  241. {
  242. unsigned int ch = *src;
  243. if (ch < 0x80) /* 0x00-0x7f: 1 byte */
  244. {
  245. *dst++ = ch;
  246. continue;
  247. }
  248. if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */
  249. {
  250. dst[1] = 0x80 | (ch & 0x3f);
  251. ch >>= 6;
  252. dst[0] = 0xc0 | ch;
  253. dst += 2;
  254. continue;
  255. }
  256. if (ch >= 0xd800 && ch <= 0xdbff && srclen > 1 && src[1] >= 0xdc00 && src[1] <= 0xdfff)
  257. {
  258. /* 0x10000-0x10ffff: 4 bytes */
  259. ch = 0x10000 + ((ch & 0x3ff) << 10) + (src[1] & 0x3ff);
  260. dst[3] = 0x80 | (ch & 0x3f);
  261. ch >>= 6;
  262. dst[2] = 0x80 | (ch & 0x3f);
  263. ch >>= 6;
  264. dst[1] = 0x80 | (ch & 0x3f);
  265. ch >>= 6;
  266. dst[0] = 0xf0 | ch;
  267. dst += 4;
  268. src++;
  269. srclen--;
  270. continue;
  271. }
  272. if (ch >= 0xd800 && ch <= 0xdfff) ch = 0xfffd; /* invalid surrogate pair */
  273. /* 0x800-0xffff: 3 bytes */
  274. dst[2] = 0x80 | (ch & 0x3f);
  275. ch >>= 6;
  276. dst[1] = 0x80 | (ch & 0x3f);
  277. ch >>= 6;
  278. dst[0] = 0xe0 | ch;
  279. dst += 3;
  280. }
  281. *dst = 0;
  282. *dstlen = dst - ret;
  283. return ret;
  284. }
  285. #ifdef _WIN32
  286. int is_valid_codepage(int id)
  287. {
  288. return IsValidCodePage( id );
  289. }
  290. WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
  291. {
  292. WCHAR *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
  293. DWORD ret = MultiByteToWideChar( codepage, MB_ERR_INVALID_CHARS, src, srclen, dst, srclen );
  294. if (!ret) return NULL;
  295. dst[ret] = 0;
  296. *dstlen = ret;
  297. return dst;
  298. }
  299. #else /* _WIN32 */
  300. struct nls_info
  301. {
  302. unsigned short codepage;
  303. unsigned short unidef;
  304. unsigned short trans_unidef;
  305. unsigned short *cp2uni;
  306. unsigned short *dbcs_offsets;
  307. };
  308. static struct nls_info nlsinfo[128];
  309. static void init_nls_info( struct nls_info *info, unsigned short *ptr )
  310. {
  311. unsigned short hdr_size = ptr[0];
  312. info->codepage = ptr[1];
  313. info->unidef = ptr[4];
  314. info->trans_unidef = ptr[6];
  315. ptr += hdr_size;
  316. info->cp2uni = ++ptr;
  317. ptr += 256;
  318. if (*ptr++) ptr += 256; /* glyph table */
  319. info->dbcs_offsets = *ptr ? ptr + 1 : NULL;
  320. }
  321. static const struct nls_info *get_nls_info( unsigned int codepage )
  322. {
  323. struct stat st;
  324. unsigned short *data;
  325. char *path;
  326. unsigned int i;
  327. int fd;
  328. for (i = 0; i < ARRAY_SIZE(nlsinfo) && nlsinfo[i].codepage; i++)
  329. if (nlsinfo[i].codepage == codepage) return &nlsinfo[i];
  330. assert( i < ARRAY_SIZE(nlsinfo) );
  331. for (i = 0; nlsdirs[i]; i++)
  332. {
  333. path = strmake( "%s/c_%03u.nls", nlsdirs[i], codepage );
  334. if ((fd = open( path, O_RDONLY )) != -1) break;
  335. free( path );
  336. }
  337. if (!nlsdirs[i]) return NULL;
  338. fstat( fd, &st );
  339. data = xmalloc( st.st_size );
  340. if (read( fd, data, st.st_size ) != st.st_size) error( "failed to load %s\n", path );
  341. close( fd );
  342. free( path );
  343. init_nls_info( &nlsinfo[i], data );
  344. return &nlsinfo[i];
  345. }
  346. int is_valid_codepage(int cp)
  347. {
  348. return cp == CP_UTF8 || get_nls_info( cp );
  349. }
  350. WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
  351. {
  352. const struct nls_info *info = get_nls_info( codepage );
  353. unsigned int i;
  354. WCHAR dbch, *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
  355. if (!info) error( "codepage %u not supported\n", codepage );
  356. if (info->dbcs_offsets)
  357. {
  358. for (i = 0; srclen; i++, srclen--, src++)
  359. {
  360. unsigned short off = info->dbcs_offsets[(unsigned char)*src];
  361. if (off)
  362. {
  363. if (srclen == 1) return NULL;
  364. dbch = (src[0] << 8) | (unsigned char)src[1];
  365. src++;
  366. srclen--;
  367. dst[i] = info->dbcs_offsets[off + (unsigned char)*src];
  368. if (dst[i] == info->unidef && dbch != info->trans_unidef) return NULL;
  369. }
  370. else
  371. {
  372. dst[i] = info->cp2uni[(unsigned char)*src];
  373. if (dst[i] == info->unidef && *src != info->trans_unidef) return NULL;
  374. }
  375. }
  376. }
  377. else
  378. {
  379. for (i = 0; i < srclen; i++)
  380. {
  381. dst[i] = info->cp2uni[(unsigned char)src[i]];
  382. if (dst[i] == info->unidef && src[i] != info->trans_unidef) return NULL;
  383. }
  384. }
  385. dst[i] = 0;
  386. *dstlen = i;
  387. return dst;
  388. }
  389. #endif /* _WIN32 */
  390. unsigned char *output_buffer;
  391. size_t output_buffer_pos;
  392. size_t output_buffer_size;