xsltlocale.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. /*
  2. * xsltlocale.c: locale handling
  3. *
  4. * Reference:
  5. * RFC 3066: Tags for the Identification of Languages
  6. * http://www.ietf.org/rfc/rfc3066.txt
  7. * ISO 639-1, ISO 3166-1
  8. *
  9. * Author: Nick Wellnhofer
  10. * winapi port: Roumen Petrov
  11. */
  12. #define IN_LIBXSLT
  13. #include "libxslt.h"
  14. #include <string.h>
  15. #include <libxml/xmlmemory.h>
  16. #include "xsltlocale.h"
  17. #include "xsltutils.h"
  18. #define TOUPPER(c) (c & ~0x20)
  19. #define TOLOWER(c) (c | 0x20)
  20. #define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26)
  21. /*without terminating null character*/
  22. #define XSLTMAX_ISO639LANGLEN 8
  23. #define XSLTMAX_ISO3166CNTRYLEN 8
  24. /* <lang>-<cntry> */
  25. #define XSLTMAX_LANGTAGLEN (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
  26. static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
  27. #ifdef XSLT_LOCALE_WINAPI
  28. xmlRMutexPtr xsltLocaleMutex = NULL;
  29. struct xsltRFC1766Info_s {
  30. /*note typedef unsigned char xmlChar !*/
  31. xmlChar tag[XSLTMAX_LANGTAGLEN+1];
  32. /*note typedef LCID xsltLocale !*/
  33. xsltLocale lcid;
  34. };
  35. typedef struct xsltRFC1766Info_s xsltRFC1766Info;
  36. static int xsltLocaleListSize = 0;
  37. static xsltRFC1766Info *xsltLocaleList = NULL;
  38. static xsltLocale
  39. xslt_locale_WINAPI(const xmlChar *languageTag) {
  40. int k;
  41. xsltRFC1766Info *p = xsltLocaleList;
  42. for (k=0; k<xsltLocaleListSize; k++, p++)
  43. if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
  44. return((xsltLocale)0);
  45. }
  46. static void xsltEnumSupportedLocales(void);
  47. #endif
  48. /**
  49. * xsltFreeLocales:
  50. *
  51. * Cleanup function for the locale support on shutdown
  52. */
  53. void
  54. xsltFreeLocales(void) {
  55. #ifdef XSLT_LOCALE_WINAPI
  56. xmlRMutexLock(xsltLocaleMutex);
  57. xmlFree(xsltLocaleList);
  58. xsltLocaleList = NULL;
  59. xmlRMutexUnlock(xsltLocaleMutex);
  60. #endif
  61. }
  62. /**
  63. * xsltNewLocale:
  64. * @languageTag: RFC 3066 language tag
  65. *
  66. * Creates a new locale of an opaque system dependent type based on the
  67. * language tag.
  68. *
  69. * Returns the locale or NULL on error or if no matching locale was found
  70. */
  71. xsltLocale
  72. xsltNewLocale(const xmlChar *languageTag) {
  73. #ifdef XSLT_LOCALE_POSIX
  74. xsltLocale locale;
  75. char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
  76. const xmlChar *p = languageTag;
  77. const char *region = NULL;
  78. char *q = localeName;
  79. int i, llen;
  80. /* Convert something like "pt-br" to "pt_BR.utf8" */
  81. if (languageTag == NULL)
  82. return(NULL);
  83. for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
  84. *q++ = TOLOWER(*p++);
  85. if (i == 0)
  86. return(NULL);
  87. llen = i;
  88. if (*p) {
  89. if (*p++ != '-')
  90. return(NULL);
  91. *q++ = '_';
  92. for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
  93. *q++ = TOUPPER(*p++);
  94. if (i == 0 || *p)
  95. return(NULL);
  96. memcpy(q, ".utf8", 6);
  97. locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
  98. if (locale != NULL)
  99. return(locale);
  100. /* Continue without using country code */
  101. q = localeName + llen;
  102. }
  103. /* Try locale without territory, e.g. for Esperanto (eo) */
  104. memcpy(q, ".utf8", 6);
  105. locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
  106. if (locale != NULL)
  107. return(locale);
  108. /* Try to find most common country for language */
  109. if (llen != 2)
  110. return(NULL);
  111. region = (char *)xsltDefaultRegion((xmlChar *)localeName);
  112. if (region == NULL)
  113. return(NULL);
  114. q = localeName + llen;
  115. *q++ = '_';
  116. *q++ = region[0];
  117. *q++ = region[1];
  118. memcpy(q, ".utf8", 6);
  119. locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
  120. return(locale);
  121. #endif
  122. #ifdef XSLT_LOCALE_WINAPI
  123. {
  124. xsltLocale locale = (xsltLocale)0;
  125. xmlChar localeName[XSLTMAX_LANGTAGLEN+1];
  126. xmlChar *q = localeName;
  127. const xmlChar *p = languageTag;
  128. int i, llen;
  129. const xmlChar *region = NULL;
  130. if (languageTag == NULL) goto end;
  131. xsltEnumSupportedLocales();
  132. for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
  133. *q++ = TOLOWER(*p++);
  134. if (i == 0) goto end;
  135. llen = i;
  136. *q++ = '-';
  137. if (*p) { /*if country tag is given*/
  138. if (*p++ != '-') goto end;
  139. for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
  140. *q++ = TOUPPER(*p++);
  141. if (i == 0 || *p) goto end;
  142. *q = '\0';
  143. locale = xslt_locale_WINAPI(localeName);
  144. if (locale != (xsltLocale)0) goto end;
  145. }
  146. /* Try to find most common country for language */
  147. region = xsltDefaultRegion(localeName);
  148. if (region == NULL) goto end;
  149. strcpy((char *) localeName + llen + 1, (char *) region);
  150. locale = xslt_locale_WINAPI(localeName);
  151. end:
  152. return(locale);
  153. }
  154. #endif
  155. #ifdef XSLT_LOCALE_NONE
  156. return(NULL);
  157. #endif
  158. }
  159. static const xmlChar*
  160. xsltDefaultRegion(const xmlChar *localeName) {
  161. xmlChar c;
  162. /* region should be xmlChar, but gcc warns on all string assignments */
  163. const char *region = NULL;
  164. c = localeName[1];
  165. /* This is based on the locales from glibc 2.3.3 */
  166. switch (localeName[0]) {
  167. case 'a':
  168. if (c == 'a' || c == 'm') region = "ET";
  169. else if (c == 'f') region = "ZA";
  170. else if (c == 'n') region = "ES";
  171. else if (c == 'r') region = "AE";
  172. else if (c == 'z') region = "AZ";
  173. break;
  174. case 'b':
  175. if (c == 'e') region = "BY";
  176. else if (c == 'g') region = "BG";
  177. else if (c == 'n') region = "BD";
  178. else if (c == 'r') region = "FR";
  179. else if (c == 's') region = "BA";
  180. break;
  181. case 'c':
  182. if (c == 'a') region = "ES";
  183. else if (c == 's') region = "CZ";
  184. else if (c == 'y') region = "GB";
  185. break;
  186. case 'd':
  187. if (c == 'a') region = "DK";
  188. else if (c == 'e') region = "DE";
  189. break;
  190. case 'e':
  191. if (c == 'l') region = "GR";
  192. else if (c == 'n' || c == 'o') region = "US";
  193. else if (c == 's' || c == 'u') region = "ES";
  194. else if (c == 't') region = "EE";
  195. break;
  196. case 'f':
  197. if (c == 'a') region = "IR";
  198. else if (c == 'i') region = "FI";
  199. else if (c == 'o') region = "FO";
  200. else if (c == 'r') region = "FR";
  201. break;
  202. case 'g':
  203. if (c == 'a') region = "IE";
  204. else if (c == 'l') region = "ES";
  205. else if (c == 'v') region = "GB";
  206. break;
  207. case 'h':
  208. if (c == 'e') region = "IL";
  209. else if (c == 'i') region = "IN";
  210. else if (c == 'r') region = "HT";
  211. else if (c == 'u') region = "HU";
  212. break;
  213. case 'i':
  214. if (c == 'd') region = "ID";
  215. else if (c == 's') region = "IS";
  216. else if (c == 't') region = "IT";
  217. else if (c == 'w') region = "IL";
  218. break;
  219. case 'j':
  220. if (c == 'a') region = "JP";
  221. break;
  222. case 'k':
  223. if (c == 'l') region = "GL";
  224. else if (c == 'o') region = "KR";
  225. else if (c == 'w') region = "GB";
  226. break;
  227. case 'l':
  228. if (c == 't') region = "LT";
  229. else if (c == 'v') region = "LV";
  230. break;
  231. case 'm':
  232. if (c == 'k') region = "MK";
  233. else if (c == 'l' || c == 'r') region = "IN";
  234. else if (c == 'n') region = "MN";
  235. else if (c == 's') region = "MY";
  236. else if (c == 't') region = "MT";
  237. break;
  238. case 'n':
  239. if (c == 'b' || c == 'n' || c == 'o') region = "NO";
  240. else if (c == 'e') region = "NP";
  241. else if (c == 'l') region = "NL";
  242. break;
  243. case 'o':
  244. if (c == 'm') region = "ET";
  245. break;
  246. case 'p':
  247. if (c == 'a') region = "IN";
  248. else if (c == 'l') region = "PL";
  249. else if (c == 't') region = "PT";
  250. break;
  251. case 'r':
  252. if (c == 'o') region = "RO";
  253. else if (c == 'u') region = "RU";
  254. break;
  255. case 's':
  256. switch (c) {
  257. case 'e': region = "NO"; break;
  258. case 'h': region = "YU"; break;
  259. case 'k': region = "SK"; break;
  260. case 'l': region = "SI"; break;
  261. case 'o': region = "ET"; break;
  262. case 'q': region = "AL"; break;
  263. case 't': region = "ZA"; break;
  264. case 'v': region = "SE"; break;
  265. }
  266. break;
  267. case 't':
  268. if (c == 'a' || c == 'e') region = "IN";
  269. else if (c == 'h') region = "TH";
  270. else if (c == 'i') region = "ER";
  271. else if (c == 'r') region = "TR";
  272. else if (c == 't') region = "RU";
  273. break;
  274. case 'u':
  275. if (c == 'k') region = "UA";
  276. else if (c == 'r') region = "PK";
  277. break;
  278. case 'v':
  279. if (c == 'i') region = "VN";
  280. break;
  281. case 'w':
  282. if (c == 'a') region = "BE";
  283. break;
  284. case 'x':
  285. if (c == 'h') region = "ZA";
  286. break;
  287. case 'z':
  288. if (c == 'h') region = "CN";
  289. else if (c == 'u') region = "ZA";
  290. break;
  291. }
  292. return((xmlChar *)region);
  293. }
  294. /**
  295. * xsltFreeLocale:
  296. * @locale: the locale to free
  297. *
  298. * Frees a locale created with xsltNewLocale
  299. */
  300. void
  301. xsltFreeLocale(xsltLocale locale) {
  302. #ifdef XSLT_LOCALE_POSIX
  303. freelocale(locale);
  304. #endif
  305. }
  306. /**
  307. * xsltStrxfrm:
  308. * @locale: locale created with xsltNewLocale
  309. * @string: UTF-8 string to transform
  310. *
  311. * Transforms a string according to locale. The transformed string must then be
  312. * compared with xsltLocaleStrcmp and freed with xmlFree.
  313. *
  314. * Returns the transformed string or NULL on error
  315. */
  316. xsltLocaleChar *
  317. xsltStrxfrm(xsltLocale locale, const xmlChar *string)
  318. {
  319. #ifdef XSLT_LOCALE_NONE
  320. return(NULL);
  321. #else
  322. size_t xstrlen, r;
  323. xsltLocaleChar *xstr;
  324. #ifdef XSLT_LOCALE_POSIX
  325. xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
  326. xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
  327. if (xstr == NULL) {
  328. xsltTransformError(NULL, NULL, NULL,
  329. "xsltStrxfrm : out of memory error\n");
  330. return(NULL);
  331. }
  332. r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
  333. #endif
  334. #ifdef XSLT_LOCALE_WINAPI
  335. xstrlen = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, NULL, 0);
  336. if (xstrlen == 0) {
  337. xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
  338. return(NULL);
  339. }
  340. xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
  341. if (xstr == NULL) {
  342. xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
  343. return(NULL);
  344. }
  345. r = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, xstr, xstrlen);
  346. if (r == 0) {
  347. xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
  348. xmlFree(xstr);
  349. return(NULL);
  350. }
  351. return(xstr);
  352. #endif /* XSLT_LOCALE_WINAPI */
  353. if (r >= xstrlen) {
  354. xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
  355. xmlFree(xstr);
  356. return(NULL);
  357. }
  358. return(xstr);
  359. #endif /* XSLT_LOCALE_NONE */
  360. }
  361. /**
  362. * xsltLocaleStrcmp:
  363. * @locale: a locale identifier
  364. * @str1: a string transformed with xsltStrxfrm
  365. * @str2: a string transformed with xsltStrxfrm
  366. *
  367. * Compares two strings transformed with xsltStrxfrm
  368. *
  369. * Returns a value < 0 if str1 sorts before str2,
  370. * a value > 0 if str1 sorts after str2,
  371. * 0 if str1 and str2 are equal wrt sorting
  372. */
  373. int
  374. xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
  375. (void)locale;
  376. #ifdef XSLT_LOCALE_WINAPI
  377. {
  378. int ret;
  379. if (str1 == str2) return(0);
  380. if (str1 == NULL) return(-1);
  381. if (str2 == NULL) return(1);
  382. ret = CompareStringW(locale, 0, str1, -1, str2, -1);
  383. if (ret == 0) {
  384. xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
  385. return(0);
  386. }
  387. return(ret - 2);
  388. }
  389. #else
  390. return(xmlStrcmp(str1, str2));
  391. #endif
  392. }
  393. #ifdef XSLT_LOCALE_WINAPI
  394. /**
  395. * xsltCountSupportedLocales:
  396. * @lcid: not used
  397. *
  398. * callback used to count locales
  399. *
  400. * Returns TRUE
  401. */
  402. BOOL CALLBACK
  403. xsltCountSupportedLocales(LPSTR lcid) {
  404. (void) lcid;
  405. ++xsltLocaleListSize;
  406. return(TRUE);
  407. }
  408. /**
  409. * xsltIterateSupportedLocales:
  410. * @lcid: not used
  411. *
  412. * callback used to track locales
  413. *
  414. * Returns TRUE if not at the end of the array
  415. */
  416. BOOL CALLBACK
  417. xsltIterateSupportedLocales(LPSTR lcid) {
  418. static int count = 0;
  419. xmlChar iso639lang [XSLTMAX_ISO639LANGLEN +1];
  420. xmlChar iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
  421. int k, l;
  422. xsltRFC1766Info *p = xsltLocaleList + count;
  423. k = sscanf(lcid, "%lx", (long*)&p->lcid);
  424. if (k < 1) goto end;
  425. /*don't count terminating null character*/
  426. k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME,
  427. (char *) iso639lang, sizeof(iso639lang));
  428. if (--k < 1) goto end;
  429. l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME,
  430. (char *) iso3136ctry, sizeof(iso3136ctry));
  431. if (--l < 1) goto end;
  432. { /*fill results*/
  433. xmlChar *q = p->tag;
  434. memcpy(q, iso639lang, k);
  435. q += k;
  436. *q++ = '-';
  437. memcpy(q, iso3136ctry, l);
  438. q += l;
  439. *q = '\0';
  440. }
  441. ++count;
  442. end:
  443. return((count < xsltLocaleListSize) ? TRUE : FALSE);
  444. }
  445. static void
  446. xsltEnumSupportedLocales(void) {
  447. xmlRMutexLock(xsltLocaleMutex);
  448. if (xsltLocaleListSize <= 0) {
  449. size_t len;
  450. EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
  451. len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
  452. xsltLocaleList = xmlMalloc(len);
  453. memset(xsltLocaleList, 0, len);
  454. EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
  455. }
  456. xmlRMutexUnlock(xsltLocaleMutex);
  457. }
  458. #endif /*def XSLT_LOCALE_WINAPI*/