123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520 |
- /*
- * xsltlocale.c: locale handling
- *
- * Reference:
- * RFC 3066: Tags for the Identification of Languages
- * http://www.ietf.org/rfc/rfc3066.txt
- * ISO 639-1, ISO 3166-1
- *
- * Author: Nick Wellnhofer
- * winapi port: Roumen Petrov
- */
- #define IN_LIBXSLT
- #include "libxslt.h"
- #include <string.h>
- #include <libxml/xmlmemory.h>
- #include "xsltlocale.h"
- #include "xsltutils.h"
- #define TOUPPER(c) (c & ~0x20)
- #define TOLOWER(c) (c | 0x20)
- #define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26)
- /*without terminating null character*/
- #define XSLTMAX_ISO639LANGLEN 8
- #define XSLTMAX_ISO3166CNTRYLEN 8
- /* <lang>-<cntry> */
- #define XSLTMAX_LANGTAGLEN (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
- static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
- #ifdef XSLT_LOCALE_WINAPI
- xmlRMutexPtr xsltLocaleMutex = NULL;
- struct xsltRFC1766Info_s {
- /*note typedef unsigned char xmlChar !*/
- xmlChar tag[XSLTMAX_LANGTAGLEN+1];
- /*note typedef LCID xsltLocale !*/
- xsltLocale lcid;
- };
- typedef struct xsltRFC1766Info_s xsltRFC1766Info;
- static int xsltLocaleListSize = 0;
- static xsltRFC1766Info *xsltLocaleList = NULL;
- static xsltLocale
- xslt_locale_WINAPI(const xmlChar *languageTag) {
- int k;
- xsltRFC1766Info *p = xsltLocaleList;
- for (k=0; k<xsltLocaleListSize; k++, p++)
- if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
- return((xsltLocale)0);
- }
- static void xsltEnumSupportedLocales(void);
- #endif
- /**
- * xsltFreeLocales:
- *
- * Cleanup function for the locale support on shutdown
- */
- void
- xsltFreeLocales(void) {
- #ifdef XSLT_LOCALE_WINAPI
- xmlRMutexLock(xsltLocaleMutex);
- xmlFree(xsltLocaleList);
- xsltLocaleList = NULL;
- xmlRMutexUnlock(xsltLocaleMutex);
- #endif
- }
- /**
- * xsltNewLocale:
- * @languageTag: RFC 3066 language tag
- *
- * Creates a new locale of an opaque system dependent type based on the
- * language tag.
- *
- * Returns the locale or NULL on error or if no matching locale was found
- */
- xsltLocale
- xsltNewLocale(const xmlChar *languageTag) {
- #ifdef XSLT_LOCALE_POSIX
- xsltLocale locale;
- char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
- const xmlChar *p = languageTag;
- const char *region = NULL;
- char *q = localeName;
- int i, llen;
- /* Convert something like "pt-br" to "pt_BR.utf8" */
- if (languageTag == NULL)
- return(NULL);
- for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
- *q++ = TOLOWER(*p++);
- if (i == 0)
- return(NULL);
- llen = i;
- if (*p) {
- if (*p++ != '-')
- return(NULL);
- *q++ = '_';
- for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
- *q++ = TOUPPER(*p++);
- if (i == 0 || *p)
- return(NULL);
- memcpy(q, ".utf8", 6);
- locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
- if (locale != NULL)
- return(locale);
- /* Continue without using country code */
- q = localeName + llen;
- }
- /* Try locale without territory, e.g. for Esperanto (eo) */
- memcpy(q, ".utf8", 6);
- locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
- if (locale != NULL)
- return(locale);
- /* Try to find most common country for language */
- if (llen != 2)
- return(NULL);
- region = (char *)xsltDefaultRegion((xmlChar *)localeName);
- if (region == NULL)
- return(NULL);
- q = localeName + llen;
- *q++ = '_';
- *q++ = region[0];
- *q++ = region[1];
- memcpy(q, ".utf8", 6);
- locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
- return(locale);
- #endif
- #ifdef XSLT_LOCALE_WINAPI
- {
- xsltLocale locale = (xsltLocale)0;
- xmlChar localeName[XSLTMAX_LANGTAGLEN+1];
- xmlChar *q = localeName;
- const xmlChar *p = languageTag;
- int i, llen;
- const xmlChar *region = NULL;
- if (languageTag == NULL) goto end;
- xsltEnumSupportedLocales();
- for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
- *q++ = TOLOWER(*p++);
- if (i == 0) goto end;
- llen = i;
- *q++ = '-';
- if (*p) { /*if country tag is given*/
- if (*p++ != '-') goto end;
- for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
- *q++ = TOUPPER(*p++);
- if (i == 0 || *p) goto end;
- *q = '\0';
- locale = xslt_locale_WINAPI(localeName);
- if (locale != (xsltLocale)0) goto end;
- }
- /* Try to find most common country for language */
- region = xsltDefaultRegion(localeName);
- if (region == NULL) goto end;
- strcpy((char *) localeName + llen + 1, (char *) region);
- locale = xslt_locale_WINAPI(localeName);
- end:
- return(locale);
- }
- #endif
- #ifdef XSLT_LOCALE_NONE
- return(NULL);
- #endif
- }
- static const xmlChar*
- xsltDefaultRegion(const xmlChar *localeName) {
- xmlChar c;
- /* region should be xmlChar, but gcc warns on all string assignments */
- const char *region = NULL;
- c = localeName[1];
- /* This is based on the locales from glibc 2.3.3 */
- switch (localeName[0]) {
- case 'a':
- if (c == 'a' || c == 'm') region = "ET";
- else if (c == 'f') region = "ZA";
- else if (c == 'n') region = "ES";
- else if (c == 'r') region = "AE";
- else if (c == 'z') region = "AZ";
- break;
- case 'b':
- if (c == 'e') region = "BY";
- else if (c == 'g') region = "BG";
- else if (c == 'n') region = "BD";
- else if (c == 'r') region = "FR";
- else if (c == 's') region = "BA";
- break;
- case 'c':
- if (c == 'a') region = "ES";
- else if (c == 's') region = "CZ";
- else if (c == 'y') region = "GB";
- break;
- case 'd':
- if (c == 'a') region = "DK";
- else if (c == 'e') region = "DE";
- break;
- case 'e':
- if (c == 'l') region = "GR";
- else if (c == 'n' || c == 'o') region = "US";
- else if (c == 's' || c == 'u') region = "ES";
- else if (c == 't') region = "EE";
- break;
- case 'f':
- if (c == 'a') region = "IR";
- else if (c == 'i') region = "FI";
- else if (c == 'o') region = "FO";
- else if (c == 'r') region = "FR";
- break;
- case 'g':
- if (c == 'a') region = "IE";
- else if (c == 'l') region = "ES";
- else if (c == 'v') region = "GB";
- break;
- case 'h':
- if (c == 'e') region = "IL";
- else if (c == 'i') region = "IN";
- else if (c == 'r') region = "HT";
- else if (c == 'u') region = "HU";
- break;
- case 'i':
- if (c == 'd') region = "ID";
- else if (c == 's') region = "IS";
- else if (c == 't') region = "IT";
- else if (c == 'w') region = "IL";
- break;
- case 'j':
- if (c == 'a') region = "JP";
- break;
- case 'k':
- if (c == 'l') region = "GL";
- else if (c == 'o') region = "KR";
- else if (c == 'w') region = "GB";
- break;
- case 'l':
- if (c == 't') region = "LT";
- else if (c == 'v') region = "LV";
- break;
- case 'm':
- if (c == 'k') region = "MK";
- else if (c == 'l' || c == 'r') region = "IN";
- else if (c == 'n') region = "MN";
- else if (c == 's') region = "MY";
- else if (c == 't') region = "MT";
- break;
- case 'n':
- if (c == 'b' || c == 'n' || c == 'o') region = "NO";
- else if (c == 'e') region = "NP";
- else if (c == 'l') region = "NL";
- break;
- case 'o':
- if (c == 'm') region = "ET";
- break;
- case 'p':
- if (c == 'a') region = "IN";
- else if (c == 'l') region = "PL";
- else if (c == 't') region = "PT";
- break;
- case 'r':
- if (c == 'o') region = "RO";
- else if (c == 'u') region = "RU";
- break;
- case 's':
- switch (c) {
- case 'e': region = "NO"; break;
- case 'h': region = "YU"; break;
- case 'k': region = "SK"; break;
- case 'l': region = "SI"; break;
- case 'o': region = "ET"; break;
- case 'q': region = "AL"; break;
- case 't': region = "ZA"; break;
- case 'v': region = "SE"; break;
- }
- break;
- case 't':
- if (c == 'a' || c == 'e') region = "IN";
- else if (c == 'h') region = "TH";
- else if (c == 'i') region = "ER";
- else if (c == 'r') region = "TR";
- else if (c == 't') region = "RU";
- break;
- case 'u':
- if (c == 'k') region = "UA";
- else if (c == 'r') region = "PK";
- break;
- case 'v':
- if (c == 'i') region = "VN";
- break;
- case 'w':
- if (c == 'a') region = "BE";
- break;
- case 'x':
- if (c == 'h') region = "ZA";
- break;
- case 'z':
- if (c == 'h') region = "CN";
- else if (c == 'u') region = "ZA";
- break;
- }
- return((xmlChar *)region);
- }
- /**
- * xsltFreeLocale:
- * @locale: the locale to free
- *
- * Frees a locale created with xsltNewLocale
- */
- void
- xsltFreeLocale(xsltLocale locale) {
- #ifdef XSLT_LOCALE_POSIX
- freelocale(locale);
- #endif
- }
- /**
- * xsltStrxfrm:
- * @locale: locale created with xsltNewLocale
- * @string: UTF-8 string to transform
- *
- * Transforms a string according to locale. The transformed string must then be
- * compared with xsltLocaleStrcmp and freed with xmlFree.
- *
- * Returns the transformed string or NULL on error
- */
- xsltLocaleChar *
- xsltStrxfrm(xsltLocale locale, const xmlChar *string)
- {
- #ifdef XSLT_LOCALE_NONE
- return(NULL);
- #else
- size_t xstrlen, r;
- xsltLocaleChar *xstr;
- #ifdef XSLT_LOCALE_POSIX
- xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
- xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
- if (xstr == NULL) {
- xsltTransformError(NULL, NULL, NULL,
- "xsltStrxfrm : out of memory error\n");
- return(NULL);
- }
- r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
- #endif
- #ifdef XSLT_LOCALE_WINAPI
- xstrlen = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, NULL, 0);
- if (xstrlen == 0) {
- xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
- return(NULL);
- }
- xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
- if (xstr == NULL) {
- xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
- return(NULL);
- }
- r = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, xstr, xstrlen);
- if (r == 0) {
- xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
- xmlFree(xstr);
- return(NULL);
- }
- return(xstr);
- #endif /* XSLT_LOCALE_WINAPI */
- if (r >= xstrlen) {
- xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
- xmlFree(xstr);
- return(NULL);
- }
- return(xstr);
- #endif /* XSLT_LOCALE_NONE */
- }
- /**
- * xsltLocaleStrcmp:
- * @locale: a locale identifier
- * @str1: a string transformed with xsltStrxfrm
- * @str2: a string transformed with xsltStrxfrm
- *
- * Compares two strings transformed with xsltStrxfrm
- *
- * Returns a value < 0 if str1 sorts before str2,
- * a value > 0 if str1 sorts after str2,
- * 0 if str1 and str2 are equal wrt sorting
- */
- int
- xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
- (void)locale;
- #ifdef XSLT_LOCALE_WINAPI
- {
- int ret;
- if (str1 == str2) return(0);
- if (str1 == NULL) return(-1);
- if (str2 == NULL) return(1);
- ret = CompareStringW(locale, 0, str1, -1, str2, -1);
- if (ret == 0) {
- xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
- return(0);
- }
- return(ret - 2);
- }
- #else
- return(xmlStrcmp(str1, str2));
- #endif
- }
- #ifdef XSLT_LOCALE_WINAPI
- /**
- * xsltCountSupportedLocales:
- * @lcid: not used
- *
- * callback used to count locales
- *
- * Returns TRUE
- */
- BOOL CALLBACK
- xsltCountSupportedLocales(LPSTR lcid) {
- (void) lcid;
- ++xsltLocaleListSize;
- return(TRUE);
- }
- /**
- * xsltIterateSupportedLocales:
- * @lcid: not used
- *
- * callback used to track locales
- *
- * Returns TRUE if not at the end of the array
- */
- BOOL CALLBACK
- xsltIterateSupportedLocales(LPSTR lcid) {
- static int count = 0;
- xmlChar iso639lang [XSLTMAX_ISO639LANGLEN +1];
- xmlChar iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
- int k, l;
- xsltRFC1766Info *p = xsltLocaleList + count;
- k = sscanf(lcid, "%lx", (long*)&p->lcid);
- if (k < 1) goto end;
- /*don't count terminating null character*/
- k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME,
- (char *) iso639lang, sizeof(iso639lang));
- if (--k < 1) goto end;
- l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME,
- (char *) iso3136ctry, sizeof(iso3136ctry));
- if (--l < 1) goto end;
- { /*fill results*/
- xmlChar *q = p->tag;
- memcpy(q, iso639lang, k);
- q += k;
- *q++ = '-';
- memcpy(q, iso3136ctry, l);
- q += l;
- *q = '\0';
- }
- ++count;
- end:
- return((count < xsltLocaleListSize) ? TRUE : FALSE);
- }
- static void
- xsltEnumSupportedLocales(void) {
- xmlRMutexLock(xsltLocaleMutex);
- if (xsltLocaleListSize <= 0) {
- size_t len;
- EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
- len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
- xsltLocaleList = xmlMalloc(len);
- memset(xsltLocaleList, 0, len);
- EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
- }
- xmlRMutexUnlock(xsltLocaleMutex);
- }
- #endif /*def XSLT_LOCALE_WINAPI*/
|