mcl.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688
  1. /*
  2. * Wine Message Compiler lexical scanner
  3. *
  4. * Copyright 2000 Bertho A. Stultiens (BS)
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19. */
  20. #include "config.h"
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <ctype.h>
  24. #include <assert.h>
  25. #include <string.h>
  26. #include "wmc.h"
  27. #include "utils.h"
  28. #include "lang.h"
  29. #include "mcy.tab.h"
  30. /*
  31. * Keywords are case insensitive. All normal input is treated as
  32. * being in codepage iso-8859-1 for ascii input files (unicode
  33. * page 0) and as equivalent unicode if unicode input is selected.
  34. * All normal input, which is not part of a message text, is
  35. * enforced to be unicode page 0. Otherwise an error will be
  36. * generated. The normal file data should only be ASCII because
  37. * that is the basic definition of the grammar.
  38. *
  39. * Byteorder or unicode input is determined automatically by
  40. * reading the first 8 bytes and checking them against unicode
  41. * page 0 byteorder (hibyte must be 0).
  42. * -- FIXME --
  43. * Alternatively, the input is checked against a special byte
  44. * sequence to identify the file.
  45. * -- FIXME --
  46. *
  47. *
  48. * Keywords:
  49. * Codepages
  50. * Facility
  51. * FacilityNames
  52. * LanguageNames
  53. * MessageId
  54. * MessageIdTypedef
  55. * Severity
  56. * SeverityNames
  57. * SymbolicName
  58. *
  59. * Default added identifiers for classes:
  60. * SeverityNames:
  61. * Success = 0x0
  62. * Informational = 0x1
  63. * Warning = 0x2
  64. * Error = 0x3
  65. * FacilityNames:
  66. * System = 0x0FF
  67. * Application = 0xFFF
  68. *
  69. * The 'Codepages' keyword is a wmc extension.
  70. */
  71. static const WCHAR ustr_application[] = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
  72. static const WCHAR ustr_codepages[] = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
  73. static const WCHAR ustr_english[] = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
  74. static const WCHAR ustr_error[] = { 'E', 'r', 'r', 'o', 'r', 0 };
  75. static const WCHAR ustr_facility[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
  76. static const WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
  77. static const WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
  78. static const WCHAR ustr_language[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
  79. static const WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
  80. static const WCHAR ustr_messageid[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
  81. static const WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
  82. static const WCHAR ustr_dxgi[] = { 'D', 'x', 'g', 'i', 0 };
  83. static const WCHAR ustr_null[] = { 'N', 'u', 'l', 'l', 0 };
  84. static const WCHAR ustr_outputbase[] = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
  85. static const WCHAR ustr_severity[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
  86. static const WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
  87. static const WCHAR ustr_success[] = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
  88. static const WCHAR ustr_symbolicname[] = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
  89. static const WCHAR ustr_system[] = { 'S', 'y', 's', 't', 'e', 'm', 0 };
  90. static const WCHAR ustr_warning[] = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
  91. static const WCHAR ustr_msg00001[] = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
  92. /*
  93. * This table is to beat any form of "expression building" to check for
  94. * correct filename characters. It is also used for ident checks.
  95. * FIXME: use it more consistently.
  96. */
  97. #define CH_SHORTNAME 0x01
  98. #define CH_LONGNAME 0x02
  99. #define CH_IDENT 0x04
  100. #define CH_NUMBER 0x08
  101. /*#define CH_WILDCARD 0x10*/
  102. /*#define CH_DOT 0x20*/
  103. #define CH_PUNCT 0x40
  104. #define CH_INVALID 0x80
  105. static const char char_table[256] = {
  106. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
  107. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
  108. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
  109. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
  110. 0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
  111. 0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
  112. 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
  113. 0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
  114. 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
  115. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
  116. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
  117. 0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
  118. 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
  119. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
  120. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
  121. 0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
  122. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
  123. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
  124. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
  125. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
  126. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
  127. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
  128. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
  129. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
  130. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
  131. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
  132. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
  133. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
  134. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
  135. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
  136. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
  137. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
  138. };
  139. static int isisochar(int ch)
  140. {
  141. return !(ch & (~0xff));
  142. }
  143. static int codepage;
  144. void set_codepage(int cp)
  145. {
  146. codepage = cp;
  147. }
  148. /*
  149. * Input functions
  150. */
  151. #define INPUTBUFFER_SIZE 2048 /* Must be larger than 4 and approx. large enough to hold a line */
  152. static int nungetstack = 0;
  153. static int allocungetstack = 0;
  154. static char *ungetstack = NULL;
  155. static int ninputbuffer = 0;
  156. static WCHAR inputbuffer[INPUTBUFFER_SIZE];
  157. /*
  158. * Fill the input buffer with *one* line of input.
  159. * The line is '\n' terminated so that scanning
  160. * messages with translation works as expected
  161. * (otherwise we cannot pre-translate because the
  162. * language is first known one line before the
  163. * actual message).
  164. */
  165. static int fill_inputbuffer(void)
  166. {
  167. static enum input_mode { INPUT_UNKNOWN, INPUT_ASCII, INPUT_UTF8, INPUT_UNICODE } mode;
  168. static int swapped;
  169. static unsigned char utf8_bom[3] = { 0xef, 0xbb, 0xbf };
  170. WCHAR *wbuf;
  171. int i, pos = 0, len = 0;
  172. char buffer[INPUTBUFFER_SIZE];
  173. if (mode == INPUT_UNKNOWN)
  174. {
  175. len = fread( buffer, 1, 8, yyin );
  176. wbuf = (WCHAR *)buffer;
  177. if (len >= 3 && !memcmp( buffer, utf8_bom, 3 ))
  178. {
  179. mode = INPUT_UTF8;
  180. memmove( buffer, buffer + 3, len - 3 );
  181. len -= 3;
  182. }
  183. else if (len == 8)
  184. {
  185. if (wbuf[0] == 0xfeff || wbuf[0] == 0xfffe)
  186. {
  187. mode = INPUT_UNICODE;
  188. pos = 1;
  189. swapped = (wbuf[0] == 0xfffe);
  190. }
  191. else if (!((wbuf[0] | wbuf[1] | wbuf[2] | wbuf[3]) & 0xff00))
  192. {
  193. mode = INPUT_UNICODE;
  194. }
  195. else if (!((wbuf[0] | wbuf[1] | wbuf[2] | wbuf[3]) & 0x00ff))
  196. {
  197. mode = INPUT_UNICODE;
  198. swapped = 1;
  199. }
  200. }
  201. if (mode == INPUT_UNICODE)
  202. {
  203. len = 4 - pos;
  204. memcpy( inputbuffer, wbuf + pos, len * sizeof(WCHAR) );
  205. }
  206. else if (mode == INPUT_UNKNOWN) mode = unicodein ? INPUT_UTF8 : INPUT_ASCII;
  207. }
  208. switch (mode)
  209. {
  210. case INPUT_ASCII:
  211. if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break;
  212. wbuf = codepage_to_unicode( codepage, buffer, strlen(buffer), &ninputbuffer );
  213. memcpy( inputbuffer, wbuf, ninputbuffer * sizeof(WCHAR) );
  214. free( wbuf );
  215. return 1;
  216. case INPUT_UTF8:
  217. if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break;
  218. wbuf = utf8_to_unicode( buffer, strlen(buffer), &ninputbuffer );
  219. memcpy( inputbuffer, wbuf, ninputbuffer * sizeof(WCHAR) );
  220. free( wbuf );
  221. return 1;
  222. case INPUT_UNICODE:
  223. len += fread( inputbuffer + len, sizeof(WCHAR), INPUTBUFFER_SIZE - len, yyin );
  224. if (!len) break;
  225. if (swapped) for (i = 0; i < len; i++) inputbuffer[i] = (inputbuffer[i] << 8) | (inputbuffer[i] >> 8);
  226. ninputbuffer = len;
  227. return 1;
  228. case INPUT_UNKNOWN:
  229. break;
  230. }
  231. if (ferror(yyin)) xyyerror( "Fatal: reading input failed\n" );
  232. return 0;
  233. }
  234. static int get_unichar(void)
  235. {
  236. static WCHAR *b = NULL;
  237. char_number++;
  238. if(nungetstack)
  239. return ungetstack[--nungetstack];
  240. if(!ninputbuffer)
  241. {
  242. if(!fill_inputbuffer())
  243. return EOF;
  244. b = inputbuffer;
  245. }
  246. ninputbuffer--;
  247. return *b++;
  248. }
  249. static void unget_unichar(int ch)
  250. {
  251. if(ch == EOF)
  252. return;
  253. char_number--;
  254. if(nungetstack == allocungetstack)
  255. {
  256. allocungetstack += 32;
  257. ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
  258. }
  259. ungetstack[nungetstack++] = (WCHAR)ch;
  260. }
  261. /*
  262. * Normal character stack.
  263. * Used for number scanning.
  264. */
  265. static int ncharstack = 0;
  266. static int alloccharstack = 0;
  267. static char *charstack = NULL;
  268. static void empty_char_stack(void)
  269. {
  270. ncharstack = 0;
  271. }
  272. static void push_char(int ch)
  273. {
  274. if(ncharstack == alloccharstack)
  275. {
  276. alloccharstack += 32;
  277. charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
  278. }
  279. charstack[ncharstack++] = (char)ch;
  280. }
  281. static int tos_char_stack(void)
  282. {
  283. if(!ncharstack)
  284. return 0;
  285. else
  286. return (int)(charstack[ncharstack-1] & 0xff);
  287. }
  288. static char *get_char_stack(void)
  289. {
  290. return charstack;
  291. }
  292. /*
  293. * Unicode character stack.
  294. * Used for general scanner.
  295. */
  296. static int nunicharstack = 0;
  297. static int allocunicharstack = 0;
  298. static WCHAR *unicharstack = NULL;
  299. static void empty_unichar_stack(void)
  300. {
  301. nunicharstack = 0;
  302. }
  303. static void push_unichar(int ch)
  304. {
  305. if(nunicharstack == allocunicharstack)
  306. {
  307. allocunicharstack += 128;
  308. unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
  309. }
  310. unicharstack[nunicharstack++] = (WCHAR)ch;
  311. }
  312. #if 0
  313. static int tos_unichar_stack(void)
  314. {
  315. if(!nunicharstack)
  316. return 0;
  317. else
  318. return (int)(unicharstack[nunicharstack-1] & 0xffff);
  319. }
  320. #endif
  321. static WCHAR *get_unichar_stack(void)
  322. {
  323. return unicharstack;
  324. }
  325. /*
  326. * Number scanner
  327. *
  328. * state | ch | next state
  329. * ------+-----------------+--------------------------
  330. * 0 | [0] | 1
  331. * 0 | [1-9] | 4
  332. * 0 | . | error (should never occur)
  333. * 1 | [xX] | 2
  334. * 1 | [0-7] | 3
  335. * 1 | [89a-wyzA-WYZ_] | error invalid digit
  336. * 1 | . | return 0
  337. * 2 | [0-9a-fA-F] | 2
  338. * 2 | [g-zG-Z_] | error invalid hex digit
  339. * 2 | . | return (hex-number) if TOS != [xX] else error
  340. * 3 | [0-7] | 3
  341. * 3 | [89a-zA-Z_] | error invalid octal digit
  342. * 3 | . | return (octal-number)
  343. * 4 | [0-9] | 4
  344. * 4 | [a-zA-Z_] | error invalid decimal digit
  345. * 4 | . | return (decimal-number)
  346. *
  347. * All non-identifier characters [^a-zA-Z_0-9] terminate the scan
  348. * and return the value. This is not entirely correct, but close
  349. * enough (should check punctuators as trailing context, but the
  350. * char_table is not adapted to that and it is questionable whether
  351. * it is worth the trouble).
  352. * All non-iso-8859-1 characters are an error.
  353. */
  354. static int scan_number(int ch)
  355. {
  356. int state = 0;
  357. int base = 10;
  358. empty_char_stack();
  359. while(1)
  360. {
  361. if(!isisochar(ch))
  362. xyyerror("Invalid digit\n");
  363. switch(state)
  364. {
  365. case 0:
  366. if(isdigit(ch))
  367. {
  368. push_char(ch);
  369. if(ch == '0')
  370. state = 1;
  371. else
  372. state = 4;
  373. }
  374. else
  375. internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state\n");
  376. break;
  377. case 1:
  378. if(ch == 'x' || ch == 'X')
  379. {
  380. push_char(ch);
  381. state = 2;
  382. }
  383. else if(ch >= '0' && ch <= '7')
  384. {
  385. push_char(ch);
  386. state = 3;
  387. }
  388. else if(isalpha(ch) || ch == '_')
  389. xyyerror("Invalid number digit\n");
  390. else
  391. {
  392. unget_unichar(ch);
  393. mcy_lval.num = 0;
  394. return tNUMBER;
  395. }
  396. break;
  397. case 2:
  398. if(isxdigit(ch))
  399. push_char(ch);
  400. else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
  401. xyyerror("Invalid hex digit\n");
  402. else
  403. {
  404. base = 16;
  405. goto finish;
  406. }
  407. break;
  408. case 3:
  409. if(ch >= '0' && ch <= '7')
  410. push_char(ch);
  411. else if(isalnum(ch) || ch == '_')
  412. xyyerror("Invalid octal digit\n");
  413. else
  414. {
  415. base = 8;
  416. goto finish;
  417. }
  418. break;
  419. case 4:
  420. if(isdigit(ch))
  421. push_char(ch);
  422. else if(isalnum(ch) || ch == '_')
  423. xyyerror("Invalid decimal digit\n");
  424. else
  425. {
  426. base = 10;
  427. goto finish;
  428. }
  429. break;
  430. default:
  431. internal_error(__FILE__, __LINE__, "Invalid state in number-scanner\n");
  432. }
  433. ch = get_unichar();
  434. }
  435. finish:
  436. unget_unichar(ch);
  437. push_char(0);
  438. mcy_lval.num = strtoul(get_char_stack(), NULL, base);
  439. return tNUMBER;
  440. }
  441. static void newline(void)
  442. {
  443. line_number++;
  444. char_number = 1;
  445. }
  446. static int unisort(const void *p1, const void *p2)
  447. {
  448. return unistricmp(((const struct token *)p1)->name, ((const struct token *)p2)->name);
  449. }
  450. static struct token *tokentable = NULL;
  451. static int ntokentable = 0;
  452. struct token *lookup_token(const WCHAR *s)
  453. {
  454. struct token tok;
  455. tok.name = s;
  456. return (struct token *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
  457. }
  458. void add_token(enum tok_enum type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
  459. {
  460. ntokentable++;
  461. tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
  462. tokentable[ntokentable-1].type = type;
  463. tokentable[ntokentable-1].name = name;
  464. tokentable[ntokentable-1].token = tok;
  465. tokentable[ntokentable-1].codepage = cp;
  466. tokentable[ntokentable-1].alias = alias;
  467. tokentable[ntokentable-1].fixed = fix;
  468. qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
  469. }
  470. void get_tokentable(struct token **tab, int *len)
  471. {
  472. assert(tab != NULL);
  473. assert(len != NULL);
  474. *tab = tokentable;
  475. *len = ntokentable;
  476. }
  477. /*
  478. * The scanner
  479. *
  480. */
  481. int mcy_lex(void)
  482. {
  483. static const WCHAR ustr_dot1[] = { '.', '\n', 0 };
  484. static const WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
  485. static int isinit = 0;
  486. int ch;
  487. if(!isinit)
  488. {
  489. isinit++;
  490. set_codepage(WMC_DEFAULT_CODEPAGE);
  491. add_token(tok_keyword, ustr_codepages, tCODEPAGE, 0, NULL, 0);
  492. add_token(tok_keyword, ustr_facility, tFACILITY, 0, NULL, 1);
  493. add_token(tok_keyword, ustr_facilitynames, tFACNAMES, 0, NULL, 1);
  494. add_token(tok_keyword, ustr_language, tLANGUAGE, 0, NULL, 1);
  495. add_token(tok_keyword, ustr_languagenames, tLANNAMES, 0, NULL, 1);
  496. add_token(tok_keyword, ustr_messageid, tMSGID, 0, NULL, 1);
  497. add_token(tok_keyword, ustr_messageidtypedef, tTYPEDEF, 0, NULL, 1);
  498. add_token(tok_keyword, ustr_outputbase, tBASE, 0, NULL, 1);
  499. add_token(tok_keyword, ustr_severity, tSEVERITY, 0, NULL, 1);
  500. add_token(tok_keyword, ustr_severitynames, tSEVNAMES, 0, NULL, 1);
  501. add_token(tok_keyword, ustr_symbolicname, tSYMNAME, 0, NULL, 1);
  502. add_token(tok_severity, ustr_error, 0x03, 0, NULL, 0);
  503. add_token(tok_severity, ustr_warning, 0x02, 0, NULL, 0);
  504. add_token(tok_severity, ustr_informational, 0x01, 0, NULL, 0);
  505. add_token(tok_severity, ustr_success, 0x00, 0, NULL, 0);
  506. add_token(tok_facility, ustr_application, 0xFFF, 0, NULL, 0);
  507. add_token(tok_facility, ustr_system, 0x0FF, 0, NULL, 0);
  508. add_token(tok_facility, ustr_dxgi, 0x87a, 0, NULL, 0);
  509. add_token(tok_facility, ustr_null, 0x000, 0, NULL, 0);
  510. add_token(tok_language, ustr_english, 0x409, 437, ustr_msg00001, 0);
  511. }
  512. empty_unichar_stack();
  513. while(1)
  514. {
  515. if(want_line)
  516. {
  517. while((ch = get_unichar()) != '\n')
  518. {
  519. if(ch == EOF)
  520. xyyerror("Unexpected EOF\n");
  521. push_unichar(ch);
  522. }
  523. newline();
  524. push_unichar(ch);
  525. push_unichar(0);
  526. if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
  527. {
  528. want_line = 0;
  529. /* Reset the codepage to our default after each message */
  530. set_codepage(WMC_DEFAULT_CODEPAGE);
  531. return tMSGEND;
  532. }
  533. mcy_lval.str = xunistrdup(get_unichar_stack());
  534. return tLINE;
  535. }
  536. ch = get_unichar();
  537. if(ch == EOF)
  538. return EOF;
  539. if(ch == '\n')
  540. {
  541. newline();
  542. if(want_nl)
  543. {
  544. want_nl = 0;
  545. return tNL;
  546. }
  547. continue;
  548. }
  549. if(isisochar(ch))
  550. {
  551. if(want_file)
  552. {
  553. int n = 0;
  554. while(n < 8 && isisochar(ch))
  555. {
  556. int t = char_table[ch];
  557. if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
  558. break;
  559. push_unichar(ch);
  560. n++;
  561. ch = get_unichar();
  562. }
  563. unget_unichar(ch);
  564. push_unichar(0);
  565. want_file = 0;
  566. mcy_lval.str = xunistrdup(get_unichar_stack());
  567. return tFILE;
  568. }
  569. if(char_table[ch] & CH_IDENT)
  570. {
  571. struct token *tok;
  572. while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
  573. {
  574. push_unichar(ch);
  575. ch = get_unichar();
  576. }
  577. unget_unichar(ch);
  578. push_unichar(0);
  579. if(!(tok = lookup_token(get_unichar_stack())))
  580. {
  581. mcy_lval.str = xunistrdup(get_unichar_stack());
  582. return tIDENT;
  583. }
  584. switch(tok->type)
  585. {
  586. case tok_keyword:
  587. return tok->token;
  588. case tok_language:
  589. codepage = tok->codepage;
  590. /* Fall through */
  591. case tok_severity:
  592. case tok_facility:
  593. mcy_lval.tok = tok;
  594. return tTOKEN;
  595. default:
  596. internal_error(__FILE__, __LINE__, "Invalid token type encountered\n");
  597. }
  598. }
  599. if(isspace(ch)) /* Ignore space */
  600. continue;
  601. if(isdigit(ch))
  602. return scan_number(ch);
  603. }
  604. switch(ch)
  605. {
  606. case ':':
  607. case '=':
  608. case '+':
  609. case '(':
  610. case ')':
  611. return ch;
  612. case ';':
  613. while(ch != '\n' && ch != EOF)
  614. {
  615. push_unichar(ch);
  616. ch = get_unichar();
  617. }
  618. newline();
  619. push_unichar(ch); /* Include the newline */
  620. push_unichar(0);
  621. mcy_lval.str = xunistrdup(get_unichar_stack());
  622. return tCOMMENT;
  623. default:
  624. xyyerror("Invalid character '%c' (0x%04x)\n", isisochar(ch) && isprint(ch) ? ch : '.', ch);
  625. }
  626. }
  627. }