parser.l 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666
  1. /* -*-C-*-
  2. *
  3. * Copyright 1998-2000 Bertho A. Stultiens (BS)
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2.1 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  18. *
  19. * History:
  20. * 21-May-2000 BS - Fixed the ident requirement of resource names
  21. * which can be keywords.
  22. * 30-Apr-2000 BS - Reintegration into the wine-tree
  23. * 11-Jan-2000 BS - Very drastic cleanup because we don't have a
  24. * preprocessor in here anymore.
  25. * 02-Jan-2000 BS - Removed the preprocessor code
  26. * 23-Dec-1999 BS - Removed the copyright for Martin von Loewis.
  27. * There is really nothing left of his code in
  28. * this parser.
  29. * 20-Jun-1998 BS - Changed the filename conversion. Filenames are
  30. * case-sensitive inder *nix, but not under dos.
  31. * default behaviour is to convert to lower case.
  32. * - All backslashes are converted to forward and
  33. * both single and double slash is recognized as
  34. * MS/Borland does.
  35. * - Fixed a bug in 'yywf' case that prevented
  36. * double quoted names to be scanned properly.
  37. *
  38. * 19-May-1998 BS - Started to build a preprocessor.
  39. * - Changed keyword processing completely to
  40. * table-lookups.
  41. *
  42. * 20-Apr-1998 BS - Added ';' comment stripping
  43. *
  44. * 17-Apr-1998 BS - Made the win32 keywords optional when compiling in
  45. * 16bit mode
  46. *
  47. * 15-Apr-1998 BS - Changed string handling to include escapes
  48. * - Added unicode string handling (no codepage
  49. * translation though).
  50. * - 'Borrowed' the main idea of string scanning from
  51. * the flex manual pages.
  52. * - Added conditional handling of scanning depending
  53. * on the state of the parser. This was mainly required
  54. * to distinguish a file to load or raw data that
  55. * follows. MS's definition of filenames is rather
  56. * complex... It can be unquoted or double quoted. If
  57. * double quoted, then the '\\' char is not automatically
  58. * escaped according to Borland's rc compiler, but it
  59. * accepts both "\\path\\file.rc" and "\path\file.rc".
  60. * This makes life very hard! I go for the escaped
  61. * version, as this seems to be the documented way...
  62. * - Single quoted strings are now parsed and converted
  63. * here.
  64. * - Added comment stripping. The implementation is
  65. * 'borrowed' from the flex manpages.
  66. * - Rebuild string processing so that it may contain
  67. * escaped '\0'.
  68. */
  69. /* Exclusive string handling */
  70. %x tkstr
  71. /* Exclusive unicode string handling */
  72. %x tklstr
  73. /* Exclusive rcdata single quoted data handling */
  74. %x tkrcd
  75. /* Exclusive comment eating... */
  76. %x comment
  77. /* Set when stripping c-junk */
  78. %x pp_cstrip
  79. /* Set when scanning #line style directives */
  80. %x pp_line
  81. /* Set when scanning #pragma */
  82. %x pp_pragma
  83. %x pp_code_page
  84. %option stack
  85. %option noinput nounput noyy_top_state noyywrap
  86. %option 8bit never-interactive
  87. %option prefix="parser_"
  88. /* Some shortcut definitions */
  89. ws [ \f\t\r]
  90. %{
  91. /*#define LEX_DEBUG*/
  92. #include "config.h"
  93. #include <stdio.h>
  94. #include <stdlib.h>
  95. #include <string.h>
  96. #include <ctype.h>
  97. #include <assert.h>
  98. #include <errno.h>
  99. #include <limits.h>
  100. #define YY_NO_UNISTD_H
  101. #include "../tools.h"
  102. #include "wrc.h"
  103. #include "utils.h"
  104. #include "parser.h"
  105. #include "newstruc.h"
  106. #include "wpp_private.h"
  107. #include "parser.tab.h"
  108. /* Always update the current character position within a line */
  109. #define YY_USER_ACTION char_number+=yyleng; wanted_id = want_id; want_id = 0;
  110. #define YY_USER_INIT current_codepage = utf8_input ? CP_UTF8 : -1;
  111. static void addcchar(char c);
  112. static void addwchar(WCHAR s);
  113. static string_t *get_buffered_cstring(void);
  114. static string_t *get_buffered_wstring(void);
  115. static string_t *make_string(char *s);
  116. static char *cbuffer; /* Buffers for string collection */
  117. static int cbufidx;
  118. static int cbufalloc = 0;
  119. static WCHAR *wbuffer;
  120. static int wbufidx;
  121. static int wbufalloc = 0;
  122. static int current_codepage = -1; /* use language default */
  123. /*
  124. * This one is a bit tricky.
  125. * We set 'want_id' in the parser to get the first
  126. * identifier we get across in the scanner, but we
  127. * also want it to be reset at nearly any token we
  128. * see. Exceptions are:
  129. * - newlines
  130. * - comments
  131. * - whitespace
  132. *
  133. * The scanner will automatically reset 'want_id'
  134. * after *each* scanner reduction and puts is value
  135. * into the var below. In this way we can see the
  136. * state after the YY_RULE_SETUP (i.e. the user action;
  137. * see above) and don't have to worry too much when
  138. * it needs to be reset.
  139. */
  140. static int wanted_id = 0;
  141. static int save_wanted_id; /* To save across comment reductions */
  142. struct keyword {
  143. const char *keyword;
  144. int token;
  145. int isextension;
  146. int needcase;
  147. int alwayskw;
  148. };
  149. static struct keyword keywords[] = {
  150. { "ACCELERATORS", tACCELERATORS, 0, 0, 0},
  151. { "ALT", tALT, 0, 0, 0},
  152. { "ASCII", tASCII, 0, 0, 0},
  153. { "AUTO3STATE", tAUTO3STATE, 1, 0, 0},
  154. { "AUTOCHECKBOX", tAUTOCHECKBOX, 1, 0, 0},
  155. { "AUTORADIOBUTTON", tAUTORADIOBUTTON, 1, 0, 0},
  156. { "BEGIN", tBEGIN, 0, 0, 0},
  157. { "BITMAP", tBITMAP, 0, 0, 0},
  158. { "BLOCK", tBLOCK, 0, 0, 0},
  159. { "BUTTON", tBUTTON, 1, 0, 0},
  160. { "CAPTION", tCAPTION, 0, 0, 0},
  161. { "CHARACTERISTICS", tCHARACTERISTICS, 1, 0, 0},
  162. { "CHECKBOX", tCHECKBOX, 0, 0, 0},
  163. { "CHECKED", tCHECKED, 0, 0, 0},
  164. { "CLASS", tCLASS, 0, 0, 0},
  165. { "COMBOBOX", tCOMBOBOX, 0, 0, 0},
  166. { "CONTROL", tCONTROL, 0, 0, 0},
  167. { "CTEXT", tCTEXT, 0, 0, 0},
  168. { "CURSOR", tCURSOR, 0, 0, 0},
  169. { "DEFPUSHBUTTON", tDEFPUSHBUTTON, 0, 0, 0},
  170. { "DIALOG", tDIALOG, 0, 0, 0},
  171. { "DIALOGEX", tDIALOGEX, 1, 0, 0},
  172. { "DISCARDABLE", tDISCARDABLE, 0, 0, 0},
  173. { "DLGINIT", tDLGINIT, 0, 0, 0},
  174. { "EDITTEXT", tEDITTEXT, 0, 0, 0},
  175. { "END", tEND, 0, 0, 0},
  176. { "EXSTYLE", tEXSTYLE, 0, 0, 0},
  177. { "FILEFLAGS", tFILEFLAGS, 0, 0, 0},
  178. { "FILEFLAGSMASK", tFILEFLAGSMASK, 0, 0, 0},
  179. { "FILEOS", tFILEOS, 0, 0, 0},
  180. { "FILESUBTYPE", tFILESUBTYPE, 0, 0, 0},
  181. { "FILETYPE", tFILETYPE, 0, 0, 0},
  182. { "FILEVERSION", tFILEVERSION, 0, 0, 0},
  183. { "FIXED", tFIXED, 0, 0, 0},
  184. { "FONT", tFONT, 0, 0, 0},
  185. { "FONTDIR", tFONTDIR, 0, 0, 0}, /* This is a Borland BRC extension */
  186. { "GRAYED", tGRAYED, 0, 0, 0},
  187. { "GROUPBOX", tGROUPBOX, 0, 0, 0},
  188. { "HELP", tHELP, 0, 0, 0},
  189. { "HTML", tHTML, 0, 0, 0},
  190. { "ICON", tICON, 0, 0, 0},
  191. { "IMPURE", tIMPURE, 0, 0, 0},
  192. { "INACTIVE", tINACTIVE, 0, 0, 0},
  193. { "LANGUAGE", tLANGUAGE, 1, 0, 1},
  194. { "LISTBOX", tLISTBOX, 0, 0, 0},
  195. { "LOADONCALL", tLOADONCALL, 0, 0, 0},
  196. { "LTEXT", tLTEXT, 0, 0, 0},
  197. { "MENU", tMENU, 0, 0, 0},
  198. { "MENUBARBREAK", tMENUBARBREAK, 0, 0, 0},
  199. { "MENUBREAK", tMENUBREAK, 0, 0, 0},
  200. { "MENUEX", tMENUEX, 1, 0, 0},
  201. { "MENUITEM", tMENUITEM, 0, 0, 0},
  202. { "MESSAGETABLE", tMESSAGETABLE, 1, 0, 0},
  203. { "MOVEABLE", tMOVEABLE, 0, 0, 0},
  204. { "NOINVERT", tNOINVERT, 0, 0, 0},
  205. { "NOT", tNOT, 0, 0, 0},
  206. { "POPUP", tPOPUP, 0, 0, 0},
  207. { "PRELOAD", tPRELOAD, 0, 0, 0},
  208. { "PRODUCTVERSION", tPRODUCTVERSION, 0, 0, 0},
  209. { "PURE", tPURE, 0, 0, 0},
  210. { "PUSHBUTTON", tPUSHBUTTON, 0, 0, 0},
  211. { "RADIOBUTTON", tRADIOBUTTON, 0, 0, 0},
  212. { "RCDATA", tRCDATA, 0, 0, 0},
  213. { "RTEXT", tRTEXT, 0, 0, 0},
  214. { "SCROLLBAR", tSCROLLBAR, 0, 0, 0},
  215. { "SEPARATOR", tSEPARATOR, 0, 0, 0},
  216. { "SHIFT", tSHIFT, 0, 0, 0},
  217. { "STATE3", tSTATE3, 1, 0, 0},
  218. { "STRING", tSTRING, 0, 0, 0},
  219. { "STRINGTABLE", tSTRINGTABLE, 0, 0, 1},
  220. { "STYLE", tSTYLE, 0, 0, 0},
  221. { "TOOLBAR", tTOOLBAR, 1, 0, 0},
  222. { "VALUE", tVALUE, 0, 0, 0},
  223. { "VERSION", tVERSION, 1, 0, 0},
  224. { "VERSIONINFO", tVERSIONINFO, 0, 0, 0},
  225. { "VIRTKEY", tVIRTKEY, 0, 0, 0}
  226. };
  227. #define NKEYWORDS (sizeof(keywords)/sizeof(keywords[0]))
  228. #define KWP(p) ((const struct keyword *)(p))
  229. static int kw_cmp_func(const void *s1, const void *s2)
  230. {
  231. int ret;
  232. ret = compare_striA(KWP(s1)->keyword, KWP(s2)->keyword);
  233. if(!ret && (KWP(s1)->needcase || KWP(s2)->needcase))
  234. return strcmp(KWP(s1)->keyword, KWP(s2)->keyword);
  235. else
  236. return ret;
  237. }
  238. #define KW_BSEARCH
  239. #define DO_SORT
  240. static struct keyword *iskeyword(char *kw)
  241. {
  242. struct keyword *kwp;
  243. struct keyword key;
  244. key.keyword = kw;
  245. key.needcase = 0;
  246. #ifdef DO_SORT
  247. {
  248. /* Make sure that it is sorted for bsearsh */
  249. static int sorted = 0;
  250. if(!sorted)
  251. {
  252. qsort(keywords, NKEYWORDS, sizeof(keywords[0]), kw_cmp_func);
  253. sorted = 1;
  254. }
  255. }
  256. #endif
  257. #ifdef KW_BSEARCH
  258. kwp = bsearch(&key, keywords, NKEYWORDS, sizeof(keywords[0]), kw_cmp_func);
  259. #else
  260. {
  261. int i;
  262. for(i = 0; i < NKEYWORDS; i++)
  263. {
  264. if(!kw_cmp_func(&key, &keywords[i]))
  265. break;
  266. }
  267. if(i < NKEYWORDS)
  268. kwp = &keywords[i];
  269. else
  270. kwp = NULL;
  271. }
  272. #endif
  273. if(kwp == NULL || (kwp->isextension && !extensions))
  274. return NULL;
  275. else
  276. return kwp;
  277. }
  278. /* converts an integer in string form to an unsigned long and prints an error
  279. * on overflow */
  280. static unsigned long xstrtoul(const char *nptr, char **endptr, int base)
  281. {
  282. unsigned long l;
  283. errno = 0;
  284. l = strtoul(nptr, endptr, base);
  285. if (l == ULONG_MAX && errno == ERANGE)
  286. parser_error("integer constant %s is too large", nptr);
  287. return l;
  288. }
  289. %}
  290. /*
  291. **************************************************************************
  292. * The flexer starts here
  293. **************************************************************************
  294. */
  295. %%
  296. /*
  297. * Catch the GCC-style line statements here and parse them.
  298. * This has the advantage that you can #include at any
  299. * stage in the resource file.
  300. * The preprocessor generates line directives in the format:
  301. * # <linenum> "filename" <codes>
  302. *
  303. * Codes can be a sequence of:
  304. * - 1 start of new file
  305. * - 2 returning to previous
  306. * - 3 system header
  307. * - 4 interpret as C-code
  308. *
  309. * 4 is not used and 1 mutually excludes 2
  310. * Anyhow, we are not really interested in these at all
  311. * because we only want to know the linenumber and
  312. * filename.
  313. */
  314. <INITIAL,pp_cstrip>^{ws}*\#{ws}*pragma{ws}+ yy_push_state(pp_pragma);
  315. <INITIAL,pp_cstrip>^{ws}*\#{ws}* yy_push_state(pp_line);
  316. <pp_line>[^\n]* {
  317. int lineno, len;
  318. char *cptr;
  319. char *fname;
  320. yy_pop_state();
  321. lineno = (int)strtol(yytext, &cptr, 10);
  322. if(!lineno)
  323. parser_error("Malformed '#...' line-directive; invalid linenumber");
  324. fname = strchr(cptr, '"');
  325. if(!fname)
  326. parser_error("Malformed '#...' line-directive; missing filename");
  327. fname++;
  328. cptr = strchr(fname, '"');
  329. if(!cptr)
  330. parser_error("Malformed '#...' line-directive; missing terminating \"");
  331. *cptr = '\0';
  332. line_number = lineno - 1; /* We didn't read the newline */
  333. input_name = xstrdup(fname);
  334. /* ignore contents of C include files */
  335. len = strlen(input_name);
  336. if (len > 1 && !strcasecmp( input_name + len - 2, ".h" ))
  337. BEGIN(pp_cstrip);
  338. else
  339. BEGIN(INITIAL);
  340. }
  341. <pp_pragma>code_page[^\n]* yyless(9); yy_pop_state(); yy_push_state(pp_code_page);
  342. <pp_pragma>[^\n]* yy_pop_state(); if (pedantic) parser_warning("Unrecognized #pragma directive '%s'\n",yytext);
  343. <pp_code_page>\({ws}*default{ws}*\)[^\n]* current_codepage = -1; yy_pop_state();
  344. <pp_code_page>\({ws}*utf8{ws}*\)[^\n]* current_codepage = CP_UTF8; yy_pop_state();
  345. <pp_code_page>\({ws}*[0-9]+{ws}*\)[^\n]* {
  346. char *p = yytext;
  347. yy_pop_state();
  348. while (*p < '0' || *p > '9') p++;
  349. current_codepage = strtol( p, NULL, 10 );
  350. if (!is_valid_codepage( current_codepage ))
  351. {
  352. parser_error("Codepage %d not supported", current_codepage);
  353. current_codepage = 0;
  354. }
  355. }
  356. <pp_code_page>[^\n]* yy_pop_state(); parser_error("Malformed #pragma code_page directive");
  357. /*
  358. * Strip everything until a ';' taking
  359. * into account braces {} for structures,
  360. * classes and enums.
  361. */
  362. <pp_cstrip>\n line_number++; char_number = 1;
  363. <pp_cstrip>. ; /* ignore */
  364. \{ return tBEGIN;
  365. \} return tEND;
  366. [0-9]+[lL]? { parser_lval.num = xstrtoul(yytext, 0, 10);
  367. return (yytext[yyleng-1] == 'L' || yytext[yyleng-1] == 'l') ? tLNUMBER : tNUMBER; }
  368. 0[xX][0-9A-Fa-f]+[lL]? { parser_lval.num = xstrtoul(yytext, 0, 16);
  369. return (yytext[yyleng-1] == 'L' || yytext[yyleng-1] == 'l') ? tLNUMBER : tNUMBER; }
  370. 0[oO][0-7]+[lL]? { parser_lval.num = xstrtoul(yytext+2, 0, 8);
  371. return (yytext[yyleng-1] == 'L' || yytext[yyleng-1] == 'l') ? tLNUMBER : tNUMBER; }
  372. [A-Za-z_0-9./\\][A-Za-z_0-9./\\\-]* {
  373. struct keyword *tok = iskeyword(yytext);
  374. if(tok)
  375. {
  376. if(wanted_id && !tok->alwayskw)
  377. {
  378. parser_lval.str = make_string(yytext);
  379. return tIDENT;
  380. }
  381. else
  382. return tok->token;
  383. }
  384. else
  385. {
  386. parser_lval.str = make_string(yytext);
  387. return tIDENT;
  388. }
  389. }
  390. /*
  391. * Wide string scanning
  392. */
  393. L\" {
  394. yy_push_state(tklstr);
  395. wbufidx = 0;
  396. if(!win32)
  397. parser_error("16bit resource contains unicode strings");
  398. }
  399. <tklstr>\"{ws}+ |
  400. <tklstr>\" {
  401. yy_pop_state();
  402. parser_lval.str = get_buffered_wstring();
  403. return tSTRING;
  404. }
  405. <tklstr>\\[0-7]{1,6} { /* octal escape sequence */
  406. unsigned int result;
  407. result = strtoul(yytext+1, 0, 8);
  408. if ( result > 0xffff )
  409. parser_error("Character constant out of range");
  410. addwchar((WCHAR)result);
  411. }
  412. <tklstr>\\x[0-9a-fA-F]{4} { /* hex escape sequence */
  413. unsigned int result;
  414. result = strtoul(yytext+2, 0, 16);
  415. addwchar((WCHAR)result);
  416. }
  417. <tklstr>\\x[0-9a-fA-F]{1,3} { parser_error("Invalid hex escape sequence '%s'", yytext); }
  418. <tklstr>\\[0-9]+ parser_error("Bad escape sequence");
  419. <tklstr>\\\n{ws}* line_number++; char_number = 1; /* backslash at EOL continues string after leading whitespace on next line */
  420. <tklstr>\\a addwchar('\a');
  421. <tklstr>\\b addwchar('\b');
  422. <tklstr>\\f addwchar('\f');
  423. <tklstr>\\n addwchar('\n');
  424. <tklstr>\\r addwchar('\r');
  425. <tklstr>\\t addwchar('\t');
  426. <tklstr>\\v addwchar('\v');
  427. <tklstr>\\. {
  428. if (yytext[1] & 0x80)
  429. parser_error("Invalid char %u in wide string", (unsigned char)yytext[1]);
  430. addwchar(yytext[1]);
  431. }
  432. <tklstr>\\\r\n addwchar(yytext[2]); line_number++; char_number = 1;
  433. <tklstr>\"\" addwchar('\"'); /* "bla""bla" -> "bla\"bla" */
  434. <tklstr>\\\"\" addwchar('\"'); /* "bla\""bla" -> "bla\"bla" */
  435. <tklstr>\"{ws}+\" ; /* "bla" "bla" -> "blabla" */
  436. <tklstr>[^\\\n\"]+ {
  437. char *yptr = yytext;
  438. while(*yptr) /* FIXME: codepage translation */
  439. {
  440. if (*yptr & 0x80)
  441. parser_error("Invalid char %u in wide string", (unsigned char)*yptr);
  442. addwchar(*yptr++ & 0xff);
  443. }
  444. }
  445. <tklstr>\n parser_error("Unterminated string");
  446. /*
  447. * Normal string scanning
  448. */
  449. \" yy_push_state(tkstr); cbufidx = 0;
  450. <tkstr>\"{ws}+ |
  451. <tkstr>\" {
  452. yy_pop_state();
  453. parser_lval.str = get_buffered_cstring();
  454. return tSTRING;
  455. }
  456. <tkstr>\\[0-7]{1,3} { /* octal escape sequence */
  457. int result;
  458. result = strtol(yytext+1, 0, 8);
  459. if ( result > 0xff )
  460. parser_error("Character constant out of range");
  461. addcchar((char)result);
  462. }
  463. <tkstr>\\x[0-9a-fA-F]{2} { /* hex escape sequence */
  464. int result;
  465. result = strtol(yytext+2, 0, 16);
  466. addcchar((char)result);
  467. }
  468. <tkstr>\\x[0-9a-fA-F] { parser_error("Invalid hex escape sequence '%s'", yytext); }
  469. <tkstr>\\[0-9]+ parser_error("Bad escape sequence");
  470. <tkstr>\\\n{ws}* line_number++; char_number = 1; /* backslash at EOL continues string after leading whitespace on next line */
  471. <tkstr>\\a addcchar('\a');
  472. <tkstr>\\b addcchar('\b');
  473. <tkstr>\\f addcchar('\f');
  474. <tkstr>\\n addcchar('\n');
  475. <tkstr>\\r addcchar('\r');
  476. <tkstr>\\t addcchar('\t');
  477. <tkstr>\\v addcchar('\v');
  478. <tkstr>\\. addcchar(yytext[1]);
  479. <tkstr>\\\r\n addcchar(yytext[2]); line_number++; char_number = 1;
  480. <tkstr>[^\\\n\"]+ {
  481. char *yptr = yytext;
  482. while(*yptr)
  483. addcchar(*yptr++);
  484. }
  485. <tkstr>\"\" addcchar('\"'); /* "bla""bla" -> "bla\"bla" */
  486. <tkstr>\\\"\" addcchar('\"'); /* "bla\""bla" -> "bla\"bla" */
  487. <tkstr>\"{ws}+\" ; /* "bla" "bla" -> "blabla" */
  488. <tkstr>\n parser_error("Unterminated string");
  489. /*
  490. * Raw data scanning
  491. */
  492. \' yy_push_state(tkrcd); cbufidx = 0;
  493. <tkrcd>\' {
  494. yy_pop_state();
  495. parser_lval.raw = new_raw_data();
  496. parser_lval.raw->size = cbufidx;
  497. parser_lval.raw->data = xmalloc(parser_lval.raw->size);
  498. memcpy(parser_lval.raw->data, cbuffer, parser_lval.raw->size);
  499. return tRAWDATA;
  500. }
  501. <tkrcd>[0-9a-fA-F]{2} {
  502. int result;
  503. result = strtol(yytext, 0, 16);
  504. addcchar((char)result);
  505. }
  506. <tkrcd>{ws}+ ; /* Ignore space */
  507. <tkrcd>\n line_number++; char_number = 1;
  508. <tkrcd>. parser_error("Malformed data-line");
  509. /*
  510. * Comment stripping
  511. * Should never occur after preprocessing
  512. */
  513. <INITIAL,pp_cstrip>"/*" {
  514. yy_push_state(comment);
  515. save_wanted_id = wanted_id;
  516. if(!no_preprocess)
  517. parser_warning("Found comments after preprocessing, please report\n");
  518. }
  519. <comment>[^*\n]* ;
  520. <comment>"*"+[^*/\n]* ;
  521. <comment>\n line_number++; char_number = 1;
  522. <comment>"*"+"/" yy_pop_state(); want_id = save_wanted_id;
  523. ;[^\n]* want_id = wanted_id; /* not really comment, but left-over c-junk */
  524. "//"[^\n]* want_id = wanted_id; if(!no_preprocess) parser_warning("Found comments after preprocessing, please report\n");
  525. \n {
  526. want_id = wanted_id;
  527. line_number++;
  528. char_number = 1;
  529. if(want_nl)
  530. {
  531. want_nl = 0;
  532. return tNL;
  533. }
  534. }
  535. {ws}+ want_id = wanted_id; /* Eat whitespace */
  536. <INITIAL>[ -~] return yytext[0];
  537. <*>.|\n {
  538. /* Catch all rule to find any unmatched text */
  539. if(*yytext == '\n')
  540. {
  541. line_number++;
  542. char_number = 1;
  543. }
  544. parser_error("Unmatched text '%c' (0x%02x) YY_START=%d",
  545. isprint((unsigned char)*yytext) ? *yytext : '.', *yytext, YY_START);
  546. }
  547. %%
  548. /* These dup functions copy the enclosed '\0' from
  549. * the resource string.
  550. */
  551. static void addcchar(char c)
  552. {
  553. if(cbufidx >= cbufalloc)
  554. {
  555. cbufalloc += 1024;
  556. cbuffer = xrealloc(cbuffer, cbufalloc * sizeof(cbuffer[0]));
  557. if(cbufalloc > 65536)
  558. parser_warning("Reallocating string buffer larger than 64kB\n");
  559. }
  560. cbuffer[cbufidx++] = c;
  561. }
  562. static void addwchar(WCHAR s)
  563. {
  564. if(wbufidx >= wbufalloc)
  565. {
  566. wbufalloc += 1024;
  567. wbuffer = xrealloc(wbuffer, wbufalloc * sizeof(wbuffer[0]));
  568. if(wbufalloc > 65536)
  569. parser_warning("Reallocating wide string buffer larger than 64kB\n");
  570. }
  571. wbuffer[wbufidx++] = s;
  572. }
  573. static string_t *get_buffered_cstring(void)
  574. {
  575. string_t *str = new_string();
  576. str->size = cbufidx;
  577. str->type = str_char;
  578. str->str.cstr = xmalloc(cbufidx+1);
  579. memcpy(str->str.cstr, cbuffer, cbufidx);
  580. str->str.cstr[cbufidx] = '\0';
  581. if (!current_codepage || current_codepage == -1 || !win32) /* store as ANSI string */
  582. {
  583. if (!current_codepage) parser_error("Codepage set to Unicode only, cannot use ASCII string here");
  584. return str;
  585. }
  586. else /* convert to Unicode before storing */
  587. {
  588. string_t *str_w = convert_string_unicode( str, current_codepage );
  589. if (check_valid_utf8( str, current_codepage ))
  590. parser_warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use, maybe use --utf8?\n",
  591. str->str.cstr, current_codepage );
  592. free_string( str );
  593. return str_w;
  594. }
  595. }
  596. static string_t *get_buffered_wstring(void)
  597. {
  598. string_t *str = new_string();
  599. str->size = wbufidx;
  600. str->type = str_unicode;
  601. str->str.wstr = xmalloc((wbufidx+1)*sizeof(WCHAR));
  602. memcpy(str->str.wstr, wbuffer, wbufidx*sizeof(WCHAR));
  603. str->str.wstr[wbufidx] = 0;
  604. return str;
  605. }
  606. static string_t *make_string(char *s)
  607. {
  608. string_t *ret, *str = new_string();
  609. str->size = strlen(s);
  610. str->type = str_char;
  611. str->str.cstr = xmalloc(str->size+1);
  612. memcpy(str->str.cstr, s, str->size+1);
  613. if (current_codepage <= 0 || !win32) return str;
  614. ret = convert_string_unicode( str, current_codepage );
  615. free_string( str );
  616. return ret;
  617. }