xmltok_impl.c 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841
  1. /*
  2. Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
  3. See the file copying.txt for copying permission.
  4. */
  5. #ifndef IS_INVALID_CHAR
  6. #define IS_INVALID_CHAR(enc, ptr, n) (0)
  7. #endif
  8. #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
  9. case BT_LEAD ## n: \
  10. if (end - ptr < n) \
  11. return XML_TOK_PARTIAL_CHAR; \
  12. if (IS_INVALID_CHAR(enc, ptr, n)) { \
  13. *(nextTokPtr) = (ptr); \
  14. return XML_TOK_INVALID; \
  15. } \
  16. ptr += n; \
  17. break;
  18. #define INVALID_CASES(ptr, nextTokPtr) \
  19. INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
  20. INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
  21. INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
  22. case BT_NONXML: \
  23. case BT_MALFORM: \
  24. case BT_TRAIL: \
  25. *(nextTokPtr) = (ptr); \
  26. return XML_TOK_INVALID
  27. #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
  28. case BT_LEAD ## n: \
  29. if (end - ptr < n) \
  30. return XML_TOK_PARTIAL_CHAR; \
  31. if (!IS_NAME_CHAR(enc, ptr, n)) { \
  32. *nextTokPtr = ptr; \
  33. return XML_TOK_INVALID; \
  34. } \
  35. ptr += n; \
  36. break;
  37. #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
  38. case BT_NONASCII: \
  39. if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
  40. *nextTokPtr = ptr; \
  41. return XML_TOK_INVALID; \
  42. } \
  43. case BT_NMSTRT: \
  44. case BT_HEX: \
  45. case BT_DIGIT: \
  46. case BT_NAME: \
  47. case BT_MINUS: \
  48. ptr += MINBPC(enc); \
  49. break; \
  50. CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
  51. CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
  52. CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
  53. #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
  54. case BT_LEAD ## n: \
  55. if (end - ptr < n) \
  56. return XML_TOK_PARTIAL_CHAR; \
  57. if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
  58. *nextTokPtr = ptr; \
  59. return XML_TOK_INVALID; \
  60. } \
  61. ptr += n; \
  62. break;
  63. #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
  64. case BT_NONASCII: \
  65. if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
  66. *nextTokPtr = ptr; \
  67. return XML_TOK_INVALID; \
  68. } \
  69. case BT_NMSTRT: \
  70. case BT_HEX: \
  71. ptr += MINBPC(enc); \
  72. break; \
  73. CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
  74. CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
  75. CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
  76. #ifndef PREFIX
  77. #define PREFIX(ident) ident
  78. #endif
  79. /* ptr points to character following "<!-" */
  80. static
  81. int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
  82. const char **nextTokPtr)
  83. {
  84. if (ptr != end) {
  85. if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
  86. *nextTokPtr = ptr;
  87. return XML_TOK_INVALID;
  88. }
  89. ptr += MINBPC(enc);
  90. while (ptr != end) {
  91. switch (BYTE_TYPE(enc, ptr)) {
  92. INVALID_CASES(ptr, nextTokPtr);
  93. case BT_MINUS:
  94. if ((ptr += MINBPC(enc)) == end)
  95. return XML_TOK_PARTIAL;
  96. if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
  97. if ((ptr += MINBPC(enc)) == end)
  98. return XML_TOK_PARTIAL;
  99. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  100. *nextTokPtr = ptr;
  101. return XML_TOK_INVALID;
  102. }
  103. *nextTokPtr = ptr + MINBPC(enc);
  104. return XML_TOK_COMMENT;
  105. }
  106. break;
  107. default:
  108. ptr += MINBPC(enc);
  109. break;
  110. }
  111. }
  112. }
  113. return XML_TOK_PARTIAL;
  114. }
  115. /* ptr points to character following "<!" */
  116. static
  117. int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
  118. const char **nextTokPtr)
  119. {
  120. if (ptr == end)
  121. return XML_TOK_PARTIAL;
  122. switch (BYTE_TYPE(enc, ptr)) {
  123. case BT_MINUS:
  124. return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  125. case BT_LSQB:
  126. *nextTokPtr = ptr + MINBPC(enc);
  127. return XML_TOK_COND_SECT_OPEN;
  128. case BT_NMSTRT:
  129. case BT_HEX:
  130. ptr += MINBPC(enc);
  131. break;
  132. default:
  133. *nextTokPtr = ptr;
  134. return XML_TOK_INVALID;
  135. }
  136. while (ptr != end) {
  137. switch (BYTE_TYPE(enc, ptr)) {
  138. case BT_PERCNT:
  139. if (ptr + MINBPC(enc) == end)
  140. return XML_TOK_PARTIAL;
  141. /* don't allow <!ENTITY% foo "whatever"> */
  142. switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
  143. case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
  144. *nextTokPtr = ptr;
  145. return XML_TOK_INVALID;
  146. }
  147. /* fall through */
  148. case BT_S: case BT_CR: case BT_LF:
  149. *nextTokPtr = ptr;
  150. return XML_TOK_DECL_OPEN;
  151. case BT_NMSTRT:
  152. case BT_HEX:
  153. ptr += MINBPC(enc);
  154. break;
  155. default:
  156. *nextTokPtr = ptr;
  157. return XML_TOK_INVALID;
  158. }
  159. }
  160. return XML_TOK_PARTIAL;
  161. }
  162. static
  163. int PREFIX(checkPiTarget)(const ENCODING * enc ATTR_UNUSED,
  164. const char * ptr,
  165. const char * end,
  166. int * tokPtr)
  167. {
  168. int upper = 0;
  169. *tokPtr = XML_TOK_PI;
  170. if (end - ptr != MINBPC(enc)*3)
  171. return 1;
  172. switch (BYTE_TO_ASCII(enc, ptr)) {
  173. case ASCII_x:
  174. break;
  175. case ASCII_X:
  176. upper = 1;
  177. break;
  178. default:
  179. return 1;
  180. }
  181. ptr += MINBPC(enc);
  182. switch (BYTE_TO_ASCII(enc, ptr)) {
  183. case ASCII_m:
  184. break;
  185. case ASCII_M:
  186. upper = 1;
  187. break;
  188. default:
  189. return 1;
  190. }
  191. ptr += MINBPC(enc);
  192. switch (BYTE_TO_ASCII(enc, ptr)) {
  193. case ASCII_l:
  194. break;
  195. case ASCII_L:
  196. upper = 1;
  197. break;
  198. default:
  199. return 1;
  200. }
  201. if (upper)
  202. return 0;
  203. *tokPtr = XML_TOK_XML_DECL;
  204. return 1;
  205. }
  206. /* ptr points to character following "<?" */
  207. static
  208. int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
  209. const char **nextTokPtr)
  210. {
  211. int tok;
  212. const char *target = ptr;
  213. if (ptr == end)
  214. return XML_TOK_PARTIAL;
  215. switch (BYTE_TYPE(enc, ptr)) {
  216. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  217. default:
  218. *nextTokPtr = ptr;
  219. return XML_TOK_INVALID;
  220. }
  221. while (ptr != end) {
  222. switch (BYTE_TYPE(enc, ptr)) {
  223. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  224. case BT_S: case BT_CR: case BT_LF:
  225. if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
  226. *nextTokPtr = ptr;
  227. return XML_TOK_INVALID;
  228. }
  229. ptr += MINBPC(enc);
  230. while (ptr != end) {
  231. switch (BYTE_TYPE(enc, ptr)) {
  232. INVALID_CASES(ptr, nextTokPtr);
  233. case BT_QUEST:
  234. ptr += MINBPC(enc);
  235. if (ptr == end)
  236. return XML_TOK_PARTIAL;
  237. if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  238. *nextTokPtr = ptr + MINBPC(enc);
  239. return tok;
  240. }
  241. break;
  242. default:
  243. ptr += MINBPC(enc);
  244. break;
  245. }
  246. }
  247. return XML_TOK_PARTIAL;
  248. case BT_QUEST:
  249. if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
  250. *nextTokPtr = ptr;
  251. return XML_TOK_INVALID;
  252. }
  253. ptr += MINBPC(enc);
  254. if (ptr == end)
  255. return XML_TOK_PARTIAL;
  256. if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  257. *nextTokPtr = ptr + MINBPC(enc);
  258. return tok;
  259. }
  260. /* fall through */
  261. default:
  262. *nextTokPtr = ptr;
  263. return XML_TOK_INVALID;
  264. }
  265. }
  266. return XML_TOK_PARTIAL;
  267. }
  268. static
  269. int PREFIX(scanCdataSection)(const ENCODING * enc ATTR_UNUSED,
  270. const char * ptr,
  271. const char * end,
  272. const char ** nextTokPtr)
  273. {
  274. static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB };
  275. int i;
  276. /* CDATA[ */
  277. if (end - ptr < 6 * MINBPC(enc))
  278. return XML_TOK_PARTIAL;
  279. for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
  280. if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
  281. *nextTokPtr = ptr;
  282. return XML_TOK_INVALID;
  283. }
  284. }
  285. *nextTokPtr = ptr;
  286. return XML_TOK_CDATA_SECT_OPEN;
  287. }
  288. static
  289. int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
  290. const char **nextTokPtr)
  291. {
  292. if (ptr == end)
  293. return XML_TOK_NONE;
  294. if (MINBPC(enc) > 1) {
  295. size_t n = end - ptr;
  296. if (n & (MINBPC(enc) - 1)) {
  297. n &= ~(MINBPC(enc) - 1);
  298. if (n == 0)
  299. return XML_TOK_PARTIAL;
  300. end = ptr + n;
  301. }
  302. }
  303. switch (BYTE_TYPE(enc, ptr)) {
  304. case BT_RSQB:
  305. ptr += MINBPC(enc);
  306. if (ptr == end)
  307. return XML_TOK_PARTIAL;
  308. if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
  309. break;
  310. ptr += MINBPC(enc);
  311. if (ptr == end)
  312. return XML_TOK_PARTIAL;
  313. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  314. ptr -= MINBPC(enc);
  315. break;
  316. }
  317. *nextTokPtr = ptr + MINBPC(enc);
  318. return XML_TOK_CDATA_SECT_CLOSE;
  319. case BT_CR:
  320. ptr += MINBPC(enc);
  321. if (ptr == end)
  322. return XML_TOK_PARTIAL;
  323. if (BYTE_TYPE(enc, ptr) == BT_LF)
  324. ptr += MINBPC(enc);
  325. *nextTokPtr = ptr;
  326. return XML_TOK_DATA_NEWLINE;
  327. case BT_LF:
  328. *nextTokPtr = ptr + MINBPC(enc);
  329. return XML_TOK_DATA_NEWLINE;
  330. INVALID_CASES(ptr, nextTokPtr);
  331. default:
  332. ptr += MINBPC(enc);
  333. break;
  334. }
  335. while (ptr != end) {
  336. switch (BYTE_TYPE(enc, ptr)) {
  337. #define LEAD_CASE(n) \
  338. case BT_LEAD ## n: \
  339. if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
  340. *nextTokPtr = ptr; \
  341. return XML_TOK_DATA_CHARS; \
  342. } \
  343. ptr += n; \
  344. break;
  345. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  346. #undef LEAD_CASE
  347. case BT_NONXML:
  348. case BT_MALFORM:
  349. case BT_TRAIL:
  350. case BT_CR:
  351. case BT_LF:
  352. case BT_RSQB:
  353. *nextTokPtr = ptr;
  354. return XML_TOK_DATA_CHARS;
  355. default:
  356. ptr += MINBPC(enc);
  357. break;
  358. }
  359. }
  360. *nextTokPtr = ptr;
  361. return XML_TOK_DATA_CHARS;
  362. }
  363. /* ptr points to character following "</" */
  364. static
  365. int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
  366. const char **nextTokPtr)
  367. {
  368. if (ptr == end)
  369. return XML_TOK_PARTIAL;
  370. switch (BYTE_TYPE(enc, ptr)) {
  371. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  372. default:
  373. *nextTokPtr = ptr;
  374. return XML_TOK_INVALID;
  375. }
  376. while (ptr != end) {
  377. switch (BYTE_TYPE(enc, ptr)) {
  378. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  379. case BT_S: case BT_CR: case BT_LF:
  380. for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
  381. switch (BYTE_TYPE(enc, ptr)) {
  382. case BT_S: case BT_CR: case BT_LF:
  383. break;
  384. case BT_GT:
  385. *nextTokPtr = ptr + MINBPC(enc);
  386. return XML_TOK_END_TAG;
  387. default:
  388. *nextTokPtr = ptr;
  389. return XML_TOK_INVALID;
  390. }
  391. }
  392. return XML_TOK_PARTIAL;
  393. case BT_COLON:
  394. /* no need to check qname syntax here, since end-tag must match exactly */
  395. ptr += MINBPC(enc);
  396. break;
  397. case BT_GT:
  398. *nextTokPtr = ptr + MINBPC(enc);
  399. return XML_TOK_END_TAG;
  400. default:
  401. *nextTokPtr = ptr;
  402. return XML_TOK_INVALID;
  403. }
  404. }
  405. return XML_TOK_PARTIAL;
  406. }
  407. /* ptr points to character following "&#X" */
  408. static
  409. int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
  410. const char **nextTokPtr)
  411. {
  412. if (ptr != end) {
  413. switch (BYTE_TYPE(enc, ptr)) {
  414. case BT_DIGIT:
  415. case BT_HEX:
  416. break;
  417. default:
  418. *nextTokPtr = ptr;
  419. return XML_TOK_INVALID;
  420. }
  421. for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
  422. switch (BYTE_TYPE(enc, ptr)) {
  423. case BT_DIGIT:
  424. case BT_HEX:
  425. break;
  426. case BT_SEMI:
  427. *nextTokPtr = ptr + MINBPC(enc);
  428. return XML_TOK_CHAR_REF;
  429. default:
  430. *nextTokPtr = ptr;
  431. return XML_TOK_INVALID;
  432. }
  433. }
  434. }
  435. return XML_TOK_PARTIAL;
  436. }
  437. /* ptr points to character following "&#" */
  438. static
  439. int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
  440. const char **nextTokPtr)
  441. {
  442. if (ptr != end) {
  443. if (CHAR_MATCHES(enc, ptr, ASCII_x))
  444. return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  445. switch (BYTE_TYPE(enc, ptr)) {
  446. case BT_DIGIT:
  447. break;
  448. default:
  449. *nextTokPtr = ptr;
  450. return XML_TOK_INVALID;
  451. }
  452. for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
  453. switch (BYTE_TYPE(enc, ptr)) {
  454. case BT_DIGIT:
  455. break;
  456. case BT_SEMI:
  457. *nextTokPtr = ptr + MINBPC(enc);
  458. return XML_TOK_CHAR_REF;
  459. default:
  460. *nextTokPtr = ptr;
  461. return XML_TOK_INVALID;
  462. }
  463. }
  464. }
  465. return XML_TOK_PARTIAL;
  466. }
  467. /* ptr points to character following "&" */
  468. static
  469. int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
  470. const char **nextTokPtr)
  471. {
  472. if (ptr == end)
  473. return XML_TOK_PARTIAL;
  474. switch (BYTE_TYPE(enc, ptr)) {
  475. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  476. case BT_NUM:
  477. return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  478. default:
  479. *nextTokPtr = ptr;
  480. return XML_TOK_INVALID;
  481. }
  482. while (ptr != end) {
  483. switch (BYTE_TYPE(enc, ptr)) {
  484. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  485. case BT_SEMI:
  486. *nextTokPtr = ptr + MINBPC(enc);
  487. return XML_TOK_ENTITY_REF;
  488. default:
  489. *nextTokPtr = ptr;
  490. return XML_TOK_INVALID;
  491. }
  492. }
  493. return XML_TOK_PARTIAL;
  494. }
  495. /* ptr points to character following first character of attribute name */
  496. static
  497. int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
  498. const char **nextTokPtr)
  499. {
  500. int hadColon = 0;
  501. while (ptr != end) {
  502. switch (BYTE_TYPE(enc, ptr)) {
  503. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  504. case BT_COLON:
  505. if (hadColon) {
  506. *nextTokPtr = ptr;
  507. return XML_TOK_INVALID;
  508. }
  509. hadColon = 1;
  510. ptr += MINBPC(enc);
  511. if (ptr == end)
  512. return XML_TOK_PARTIAL;
  513. switch (BYTE_TYPE(enc, ptr)) {
  514. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  515. default:
  516. *nextTokPtr = ptr;
  517. return XML_TOK_INVALID;
  518. }
  519. break;
  520. case BT_S: case BT_CR: case BT_LF:
  521. for (;;) {
  522. int t;
  523. ptr += MINBPC(enc);
  524. if (ptr == end)
  525. return XML_TOK_PARTIAL;
  526. t = BYTE_TYPE(enc, ptr);
  527. if (t == BT_EQUALS)
  528. break;
  529. switch (t) {
  530. case BT_S:
  531. case BT_LF:
  532. case BT_CR:
  533. break;
  534. default:
  535. *nextTokPtr = ptr;
  536. return XML_TOK_INVALID;
  537. }
  538. }
  539. /* fall through */
  540. case BT_EQUALS:
  541. {
  542. int open;
  543. hadColon = 0;
  544. for (;;) {
  545. ptr += MINBPC(enc);
  546. if (ptr == end)
  547. return XML_TOK_PARTIAL;
  548. open = BYTE_TYPE(enc, ptr);
  549. if (open == BT_QUOT || open == BT_APOS)
  550. break;
  551. switch (open) {
  552. case BT_S:
  553. case BT_LF:
  554. case BT_CR:
  555. break;
  556. default:
  557. *nextTokPtr = ptr;
  558. return XML_TOK_INVALID;
  559. }
  560. }
  561. ptr += MINBPC(enc);
  562. /* in attribute value */
  563. for (;;) {
  564. int t;
  565. if (ptr == end)
  566. return XML_TOK_PARTIAL;
  567. t = BYTE_TYPE(enc, ptr);
  568. if (t == open)
  569. break;
  570. switch (t) {
  571. INVALID_CASES(ptr, nextTokPtr);
  572. case BT_AMP:
  573. {
  574. int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
  575. if (tok <= 0) {
  576. if (tok == XML_TOK_INVALID)
  577. *nextTokPtr = ptr;
  578. return tok;
  579. }
  580. break;
  581. }
  582. case BT_LT:
  583. *nextTokPtr = ptr;
  584. return XML_TOK_INVALID;
  585. default:
  586. ptr += MINBPC(enc);
  587. break;
  588. }
  589. }
  590. ptr += MINBPC(enc);
  591. if (ptr == end)
  592. return XML_TOK_PARTIAL;
  593. switch (BYTE_TYPE(enc, ptr)) {
  594. case BT_S:
  595. case BT_CR:
  596. case BT_LF:
  597. break;
  598. case BT_SOL:
  599. goto sol;
  600. case BT_GT:
  601. goto gt;
  602. default:
  603. *nextTokPtr = ptr;
  604. return XML_TOK_INVALID;
  605. }
  606. /* ptr points to closing quote */
  607. for (;;) {
  608. ptr += MINBPC(enc);
  609. if (ptr == end)
  610. return XML_TOK_PARTIAL;
  611. switch (BYTE_TYPE(enc, ptr)) {
  612. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  613. case BT_S: case BT_CR: case BT_LF:
  614. continue;
  615. case BT_GT:
  616. gt:
  617. *nextTokPtr = ptr + MINBPC(enc);
  618. return XML_TOK_START_TAG_WITH_ATTS;
  619. case BT_SOL:
  620. sol:
  621. ptr += MINBPC(enc);
  622. if (ptr == end)
  623. return XML_TOK_PARTIAL;
  624. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  625. *nextTokPtr = ptr;
  626. return XML_TOK_INVALID;
  627. }
  628. *nextTokPtr = ptr + MINBPC(enc);
  629. return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
  630. default:
  631. *nextTokPtr = ptr;
  632. return XML_TOK_INVALID;
  633. }
  634. break;
  635. }
  636. break;
  637. }
  638. default:
  639. *nextTokPtr = ptr;
  640. return XML_TOK_INVALID;
  641. }
  642. }
  643. return XML_TOK_PARTIAL;
  644. }
  645. /* ptr points to character following "<" */
  646. static
  647. int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
  648. const char **nextTokPtr)
  649. {
  650. int hadColon;
  651. if (ptr == end)
  652. return XML_TOK_PARTIAL;
  653. switch (BYTE_TYPE(enc, ptr)) {
  654. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  655. case BT_EXCL:
  656. if ((ptr += MINBPC(enc)) == end)
  657. return XML_TOK_PARTIAL;
  658. switch (BYTE_TYPE(enc, ptr)) {
  659. case BT_MINUS:
  660. return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  661. case BT_LSQB:
  662. return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  663. }
  664. *nextTokPtr = ptr;
  665. return XML_TOK_INVALID;
  666. case BT_QUEST:
  667. return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  668. case BT_SOL:
  669. return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  670. default:
  671. *nextTokPtr = ptr;
  672. return XML_TOK_INVALID;
  673. }
  674. hadColon = 0;
  675. /* we have a start-tag */
  676. while (ptr != end) {
  677. switch (BYTE_TYPE(enc, ptr)) {
  678. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  679. case BT_COLON:
  680. if (hadColon) {
  681. *nextTokPtr = ptr;
  682. return XML_TOK_INVALID;
  683. }
  684. hadColon = 1;
  685. ptr += MINBPC(enc);
  686. if (ptr == end)
  687. return XML_TOK_PARTIAL;
  688. switch (BYTE_TYPE(enc, ptr)) {
  689. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  690. default:
  691. *nextTokPtr = ptr;
  692. return XML_TOK_INVALID;
  693. }
  694. break;
  695. case BT_S: case BT_CR: case BT_LF:
  696. {
  697. ptr += MINBPC(enc);
  698. while (ptr != end) {
  699. switch (BYTE_TYPE(enc, ptr)) {
  700. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  701. case BT_GT:
  702. goto gt;
  703. case BT_SOL:
  704. goto sol;
  705. case BT_S: case BT_CR: case BT_LF:
  706. ptr += MINBPC(enc);
  707. continue;
  708. default:
  709. *nextTokPtr = ptr;
  710. return XML_TOK_INVALID;
  711. }
  712. return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
  713. }
  714. return XML_TOK_PARTIAL;
  715. }
  716. case BT_GT:
  717. gt:
  718. *nextTokPtr = ptr + MINBPC(enc);
  719. return XML_TOK_START_TAG_NO_ATTS;
  720. case BT_SOL:
  721. sol:
  722. ptr += MINBPC(enc);
  723. if (ptr == end)
  724. return XML_TOK_PARTIAL;
  725. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  726. *nextTokPtr = ptr;
  727. return XML_TOK_INVALID;
  728. }
  729. *nextTokPtr = ptr + MINBPC(enc);
  730. return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
  731. default:
  732. *nextTokPtr = ptr;
  733. return XML_TOK_INVALID;
  734. }
  735. }
  736. return XML_TOK_PARTIAL;
  737. }
  738. #define LEAD_CASE(n) \
  739. case BT_LEAD ## n: \
  740. if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
  741. *nextTokPtr = ptr; \
  742. return XML_TOK_DATA_CHARS; \
  743. } \
  744. ptr += n; \
  745. break
  746. static void
  747. PREFIX(chopToWholeCharacters)(const char * const inputStart,
  748. const char * const inputEnd,
  749. const char ** const choppedEndP) {
  750. if (MINBPC(enc) > 1) {
  751. size_t const length = inputEnd - inputStart;
  752. if (length & (MINBPC(enc) - 1)) {
  753. size_t const roundedLen = length & ~(MINBPC(enc) - 1);
  754. *choppedEndP = inputStart + roundedLen;
  755. } else
  756. *choppedEndP = inputEnd;
  757. } else
  758. *choppedEndP = inputEnd;
  759. }
  760. static void
  761. PREFIX(processBtRsqb)(const ENCODING * const enc ATTR_UNUSED,
  762. const char * const start,
  763. const char * const end,
  764. unsigned int * const countP,
  765. bool * const invalidP) {
  766. if (start + MINBPC(enc) < end) {
  767. if (!CHAR_MATCHES(enc, start + MINBPC(enc), ASCII_RSQB)) {
  768. *countP = MINBPC(enc);
  769. *invalidP = false;
  770. } else {
  771. if (start + 2*MINBPC(enc) < end) {
  772. if (!CHAR_MATCHES(enc, start + 2*MINBPC(enc), ASCII_GT)) {
  773. *countP = MINBPC(enc);
  774. *invalidP = false;
  775. } else {
  776. *countP = 2 * MINBPC(enc);
  777. *invalidP = true;
  778. }
  779. } else {
  780. *countP = 0;
  781. *invalidP = false;
  782. }
  783. }
  784. } else {
  785. *countP = 0;
  786. *invalidP = false;
  787. }
  788. }
  789. static int
  790. PREFIX(contentTok)(const ENCODING * const enc,
  791. const char * const inputStart,
  792. const char * const inputEnd,
  793. const char ** const nextTokPtr) {
  794. /*----------------------------------------------------------------------------
  795. Parse off a token from the string that starts at 'inputStart' and ends at
  796. 'inputEnd'. Return the class of that token.
  797. Return *nextTokPtr pointing just after the parsed-off token in the string.
  798. Sometimes, there is no token we can parse, so our return value is a
  799. disposition code indicating that situation and *nextTokPtr points to the
  800. beginning of the string.
  801. -----------------------------------------------------------------------------*/
  802. if (inputEnd == inputStart) {
  803. *nextTokPtr = inputStart;
  804. return XML_TOK_NONE;
  805. } else {
  806. const char * ptr;
  807. const char * end;
  808. /* The virtual end of the string; we look at only whole
  809. characters; e.g. if there are 2 bytes per character and the
  810. buffer is 9 bytes, we look at only the first 8 and 'end' points
  811. after the 8th byte.
  812. */
  813. PREFIX(chopToWholeCharacters)(inputStart, inputEnd, &end);
  814. if (end == inputStart) {
  815. *nextTokPtr = inputStart;
  816. return XML_TOK_PARTIAL;
  817. }
  818. ptr = inputStart; /* Start at the beginning */
  819. switch (BYTE_TYPE(enc, ptr)) {
  820. case BT_LT:
  821. return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  822. case BT_AMP:
  823. return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  824. case BT_CR:
  825. ptr += MINBPC(enc);
  826. if (ptr == end)
  827. return XML_TOK_TRAILING_CR;
  828. if (BYTE_TYPE(enc, ptr) == BT_LF)
  829. ptr += MINBPC(enc);
  830. *nextTokPtr = ptr;
  831. return XML_TOK_DATA_NEWLINE;
  832. case BT_LF:
  833. *nextTokPtr = ptr + MINBPC(enc);
  834. return XML_TOK_DATA_NEWLINE;
  835. case BT_RSQB:
  836. ptr += MINBPC(enc);
  837. if (ptr == end)
  838. return XML_TOK_TRAILING_RSQB;
  839. if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
  840. break;
  841. ptr += MINBPC(enc);
  842. if (ptr == end)
  843. return XML_TOK_TRAILING_RSQB;
  844. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  845. ptr -= MINBPC(enc);
  846. break;
  847. }
  848. *nextTokPtr = ptr;
  849. return XML_TOK_INVALID;
  850. INVALID_CASES(ptr, nextTokPtr);
  851. default:
  852. ptr += MINBPC(enc);
  853. break;
  854. }
  855. while (ptr < end) {
  856. switch (BYTE_TYPE(enc, ptr)) {
  857. LEAD_CASE(2);
  858. LEAD_CASE(3);
  859. LEAD_CASE(4);
  860. case BT_RSQB: {
  861. bool invalid;
  862. unsigned int count;
  863. PREFIX(processBtRsqb)(enc, ptr, end, &count, &invalid);
  864. ptr += count;
  865. if (invalid) {
  866. *nextTokPtr = ptr;
  867. return XML_TOK_INVALID;
  868. }
  869. }
  870. /* fall through */
  871. case BT_AMP:
  872. case BT_LT:
  873. case BT_NONXML:
  874. case BT_MALFORM:
  875. case BT_TRAIL:
  876. case BT_CR:
  877. case BT_LF:
  878. *nextTokPtr = ptr;
  879. return XML_TOK_DATA_CHARS;
  880. default:
  881. ptr += MINBPC(enc);
  882. break;
  883. }
  884. }
  885. *nextTokPtr = ptr;
  886. return XML_TOK_DATA_CHARS;
  887. }
  888. }
  889. #undef LEAD_CASE
  890. /* ptr points to character following "%" */
  891. static
  892. int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
  893. const char **nextTokPtr)
  894. {
  895. if (ptr == end)
  896. return XML_TOK_PARTIAL;
  897. switch (BYTE_TYPE(enc, ptr)) {
  898. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  899. case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
  900. *nextTokPtr = ptr;
  901. return XML_TOK_PERCENT;
  902. default:
  903. *nextTokPtr = ptr;
  904. return XML_TOK_INVALID;
  905. }
  906. while (ptr != end) {
  907. switch (BYTE_TYPE(enc, ptr)) {
  908. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  909. case BT_SEMI:
  910. *nextTokPtr = ptr + MINBPC(enc);
  911. return XML_TOK_PARAM_ENTITY_REF;
  912. default:
  913. *nextTokPtr = ptr;
  914. return XML_TOK_INVALID;
  915. }
  916. }
  917. return XML_TOK_PARTIAL;
  918. }
  919. static
  920. int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
  921. const char **nextTokPtr)
  922. {
  923. if (ptr == end)
  924. return XML_TOK_PARTIAL;
  925. switch (BYTE_TYPE(enc, ptr)) {
  926. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  927. default:
  928. *nextTokPtr = ptr;
  929. return XML_TOK_INVALID;
  930. }
  931. while (ptr != end) {
  932. switch (BYTE_TYPE(enc, ptr)) {
  933. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  934. case BT_CR: case BT_LF: case BT_S:
  935. case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
  936. *nextTokPtr = ptr;
  937. return XML_TOK_POUND_NAME;
  938. default:
  939. *nextTokPtr = ptr;
  940. return XML_TOK_INVALID;
  941. }
  942. }
  943. return -XML_TOK_POUND_NAME;
  944. }
  945. static
  946. int PREFIX(scanLit)(int open, const ENCODING *enc,
  947. const char *ptr, const char *end,
  948. const char **nextTokPtr)
  949. {
  950. while (ptr != end) {
  951. int t = BYTE_TYPE(enc, ptr);
  952. switch (t) {
  953. INVALID_CASES(ptr, nextTokPtr);
  954. case BT_QUOT:
  955. case BT_APOS:
  956. ptr += MINBPC(enc);
  957. if (t != open)
  958. break;
  959. if (ptr == end)
  960. return -XML_TOK_LITERAL;
  961. *nextTokPtr = ptr;
  962. switch (BYTE_TYPE(enc, ptr)) {
  963. case BT_S: case BT_CR: case BT_LF:
  964. case BT_GT: case BT_PERCNT: case BT_LSQB:
  965. return XML_TOK_LITERAL;
  966. default:
  967. return XML_TOK_INVALID;
  968. }
  969. default:
  970. ptr += MINBPC(enc);
  971. break;
  972. }
  973. }
  974. return XML_TOK_PARTIAL;
  975. }
  976. static
  977. int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
  978. const char **nextTokPtr)
  979. {
  980. int tok;
  981. if (ptr == end)
  982. return XML_TOK_NONE;
  983. if (MINBPC(enc) > 1) {
  984. size_t n = end - ptr;
  985. if (n & (MINBPC(enc) - 1)) {
  986. n &= ~(MINBPC(enc) - 1);
  987. if (n == 0)
  988. return XML_TOK_PARTIAL;
  989. end = ptr + n;
  990. }
  991. }
  992. switch (BYTE_TYPE(enc, ptr)) {
  993. case BT_QUOT:
  994. return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
  995. case BT_APOS:
  996. return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
  997. case BT_LT:
  998. {
  999. ptr += MINBPC(enc);
  1000. if (ptr == end)
  1001. return XML_TOK_PARTIAL;
  1002. switch (BYTE_TYPE(enc, ptr)) {
  1003. case BT_EXCL:
  1004. return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1005. case BT_QUEST:
  1006. return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1007. case BT_NMSTRT:
  1008. case BT_HEX:
  1009. case BT_NONASCII:
  1010. case BT_LEAD2:
  1011. case BT_LEAD3:
  1012. case BT_LEAD4:
  1013. *nextTokPtr = ptr - MINBPC(enc);
  1014. return XML_TOK_INSTANCE_START;
  1015. }
  1016. *nextTokPtr = ptr;
  1017. return XML_TOK_INVALID;
  1018. }
  1019. case BT_CR:
  1020. if (ptr + MINBPC(enc) == end)
  1021. return -XML_TOK_PROLOG_S;
  1022. /* fall through */
  1023. case BT_S: case BT_LF:
  1024. for (;;) {
  1025. ptr += MINBPC(enc);
  1026. if (ptr == end)
  1027. break;
  1028. switch (BYTE_TYPE(enc, ptr)) {
  1029. case BT_S: case BT_LF:
  1030. break;
  1031. case BT_CR:
  1032. /* don't split CR/LF pair */
  1033. if (ptr + MINBPC(enc) != end)
  1034. break;
  1035. /* fall through */
  1036. default:
  1037. *nextTokPtr = ptr;
  1038. return XML_TOK_PROLOG_S;
  1039. }
  1040. }
  1041. *nextTokPtr = ptr;
  1042. return XML_TOK_PROLOG_S;
  1043. case BT_PERCNT:
  1044. return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1045. case BT_COMMA:
  1046. *nextTokPtr = ptr + MINBPC(enc);
  1047. return XML_TOK_COMMA;
  1048. case BT_LSQB:
  1049. *nextTokPtr = ptr + MINBPC(enc);
  1050. return XML_TOK_OPEN_BRACKET;
  1051. case BT_RSQB:
  1052. ptr += MINBPC(enc);
  1053. if (ptr == end)
  1054. return -XML_TOK_CLOSE_BRACKET;
  1055. if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
  1056. if (ptr + MINBPC(enc) == end)
  1057. return XML_TOK_PARTIAL;
  1058. if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
  1059. *nextTokPtr = ptr + 2*MINBPC(enc);
  1060. return XML_TOK_COND_SECT_CLOSE;
  1061. }
  1062. }
  1063. *nextTokPtr = ptr;
  1064. return XML_TOK_CLOSE_BRACKET;
  1065. case BT_LPAR:
  1066. *nextTokPtr = ptr + MINBPC(enc);
  1067. return XML_TOK_OPEN_PAREN;
  1068. case BT_RPAR:
  1069. ptr += MINBPC(enc);
  1070. if (ptr == end)
  1071. return -XML_TOK_CLOSE_PAREN;
  1072. switch (BYTE_TYPE(enc, ptr)) {
  1073. case BT_AST:
  1074. *nextTokPtr = ptr + MINBPC(enc);
  1075. return XML_TOK_CLOSE_PAREN_ASTERISK;
  1076. case BT_QUEST:
  1077. *nextTokPtr = ptr + MINBPC(enc);
  1078. return XML_TOK_CLOSE_PAREN_QUESTION;
  1079. case BT_PLUS:
  1080. *nextTokPtr = ptr + MINBPC(enc);
  1081. return XML_TOK_CLOSE_PAREN_PLUS;
  1082. case BT_CR: case BT_LF: case BT_S:
  1083. case BT_GT: case BT_COMMA: case BT_VERBAR:
  1084. case BT_RPAR:
  1085. *nextTokPtr = ptr;
  1086. return XML_TOK_CLOSE_PAREN;
  1087. }
  1088. *nextTokPtr = ptr;
  1089. return XML_TOK_INVALID;
  1090. case BT_VERBAR:
  1091. *nextTokPtr = ptr + MINBPC(enc);
  1092. return XML_TOK_OR;
  1093. case BT_GT:
  1094. *nextTokPtr = ptr + MINBPC(enc);
  1095. return XML_TOK_DECL_CLOSE;
  1096. case BT_NUM:
  1097. return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1098. #define LEAD_CASE(n) \
  1099. case BT_LEAD ## n: \
  1100. if (end - ptr < n) \
  1101. return XML_TOK_PARTIAL_CHAR; \
  1102. if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
  1103. ptr += n; \
  1104. tok = XML_TOK_NAME; \
  1105. break; \
  1106. } \
  1107. if (IS_NAME_CHAR(enc, ptr, n)) { \
  1108. ptr += n; \
  1109. tok = XML_TOK_NMTOKEN; \
  1110. break; \
  1111. } \
  1112. *nextTokPtr = ptr; \
  1113. return XML_TOK_INVALID;
  1114. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1115. #undef LEAD_CASE
  1116. case BT_NMSTRT:
  1117. case BT_HEX:
  1118. tok = XML_TOK_NAME;
  1119. ptr += MINBPC(enc);
  1120. break;
  1121. case BT_DIGIT:
  1122. case BT_NAME:
  1123. case BT_MINUS:
  1124. case BT_COLON:
  1125. tok = XML_TOK_NMTOKEN;
  1126. ptr += MINBPC(enc);
  1127. break;
  1128. case BT_NONASCII:
  1129. if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
  1130. ptr += MINBPC(enc);
  1131. tok = XML_TOK_NAME;
  1132. break;
  1133. }
  1134. if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
  1135. ptr += MINBPC(enc);
  1136. tok = XML_TOK_NMTOKEN;
  1137. break;
  1138. }
  1139. /* fall through */
  1140. default:
  1141. *nextTokPtr = ptr;
  1142. return XML_TOK_INVALID;
  1143. }
  1144. while (ptr != end) {
  1145. switch (BYTE_TYPE(enc, ptr)) {
  1146. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  1147. case BT_GT: case BT_RPAR: case BT_COMMA:
  1148. case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
  1149. case BT_S: case BT_CR: case BT_LF:
  1150. *nextTokPtr = ptr;
  1151. return tok;
  1152. case BT_COLON:
  1153. ptr += MINBPC(enc);
  1154. switch (tok) {
  1155. case XML_TOK_NAME:
  1156. if (ptr == end)
  1157. return XML_TOK_PARTIAL;
  1158. tok = XML_TOK_PREFIXED_NAME;
  1159. switch (BYTE_TYPE(enc, ptr)) {
  1160. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  1161. default:
  1162. tok = XML_TOK_NMTOKEN;
  1163. break;
  1164. }
  1165. break;
  1166. case XML_TOK_PREFIXED_NAME:
  1167. tok = XML_TOK_NMTOKEN;
  1168. break;
  1169. }
  1170. break;
  1171. case BT_PLUS:
  1172. if (tok == XML_TOK_NMTOKEN) {
  1173. *nextTokPtr = ptr;
  1174. return XML_TOK_INVALID;
  1175. }
  1176. *nextTokPtr = ptr + MINBPC(enc);
  1177. return XML_TOK_NAME_PLUS;
  1178. case BT_AST:
  1179. if (tok == XML_TOK_NMTOKEN) {
  1180. *nextTokPtr = ptr;
  1181. return XML_TOK_INVALID;
  1182. }
  1183. *nextTokPtr = ptr + MINBPC(enc);
  1184. return XML_TOK_NAME_ASTERISK;
  1185. case BT_QUEST:
  1186. if (tok == XML_TOK_NMTOKEN) {
  1187. *nextTokPtr = ptr;
  1188. return XML_TOK_INVALID;
  1189. }
  1190. *nextTokPtr = ptr + MINBPC(enc);
  1191. return XML_TOK_NAME_QUESTION;
  1192. default:
  1193. *nextTokPtr = ptr;
  1194. return XML_TOK_INVALID;
  1195. }
  1196. }
  1197. return -tok;
  1198. }
  1199. static
  1200. int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
  1201. const char **nextTokPtr)
  1202. {
  1203. const char *start;
  1204. if (ptr == end)
  1205. return XML_TOK_NONE;
  1206. start = ptr;
  1207. while (ptr != end) {
  1208. switch (BYTE_TYPE(enc, ptr)) {
  1209. #define LEAD_CASE(n) \
  1210. case BT_LEAD ## n: ptr += n; break;
  1211. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1212. #undef LEAD_CASE
  1213. case BT_AMP:
  1214. if (ptr == start)
  1215. return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1216. *nextTokPtr = ptr;
  1217. return XML_TOK_DATA_CHARS;
  1218. case BT_LT:
  1219. /* this is for inside entity references */
  1220. *nextTokPtr = ptr;
  1221. return XML_TOK_INVALID;
  1222. case BT_LF:
  1223. if (ptr == start) {
  1224. *nextTokPtr = ptr + MINBPC(enc);
  1225. return XML_TOK_DATA_NEWLINE;
  1226. }
  1227. *nextTokPtr = ptr;
  1228. return XML_TOK_DATA_CHARS;
  1229. case BT_CR:
  1230. if (ptr == start) {
  1231. ptr += MINBPC(enc);
  1232. if (ptr == end)
  1233. return XML_TOK_TRAILING_CR;
  1234. if (BYTE_TYPE(enc, ptr) == BT_LF)
  1235. ptr += MINBPC(enc);
  1236. *nextTokPtr = ptr;
  1237. return XML_TOK_DATA_NEWLINE;
  1238. }
  1239. *nextTokPtr = ptr;
  1240. return XML_TOK_DATA_CHARS;
  1241. case BT_S:
  1242. if (ptr == start) {
  1243. *nextTokPtr = ptr + MINBPC(enc);
  1244. return XML_TOK_ATTRIBUTE_VALUE_S;
  1245. }
  1246. *nextTokPtr = ptr;
  1247. return XML_TOK_DATA_CHARS;
  1248. default:
  1249. ptr += MINBPC(enc);
  1250. break;
  1251. }
  1252. }
  1253. *nextTokPtr = ptr;
  1254. return XML_TOK_DATA_CHARS;
  1255. }
  1256. static
  1257. int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
  1258. const char **nextTokPtr)
  1259. {
  1260. const char *start;
  1261. if (ptr == end)
  1262. return XML_TOK_NONE;
  1263. start = ptr;
  1264. while (ptr != end) {
  1265. switch (BYTE_TYPE(enc, ptr)) {
  1266. #define LEAD_CASE(n) \
  1267. case BT_LEAD ## n: ptr += n; break;
  1268. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1269. #undef LEAD_CASE
  1270. case BT_AMP:
  1271. if (ptr == start)
  1272. return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1273. *nextTokPtr = ptr;
  1274. return XML_TOK_DATA_CHARS;
  1275. case BT_PERCNT:
  1276. if (ptr == start)
  1277. return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1278. *nextTokPtr = ptr;
  1279. return XML_TOK_DATA_CHARS;
  1280. case BT_LF:
  1281. if (ptr == start) {
  1282. *nextTokPtr = ptr + MINBPC(enc);
  1283. return XML_TOK_DATA_NEWLINE;
  1284. }
  1285. *nextTokPtr = ptr;
  1286. return XML_TOK_DATA_CHARS;
  1287. case BT_CR:
  1288. if (ptr == start) {
  1289. ptr += MINBPC(enc);
  1290. if (ptr == end)
  1291. return XML_TOK_TRAILING_CR;
  1292. if (BYTE_TYPE(enc, ptr) == BT_LF)
  1293. ptr += MINBPC(enc);
  1294. *nextTokPtr = ptr;
  1295. return XML_TOK_DATA_NEWLINE;
  1296. }
  1297. *nextTokPtr = ptr;
  1298. return XML_TOK_DATA_CHARS;
  1299. default:
  1300. ptr += MINBPC(enc);
  1301. break;
  1302. }
  1303. }
  1304. *nextTokPtr = ptr;
  1305. return XML_TOK_DATA_CHARS;
  1306. }
  1307. static
  1308. int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
  1309. const char **nextTokPtr)
  1310. {
  1311. int level = 0;
  1312. if (MINBPC(enc) > 1) {
  1313. size_t n = end - ptr;
  1314. if (n & (MINBPC(enc) - 1)) {
  1315. n &= ~(MINBPC(enc) - 1);
  1316. end = ptr + n;
  1317. }
  1318. }
  1319. while (ptr != end) {
  1320. switch (BYTE_TYPE(enc, ptr)) {
  1321. INVALID_CASES(ptr, nextTokPtr);
  1322. case BT_LT:
  1323. if ((ptr += MINBPC(enc)) == end)
  1324. return XML_TOK_PARTIAL;
  1325. if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
  1326. if ((ptr += MINBPC(enc)) == end)
  1327. return XML_TOK_PARTIAL;
  1328. if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
  1329. ++level;
  1330. ptr += MINBPC(enc);
  1331. }
  1332. }
  1333. break;
  1334. case BT_RSQB:
  1335. if ((ptr += MINBPC(enc)) == end)
  1336. return XML_TOK_PARTIAL;
  1337. if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
  1338. if ((ptr += MINBPC(enc)) == end)
  1339. return XML_TOK_PARTIAL;
  1340. if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  1341. ptr += MINBPC(enc);
  1342. if (level == 0) {
  1343. *nextTokPtr = ptr;
  1344. return XML_TOK_IGNORE_SECT;
  1345. }
  1346. --level;
  1347. }
  1348. }
  1349. break;
  1350. default:
  1351. ptr += MINBPC(enc);
  1352. break;
  1353. }
  1354. }
  1355. return XML_TOK_PARTIAL;
  1356. }
  1357. static
  1358. int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
  1359. const char **badPtr)
  1360. {
  1361. ptr += MINBPC(enc);
  1362. end -= MINBPC(enc);
  1363. for (; ptr != end; ptr += MINBPC(enc)) {
  1364. switch (BYTE_TYPE(enc, ptr)) {
  1365. case BT_DIGIT:
  1366. case BT_HEX:
  1367. case BT_MINUS:
  1368. case BT_APOS:
  1369. case BT_LPAR:
  1370. case BT_RPAR:
  1371. case BT_PLUS:
  1372. case BT_COMMA:
  1373. case BT_SOL:
  1374. case BT_EQUALS:
  1375. case BT_QUEST:
  1376. case BT_CR:
  1377. case BT_LF:
  1378. case BT_SEMI:
  1379. case BT_EXCL:
  1380. case BT_AST:
  1381. case BT_PERCNT:
  1382. case BT_NUM:
  1383. case BT_COLON:
  1384. break;
  1385. case BT_S:
  1386. if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
  1387. *badPtr = ptr;
  1388. return 0;
  1389. }
  1390. break;
  1391. case BT_NAME:
  1392. case BT_NMSTRT:
  1393. if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
  1394. break;
  1395. default:
  1396. switch (BYTE_TO_ASCII(enc, ptr)) {
  1397. case 0x24: /* $ */
  1398. case 0x40: /* @ */
  1399. break;
  1400. default:
  1401. *badPtr = ptr;
  1402. return 0;
  1403. }
  1404. break;
  1405. }
  1406. }
  1407. return 1;
  1408. }
  1409. /* This must only be called for a well-formed start-tag or empty element tag.
  1410. Returns the number of attributes. Pointers to the first attsMax attributes
  1411. are stored in atts. */
  1412. static
  1413. int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
  1414. int attsMax, ATTRIBUTE *atts)
  1415. {
  1416. enum { other, inName, inValue } state = inName;
  1417. int nAtts = 0;
  1418. int open = 0; /* defined when state == inValue;
  1419. initialization just to shut up compilers */
  1420. for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
  1421. switch (BYTE_TYPE(enc, ptr)) {
  1422. #define START_NAME \
  1423. if (state == other) { \
  1424. if (nAtts < attsMax) { \
  1425. atts[nAtts].name = ptr; \
  1426. atts[nAtts].normalized = 1; \
  1427. } \
  1428. state = inName; \
  1429. }
  1430. #define LEAD_CASE(n) \
  1431. case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
  1432. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1433. #undef LEAD_CASE
  1434. case BT_NONASCII:
  1435. case BT_NMSTRT:
  1436. case BT_HEX:
  1437. START_NAME
  1438. break;
  1439. #undef START_NAME
  1440. case BT_QUOT:
  1441. if (state != inValue) {
  1442. if (nAtts < attsMax)
  1443. atts[nAtts].valuePtr = ptr + MINBPC(enc);
  1444. state = inValue;
  1445. open = BT_QUOT;
  1446. }
  1447. else if (open == BT_QUOT) {
  1448. state = other;
  1449. if (nAtts < attsMax)
  1450. atts[nAtts].valueEnd = ptr;
  1451. nAtts++;
  1452. }
  1453. break;
  1454. case BT_APOS:
  1455. if (state != inValue) {
  1456. if (nAtts < attsMax)
  1457. atts[nAtts].valuePtr = ptr + MINBPC(enc);
  1458. state = inValue;
  1459. open = BT_APOS;
  1460. }
  1461. else if (open == BT_APOS) {
  1462. state = other;
  1463. if (nAtts < attsMax)
  1464. atts[nAtts].valueEnd = ptr;
  1465. nAtts++;
  1466. }
  1467. break;
  1468. case BT_AMP:
  1469. if (nAtts < attsMax)
  1470. atts[nAtts].normalized = 0;
  1471. break;
  1472. case BT_S:
  1473. if (state == inName)
  1474. state = other;
  1475. else if (state == inValue
  1476. && nAtts < attsMax
  1477. && atts[nAtts].normalized
  1478. && (ptr == atts[nAtts].valuePtr
  1479. || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
  1480. || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
  1481. || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
  1482. atts[nAtts].normalized = 0;
  1483. break;
  1484. case BT_CR: case BT_LF:
  1485. /* This case ensures that the first attribute name is counted
  1486. Apart from that we could just change state on the quote. */
  1487. if (state == inName)
  1488. state = other;
  1489. else if (state == inValue && nAtts < attsMax)
  1490. atts[nAtts].normalized = 0;
  1491. break;
  1492. case BT_GT:
  1493. case BT_SOL:
  1494. if (state != inValue)
  1495. return nAtts;
  1496. break;
  1497. default:
  1498. break;
  1499. }
  1500. }
  1501. /* not reached */
  1502. }
  1503. static
  1504. int PREFIX(charRefNumber)(const ENCODING *enc ATTR_UNUSED, const char *ptr)
  1505. {
  1506. int result = 0;
  1507. /* skip &# */
  1508. ptr += 2*MINBPC(enc);
  1509. if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
  1510. for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
  1511. int c = BYTE_TO_ASCII(enc, ptr);
  1512. switch (c) {
  1513. case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
  1514. case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
  1515. result <<= 4;
  1516. result |= (c - ASCII_0);
  1517. break;
  1518. case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F:
  1519. result <<= 4;
  1520. result += 10 + (c - ASCII_A);
  1521. break;
  1522. case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f:
  1523. result <<= 4;
  1524. result += 10 + (c - ASCII_a);
  1525. break;
  1526. }
  1527. if (result >= 0x110000)
  1528. return -1;
  1529. }
  1530. }
  1531. else {
  1532. for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
  1533. int c = BYTE_TO_ASCII(enc, ptr);
  1534. result *= 10;
  1535. result += (c - ASCII_0);
  1536. if (result >= 0x110000)
  1537. return -1;
  1538. }
  1539. }
  1540. return checkCharRefNumber(result);
  1541. }
  1542. static
  1543. int PREFIX(predefinedEntityName)(const ENCODING * enc ATTR_UNUSED,
  1544. const char * ptr,
  1545. const char * end)
  1546. {
  1547. switch ((end - ptr)/MINBPC(enc)) {
  1548. case 2:
  1549. if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
  1550. switch (BYTE_TO_ASCII(enc, ptr)) {
  1551. case ASCII_l:
  1552. return ASCII_LT;
  1553. case ASCII_g:
  1554. return ASCII_GT;
  1555. }
  1556. }
  1557. break;
  1558. case 3:
  1559. if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
  1560. ptr += MINBPC(enc);
  1561. if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
  1562. ptr += MINBPC(enc);
  1563. if (CHAR_MATCHES(enc, ptr, ASCII_p))
  1564. return ASCII_AMP;
  1565. }
  1566. }
  1567. break;
  1568. case 4:
  1569. switch (BYTE_TO_ASCII(enc, ptr)) {
  1570. case ASCII_q:
  1571. ptr += MINBPC(enc);
  1572. if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
  1573. ptr += MINBPC(enc);
  1574. if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
  1575. ptr += MINBPC(enc);
  1576. if (CHAR_MATCHES(enc, ptr, ASCII_t))
  1577. return ASCII_QUOT;
  1578. }
  1579. }
  1580. break;
  1581. case ASCII_a:
  1582. ptr += MINBPC(enc);
  1583. if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
  1584. ptr += MINBPC(enc);
  1585. if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
  1586. ptr += MINBPC(enc);
  1587. if (CHAR_MATCHES(enc, ptr, ASCII_s))
  1588. return ASCII_APOS;
  1589. }
  1590. }
  1591. break;
  1592. }
  1593. }
  1594. return 0;
  1595. }
  1596. static
  1597. int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
  1598. {
  1599. for (;;) {
  1600. switch (BYTE_TYPE(enc, ptr1)) {
  1601. #define LEAD_CASE(n) \
  1602. case BT_LEAD ## n: \
  1603. if (*ptr1++ != *ptr2++) \
  1604. return 0;
  1605. LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
  1606. #undef LEAD_CASE
  1607. /* fall through */
  1608. if (*ptr1++ != *ptr2++)
  1609. return 0;
  1610. break;
  1611. case BT_NONASCII:
  1612. case BT_NMSTRT:
  1613. case BT_COLON:
  1614. case BT_HEX:
  1615. case BT_DIGIT:
  1616. case BT_NAME:
  1617. case BT_MINUS:
  1618. if (*ptr2++ != *ptr1++)
  1619. return 0;
  1620. if (MINBPC(enc) > 1) {
  1621. if (*ptr2++ != *ptr1++)
  1622. return 0;
  1623. if (MINBPC(enc) > 2) {
  1624. if (*ptr2++ != *ptr1++)
  1625. return 0;
  1626. if (MINBPC(enc) > 3) {
  1627. if (*ptr2++ != *ptr1++)
  1628. return 0;
  1629. }
  1630. }
  1631. }
  1632. break;
  1633. default:
  1634. if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
  1635. return 1;
  1636. switch (BYTE_TYPE(enc, ptr2)) {
  1637. case BT_LEAD2:
  1638. case BT_LEAD3:
  1639. case BT_LEAD4:
  1640. case BT_NONASCII:
  1641. case BT_NMSTRT:
  1642. case BT_COLON:
  1643. case BT_HEX:
  1644. case BT_DIGIT:
  1645. case BT_NAME:
  1646. case BT_MINUS:
  1647. return 0;
  1648. default:
  1649. return 1;
  1650. }
  1651. }
  1652. }
  1653. /* not reached */
  1654. }
  1655. static
  1656. int PREFIX(nameMatchesAscii)(const ENCODING * enc ATTR_UNUSED,
  1657. const char * ptr1,
  1658. const char * end1,
  1659. const char * ptr2)
  1660. {
  1661. for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
  1662. if (ptr1 == end1)
  1663. return 0;
  1664. if (!CHAR_MATCHES(enc, ptr1, *ptr2))
  1665. return 0;
  1666. }
  1667. return ptr1 == end1;
  1668. }
  1669. #define LEAD_CASE(n) case BT_LEAD ## n: ptr += n; break
  1670. static size_t
  1671. PREFIX(nameLength)(const ENCODING * const enc,
  1672. const char * const start) {
  1673. const char * ptr;
  1674. for (ptr = start;;) {
  1675. switch (BYTE_TYPE(enc, ptr)) {
  1676. LEAD_CASE(2);
  1677. LEAD_CASE(3);
  1678. LEAD_CASE(4);
  1679. case BT_NONASCII:
  1680. case BT_NMSTRT:
  1681. case BT_COLON:
  1682. case BT_HEX:
  1683. case BT_DIGIT:
  1684. case BT_NAME:
  1685. case BT_MINUS:
  1686. ptr += MINBPC(enc);
  1687. break;
  1688. default:
  1689. return ptr - start;
  1690. }
  1691. }
  1692. }
  1693. #undef LEAD_CASE
  1694. static
  1695. const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
  1696. {
  1697. for (;;) {
  1698. switch (BYTE_TYPE(enc, ptr)) {
  1699. case BT_LF:
  1700. case BT_CR:
  1701. case BT_S:
  1702. ptr += MINBPC(enc);
  1703. break;
  1704. default:
  1705. return ptr;
  1706. }
  1707. }
  1708. }
  1709. #define LEAD_CASE(n) \
  1710. case BT_LEAD ## n: \
  1711. ptr += n; \
  1712. break
  1713. static void
  1714. PREFIX(updatePosition)(const ENCODING * const enc,
  1715. const char * const start,
  1716. const char * const end,
  1717. POSITION * const posP) {
  1718. const char * ptr;
  1719. for (ptr = start; ptr < end;) {
  1720. switch (BYTE_TYPE(enc, ptr)) {
  1721. LEAD_CASE(2);
  1722. LEAD_CASE(3);
  1723. LEAD_CASE(4);
  1724. case BT_LF:
  1725. posP->columnNumber = (unsigned)-1;
  1726. ++posP->lineNumber;
  1727. ptr += MINBPC(enc);
  1728. break;
  1729. case BT_CR:
  1730. ++posP->lineNumber;
  1731. ptr += MINBPC(enc);
  1732. if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
  1733. ptr += MINBPC(enc);
  1734. posP->columnNumber = (unsigned)-1;
  1735. break;
  1736. default:
  1737. ptr += MINBPC(enc);
  1738. break;
  1739. }
  1740. ++posP->columnNumber;
  1741. }
  1742. }
  1743. #undef LEAD_CASE
  1744. #undef DO_LEAD_CASE
  1745. #undef MULTIBYTE_CASES
  1746. #undef INVALID_CASES
  1747. #undef CHECK_NAME_CASE
  1748. #undef CHECK_NAME_CASES
  1749. #undef CHECK_NMSTRT_CASE
  1750. #undef CHECK_NMSTRT_CASES