encoding.c 142 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978
  1. /*
  2. * encoding.c : implements the encoding conversion functions needed for XML
  3. *
  4. * Related specs:
  5. * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
  6. * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
  7. * [ISO-10646] UTF-8 and UTF-16 in Annexes
  8. * [ISO-8859-1] ISO Latin-1 characters codes.
  9. * [UNICODE] The Unicode Consortium, "The Unicode Standard --
  10. * Worldwide Character Encoding -- Version 1.0", Addison-
  11. * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
  12. * described in Unicode Technical Report #4.
  13. * [US-ASCII] Coded Character Set--7-bit American Standard Code for
  14. * Information Interchange, ANSI X3.4-1986.
  15. *
  16. * See Copyright for the status of this software.
  17. *
  18. * daniel@veillard.com
  19. *
  20. * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
  21. */
  22. #define IN_LIBXML
  23. #include "libxml.h"
  24. #include <string.h>
  25. #include <limits.h>
  26. #ifdef HAVE_CTYPE_H
  27. #include <ctype.h>
  28. #endif
  29. #ifdef HAVE_STDLIB_H
  30. #include <stdlib.h>
  31. #endif
  32. #ifdef LIBXML_ICONV_ENABLED
  33. #ifdef HAVE_ERRNO_H
  34. #include <errno.h>
  35. #endif
  36. #endif
  37. #include <libxml/encoding.h>
  38. #include <libxml/xmlmemory.h>
  39. #ifdef LIBXML_HTML_ENABLED
  40. #include <libxml/HTMLparser.h>
  41. #endif
  42. #include <libxml/globals.h>
  43. #include <libxml/xmlerror.h>
  44. #include "buf.h"
  45. #include "enc.h"
  46. static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
  47. static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
  48. typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
  49. typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
  50. struct _xmlCharEncodingAlias {
  51. const char *name;
  52. const char *alias;
  53. };
  54. static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
  55. static int xmlCharEncodingAliasesNb = 0;
  56. static int xmlCharEncodingAliasesMax = 0;
  57. #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
  58. #if 0
  59. #define DEBUG_ENCODING /* Define this to get encoding traces */
  60. #endif
  61. #else
  62. #ifdef LIBXML_ISO8859X_ENABLED
  63. static void xmlRegisterCharEncodingHandlersISO8859x (void);
  64. #endif
  65. #endif
  66. static int xmlLittleEndian = 1;
  67. /**
  68. * xmlEncodingErrMemory:
  69. * @extra: extra information
  70. *
  71. * Handle an out of memory condition
  72. */
  73. static void
  74. xmlEncodingErrMemory(const char *extra)
  75. {
  76. __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
  77. }
  78. /**
  79. * xmlErrEncoding:
  80. * @error: the error number
  81. * @msg: the error message
  82. *
  83. * n encoding error
  84. */
  85. static void LIBXML_ATTR_FORMAT(2,0)
  86. xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
  87. {
  88. __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
  89. XML_FROM_I18N, error, XML_ERR_FATAL,
  90. NULL, 0, val, NULL, NULL, 0, 0, msg, val);
  91. }
  92. #ifdef LIBXML_ICU_ENABLED
  93. static uconv_t*
  94. openIcuConverter(const char* name, int toUnicode)
  95. {
  96. UErrorCode status = U_ZERO_ERROR;
  97. uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
  98. if (conv == NULL)
  99. return NULL;
  100. conv->pivot_source = conv->pivot_buf;
  101. conv->pivot_target = conv->pivot_buf;
  102. conv->uconv = ucnv_open(name, &status);
  103. if (U_FAILURE(status))
  104. goto error;
  105. status = U_ZERO_ERROR;
  106. if (toUnicode) {
  107. ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
  108. NULL, NULL, NULL, &status);
  109. }
  110. else {
  111. ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
  112. NULL, NULL, NULL, &status);
  113. }
  114. if (U_FAILURE(status))
  115. goto error;
  116. status = U_ZERO_ERROR;
  117. conv->utf8 = ucnv_open("UTF-8", &status);
  118. if (U_SUCCESS(status))
  119. return conv;
  120. error:
  121. if (conv->uconv)
  122. ucnv_close(conv->uconv);
  123. xmlFree(conv);
  124. return NULL;
  125. }
  126. static void
  127. closeIcuConverter(uconv_t *conv)
  128. {
  129. if (conv != NULL) {
  130. ucnv_close(conv->uconv);
  131. ucnv_close(conv->utf8);
  132. xmlFree(conv);
  133. }
  134. }
  135. #endif /* LIBXML_ICU_ENABLED */
  136. /************************************************************************
  137. * *
  138. * Conversions To/From UTF8 encoding *
  139. * *
  140. ************************************************************************/
  141. /**
  142. * asciiToUTF8:
  143. * @out: a pointer to an array of bytes to store the result
  144. * @outlen: the length of @out
  145. * @in: a pointer to an array of ASCII chars
  146. * @inlen: the length of @in
  147. *
  148. * Take a block of ASCII chars in and try to convert it to an UTF-8
  149. * block of chars out.
  150. * Returns 0 if success, or -1 otherwise
  151. * The value of @inlen after return is the number of octets consumed
  152. * if the return value is positive, else unpredictable.
  153. * The value of @outlen after return is the number of octets produced.
  154. */
  155. static int
  156. asciiToUTF8(unsigned char* out, int *outlen,
  157. const unsigned char* in, int *inlen) {
  158. unsigned char* outstart = out;
  159. const unsigned char* base = in;
  160. const unsigned char* processed = in;
  161. unsigned char* outend = out + *outlen;
  162. const unsigned char* inend;
  163. unsigned int c;
  164. inend = in + (*inlen);
  165. while ((in < inend) && (out - outstart + 5 < *outlen)) {
  166. c= *in++;
  167. if (out >= outend)
  168. break;
  169. if (c < 0x80) {
  170. *out++ = c;
  171. } else {
  172. *outlen = out - outstart;
  173. *inlen = processed - base;
  174. return(-1);
  175. }
  176. processed = (const unsigned char*) in;
  177. }
  178. *outlen = out - outstart;
  179. *inlen = processed - base;
  180. return(*outlen);
  181. }
  182. #ifdef LIBXML_OUTPUT_ENABLED
  183. /**
  184. * UTF8Toascii:
  185. * @out: a pointer to an array of bytes to store the result
  186. * @outlen: the length of @out
  187. * @in: a pointer to an array of UTF-8 chars
  188. * @inlen: the length of @in
  189. *
  190. * Take a block of UTF-8 chars in and try to convert it to an ASCII
  191. * block of chars out.
  192. *
  193. * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
  194. * The value of @inlen after return is the number of octets consumed
  195. * if the return value is positive, else unpredictable.
  196. * The value of @outlen after return is the number of octets produced.
  197. */
  198. static int
  199. UTF8Toascii(unsigned char* out, int *outlen,
  200. const unsigned char* in, int *inlen) {
  201. const unsigned char* processed = in;
  202. const unsigned char* outend;
  203. const unsigned char* outstart = out;
  204. const unsigned char* instart = in;
  205. const unsigned char* inend;
  206. unsigned int c, d;
  207. int trailing;
  208. if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
  209. if (in == NULL) {
  210. /*
  211. * initialization nothing to do
  212. */
  213. *outlen = 0;
  214. *inlen = 0;
  215. return(0);
  216. }
  217. inend = in + (*inlen);
  218. outend = out + (*outlen);
  219. while (in < inend) {
  220. d = *in++;
  221. if (d < 0x80) { c= d; trailing= 0; }
  222. else if (d < 0xC0) {
  223. /* trailing byte in leading position */
  224. *outlen = out - outstart;
  225. *inlen = processed - instart;
  226. return(-2);
  227. } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
  228. else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
  229. else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
  230. else {
  231. /* no chance for this in Ascii */
  232. *outlen = out - outstart;
  233. *inlen = processed - instart;
  234. return(-2);
  235. }
  236. if (inend - in < trailing) {
  237. break;
  238. }
  239. for ( ; trailing; trailing--) {
  240. if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
  241. break;
  242. c <<= 6;
  243. c |= d & 0x3F;
  244. }
  245. /* assertion: c is a single UTF-4 value */
  246. if (c < 0x80) {
  247. if (out >= outend)
  248. break;
  249. *out++ = c;
  250. } else {
  251. /* no chance for this in Ascii */
  252. *outlen = out - outstart;
  253. *inlen = processed - instart;
  254. return(-2);
  255. }
  256. processed = in;
  257. }
  258. *outlen = out - outstart;
  259. *inlen = processed - instart;
  260. return(*outlen);
  261. }
  262. #endif /* LIBXML_OUTPUT_ENABLED */
  263. /**
  264. * isolat1ToUTF8:
  265. * @out: a pointer to an array of bytes to store the result
  266. * @outlen: the length of @out
  267. * @in: a pointer to an array of ISO Latin 1 chars
  268. * @inlen: the length of @in
  269. *
  270. * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
  271. * block of chars out.
  272. * Returns the number of bytes written if success, or -1 otherwise
  273. * The value of @inlen after return is the number of octets consumed
  274. * if the return value is positive, else unpredictable.
  275. * The value of @outlen after return is the number of octets produced.
  276. */
  277. int
  278. isolat1ToUTF8(unsigned char* out, int *outlen,
  279. const unsigned char* in, int *inlen) {
  280. unsigned char* outstart = out;
  281. const unsigned char* base = in;
  282. unsigned char* outend;
  283. const unsigned char* inend;
  284. const unsigned char* instop;
  285. if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
  286. return(-1);
  287. outend = out + *outlen;
  288. inend = in + (*inlen);
  289. instop = inend;
  290. while ((in < inend) && (out < outend - 1)) {
  291. if (*in >= 0x80) {
  292. *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
  293. *out++ = ((*in) & 0x3F) | 0x80;
  294. ++in;
  295. }
  296. if ((instop - in) > (outend - out)) instop = in + (outend - out);
  297. while ((in < instop) && (*in < 0x80)) {
  298. *out++ = *in++;
  299. }
  300. }
  301. if ((in < inend) && (out < outend) && (*in < 0x80)) {
  302. *out++ = *in++;
  303. }
  304. *outlen = out - outstart;
  305. *inlen = in - base;
  306. return(*outlen);
  307. }
  308. /**
  309. * UTF8ToUTF8:
  310. * @out: a pointer to an array of bytes to store the result
  311. * @outlen: the length of @out
  312. * @inb: a pointer to an array of UTF-8 chars
  313. * @inlenb: the length of @in in UTF-8 chars
  314. *
  315. * No op copy operation for UTF8 handling.
  316. *
  317. * Returns the number of bytes written, or -1 if lack of space.
  318. * The value of *inlen after return is the number of octets consumed
  319. * if the return value is positive, else unpredictable.
  320. */
  321. static int
  322. UTF8ToUTF8(unsigned char* out, int *outlen,
  323. const unsigned char* inb, int *inlenb)
  324. {
  325. int len;
  326. if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
  327. return(-1);
  328. if (inb == NULL) {
  329. /* inb == NULL means output is initialized. */
  330. *outlen = 0;
  331. *inlenb = 0;
  332. return(0);
  333. }
  334. if (*outlen > *inlenb) {
  335. len = *inlenb;
  336. } else {
  337. len = *outlen;
  338. }
  339. if (len < 0)
  340. return(-1);
  341. /*
  342. * FIXME: Conversion functions must assure valid UTF-8, so we have
  343. * to check for UTF-8 validity. Preferably, this converter shouldn't
  344. * be used at all.
  345. */
  346. memcpy(out, inb, len);
  347. *outlen = len;
  348. *inlenb = len;
  349. return(*outlen);
  350. }
  351. #ifdef LIBXML_OUTPUT_ENABLED
  352. /**
  353. * UTF8Toisolat1:
  354. * @out: a pointer to an array of bytes to store the result
  355. * @outlen: the length of @out
  356. * @in: a pointer to an array of UTF-8 chars
  357. * @inlen: the length of @in
  358. *
  359. * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
  360. * block of chars out.
  361. *
  362. * Returns the number of bytes written if success, -2 if the transcoding fails,
  363. or -1 otherwise
  364. * The value of @inlen after return is the number of octets consumed
  365. * if the return value is positive, else unpredictable.
  366. * The value of @outlen after return is the number of octets produced.
  367. */
  368. int
  369. UTF8Toisolat1(unsigned char* out, int *outlen,
  370. const unsigned char* in, int *inlen) {
  371. const unsigned char* processed = in;
  372. const unsigned char* outend;
  373. const unsigned char* outstart = out;
  374. const unsigned char* instart = in;
  375. const unsigned char* inend;
  376. unsigned int c, d;
  377. int trailing;
  378. if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
  379. if (in == NULL) {
  380. /*
  381. * initialization nothing to do
  382. */
  383. *outlen = 0;
  384. *inlen = 0;
  385. return(0);
  386. }
  387. inend = in + (*inlen);
  388. outend = out + (*outlen);
  389. while (in < inend) {
  390. d = *in++;
  391. if (d < 0x80) { c= d; trailing= 0; }
  392. else if (d < 0xC0) {
  393. /* trailing byte in leading position */
  394. *outlen = out - outstart;
  395. *inlen = processed - instart;
  396. return(-2);
  397. } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
  398. else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
  399. else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
  400. else {
  401. /* no chance for this in IsoLat1 */
  402. *outlen = out - outstart;
  403. *inlen = processed - instart;
  404. return(-2);
  405. }
  406. if (inend - in < trailing) {
  407. break;
  408. }
  409. for ( ; trailing; trailing--) {
  410. if (in >= inend)
  411. break;
  412. if (((d= *in++) & 0xC0) != 0x80) {
  413. *outlen = out - outstart;
  414. *inlen = processed - instart;
  415. return(-2);
  416. }
  417. c <<= 6;
  418. c |= d & 0x3F;
  419. }
  420. /* assertion: c is a single UTF-4 value */
  421. if (c <= 0xFF) {
  422. if (out >= outend)
  423. break;
  424. *out++ = c;
  425. } else {
  426. /* no chance for this in IsoLat1 */
  427. *outlen = out - outstart;
  428. *inlen = processed - instart;
  429. return(-2);
  430. }
  431. processed = in;
  432. }
  433. *outlen = out - outstart;
  434. *inlen = processed - instart;
  435. return(*outlen);
  436. }
  437. #endif /* LIBXML_OUTPUT_ENABLED */
  438. /**
  439. * UTF16LEToUTF8:
  440. * @out: a pointer to an array of bytes to store the result
  441. * @outlen: the length of @out
  442. * @inb: a pointer to an array of UTF-16LE passwd as a byte array
  443. * @inlenb: the length of @in in UTF-16LE chars
  444. *
  445. * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
  446. * block of chars out. This function assumes the endian property
  447. * is the same between the native type of this machine and the
  448. * inputed one.
  449. *
  450. * Returns the number of bytes written, or -1 if lack of space, or -2
  451. * if the transcoding fails (if *in is not a valid utf16 string)
  452. * The value of *inlen after return is the number of octets consumed
  453. * if the return value is positive, else unpredictable.
  454. */
  455. static int
  456. UTF16LEToUTF8(unsigned char* out, int *outlen,
  457. const unsigned char* inb, int *inlenb)
  458. {
  459. unsigned char* outstart = out;
  460. const unsigned char* processed = inb;
  461. unsigned char* outend;
  462. unsigned short* in = (unsigned short*) inb;
  463. unsigned short* inend;
  464. unsigned int c, d, inlen;
  465. unsigned char *tmp;
  466. int bits;
  467. if (*outlen == 0) {
  468. *inlenb = 0;
  469. return(0);
  470. }
  471. outend = out + *outlen;
  472. if ((*inlenb % 2) == 1)
  473. (*inlenb)--;
  474. inlen = *inlenb / 2;
  475. inend = in + inlen;
  476. while ((in < inend) && (out - outstart + 5 < *outlen)) {
  477. if (xmlLittleEndian) {
  478. c= *in++;
  479. } else {
  480. tmp = (unsigned char *) in;
  481. c = *tmp++;
  482. c = c | (((unsigned int)*tmp) << 8);
  483. in++;
  484. }
  485. if ((c & 0xFC00) == 0xD800) { /* surrogates */
  486. if (in >= inend) { /* (in > inend) shouldn't happens */
  487. break;
  488. }
  489. if (xmlLittleEndian) {
  490. d = *in++;
  491. } else {
  492. tmp = (unsigned char *) in;
  493. d = *tmp++;
  494. d = d | (((unsigned int)*tmp) << 8);
  495. in++;
  496. }
  497. if ((d & 0xFC00) == 0xDC00) {
  498. c &= 0x03FF;
  499. c <<= 10;
  500. c |= d & 0x03FF;
  501. c += 0x10000;
  502. }
  503. else {
  504. *outlen = out - outstart;
  505. *inlenb = processed - inb;
  506. return(-2);
  507. }
  508. }
  509. /* assertion: c is a single UTF-4 value */
  510. if (out >= outend)
  511. break;
  512. if (c < 0x80) { *out++= c; bits= -6; }
  513. else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
  514. else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
  515. else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
  516. for ( ; bits >= 0; bits-= 6) {
  517. if (out >= outend)
  518. break;
  519. *out++= ((c >> bits) & 0x3F) | 0x80;
  520. }
  521. processed = (const unsigned char*) in;
  522. }
  523. *outlen = out - outstart;
  524. *inlenb = processed - inb;
  525. return(*outlen);
  526. }
  527. #ifdef LIBXML_OUTPUT_ENABLED
  528. /**
  529. * UTF8ToUTF16LE:
  530. * @outb: a pointer to an array of bytes to store the result
  531. * @outlen: the length of @outb
  532. * @in: a pointer to an array of UTF-8 chars
  533. * @inlen: the length of @in
  534. *
  535. * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
  536. * block of chars out.
  537. *
  538. * Returns the number of bytes written, or -1 if lack of space, or -2
  539. * if the transcoding failed.
  540. */
  541. static int
  542. UTF8ToUTF16LE(unsigned char* outb, int *outlen,
  543. const unsigned char* in, int *inlen)
  544. {
  545. unsigned short* out = (unsigned short*) outb;
  546. const unsigned char* processed = in;
  547. const unsigned char *const instart = in;
  548. unsigned short* outstart= out;
  549. unsigned short* outend;
  550. const unsigned char* inend;
  551. unsigned int c, d;
  552. int trailing;
  553. unsigned char *tmp;
  554. unsigned short tmp1, tmp2;
  555. /* UTF16LE encoding has no BOM */
  556. if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
  557. if (in == NULL) {
  558. *outlen = 0;
  559. *inlen = 0;
  560. return(0);
  561. }
  562. inend= in + *inlen;
  563. outend = out + (*outlen / 2);
  564. while (in < inend) {
  565. d= *in++;
  566. if (d < 0x80) { c= d; trailing= 0; }
  567. else if (d < 0xC0) {
  568. /* trailing byte in leading position */
  569. *outlen = (out - outstart) * 2;
  570. *inlen = processed - instart;
  571. return(-2);
  572. } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
  573. else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
  574. else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
  575. else {
  576. /* no chance for this in UTF-16 */
  577. *outlen = (out - outstart) * 2;
  578. *inlen = processed - instart;
  579. return(-2);
  580. }
  581. if (inend - in < trailing) {
  582. break;
  583. }
  584. for ( ; trailing; trailing--) {
  585. if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
  586. break;
  587. c <<= 6;
  588. c |= d & 0x3F;
  589. }
  590. /* assertion: c is a single UTF-4 value */
  591. if (c < 0x10000) {
  592. if (out >= outend)
  593. break;
  594. if (xmlLittleEndian) {
  595. *out++ = c;
  596. } else {
  597. tmp = (unsigned char *) out;
  598. *tmp = c ;
  599. *(tmp + 1) = c >> 8 ;
  600. out++;
  601. }
  602. }
  603. else if (c < 0x110000) {
  604. if (out+1 >= outend)
  605. break;
  606. c -= 0x10000;
  607. if (xmlLittleEndian) {
  608. *out++ = 0xD800 | (c >> 10);
  609. *out++ = 0xDC00 | (c & 0x03FF);
  610. } else {
  611. tmp1 = 0xD800 | (c >> 10);
  612. tmp = (unsigned char *) out;
  613. *tmp = (unsigned char) tmp1;
  614. *(tmp + 1) = tmp1 >> 8;
  615. out++;
  616. tmp2 = 0xDC00 | (c & 0x03FF);
  617. tmp = (unsigned char *) out;
  618. *tmp = (unsigned char) tmp2;
  619. *(tmp + 1) = tmp2 >> 8;
  620. out++;
  621. }
  622. }
  623. else
  624. break;
  625. processed = in;
  626. }
  627. *outlen = (out - outstart) * 2;
  628. *inlen = processed - instart;
  629. return(*outlen);
  630. }
  631. /**
  632. * UTF8ToUTF16:
  633. * @outb: a pointer to an array of bytes to store the result
  634. * @outlen: the length of @outb
  635. * @in: a pointer to an array of UTF-8 chars
  636. * @inlen: the length of @in
  637. *
  638. * Take a block of UTF-8 chars in and try to convert it to an UTF-16
  639. * block of chars out.
  640. *
  641. * Returns the number of bytes written, or -1 if lack of space, or -2
  642. * if the transcoding failed.
  643. */
  644. static int
  645. UTF8ToUTF16(unsigned char* outb, int *outlen,
  646. const unsigned char* in, int *inlen)
  647. {
  648. if (in == NULL) {
  649. /*
  650. * initialization, add the Byte Order Mark for UTF-16LE
  651. */
  652. if (*outlen >= 2) {
  653. outb[0] = 0xFF;
  654. outb[1] = 0xFE;
  655. *outlen = 2;
  656. *inlen = 0;
  657. #ifdef DEBUG_ENCODING
  658. xmlGenericError(xmlGenericErrorContext,
  659. "Added FFFE Byte Order Mark\n");
  660. #endif
  661. return(2);
  662. }
  663. *outlen = 0;
  664. *inlen = 0;
  665. return(0);
  666. }
  667. return (UTF8ToUTF16LE(outb, outlen, in, inlen));
  668. }
  669. #endif /* LIBXML_OUTPUT_ENABLED */
  670. /**
  671. * UTF16BEToUTF8:
  672. * @out: a pointer to an array of bytes to store the result
  673. * @outlen: the length of @out
  674. * @inb: a pointer to an array of UTF-16 passed as a byte array
  675. * @inlenb: the length of @in in UTF-16 chars
  676. *
  677. * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
  678. * block of chars out. This function assumes the endian property
  679. * is the same between the native type of this machine and the
  680. * inputed one.
  681. *
  682. * Returns the number of bytes written, or -1 if lack of space, or -2
  683. * if the transcoding fails (if *in is not a valid utf16 string)
  684. * The value of *inlen after return is the number of octets consumed
  685. * if the return value is positive, else unpredictable.
  686. */
  687. static int
  688. UTF16BEToUTF8(unsigned char* out, int *outlen,
  689. const unsigned char* inb, int *inlenb)
  690. {
  691. unsigned char* outstart = out;
  692. const unsigned char* processed = inb;
  693. unsigned char* outend = out + *outlen;
  694. unsigned short* in = (unsigned short*) inb;
  695. unsigned short* inend;
  696. unsigned int c, d, inlen;
  697. unsigned char *tmp;
  698. int bits;
  699. if ((*inlenb % 2) == 1)
  700. (*inlenb)--;
  701. inlen = *inlenb / 2;
  702. inend= in + inlen;
  703. while (in < inend) {
  704. if (xmlLittleEndian) {
  705. tmp = (unsigned char *) in;
  706. c = *tmp++;
  707. c = c << 8;
  708. c = c | (unsigned int) *tmp;
  709. in++;
  710. } else {
  711. c= *in++;
  712. }
  713. if ((c & 0xFC00) == 0xD800) { /* surrogates */
  714. if (in >= inend) { /* (in > inend) shouldn't happens */
  715. *outlen = out - outstart;
  716. *inlenb = processed - inb;
  717. return(-2);
  718. }
  719. if (xmlLittleEndian) {
  720. tmp = (unsigned char *) in;
  721. d = *tmp++;
  722. d = d << 8;
  723. d = d | (unsigned int) *tmp;
  724. in++;
  725. } else {
  726. d= *in++;
  727. }
  728. if ((d & 0xFC00) == 0xDC00) {
  729. c &= 0x03FF;
  730. c <<= 10;
  731. c |= d & 0x03FF;
  732. c += 0x10000;
  733. }
  734. else {
  735. *outlen = out - outstart;
  736. *inlenb = processed - inb;
  737. return(-2);
  738. }
  739. }
  740. /* assertion: c is a single UTF-4 value */
  741. if (out >= outend)
  742. break;
  743. if (c < 0x80) { *out++= c; bits= -6; }
  744. else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
  745. else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
  746. else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
  747. for ( ; bits >= 0; bits-= 6) {
  748. if (out >= outend)
  749. break;
  750. *out++= ((c >> bits) & 0x3F) | 0x80;
  751. }
  752. processed = (const unsigned char*) in;
  753. }
  754. *outlen = out - outstart;
  755. *inlenb = processed - inb;
  756. return(*outlen);
  757. }
  758. #ifdef LIBXML_OUTPUT_ENABLED
  759. /**
  760. * UTF8ToUTF16BE:
  761. * @outb: a pointer to an array of bytes to store the result
  762. * @outlen: the length of @outb
  763. * @in: a pointer to an array of UTF-8 chars
  764. * @inlen: the length of @in
  765. *
  766. * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
  767. * block of chars out.
  768. *
  769. * Returns the number of byte written, or -1 by lack of space, or -2
  770. * if the transcoding failed.
  771. */
  772. static int
  773. UTF8ToUTF16BE(unsigned char* outb, int *outlen,
  774. const unsigned char* in, int *inlen)
  775. {
  776. unsigned short* out = (unsigned short*) outb;
  777. const unsigned char* processed = in;
  778. const unsigned char *const instart = in;
  779. unsigned short* outstart= out;
  780. unsigned short* outend;
  781. const unsigned char* inend;
  782. unsigned int c, d;
  783. int trailing;
  784. unsigned char *tmp;
  785. unsigned short tmp1, tmp2;
  786. /* UTF-16BE has no BOM */
  787. if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
  788. if (in == NULL) {
  789. *outlen = 0;
  790. *inlen = 0;
  791. return(0);
  792. }
  793. inend= in + *inlen;
  794. outend = out + (*outlen / 2);
  795. while (in < inend) {
  796. d= *in++;
  797. if (d < 0x80) { c= d; trailing= 0; }
  798. else if (d < 0xC0) {
  799. /* trailing byte in leading position */
  800. *outlen = out - outstart;
  801. *inlen = processed - instart;
  802. return(-2);
  803. } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
  804. else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
  805. else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
  806. else {
  807. /* no chance for this in UTF-16 */
  808. *outlen = out - outstart;
  809. *inlen = processed - instart;
  810. return(-2);
  811. }
  812. if (inend - in < trailing) {
  813. break;
  814. }
  815. for ( ; trailing; trailing--) {
  816. if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
  817. c <<= 6;
  818. c |= d & 0x3F;
  819. }
  820. /* assertion: c is a single UTF-4 value */
  821. if (c < 0x10000) {
  822. if (out >= outend) break;
  823. if (xmlLittleEndian) {
  824. tmp = (unsigned char *) out;
  825. *tmp = c >> 8;
  826. *(tmp + 1) = c;
  827. out++;
  828. } else {
  829. *out++ = c;
  830. }
  831. }
  832. else if (c < 0x110000) {
  833. if (out+1 >= outend) break;
  834. c -= 0x10000;
  835. if (xmlLittleEndian) {
  836. tmp1 = 0xD800 | (c >> 10);
  837. tmp = (unsigned char *) out;
  838. *tmp = tmp1 >> 8;
  839. *(tmp + 1) = (unsigned char) tmp1;
  840. out++;
  841. tmp2 = 0xDC00 | (c & 0x03FF);
  842. tmp = (unsigned char *) out;
  843. *tmp = tmp2 >> 8;
  844. *(tmp + 1) = (unsigned char) tmp2;
  845. out++;
  846. } else {
  847. *out++ = 0xD800 | (c >> 10);
  848. *out++ = 0xDC00 | (c & 0x03FF);
  849. }
  850. }
  851. else
  852. break;
  853. processed = in;
  854. }
  855. *outlen = (out - outstart) * 2;
  856. *inlen = processed - instart;
  857. return(*outlen);
  858. }
  859. #endif /* LIBXML_OUTPUT_ENABLED */
  860. /************************************************************************
  861. * *
  862. * Generic encoding handling routines *
  863. * *
  864. ************************************************************************/
  865. /**
  866. * xmlDetectCharEncoding:
  867. * @in: a pointer to the first bytes of the XML entity, must be at least
  868. * 2 bytes long (at least 4 if encoding is UTF4 variant).
  869. * @len: pointer to the length of the buffer
  870. *
  871. * Guess the encoding of the entity using the first bytes of the entity content
  872. * according to the non-normative appendix F of the XML-1.0 recommendation.
  873. *
  874. * Returns one of the XML_CHAR_ENCODING_... values.
  875. */
  876. xmlCharEncoding
  877. xmlDetectCharEncoding(const unsigned char* in, int len)
  878. {
  879. if (in == NULL)
  880. return(XML_CHAR_ENCODING_NONE);
  881. if (len >= 4) {
  882. if ((in[0] == 0x00) && (in[1] == 0x00) &&
  883. (in[2] == 0x00) && (in[3] == 0x3C))
  884. return(XML_CHAR_ENCODING_UCS4BE);
  885. if ((in[0] == 0x3C) && (in[1] == 0x00) &&
  886. (in[2] == 0x00) && (in[3] == 0x00))
  887. return(XML_CHAR_ENCODING_UCS4LE);
  888. if ((in[0] == 0x00) && (in[1] == 0x00) &&
  889. (in[2] == 0x3C) && (in[3] == 0x00))
  890. return(XML_CHAR_ENCODING_UCS4_2143);
  891. if ((in[0] == 0x00) && (in[1] == 0x3C) &&
  892. (in[2] == 0x00) && (in[3] == 0x00))
  893. return(XML_CHAR_ENCODING_UCS4_3412);
  894. if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
  895. (in[2] == 0xA7) && (in[3] == 0x94))
  896. return(XML_CHAR_ENCODING_EBCDIC);
  897. if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
  898. (in[2] == 0x78) && (in[3] == 0x6D))
  899. return(XML_CHAR_ENCODING_UTF8);
  900. /*
  901. * Although not part of the recommendation, we also
  902. * attempt an "auto-recognition" of UTF-16LE and
  903. * UTF-16BE encodings.
  904. */
  905. if ((in[0] == 0x3C) && (in[1] == 0x00) &&
  906. (in[2] == 0x3F) && (in[3] == 0x00))
  907. return(XML_CHAR_ENCODING_UTF16LE);
  908. if ((in[0] == 0x00) && (in[1] == 0x3C) &&
  909. (in[2] == 0x00) && (in[3] == 0x3F))
  910. return(XML_CHAR_ENCODING_UTF16BE);
  911. }
  912. if (len >= 3) {
  913. /*
  914. * Errata on XML-1.0 June 20 2001
  915. * We now allow an UTF8 encoded BOM
  916. */
  917. if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
  918. (in[2] == 0xBF))
  919. return(XML_CHAR_ENCODING_UTF8);
  920. }
  921. /* For UTF-16 we can recognize by the BOM */
  922. if (len >= 2) {
  923. if ((in[0] == 0xFE) && (in[1] == 0xFF))
  924. return(XML_CHAR_ENCODING_UTF16BE);
  925. if ((in[0] == 0xFF) && (in[1] == 0xFE))
  926. return(XML_CHAR_ENCODING_UTF16LE);
  927. }
  928. return(XML_CHAR_ENCODING_NONE);
  929. }
  930. /**
  931. * xmlCleanupEncodingAliases:
  932. *
  933. * Unregisters all aliases
  934. */
  935. void
  936. xmlCleanupEncodingAliases(void) {
  937. int i;
  938. if (xmlCharEncodingAliases == NULL)
  939. return;
  940. for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
  941. if (xmlCharEncodingAliases[i].name != NULL)
  942. xmlFree((char *) xmlCharEncodingAliases[i].name);
  943. if (xmlCharEncodingAliases[i].alias != NULL)
  944. xmlFree((char *) xmlCharEncodingAliases[i].alias);
  945. }
  946. xmlCharEncodingAliasesNb = 0;
  947. xmlCharEncodingAliasesMax = 0;
  948. xmlFree(xmlCharEncodingAliases);
  949. xmlCharEncodingAliases = NULL;
  950. }
  951. /**
  952. * xmlGetEncodingAlias:
  953. * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
  954. *
  955. * Lookup an encoding name for the given alias.
  956. *
  957. * Returns NULL if not found, otherwise the original name
  958. */
  959. const char *
  960. xmlGetEncodingAlias(const char *alias) {
  961. int i;
  962. char upper[100];
  963. if (alias == NULL)
  964. return(NULL);
  965. if (xmlCharEncodingAliases == NULL)
  966. return(NULL);
  967. for (i = 0;i < 99;i++) {
  968. upper[i] = toupper(alias[i]);
  969. if (upper[i] == 0) break;
  970. }
  971. upper[i] = 0;
  972. /*
  973. * Walk down the list looking for a definition of the alias
  974. */
  975. for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
  976. if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
  977. return(xmlCharEncodingAliases[i].name);
  978. }
  979. }
  980. return(NULL);
  981. }
  982. /**
  983. * xmlAddEncodingAlias:
  984. * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
  985. * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
  986. *
  987. * Registers an alias @alias for an encoding named @name. Existing alias
  988. * will be overwritten.
  989. *
  990. * Returns 0 in case of success, -1 in case of error
  991. */
  992. int
  993. xmlAddEncodingAlias(const char *name, const char *alias) {
  994. int i;
  995. char upper[100];
  996. if ((name == NULL) || (alias == NULL))
  997. return(-1);
  998. for (i = 0;i < 99;i++) {
  999. upper[i] = toupper(alias[i]);
  1000. if (upper[i] == 0) break;
  1001. }
  1002. upper[i] = 0;
  1003. if (xmlCharEncodingAliases == NULL) {
  1004. xmlCharEncodingAliasesNb = 0;
  1005. xmlCharEncodingAliasesMax = 20;
  1006. xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
  1007. xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
  1008. if (xmlCharEncodingAliases == NULL)
  1009. return(-1);
  1010. } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
  1011. xmlCharEncodingAliasesMax *= 2;
  1012. xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
  1013. xmlRealloc(xmlCharEncodingAliases,
  1014. xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
  1015. }
  1016. /*
  1017. * Walk down the list looking for a definition of the alias
  1018. */
  1019. for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
  1020. if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
  1021. /*
  1022. * Replace the definition.
  1023. */
  1024. xmlFree((char *) xmlCharEncodingAliases[i].name);
  1025. xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
  1026. return(0);
  1027. }
  1028. }
  1029. /*
  1030. * Add the definition
  1031. */
  1032. xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
  1033. xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
  1034. xmlCharEncodingAliasesNb++;
  1035. return(0);
  1036. }
  1037. /**
  1038. * xmlDelEncodingAlias:
  1039. * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
  1040. *
  1041. * Unregisters an encoding alias @alias
  1042. *
  1043. * Returns 0 in case of success, -1 in case of error
  1044. */
  1045. int
  1046. xmlDelEncodingAlias(const char *alias) {
  1047. int i;
  1048. if (alias == NULL)
  1049. return(-1);
  1050. if (xmlCharEncodingAliases == NULL)
  1051. return(-1);
  1052. /*
  1053. * Walk down the list looking for a definition of the alias
  1054. */
  1055. for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
  1056. if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
  1057. xmlFree((char *) xmlCharEncodingAliases[i].name);
  1058. xmlFree((char *) xmlCharEncodingAliases[i].alias);
  1059. xmlCharEncodingAliasesNb--;
  1060. memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
  1061. sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
  1062. return(0);
  1063. }
  1064. }
  1065. return(-1);
  1066. }
  1067. /**
  1068. * xmlParseCharEncoding:
  1069. * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
  1070. *
  1071. * Compare the string to the encoding schemes already known. Note
  1072. * that the comparison is case insensitive accordingly to the section
  1073. * [XML] 4.3.3 Character Encoding in Entities.
  1074. *
  1075. * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
  1076. * if not recognized.
  1077. */
  1078. xmlCharEncoding
  1079. xmlParseCharEncoding(const char* name)
  1080. {
  1081. const char *alias;
  1082. char upper[500];
  1083. int i;
  1084. if (name == NULL)
  1085. return(XML_CHAR_ENCODING_NONE);
  1086. /*
  1087. * Do the alias resolution
  1088. */
  1089. alias = xmlGetEncodingAlias(name);
  1090. if (alias != NULL)
  1091. name = alias;
  1092. for (i = 0;i < 499;i++) {
  1093. upper[i] = toupper(name[i]);
  1094. if (upper[i] == 0) break;
  1095. }
  1096. upper[i] = 0;
  1097. if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
  1098. if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
  1099. if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
  1100. /*
  1101. * NOTE: if we were able to parse this, the endianness of UTF16 is
  1102. * already found and in use
  1103. */
  1104. if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
  1105. if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
  1106. if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
  1107. if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
  1108. if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
  1109. /*
  1110. * NOTE: if we were able to parse this, the endianness of UCS4 is
  1111. * already found and in use
  1112. */
  1113. if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
  1114. if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
  1115. if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
  1116. if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
  1117. if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
  1118. if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
  1119. if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
  1120. if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
  1121. if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
  1122. if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
  1123. if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
  1124. if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
  1125. if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
  1126. if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
  1127. if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
  1128. if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
  1129. if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
  1130. if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
  1131. if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
  1132. #ifdef DEBUG_ENCODING
  1133. xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
  1134. #endif
  1135. return(XML_CHAR_ENCODING_ERROR);
  1136. }
  1137. /**
  1138. * xmlGetCharEncodingName:
  1139. * @enc: the encoding
  1140. *
  1141. * The "canonical" name for XML encoding.
  1142. * C.f. http://www.w3.org/TR/REC-xml#charencoding
  1143. * Section 4.3.3 Character Encoding in Entities
  1144. *
  1145. * Returns the canonical name for the given encoding
  1146. */
  1147. const char*
  1148. xmlGetCharEncodingName(xmlCharEncoding enc) {
  1149. switch (enc) {
  1150. case XML_CHAR_ENCODING_ERROR:
  1151. return(NULL);
  1152. case XML_CHAR_ENCODING_NONE:
  1153. return(NULL);
  1154. case XML_CHAR_ENCODING_UTF8:
  1155. return("UTF-8");
  1156. case XML_CHAR_ENCODING_UTF16LE:
  1157. return("UTF-16");
  1158. case XML_CHAR_ENCODING_UTF16BE:
  1159. return("UTF-16");
  1160. case XML_CHAR_ENCODING_EBCDIC:
  1161. return("EBCDIC");
  1162. case XML_CHAR_ENCODING_UCS4LE:
  1163. return("ISO-10646-UCS-4");
  1164. case XML_CHAR_ENCODING_UCS4BE:
  1165. return("ISO-10646-UCS-4");
  1166. case XML_CHAR_ENCODING_UCS4_2143:
  1167. return("ISO-10646-UCS-4");
  1168. case XML_CHAR_ENCODING_UCS4_3412:
  1169. return("ISO-10646-UCS-4");
  1170. case XML_CHAR_ENCODING_UCS2:
  1171. return("ISO-10646-UCS-2");
  1172. case XML_CHAR_ENCODING_8859_1:
  1173. return("ISO-8859-1");
  1174. case XML_CHAR_ENCODING_8859_2:
  1175. return("ISO-8859-2");
  1176. case XML_CHAR_ENCODING_8859_3:
  1177. return("ISO-8859-3");
  1178. case XML_CHAR_ENCODING_8859_4:
  1179. return("ISO-8859-4");
  1180. case XML_CHAR_ENCODING_8859_5:
  1181. return("ISO-8859-5");
  1182. case XML_CHAR_ENCODING_8859_6:
  1183. return("ISO-8859-6");
  1184. case XML_CHAR_ENCODING_8859_7:
  1185. return("ISO-8859-7");
  1186. case XML_CHAR_ENCODING_8859_8:
  1187. return("ISO-8859-8");
  1188. case XML_CHAR_ENCODING_8859_9:
  1189. return("ISO-8859-9");
  1190. case XML_CHAR_ENCODING_2022_JP:
  1191. return("ISO-2022-JP");
  1192. case XML_CHAR_ENCODING_SHIFT_JIS:
  1193. return("Shift-JIS");
  1194. case XML_CHAR_ENCODING_EUC_JP:
  1195. return("EUC-JP");
  1196. case XML_CHAR_ENCODING_ASCII:
  1197. return(NULL);
  1198. }
  1199. return(NULL);
  1200. }
  1201. /************************************************************************
  1202. * *
  1203. * Char encoding handlers *
  1204. * *
  1205. ************************************************************************/
  1206. /* the size should be growable, but it's not a big deal ... */
  1207. #define MAX_ENCODING_HANDLERS 50
  1208. static xmlCharEncodingHandlerPtr *handlers = NULL;
  1209. static int nbCharEncodingHandler = 0;
  1210. /*
  1211. * The default is UTF-8 for XML, that's also the default used for the
  1212. * parser internals, so the default encoding handler is NULL
  1213. */
  1214. static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
  1215. /**
  1216. * xmlNewCharEncodingHandler:
  1217. * @name: the encoding name, in UTF-8 format (ASCII actually)
  1218. * @input: the xmlCharEncodingInputFunc to read that encoding
  1219. * @output: the xmlCharEncodingOutputFunc to write that encoding
  1220. *
  1221. * Create and registers an xmlCharEncodingHandler.
  1222. *
  1223. * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
  1224. */
  1225. xmlCharEncodingHandlerPtr
  1226. xmlNewCharEncodingHandler(const char *name,
  1227. xmlCharEncodingInputFunc input,
  1228. xmlCharEncodingOutputFunc output) {
  1229. xmlCharEncodingHandlerPtr handler;
  1230. const char *alias;
  1231. char upper[500];
  1232. int i;
  1233. char *up = NULL;
  1234. /*
  1235. * Do the alias resolution
  1236. */
  1237. alias = xmlGetEncodingAlias(name);
  1238. if (alias != NULL)
  1239. name = alias;
  1240. /*
  1241. * Keep only the uppercase version of the encoding.
  1242. */
  1243. if (name == NULL) {
  1244. xmlEncodingErr(XML_I18N_NO_NAME,
  1245. "xmlNewCharEncodingHandler : no name !\n", NULL);
  1246. return(NULL);
  1247. }
  1248. for (i = 0;i < 499;i++) {
  1249. upper[i] = toupper(name[i]);
  1250. if (upper[i] == 0) break;
  1251. }
  1252. upper[i] = 0;
  1253. up = xmlMemStrdup(upper);
  1254. if (up == NULL) {
  1255. xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
  1256. return(NULL);
  1257. }
  1258. /*
  1259. * allocate and fill-up an handler block.
  1260. */
  1261. handler = (xmlCharEncodingHandlerPtr)
  1262. xmlMalloc(sizeof(xmlCharEncodingHandler));
  1263. if (handler == NULL) {
  1264. xmlFree(up);
  1265. xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
  1266. return(NULL);
  1267. }
  1268. memset(handler, 0, sizeof(xmlCharEncodingHandler));
  1269. handler->input = input;
  1270. handler->output = output;
  1271. handler->name = up;
  1272. #ifdef LIBXML_ICONV_ENABLED
  1273. handler->iconv_in = NULL;
  1274. handler->iconv_out = NULL;
  1275. #endif
  1276. #ifdef LIBXML_ICU_ENABLED
  1277. handler->uconv_in = NULL;
  1278. handler->uconv_out = NULL;
  1279. #endif
  1280. /*
  1281. * registers and returns the handler.
  1282. */
  1283. xmlRegisterCharEncodingHandler(handler);
  1284. #ifdef DEBUG_ENCODING
  1285. xmlGenericError(xmlGenericErrorContext,
  1286. "Registered encoding handler for %s\n", name);
  1287. #endif
  1288. return(handler);
  1289. }
  1290. /**
  1291. * xmlInitCharEncodingHandlers:
  1292. *
  1293. * Initialize the char encoding support, it registers the default
  1294. * encoding supported.
  1295. * NOTE: while public, this function usually doesn't need to be called
  1296. * in normal processing.
  1297. */
  1298. void
  1299. xmlInitCharEncodingHandlers(void) {
  1300. unsigned short int tst = 0x1234;
  1301. unsigned char *ptr = (unsigned char *) &tst;
  1302. if (handlers != NULL) return;
  1303. handlers = (xmlCharEncodingHandlerPtr *)
  1304. xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
  1305. if (*ptr == 0x12) xmlLittleEndian = 0;
  1306. else if (*ptr == 0x34) xmlLittleEndian = 1;
  1307. else {
  1308. xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
  1309. "Odd problem at endianness detection\n", NULL);
  1310. }
  1311. if (handlers == NULL) {
  1312. xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
  1313. return;
  1314. }
  1315. xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
  1316. #ifdef LIBXML_OUTPUT_ENABLED
  1317. xmlUTF16LEHandler =
  1318. xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
  1319. xmlUTF16BEHandler =
  1320. xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
  1321. xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
  1322. xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
  1323. xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
  1324. xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
  1325. #ifdef LIBXML_HTML_ENABLED
  1326. xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
  1327. #endif
  1328. #else
  1329. xmlUTF16LEHandler =
  1330. xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
  1331. xmlUTF16BEHandler =
  1332. xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
  1333. xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
  1334. xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
  1335. xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
  1336. xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
  1337. #endif /* LIBXML_OUTPUT_ENABLED */
  1338. #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
  1339. #ifdef LIBXML_ISO8859X_ENABLED
  1340. xmlRegisterCharEncodingHandlersISO8859x ();
  1341. #endif
  1342. #endif
  1343. }
  1344. /**
  1345. * xmlCleanupCharEncodingHandlers:
  1346. *
  1347. * Cleanup the memory allocated for the char encoding support, it
  1348. * unregisters all the encoding handlers and the aliases.
  1349. */
  1350. void
  1351. xmlCleanupCharEncodingHandlers(void) {
  1352. xmlCleanupEncodingAliases();
  1353. if (handlers == NULL) return;
  1354. for (;nbCharEncodingHandler > 0;) {
  1355. nbCharEncodingHandler--;
  1356. if (handlers[nbCharEncodingHandler] != NULL) {
  1357. if (handlers[nbCharEncodingHandler]->name != NULL)
  1358. xmlFree(handlers[nbCharEncodingHandler]->name);
  1359. xmlFree(handlers[nbCharEncodingHandler]);
  1360. }
  1361. }
  1362. xmlFree(handlers);
  1363. handlers = NULL;
  1364. nbCharEncodingHandler = 0;
  1365. xmlDefaultCharEncodingHandler = NULL;
  1366. }
  1367. /**
  1368. * xmlRegisterCharEncodingHandler:
  1369. * @handler: the xmlCharEncodingHandlerPtr handler block
  1370. *
  1371. * Register the char encoding handler, surprising, isn't it ?
  1372. */
  1373. void
  1374. xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
  1375. if (handlers == NULL) xmlInitCharEncodingHandlers();
  1376. if ((handler == NULL) || (handlers == NULL)) {
  1377. xmlEncodingErr(XML_I18N_NO_HANDLER,
  1378. "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
  1379. goto free_handler;
  1380. }
  1381. if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
  1382. xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
  1383. "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
  1384. "MAX_ENCODING_HANDLERS");
  1385. goto free_handler;
  1386. }
  1387. handlers[nbCharEncodingHandler++] = handler;
  1388. return;
  1389. free_handler:
  1390. if (handler != NULL) {
  1391. if (handler->name != NULL) {
  1392. xmlFree(handler->name);
  1393. }
  1394. xmlFree(handler);
  1395. }
  1396. }
  1397. /**
  1398. * xmlGetCharEncodingHandler:
  1399. * @enc: an xmlCharEncoding value.
  1400. *
  1401. * Search in the registered set the handler able to read/write that encoding.
  1402. *
  1403. * Returns the handler or NULL if not found
  1404. */
  1405. xmlCharEncodingHandlerPtr
  1406. xmlGetCharEncodingHandler(xmlCharEncoding enc) {
  1407. xmlCharEncodingHandlerPtr handler;
  1408. if (handlers == NULL) xmlInitCharEncodingHandlers();
  1409. switch (enc) {
  1410. case XML_CHAR_ENCODING_ERROR:
  1411. return(NULL);
  1412. case XML_CHAR_ENCODING_NONE:
  1413. return(NULL);
  1414. case XML_CHAR_ENCODING_UTF8:
  1415. return(NULL);
  1416. case XML_CHAR_ENCODING_UTF16LE:
  1417. return(xmlUTF16LEHandler);
  1418. case XML_CHAR_ENCODING_UTF16BE:
  1419. return(xmlUTF16BEHandler);
  1420. case XML_CHAR_ENCODING_EBCDIC:
  1421. handler = xmlFindCharEncodingHandler("EBCDIC");
  1422. if (handler != NULL) return(handler);
  1423. handler = xmlFindCharEncodingHandler("ebcdic");
  1424. if (handler != NULL) return(handler);
  1425. handler = xmlFindCharEncodingHandler("EBCDIC-US");
  1426. if (handler != NULL) return(handler);
  1427. handler = xmlFindCharEncodingHandler("IBM-037");
  1428. if (handler != NULL) return(handler);
  1429. break;
  1430. case XML_CHAR_ENCODING_UCS4BE:
  1431. handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
  1432. if (handler != NULL) return(handler);
  1433. handler = xmlFindCharEncodingHandler("UCS-4");
  1434. if (handler != NULL) return(handler);
  1435. handler = xmlFindCharEncodingHandler("UCS4");
  1436. if (handler != NULL) return(handler);
  1437. break;
  1438. case XML_CHAR_ENCODING_UCS4LE:
  1439. handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
  1440. if (handler != NULL) return(handler);
  1441. handler = xmlFindCharEncodingHandler("UCS-4");
  1442. if (handler != NULL) return(handler);
  1443. handler = xmlFindCharEncodingHandler("UCS4");
  1444. if (handler != NULL) return(handler);
  1445. break;
  1446. case XML_CHAR_ENCODING_UCS4_2143:
  1447. break;
  1448. case XML_CHAR_ENCODING_UCS4_3412:
  1449. break;
  1450. case XML_CHAR_ENCODING_UCS2:
  1451. handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
  1452. if (handler != NULL) return(handler);
  1453. handler = xmlFindCharEncodingHandler("UCS-2");
  1454. if (handler != NULL) return(handler);
  1455. handler = xmlFindCharEncodingHandler("UCS2");
  1456. if (handler != NULL) return(handler);
  1457. break;
  1458. /*
  1459. * We used to keep ISO Latin encodings native in the
  1460. * generated data. This led to so many problems that
  1461. * this has been removed. One can still change this
  1462. * back by registering no-ops encoders for those
  1463. */
  1464. case XML_CHAR_ENCODING_8859_1:
  1465. handler = xmlFindCharEncodingHandler("ISO-8859-1");
  1466. if (handler != NULL) return(handler);
  1467. break;
  1468. case XML_CHAR_ENCODING_8859_2:
  1469. handler = xmlFindCharEncodingHandler("ISO-8859-2");
  1470. if (handler != NULL) return(handler);
  1471. break;
  1472. case XML_CHAR_ENCODING_8859_3:
  1473. handler = xmlFindCharEncodingHandler("ISO-8859-3");
  1474. if (handler != NULL) return(handler);
  1475. break;
  1476. case XML_CHAR_ENCODING_8859_4:
  1477. handler = xmlFindCharEncodingHandler("ISO-8859-4");
  1478. if (handler != NULL) return(handler);
  1479. break;
  1480. case XML_CHAR_ENCODING_8859_5:
  1481. handler = xmlFindCharEncodingHandler("ISO-8859-5");
  1482. if (handler != NULL) return(handler);
  1483. break;
  1484. case XML_CHAR_ENCODING_8859_6:
  1485. handler = xmlFindCharEncodingHandler("ISO-8859-6");
  1486. if (handler != NULL) return(handler);
  1487. break;
  1488. case XML_CHAR_ENCODING_8859_7:
  1489. handler = xmlFindCharEncodingHandler("ISO-8859-7");
  1490. if (handler != NULL) return(handler);
  1491. break;
  1492. case XML_CHAR_ENCODING_8859_8:
  1493. handler = xmlFindCharEncodingHandler("ISO-8859-8");
  1494. if (handler != NULL) return(handler);
  1495. break;
  1496. case XML_CHAR_ENCODING_8859_9:
  1497. handler = xmlFindCharEncodingHandler("ISO-8859-9");
  1498. if (handler != NULL) return(handler);
  1499. break;
  1500. case XML_CHAR_ENCODING_2022_JP:
  1501. handler = xmlFindCharEncodingHandler("ISO-2022-JP");
  1502. if (handler != NULL) return(handler);
  1503. break;
  1504. case XML_CHAR_ENCODING_SHIFT_JIS:
  1505. handler = xmlFindCharEncodingHandler("SHIFT-JIS");
  1506. if (handler != NULL) return(handler);
  1507. handler = xmlFindCharEncodingHandler("SHIFT_JIS");
  1508. if (handler != NULL) return(handler);
  1509. handler = xmlFindCharEncodingHandler("Shift_JIS");
  1510. if (handler != NULL) return(handler);
  1511. break;
  1512. case XML_CHAR_ENCODING_EUC_JP:
  1513. handler = xmlFindCharEncodingHandler("EUC-JP");
  1514. if (handler != NULL) return(handler);
  1515. break;
  1516. default:
  1517. break;
  1518. }
  1519. #ifdef DEBUG_ENCODING
  1520. xmlGenericError(xmlGenericErrorContext,
  1521. "No handler found for encoding %d\n", enc);
  1522. #endif
  1523. return(NULL);
  1524. }
  1525. /**
  1526. * xmlFindCharEncodingHandler:
  1527. * @name: a string describing the char encoding.
  1528. *
  1529. * Search in the registered set the handler able to read/write that encoding.
  1530. *
  1531. * Returns the handler or NULL if not found
  1532. */
  1533. xmlCharEncodingHandlerPtr
  1534. xmlFindCharEncodingHandler(const char *name) {
  1535. const char *nalias;
  1536. const char *norig;
  1537. xmlCharEncoding alias;
  1538. #ifdef LIBXML_ICONV_ENABLED
  1539. xmlCharEncodingHandlerPtr enc;
  1540. iconv_t icv_in, icv_out;
  1541. #endif /* LIBXML_ICONV_ENABLED */
  1542. #ifdef LIBXML_ICU_ENABLED
  1543. xmlCharEncodingHandlerPtr encu;
  1544. uconv_t *ucv_in, *ucv_out;
  1545. #endif /* LIBXML_ICU_ENABLED */
  1546. char upper[100];
  1547. int i;
  1548. if (handlers == NULL) xmlInitCharEncodingHandlers();
  1549. if (name == NULL) return(xmlDefaultCharEncodingHandler);
  1550. if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
  1551. /*
  1552. * Do the alias resolution
  1553. */
  1554. norig = name;
  1555. nalias = xmlGetEncodingAlias(name);
  1556. if (nalias != NULL)
  1557. name = nalias;
  1558. /*
  1559. * Check first for directly registered encoding names
  1560. */
  1561. for (i = 0;i < 99;i++) {
  1562. upper[i] = toupper(name[i]);
  1563. if (upper[i] == 0) break;
  1564. }
  1565. upper[i] = 0;
  1566. if (handlers != NULL) {
  1567. for (i = 0;i < nbCharEncodingHandler; i++) {
  1568. if (!strcmp(upper, handlers[i]->name)) {
  1569. #ifdef DEBUG_ENCODING
  1570. xmlGenericError(xmlGenericErrorContext,
  1571. "Found registered handler for encoding %s\n", name);
  1572. #endif
  1573. return(handlers[i]);
  1574. }
  1575. }
  1576. }
  1577. #ifdef LIBXML_ICONV_ENABLED
  1578. /* check whether iconv can handle this */
  1579. icv_in = iconv_open("UTF-8", name);
  1580. icv_out = iconv_open(name, "UTF-8");
  1581. if (icv_in == (iconv_t) -1) {
  1582. icv_in = iconv_open("UTF-8", upper);
  1583. }
  1584. if (icv_out == (iconv_t) -1) {
  1585. icv_out = iconv_open(upper, "UTF-8");
  1586. }
  1587. if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
  1588. enc = (xmlCharEncodingHandlerPtr)
  1589. xmlMalloc(sizeof(xmlCharEncodingHandler));
  1590. if (enc == NULL) {
  1591. iconv_close(icv_in);
  1592. iconv_close(icv_out);
  1593. return(NULL);
  1594. }
  1595. memset(enc, 0, sizeof(xmlCharEncodingHandler));
  1596. enc->name = xmlMemStrdup(name);
  1597. enc->input = NULL;
  1598. enc->output = NULL;
  1599. enc->iconv_in = icv_in;
  1600. enc->iconv_out = icv_out;
  1601. #ifdef DEBUG_ENCODING
  1602. xmlGenericError(xmlGenericErrorContext,
  1603. "Found iconv handler for encoding %s\n", name);
  1604. #endif
  1605. return enc;
  1606. } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
  1607. xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
  1608. "iconv : problems with filters for '%s'\n", name);
  1609. }
  1610. #endif /* LIBXML_ICONV_ENABLED */
  1611. #ifdef LIBXML_ICU_ENABLED
  1612. /* check whether icu can handle this */
  1613. ucv_in = openIcuConverter(name, 1);
  1614. ucv_out = openIcuConverter(name, 0);
  1615. if (ucv_in != NULL && ucv_out != NULL) {
  1616. encu = (xmlCharEncodingHandlerPtr)
  1617. xmlMalloc(sizeof(xmlCharEncodingHandler));
  1618. if (encu == NULL) {
  1619. closeIcuConverter(ucv_in);
  1620. closeIcuConverter(ucv_out);
  1621. return(NULL);
  1622. }
  1623. memset(encu, 0, sizeof(xmlCharEncodingHandler));
  1624. encu->name = xmlMemStrdup(name);
  1625. encu->input = NULL;
  1626. encu->output = NULL;
  1627. encu->uconv_in = ucv_in;
  1628. encu->uconv_out = ucv_out;
  1629. #ifdef DEBUG_ENCODING
  1630. xmlGenericError(xmlGenericErrorContext,
  1631. "Found ICU converter handler for encoding %s\n", name);
  1632. #endif
  1633. return encu;
  1634. } else if (ucv_in != NULL || ucv_out != NULL) {
  1635. closeIcuConverter(ucv_in);
  1636. closeIcuConverter(ucv_out);
  1637. xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
  1638. "ICU converter : problems with filters for '%s'\n", name);
  1639. }
  1640. #endif /* LIBXML_ICU_ENABLED */
  1641. #ifdef DEBUG_ENCODING
  1642. xmlGenericError(xmlGenericErrorContext,
  1643. "No handler found for encoding %s\n", name);
  1644. #endif
  1645. /*
  1646. * Fallback using the canonical names
  1647. */
  1648. alias = xmlParseCharEncoding(norig);
  1649. if (alias != XML_CHAR_ENCODING_ERROR) {
  1650. const char* canon;
  1651. canon = xmlGetCharEncodingName(alias);
  1652. if ((canon != NULL) && (strcmp(name, canon))) {
  1653. return(xmlFindCharEncodingHandler(canon));
  1654. }
  1655. }
  1656. /* If "none of the above", give up */
  1657. return(NULL);
  1658. }
  1659. /************************************************************************
  1660. * *
  1661. * ICONV based generic conversion functions *
  1662. * *
  1663. ************************************************************************/
  1664. #ifdef LIBXML_ICONV_ENABLED
  1665. /**
  1666. * xmlIconvWrapper:
  1667. * @cd: iconv converter data structure
  1668. * @out: a pointer to an array of bytes to store the result
  1669. * @outlen: the length of @out
  1670. * @in: a pointer to an array of input bytes
  1671. * @inlen: the length of @in
  1672. *
  1673. * Returns 0 if success, or
  1674. * -1 by lack of space, or
  1675. * -2 if the transcoding fails (for *in is not valid utf8 string or
  1676. * the result of transformation can't fit into the encoding we want), or
  1677. * -3 if there the last byte can't form a single output char.
  1678. *
  1679. * The value of @inlen after return is the number of octets consumed
  1680. * as the return value is positive, else unpredictable.
  1681. * The value of @outlen after return is the number of octets produced.
  1682. */
  1683. static int
  1684. xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
  1685. const unsigned char *in, int *inlen) {
  1686. size_t icv_inlen, icv_outlen;
  1687. const char *icv_in = (const char *) in;
  1688. char *icv_out = (char *) out;
  1689. int ret;
  1690. if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
  1691. if (outlen != NULL) *outlen = 0;
  1692. return(-1);
  1693. }
  1694. icv_inlen = *inlen;
  1695. icv_outlen = *outlen;
  1696. ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
  1697. *inlen -= icv_inlen;
  1698. *outlen -= icv_outlen;
  1699. if ((icv_inlen != 0) || (ret == -1)) {
  1700. #ifdef EILSEQ
  1701. if (errno == EILSEQ) {
  1702. return -2;
  1703. } else
  1704. #endif
  1705. #ifdef E2BIG
  1706. if (errno == E2BIG) {
  1707. return -1;
  1708. } else
  1709. #endif
  1710. #ifdef EINVAL
  1711. if (errno == EINVAL) {
  1712. return -3;
  1713. } else
  1714. #endif
  1715. {
  1716. return -3;
  1717. }
  1718. }
  1719. return 0;
  1720. }
  1721. #endif /* LIBXML_ICONV_ENABLED */
  1722. /************************************************************************
  1723. * *
  1724. * ICU based generic conversion functions *
  1725. * *
  1726. ************************************************************************/
  1727. #ifdef LIBXML_ICU_ENABLED
  1728. /**
  1729. * xmlUconvWrapper:
  1730. * @cd: ICU uconverter data structure
  1731. * @toUnicode : non-zero if toUnicode. 0 otherwise.
  1732. * @out: a pointer to an array of bytes to store the result
  1733. * @outlen: the length of @out
  1734. * @in: a pointer to an array of input bytes
  1735. * @inlen: the length of @in
  1736. * @flush: if true, indicates end of input
  1737. *
  1738. * Returns 0 if success, or
  1739. * -1 by lack of space, or
  1740. * -2 if the transcoding fails (for *in is not valid utf8 string or
  1741. * the result of transformation can't fit into the encoding we want), or
  1742. * -3 if there the last byte can't form a single output char.
  1743. *
  1744. * The value of @inlen after return is the number of octets consumed
  1745. * as the return value is positive, else unpredictable.
  1746. * The value of @outlen after return is the number of octets produced.
  1747. */
  1748. static int
  1749. xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
  1750. const unsigned char *in, int *inlen, int flush) {
  1751. const char *ucv_in = (const char *) in;
  1752. char *ucv_out = (char *) out;
  1753. UErrorCode err = U_ZERO_ERROR;
  1754. if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
  1755. if (outlen != NULL) *outlen = 0;
  1756. return(-1);
  1757. }
  1758. if (toUnicode) {
  1759. /* encoding => UTF-16 => UTF-8 */
  1760. ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
  1761. &ucv_in, ucv_in + *inlen, cd->pivot_buf,
  1762. &cd->pivot_source, &cd->pivot_target,
  1763. cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
  1764. } else {
  1765. /* UTF-8 => UTF-16 => encoding */
  1766. ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
  1767. &ucv_in, ucv_in + *inlen, cd->pivot_buf,
  1768. &cd->pivot_source, &cd->pivot_target,
  1769. cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
  1770. }
  1771. *inlen = ucv_in - (const char*) in;
  1772. *outlen = ucv_out - (char *) out;
  1773. if (U_SUCCESS(err)) {
  1774. /* reset pivot buf if this is the last call for input (flush==TRUE) */
  1775. if (flush)
  1776. cd->pivot_source = cd->pivot_target = cd->pivot_buf;
  1777. return 0;
  1778. }
  1779. if (err == U_BUFFER_OVERFLOW_ERROR)
  1780. return -1;
  1781. if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
  1782. return -2;
  1783. return -3;
  1784. }
  1785. #endif /* LIBXML_ICU_ENABLED */
  1786. /************************************************************************
  1787. * *
  1788. * The real API used by libxml for on-the-fly conversion *
  1789. * *
  1790. ************************************************************************/
  1791. /**
  1792. * xmlEncInputChunk:
  1793. * @handler: encoding handler
  1794. * @out: a pointer to an array of bytes to store the result
  1795. * @outlen: the length of @out
  1796. * @in: a pointer to an array of input bytes
  1797. * @inlen: the length of @in
  1798. * @flush: flush (ICU-related)
  1799. *
  1800. * Returns 0 if success, or
  1801. * -1 by lack of space, or
  1802. * -2 if the transcoding fails (for *in is not valid utf8 string or
  1803. * the result of transformation can't fit into the encoding we want), or
  1804. * -3 if there the last byte can't form a single output char.
  1805. *
  1806. * The value of @inlen after return is the number of octets consumed
  1807. * as the return value is 0, else unpredictable.
  1808. * The value of @outlen after return is the number of octets produced.
  1809. */
  1810. static int
  1811. xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
  1812. int *outlen, const unsigned char *in, int *inlen, int flush) {
  1813. int ret;
  1814. (void)flush;
  1815. if (handler->input != NULL) {
  1816. ret = handler->input(out, outlen, in, inlen);
  1817. if (ret > 0)
  1818. ret = 0;
  1819. }
  1820. #ifdef LIBXML_ICONV_ENABLED
  1821. else if (handler->iconv_in != NULL) {
  1822. ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
  1823. }
  1824. #endif /* LIBXML_ICONV_ENABLED */
  1825. #ifdef LIBXML_ICU_ENABLED
  1826. else if (handler->uconv_in != NULL) {
  1827. ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
  1828. flush);
  1829. }
  1830. #endif /* LIBXML_ICU_ENABLED */
  1831. else {
  1832. *outlen = 0;
  1833. *inlen = 0;
  1834. ret = -2;
  1835. }
  1836. return(ret);
  1837. }
  1838. /**
  1839. * xmlEncOutputChunk:
  1840. * @handler: encoding handler
  1841. * @out: a pointer to an array of bytes to store the result
  1842. * @outlen: the length of @out
  1843. * @in: a pointer to an array of input bytes
  1844. * @inlen: the length of @in
  1845. *
  1846. * Returns 0 if success, or
  1847. * -1 by lack of space, or
  1848. * -2 if the transcoding fails (for *in is not valid utf8 string or
  1849. * the result of transformation can't fit into the encoding we want), or
  1850. * -3 if there the last byte can't form a single output char.
  1851. * -4 if no output function was found.
  1852. *
  1853. * The value of @inlen after return is the number of octets consumed
  1854. * as the return value is 0, else unpredictable.
  1855. * The value of @outlen after return is the number of octets produced.
  1856. */
  1857. static int
  1858. xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
  1859. int *outlen, const unsigned char *in, int *inlen) {
  1860. int ret;
  1861. if (handler->output != NULL) {
  1862. ret = handler->output(out, outlen, in, inlen);
  1863. if (ret > 0)
  1864. ret = 0;
  1865. }
  1866. #ifdef LIBXML_ICONV_ENABLED
  1867. else if (handler->iconv_out != NULL) {
  1868. ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
  1869. }
  1870. #endif /* LIBXML_ICONV_ENABLED */
  1871. #ifdef LIBXML_ICU_ENABLED
  1872. else if (handler->uconv_out != NULL) {
  1873. ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
  1874. 1);
  1875. }
  1876. #endif /* LIBXML_ICU_ENABLED */
  1877. else {
  1878. *outlen = 0;
  1879. *inlen = 0;
  1880. ret = -4;
  1881. }
  1882. return(ret);
  1883. }
  1884. /**
  1885. * xmlCharEncFirstLineInt:
  1886. * @handler: char encoding transformation data structure
  1887. * @out: an xmlBuffer for the output.
  1888. * @in: an xmlBuffer for the input
  1889. * @len: number of bytes to convert for the first line, or -1
  1890. *
  1891. * Front-end for the encoding handler input function, but handle only
  1892. * the very first line, i.e. limit itself to 45 chars.
  1893. *
  1894. * Returns the number of byte written if success, or
  1895. * -1 general error
  1896. * -2 if the transcoding fails (for *in is not valid utf8 string or
  1897. * the result of transformation can't fit into the encoding we want), or
  1898. */
  1899. int
  1900. xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
  1901. xmlBufferPtr in, int len) {
  1902. int ret;
  1903. int written;
  1904. int toconv;
  1905. if (handler == NULL) return(-1);
  1906. if (out == NULL) return(-1);
  1907. if (in == NULL) return(-1);
  1908. /* calculate space available */
  1909. written = out->size - out->use - 1; /* count '\0' */
  1910. toconv = in->use;
  1911. /*
  1912. * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
  1913. * 45 chars should be sufficient to reach the end of the encoding
  1914. * declaration without going too far inside the document content.
  1915. * on UTF-16 this means 90bytes, on UCS4 this means 180
  1916. * The actual value depending on guessed encoding is passed as @len
  1917. * if provided
  1918. */
  1919. if (len >= 0) {
  1920. if (toconv > len)
  1921. toconv = len;
  1922. } else {
  1923. if (toconv > 180)
  1924. toconv = 180;
  1925. }
  1926. if (toconv * 2 >= written) {
  1927. xmlBufferGrow(out, toconv * 2);
  1928. written = out->size - out->use - 1;
  1929. }
  1930. ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
  1931. in->content, &toconv, 0);
  1932. xmlBufferShrink(in, toconv);
  1933. out->use += written;
  1934. out->content[out->use] = 0;
  1935. if (ret == -1) ret = -3;
  1936. #ifdef DEBUG_ENCODING
  1937. switch (ret) {
  1938. case 0:
  1939. xmlGenericError(xmlGenericErrorContext,
  1940. "converted %d bytes to %d bytes of input\n",
  1941. toconv, written);
  1942. break;
  1943. case -1:
  1944. xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
  1945. toconv, written, in->use);
  1946. break;
  1947. case -2:
  1948. xmlGenericError(xmlGenericErrorContext,
  1949. "input conversion failed due to input error\n");
  1950. break;
  1951. case -3:
  1952. xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
  1953. toconv, written, in->use);
  1954. break;
  1955. default:
  1956. xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
  1957. }
  1958. #endif /* DEBUG_ENCODING */
  1959. /*
  1960. * Ignore when input buffer is not on a boundary
  1961. */
  1962. if (ret == -3) ret = 0;
  1963. if (ret == -1) ret = 0;
  1964. return(written ? written : ret);
  1965. }
  1966. /**
  1967. * xmlCharEncFirstLine:
  1968. * @handler: char encoding transformation data structure
  1969. * @out: an xmlBuffer for the output.
  1970. * @in: an xmlBuffer for the input
  1971. *
  1972. * Front-end for the encoding handler input function, but handle only
  1973. * the very first line, i.e. limit itself to 45 chars.
  1974. *
  1975. * Returns the number of byte written if success, or
  1976. * -1 general error
  1977. * -2 if the transcoding fails (for *in is not valid utf8 string or
  1978. * the result of transformation can't fit into the encoding we want), or
  1979. */
  1980. int
  1981. xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
  1982. xmlBufferPtr in) {
  1983. return(xmlCharEncFirstLineInt(handler, out, in, -1));
  1984. }
  1985. /**
  1986. * xmlCharEncFirstLineInput:
  1987. * @input: a parser input buffer
  1988. * @len: number of bytes to convert for the first line, or -1
  1989. *
  1990. * Front-end for the encoding handler input function, but handle only
  1991. * the very first line. Point is that this is based on autodetection
  1992. * of the encoding and once that first line is converted we may find
  1993. * out that a different decoder is needed to process the input.
  1994. *
  1995. * Returns the number of byte written if success, or
  1996. * -1 general error
  1997. * -2 if the transcoding fails (for *in is not valid utf8 string or
  1998. * the result of transformation can't fit into the encoding we want), or
  1999. */
  2000. int
  2001. xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
  2002. {
  2003. int ret;
  2004. size_t written;
  2005. size_t toconv;
  2006. int c_in;
  2007. int c_out;
  2008. xmlBufPtr in;
  2009. xmlBufPtr out;
  2010. if ((input == NULL) || (input->encoder == NULL) ||
  2011. (input->buffer == NULL) || (input->raw == NULL))
  2012. return (-1);
  2013. out = input->buffer;
  2014. in = input->raw;
  2015. toconv = xmlBufUse(in);
  2016. if (toconv == 0)
  2017. return (0);
  2018. written = xmlBufAvail(out) - 1; /* count '\0' */
  2019. /*
  2020. * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
  2021. * 45 chars should be sufficient to reach the end of the encoding
  2022. * declaration without going too far inside the document content.
  2023. * on UTF-16 this means 90bytes, on UCS4 this means 180
  2024. * The actual value depending on guessed encoding is passed as @len
  2025. * if provided
  2026. */
  2027. if (len >= 0) {
  2028. if (toconv > (unsigned int) len)
  2029. toconv = len;
  2030. } else {
  2031. if (toconv > 180)
  2032. toconv = 180;
  2033. }
  2034. if (toconv * 2 >= written) {
  2035. xmlBufGrow(out, toconv * 2);
  2036. written = xmlBufAvail(out) - 1;
  2037. }
  2038. if (written > 360)
  2039. written = 360;
  2040. c_in = toconv;
  2041. c_out = written;
  2042. ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
  2043. xmlBufContent(in), &c_in, 0);
  2044. xmlBufShrink(in, c_in);
  2045. xmlBufAddLen(out, c_out);
  2046. if (ret == -1)
  2047. ret = -3;
  2048. switch (ret) {
  2049. case 0:
  2050. #ifdef DEBUG_ENCODING
  2051. xmlGenericError(xmlGenericErrorContext,
  2052. "converted %d bytes to %d bytes of input\n",
  2053. c_in, c_out);
  2054. #endif
  2055. break;
  2056. case -1:
  2057. #ifdef DEBUG_ENCODING
  2058. xmlGenericError(xmlGenericErrorContext,
  2059. "converted %d bytes to %d bytes of input, %d left\n",
  2060. c_in, c_out, (int)xmlBufUse(in));
  2061. #endif
  2062. break;
  2063. case -3:
  2064. #ifdef DEBUG_ENCODING
  2065. xmlGenericError(xmlGenericErrorContext,
  2066. "converted %d bytes to %d bytes of input, %d left\n",
  2067. c_in, c_out, (int)xmlBufUse(in));
  2068. #endif
  2069. break;
  2070. case -2: {
  2071. char buf[50];
  2072. const xmlChar *content = xmlBufContent(in);
  2073. snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
  2074. content[0], content[1],
  2075. content[2], content[3]);
  2076. buf[49] = 0;
  2077. xmlEncodingErr(XML_I18N_CONV_FAILED,
  2078. "input conversion failed due to input error, bytes %s\n",
  2079. buf);
  2080. }
  2081. }
  2082. /*
  2083. * Ignore when input buffer is not on a boundary
  2084. */
  2085. if (ret == -3) ret = 0;
  2086. if (ret == -1) ret = 0;
  2087. return(c_out ? c_out : ret);
  2088. }
  2089. /**
  2090. * xmlCharEncInput:
  2091. * @input: a parser input buffer
  2092. * @flush: try to flush all the raw buffer
  2093. *
  2094. * Generic front-end for the encoding handler on parser input
  2095. *
  2096. * Returns the number of byte written if success, or
  2097. * -1 general error
  2098. * -2 if the transcoding fails (for *in is not valid utf8 string or
  2099. * the result of transformation can't fit into the encoding we want), or
  2100. */
  2101. int
  2102. xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
  2103. {
  2104. int ret;
  2105. size_t written;
  2106. size_t toconv;
  2107. int c_in;
  2108. int c_out;
  2109. xmlBufPtr in;
  2110. xmlBufPtr out;
  2111. if ((input == NULL) || (input->encoder == NULL) ||
  2112. (input->buffer == NULL) || (input->raw == NULL))
  2113. return (-1);
  2114. out = input->buffer;
  2115. in = input->raw;
  2116. toconv = xmlBufUse(in);
  2117. if (toconv == 0)
  2118. return (0);
  2119. if ((toconv > 64 * 1024) && (flush == 0))
  2120. toconv = 64 * 1024;
  2121. written = xmlBufAvail(out);
  2122. if (written > 0)
  2123. written--; /* count '\0' */
  2124. if (toconv * 2 >= written) {
  2125. xmlBufGrow(out, toconv * 2);
  2126. written = xmlBufAvail(out);
  2127. if (written > 0)
  2128. written--; /* count '\0' */
  2129. }
  2130. if ((written > 128 * 1024) && (flush == 0))
  2131. written = 128 * 1024;
  2132. c_in = toconv;
  2133. c_out = written;
  2134. ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
  2135. xmlBufContent(in), &c_in, flush);
  2136. xmlBufShrink(in, c_in);
  2137. xmlBufAddLen(out, c_out);
  2138. if (ret == -1)
  2139. ret = -3;
  2140. switch (ret) {
  2141. case 0:
  2142. #ifdef DEBUG_ENCODING
  2143. xmlGenericError(xmlGenericErrorContext,
  2144. "converted %d bytes to %d bytes of input\n",
  2145. c_in, c_out);
  2146. #endif
  2147. break;
  2148. case -1:
  2149. #ifdef DEBUG_ENCODING
  2150. xmlGenericError(xmlGenericErrorContext,
  2151. "converted %d bytes to %d bytes of input, %d left\n",
  2152. c_in, c_out, (int)xmlBufUse(in));
  2153. #endif
  2154. break;
  2155. case -3:
  2156. #ifdef DEBUG_ENCODING
  2157. xmlGenericError(xmlGenericErrorContext,
  2158. "converted %d bytes to %d bytes of input, %d left\n",
  2159. c_in, c_out, (int)xmlBufUse(in));
  2160. #endif
  2161. break;
  2162. case -2: {
  2163. char buf[50];
  2164. const xmlChar *content = xmlBufContent(in);
  2165. snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
  2166. content[0], content[1],
  2167. content[2], content[3]);
  2168. buf[49] = 0;
  2169. xmlEncodingErr(XML_I18N_CONV_FAILED,
  2170. "input conversion failed due to input error, bytes %s\n",
  2171. buf);
  2172. }
  2173. }
  2174. /*
  2175. * Ignore when input buffer is not on a boundary
  2176. */
  2177. if (ret == -3)
  2178. ret = 0;
  2179. return (c_out? c_out : ret);
  2180. }
  2181. /**
  2182. * xmlCharEncInFunc:
  2183. * @handler: char encoding transformation data structure
  2184. * @out: an xmlBuffer for the output.
  2185. * @in: an xmlBuffer for the input
  2186. *
  2187. * Generic front-end for the encoding handler input function
  2188. *
  2189. * Returns the number of byte written if success, or
  2190. * -1 general error
  2191. * -2 if the transcoding fails (for *in is not valid utf8 string or
  2192. * the result of transformation can't fit into the encoding we want), or
  2193. */
  2194. int
  2195. xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
  2196. xmlBufferPtr in)
  2197. {
  2198. int ret;
  2199. int written;
  2200. int toconv;
  2201. if (handler == NULL)
  2202. return (-1);
  2203. if (out == NULL)
  2204. return (-1);
  2205. if (in == NULL)
  2206. return (-1);
  2207. toconv = in->use;
  2208. if (toconv == 0)
  2209. return (0);
  2210. written = out->size - out->use -1; /* count '\0' */
  2211. if (toconv * 2 >= written) {
  2212. xmlBufferGrow(out, out->size + toconv * 2);
  2213. written = out->size - out->use - 1;
  2214. }
  2215. ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
  2216. in->content, &toconv, 1);
  2217. xmlBufferShrink(in, toconv);
  2218. out->use += written;
  2219. out->content[out->use] = 0;
  2220. if (ret == -1)
  2221. ret = -3;
  2222. switch (ret) {
  2223. case 0:
  2224. #ifdef DEBUG_ENCODING
  2225. xmlGenericError(xmlGenericErrorContext,
  2226. "converted %d bytes to %d bytes of input\n",
  2227. toconv, written);
  2228. #endif
  2229. break;
  2230. case -1:
  2231. #ifdef DEBUG_ENCODING
  2232. xmlGenericError(xmlGenericErrorContext,
  2233. "converted %d bytes to %d bytes of input, %d left\n",
  2234. toconv, written, in->use);
  2235. #endif
  2236. break;
  2237. case -3:
  2238. #ifdef DEBUG_ENCODING
  2239. xmlGenericError(xmlGenericErrorContext,
  2240. "converted %d bytes to %d bytes of input, %d left\n",
  2241. toconv, written, in->use);
  2242. #endif
  2243. break;
  2244. case -2: {
  2245. char buf[50];
  2246. snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
  2247. in->content[0], in->content[1],
  2248. in->content[2], in->content[3]);
  2249. buf[49] = 0;
  2250. xmlEncodingErr(XML_I18N_CONV_FAILED,
  2251. "input conversion failed due to input error, bytes %s\n",
  2252. buf);
  2253. }
  2254. }
  2255. /*
  2256. * Ignore when input buffer is not on a boundary
  2257. */
  2258. if (ret == -3)
  2259. ret = 0;
  2260. return (written? written : ret);
  2261. }
  2262. #ifdef LIBXML_OUTPUT_ENABLED
  2263. /**
  2264. * xmlCharEncOutput:
  2265. * @output: a parser output buffer
  2266. * @init: is this an initialization call without data
  2267. *
  2268. * Generic front-end for the encoding handler on parser output
  2269. * a first call with @init == 1 has to be made first to initiate the
  2270. * output in case of non-stateless encoding needing to initiate their
  2271. * state or the output (like the BOM in UTF16).
  2272. * In case of UTF8 sequence conversion errors for the given encoder,
  2273. * the content will be automatically remapped to a CharRef sequence.
  2274. *
  2275. * Returns the number of byte written if success, or
  2276. * -1 general error
  2277. * -2 if the transcoding fails (for *in is not valid utf8 string or
  2278. * the result of transformation can't fit into the encoding we want), or
  2279. */
  2280. int
  2281. xmlCharEncOutput(xmlOutputBufferPtr output, int init)
  2282. {
  2283. int ret;
  2284. size_t written;
  2285. int writtentot = 0;
  2286. size_t toconv;
  2287. int c_in;
  2288. int c_out;
  2289. xmlBufPtr in;
  2290. xmlBufPtr out;
  2291. if ((output == NULL) || (output->encoder == NULL) ||
  2292. (output->buffer == NULL) || (output->conv == NULL))
  2293. return (-1);
  2294. out = output->conv;
  2295. in = output->buffer;
  2296. retry:
  2297. written = xmlBufAvail(out);
  2298. if (written > 0)
  2299. written--; /* count '\0' */
  2300. /*
  2301. * First specific handling of the initialization call
  2302. */
  2303. if (init) {
  2304. c_in = 0;
  2305. c_out = written;
  2306. /* TODO: Check return value. */
  2307. xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
  2308. NULL, &c_in);
  2309. xmlBufAddLen(out, c_out);
  2310. #ifdef DEBUG_ENCODING
  2311. xmlGenericError(xmlGenericErrorContext,
  2312. "initialized encoder\n");
  2313. #endif
  2314. return(c_out);
  2315. }
  2316. /*
  2317. * Conversion itself.
  2318. */
  2319. toconv = xmlBufUse(in);
  2320. if (toconv == 0)
  2321. return (0);
  2322. if (toconv > 64 * 1024)
  2323. toconv = 64 * 1024;
  2324. if (toconv * 4 >= written) {
  2325. xmlBufGrow(out, toconv * 4);
  2326. written = xmlBufAvail(out) - 1;
  2327. }
  2328. if (written > 256 * 1024)
  2329. written = 256 * 1024;
  2330. c_in = toconv;
  2331. c_out = written;
  2332. ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
  2333. xmlBufContent(in), &c_in);
  2334. xmlBufShrink(in, c_in);
  2335. xmlBufAddLen(out, c_out);
  2336. writtentot += c_out;
  2337. if (ret == -1) {
  2338. if (c_out > 0) {
  2339. /* Can be a limitation of iconv or uconv */
  2340. goto retry;
  2341. }
  2342. ret = -3;
  2343. }
  2344. /*
  2345. * Attempt to handle error cases
  2346. */
  2347. switch (ret) {
  2348. case 0:
  2349. #ifdef DEBUG_ENCODING
  2350. xmlGenericError(xmlGenericErrorContext,
  2351. "converted %d bytes to %d bytes of output\n",
  2352. c_in, c_out);
  2353. #endif
  2354. break;
  2355. case -1:
  2356. #ifdef DEBUG_ENCODING
  2357. xmlGenericError(xmlGenericErrorContext,
  2358. "output conversion failed by lack of space\n");
  2359. #endif
  2360. break;
  2361. case -3:
  2362. #ifdef DEBUG_ENCODING
  2363. xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
  2364. c_in, c_out, (int) xmlBufUse(in));
  2365. #endif
  2366. break;
  2367. case -4:
  2368. xmlEncodingErr(XML_I18N_NO_OUTPUT,
  2369. "xmlCharEncOutFunc: no output function !\n", NULL);
  2370. ret = -1;
  2371. break;
  2372. case -2: {
  2373. xmlChar charref[20];
  2374. int len = (int) xmlBufUse(in);
  2375. xmlChar *content = xmlBufContent(in);
  2376. int cur, charrefLen;
  2377. cur = xmlGetUTF8Char(content, &len);
  2378. if (cur <= 0)
  2379. break;
  2380. #ifdef DEBUG_ENCODING
  2381. xmlGenericError(xmlGenericErrorContext,
  2382. "handling output conversion error\n");
  2383. xmlGenericError(xmlGenericErrorContext,
  2384. "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
  2385. content[0], content[1],
  2386. content[2], content[3]);
  2387. #endif
  2388. /*
  2389. * Removes the UTF8 sequence, and replace it by a charref
  2390. * and continue the transcoding phase, hoping the error
  2391. * did not mangle the encoder state.
  2392. */
  2393. charrefLen = snprintf((char *) &charref[0], sizeof(charref),
  2394. "&#%d;", cur);
  2395. xmlBufShrink(in, len);
  2396. xmlBufGrow(out, charrefLen * 4);
  2397. c_out = xmlBufAvail(out) - 1;
  2398. c_in = charrefLen;
  2399. ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
  2400. charref, &c_in);
  2401. if ((ret < 0) || (c_in != charrefLen)) {
  2402. char buf[50];
  2403. snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
  2404. content[0], content[1],
  2405. content[2], content[3]);
  2406. buf[49] = 0;
  2407. xmlEncodingErr(XML_I18N_CONV_FAILED,
  2408. "output conversion failed due to conv error, bytes %s\n",
  2409. buf);
  2410. if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
  2411. content[0] = ' ';
  2412. break;
  2413. }
  2414. xmlBufAddLen(out, c_out);
  2415. writtentot += c_out;
  2416. goto retry;
  2417. }
  2418. }
  2419. return(writtentot ? writtentot : ret);
  2420. }
  2421. #endif
  2422. /**
  2423. * xmlCharEncOutFunc:
  2424. * @handler: char encoding transformation data structure
  2425. * @out: an xmlBuffer for the output.
  2426. * @in: an xmlBuffer for the input
  2427. *
  2428. * Generic front-end for the encoding handler output function
  2429. * a first call with @in == NULL has to be made firs to initiate the
  2430. * output in case of non-stateless encoding needing to initiate their
  2431. * state or the output (like the BOM in UTF16).
  2432. * In case of UTF8 sequence conversion errors for the given encoder,
  2433. * the content will be automatically remapped to a CharRef sequence.
  2434. *
  2435. * Returns the number of byte written if success, or
  2436. * -1 general error
  2437. * -2 if the transcoding fails (for *in is not valid utf8 string or
  2438. * the result of transformation can't fit into the encoding we want), or
  2439. */
  2440. int
  2441. xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
  2442. xmlBufferPtr in) {
  2443. int ret;
  2444. int written;
  2445. int writtentot = 0;
  2446. int toconv;
  2447. int output = 0;
  2448. if (handler == NULL) return(-1);
  2449. if (out == NULL) return(-1);
  2450. retry:
  2451. written = out->size - out->use;
  2452. if (written > 0)
  2453. written--; /* Gennady: count '/0' */
  2454. /*
  2455. * First specific handling of in = NULL, i.e. the initialization call
  2456. */
  2457. if (in == NULL) {
  2458. toconv = 0;
  2459. /* TODO: Check return value. */
  2460. xmlEncOutputChunk(handler, &out->content[out->use], &written,
  2461. NULL, &toconv);
  2462. out->use += written;
  2463. out->content[out->use] = 0;
  2464. #ifdef DEBUG_ENCODING
  2465. xmlGenericError(xmlGenericErrorContext,
  2466. "initialized encoder\n");
  2467. #endif
  2468. return(0);
  2469. }
  2470. /*
  2471. * Conversion itself.
  2472. */
  2473. toconv = in->use;
  2474. if (toconv == 0)
  2475. return(0);
  2476. if (toconv * 4 >= written) {
  2477. xmlBufferGrow(out, toconv * 4);
  2478. written = out->size - out->use - 1;
  2479. }
  2480. ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
  2481. in->content, &toconv);
  2482. xmlBufferShrink(in, toconv);
  2483. out->use += written;
  2484. writtentot += written;
  2485. out->content[out->use] = 0;
  2486. if (ret == -1) {
  2487. if (written > 0) {
  2488. /* Can be a limitation of iconv or uconv */
  2489. goto retry;
  2490. }
  2491. ret = -3;
  2492. }
  2493. if (ret >= 0) output += ret;
  2494. /*
  2495. * Attempt to handle error cases
  2496. */
  2497. switch (ret) {
  2498. case 0:
  2499. #ifdef DEBUG_ENCODING
  2500. xmlGenericError(xmlGenericErrorContext,
  2501. "converted %d bytes to %d bytes of output\n",
  2502. toconv, written);
  2503. #endif
  2504. break;
  2505. case -1:
  2506. #ifdef DEBUG_ENCODING
  2507. xmlGenericError(xmlGenericErrorContext,
  2508. "output conversion failed by lack of space\n");
  2509. #endif
  2510. break;
  2511. case -3:
  2512. #ifdef DEBUG_ENCODING
  2513. xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
  2514. toconv, written, in->use);
  2515. #endif
  2516. break;
  2517. case -4:
  2518. xmlEncodingErr(XML_I18N_NO_OUTPUT,
  2519. "xmlCharEncOutFunc: no output function !\n", NULL);
  2520. ret = -1;
  2521. break;
  2522. case -2: {
  2523. xmlChar charref[20];
  2524. int len = in->use;
  2525. const xmlChar *utf = (const xmlChar *) in->content;
  2526. int cur, charrefLen;
  2527. cur = xmlGetUTF8Char(utf, &len);
  2528. if (cur <= 0)
  2529. break;
  2530. #ifdef DEBUG_ENCODING
  2531. xmlGenericError(xmlGenericErrorContext,
  2532. "handling output conversion error\n");
  2533. xmlGenericError(xmlGenericErrorContext,
  2534. "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
  2535. in->content[0], in->content[1],
  2536. in->content[2], in->content[3]);
  2537. #endif
  2538. /*
  2539. * Removes the UTF8 sequence, and replace it by a charref
  2540. * and continue the transcoding phase, hoping the error
  2541. * did not mangle the encoder state.
  2542. */
  2543. charrefLen = snprintf((char *) &charref[0], sizeof(charref),
  2544. "&#%d;", cur);
  2545. xmlBufferShrink(in, len);
  2546. xmlBufferGrow(out, charrefLen * 4);
  2547. written = out->size - out->use - 1;
  2548. toconv = charrefLen;
  2549. ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
  2550. charref, &toconv);
  2551. if ((ret < 0) || (toconv != charrefLen)) {
  2552. char buf[50];
  2553. snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
  2554. in->content[0], in->content[1],
  2555. in->content[2], in->content[3]);
  2556. buf[49] = 0;
  2557. xmlEncodingErr(XML_I18N_CONV_FAILED,
  2558. "output conversion failed due to conv error, bytes %s\n",
  2559. buf);
  2560. if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
  2561. in->content[0] = ' ';
  2562. break;
  2563. }
  2564. out->use += written;
  2565. writtentot += written;
  2566. out->content[out->use] = 0;
  2567. goto retry;
  2568. }
  2569. }
  2570. return(writtentot ? writtentot : ret);
  2571. }
  2572. /**
  2573. * xmlCharEncCloseFunc:
  2574. * @handler: char encoding transformation data structure
  2575. *
  2576. * Generic front-end for encoding handler close function
  2577. *
  2578. * Returns 0 if success, or -1 in case of error
  2579. */
  2580. int
  2581. xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
  2582. int ret = 0;
  2583. int tofree = 0;
  2584. int i, handler_in_list = 0;
  2585. if (handler == NULL) return(-1);
  2586. if (handler->name == NULL) return(-1);
  2587. if (handlers != NULL) {
  2588. for (i = 0;i < nbCharEncodingHandler; i++) {
  2589. if (handler == handlers[i]) {
  2590. handler_in_list = 1;
  2591. break;
  2592. }
  2593. }
  2594. }
  2595. #ifdef LIBXML_ICONV_ENABLED
  2596. /*
  2597. * Iconv handlers can be used only once, free the whole block.
  2598. * and the associated icon resources.
  2599. */
  2600. if ((handler_in_list == 0) &&
  2601. ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
  2602. tofree = 1;
  2603. if (handler->iconv_out != NULL) {
  2604. if (iconv_close(handler->iconv_out))
  2605. ret = -1;
  2606. handler->iconv_out = NULL;
  2607. }
  2608. if (handler->iconv_in != NULL) {
  2609. if (iconv_close(handler->iconv_in))
  2610. ret = -1;
  2611. handler->iconv_in = NULL;
  2612. }
  2613. }
  2614. #endif /* LIBXML_ICONV_ENABLED */
  2615. #ifdef LIBXML_ICU_ENABLED
  2616. if ((handler_in_list == 0) &&
  2617. ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
  2618. tofree = 1;
  2619. if (handler->uconv_out != NULL) {
  2620. closeIcuConverter(handler->uconv_out);
  2621. handler->uconv_out = NULL;
  2622. }
  2623. if (handler->uconv_in != NULL) {
  2624. closeIcuConverter(handler->uconv_in);
  2625. handler->uconv_in = NULL;
  2626. }
  2627. }
  2628. #endif
  2629. if (tofree) {
  2630. /* free up only dynamic handlers iconv/uconv */
  2631. if (handler->name != NULL)
  2632. xmlFree(handler->name);
  2633. handler->name = NULL;
  2634. xmlFree(handler);
  2635. }
  2636. #ifdef DEBUG_ENCODING
  2637. if (ret)
  2638. xmlGenericError(xmlGenericErrorContext,
  2639. "failed to close the encoding handler\n");
  2640. else
  2641. xmlGenericError(xmlGenericErrorContext,
  2642. "closed the encoding handler\n");
  2643. #endif
  2644. return(ret);
  2645. }
  2646. /**
  2647. * xmlByteConsumed:
  2648. * @ctxt: an XML parser context
  2649. *
  2650. * This function provides the current index of the parser relative
  2651. * to the start of the current entity. This function is computed in
  2652. * bytes from the beginning starting at zero and finishing at the
  2653. * size in byte of the file if parsing a file. The function is
  2654. * of constant cost if the input is UTF-8 but can be costly if run
  2655. * on non-UTF-8 input.
  2656. *
  2657. * Returns the index in bytes from the beginning of the entity or -1
  2658. * in case the index could not be computed.
  2659. */
  2660. long
  2661. xmlByteConsumed(xmlParserCtxtPtr ctxt) {
  2662. xmlParserInputPtr in;
  2663. if (ctxt == NULL) return(-1);
  2664. in = ctxt->input;
  2665. if (in == NULL) return(-1);
  2666. if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
  2667. unsigned int unused = 0;
  2668. xmlCharEncodingHandler * handler = in->buf->encoder;
  2669. /*
  2670. * Encoding conversion, compute the number of unused original
  2671. * bytes from the input not consumed and subtract that from
  2672. * the raw consumed value, this is not a cheap operation
  2673. */
  2674. if (in->end - in->cur > 0) {
  2675. unsigned char convbuf[32000];
  2676. const unsigned char *cur = (const unsigned char *)in->cur;
  2677. int toconv = in->end - in->cur, written = 32000;
  2678. int ret;
  2679. do {
  2680. toconv = in->end - cur;
  2681. written = 32000;
  2682. ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
  2683. cur, &toconv);
  2684. if (ret < 0) {
  2685. if (written > 0)
  2686. ret = -2;
  2687. else
  2688. return(-1);
  2689. }
  2690. unused += written;
  2691. cur += toconv;
  2692. } while (ret == -2);
  2693. }
  2694. if (in->buf->rawconsumed < unused)
  2695. return(-1);
  2696. return(in->buf->rawconsumed - unused);
  2697. }
  2698. return(in->consumed + (in->cur - in->base));
  2699. }
  2700. #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
  2701. #ifdef LIBXML_ISO8859X_ENABLED
  2702. /**
  2703. * UTF8ToISO8859x:
  2704. * @out: a pointer to an array of bytes to store the result
  2705. * @outlen: the length of @out
  2706. * @in: a pointer to an array of UTF-8 chars
  2707. * @inlen: the length of @in
  2708. * @xlattable: the 2-level transcoding table
  2709. *
  2710. * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
  2711. * block of chars out.
  2712. *
  2713. * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
  2714. * The value of @inlen after return is the number of octets consumed
  2715. * as the return value is positive, else unpredictable.
  2716. * The value of @outlen after return is the number of octets consumed.
  2717. */
  2718. static int
  2719. UTF8ToISO8859x(unsigned char* out, int *outlen,
  2720. const unsigned char* in, int *inlen,
  2721. unsigned char const *xlattable) {
  2722. const unsigned char* outstart = out;
  2723. const unsigned char* inend;
  2724. const unsigned char* instart = in;
  2725. const unsigned char* processed = in;
  2726. if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
  2727. (xlattable == NULL))
  2728. return(-1);
  2729. if (in == NULL) {
  2730. /*
  2731. * initialization nothing to do
  2732. */
  2733. *outlen = 0;
  2734. *inlen = 0;
  2735. return(0);
  2736. }
  2737. inend = in + (*inlen);
  2738. while (in < inend) {
  2739. unsigned char d = *in++;
  2740. if (d < 0x80) {
  2741. *out++ = d;
  2742. } else if (d < 0xC0) {
  2743. /* trailing byte in leading position */
  2744. *outlen = out - outstart;
  2745. *inlen = processed - instart;
  2746. return(-2);
  2747. } else if (d < 0xE0) {
  2748. unsigned char c;
  2749. if (!(in < inend)) {
  2750. /* trailing byte not in input buffer */
  2751. *outlen = out - outstart;
  2752. *inlen = processed - instart;
  2753. return(-3);
  2754. }
  2755. c = *in++;
  2756. if ((c & 0xC0) != 0x80) {
  2757. /* not a trailing byte */
  2758. *outlen = out - outstart;
  2759. *inlen = processed - instart;
  2760. return(-2);
  2761. }
  2762. c = c & 0x3F;
  2763. d = d & 0x1F;
  2764. d = xlattable [48 + c + xlattable [d] * 64];
  2765. if (d == 0) {
  2766. /* not in character set */
  2767. *outlen = out - outstart;
  2768. *inlen = processed - instart;
  2769. return(-2);
  2770. }
  2771. *out++ = d;
  2772. } else if (d < 0xF0) {
  2773. unsigned char c1;
  2774. unsigned char c2;
  2775. if (!(in < inend - 1)) {
  2776. /* trailing bytes not in input buffer */
  2777. *outlen = out - outstart;
  2778. *inlen = processed - instart;
  2779. return(-3);
  2780. }
  2781. c1 = *in++;
  2782. if ((c1 & 0xC0) != 0x80) {
  2783. /* not a trailing byte (c1) */
  2784. *outlen = out - outstart;
  2785. *inlen = processed - instart;
  2786. return(-2);
  2787. }
  2788. c2 = *in++;
  2789. if ((c2 & 0xC0) != 0x80) {
  2790. /* not a trailing byte (c2) */
  2791. *outlen = out - outstart;
  2792. *inlen = processed - instart;
  2793. return(-2);
  2794. }
  2795. c1 = c1 & 0x3F;
  2796. c2 = c2 & 0x3F;
  2797. d = d & 0x0F;
  2798. d = xlattable [48 + c2 + xlattable [48 + c1 +
  2799. xlattable [32 + d] * 64] * 64];
  2800. if (d == 0) {
  2801. /* not in character set */
  2802. *outlen = out - outstart;
  2803. *inlen = processed - instart;
  2804. return(-2);
  2805. }
  2806. *out++ = d;
  2807. } else {
  2808. /* cannot transcode >= U+010000 */
  2809. *outlen = out - outstart;
  2810. *inlen = processed - instart;
  2811. return(-2);
  2812. }
  2813. processed = in;
  2814. }
  2815. *outlen = out - outstart;
  2816. *inlen = processed - instart;
  2817. return(*outlen);
  2818. }
  2819. /**
  2820. * ISO8859xToUTF8
  2821. * @out: a pointer to an array of bytes to store the result
  2822. * @outlen: the length of @out
  2823. * @in: a pointer to an array of ISO Latin 1 chars
  2824. * @inlen: the length of @in
  2825. *
  2826. * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
  2827. * block of chars out.
  2828. * Returns 0 if success, or -1 otherwise
  2829. * The value of @inlen after return is the number of octets consumed
  2830. * The value of @outlen after return is the number of octets produced.
  2831. */
  2832. static int
  2833. ISO8859xToUTF8(unsigned char* out, int *outlen,
  2834. const unsigned char* in, int *inlen,
  2835. unsigned short const *unicodetable) {
  2836. unsigned char* outstart = out;
  2837. unsigned char* outend;
  2838. const unsigned char* instart = in;
  2839. const unsigned char* inend;
  2840. const unsigned char* instop;
  2841. unsigned int c;
  2842. if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
  2843. (in == NULL) || (unicodetable == NULL))
  2844. return(-1);
  2845. outend = out + *outlen;
  2846. inend = in + *inlen;
  2847. instop = inend;
  2848. while ((in < inend) && (out < outend - 2)) {
  2849. if (*in >= 0x80) {
  2850. c = unicodetable [*in - 0x80];
  2851. if (c == 0) {
  2852. /* undefined code point */
  2853. *outlen = out - outstart;
  2854. *inlen = in - instart;
  2855. return (-1);
  2856. }
  2857. if (c < 0x800) {
  2858. *out++ = ((c >> 6) & 0x1F) | 0xC0;
  2859. *out++ = (c & 0x3F) | 0x80;
  2860. } else {
  2861. *out++ = ((c >> 12) & 0x0F) | 0xE0;
  2862. *out++ = ((c >> 6) & 0x3F) | 0x80;
  2863. *out++ = (c & 0x3F) | 0x80;
  2864. }
  2865. ++in;
  2866. }
  2867. if (instop - in > outend - out) instop = in + (outend - out);
  2868. while ((*in < 0x80) && (in < instop)) {
  2869. *out++ = *in++;
  2870. }
  2871. }
  2872. if ((in < inend) && (out < outend) && (*in < 0x80)) {
  2873. *out++ = *in++;
  2874. }
  2875. if ((in < inend) && (out < outend) && (*in < 0x80)) {
  2876. *out++ = *in++;
  2877. }
  2878. *outlen = out - outstart;
  2879. *inlen = in - instart;
  2880. return (*outlen);
  2881. }
  2882. /************************************************************************
  2883. * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
  2884. ************************************************************************/
  2885. static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
  2886. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  2887. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  2888. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  2889. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  2890. 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
  2891. 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
  2892. 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
  2893. 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
  2894. 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
  2895. 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
  2896. 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
  2897. 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
  2898. 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
  2899. 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
  2900. 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
  2901. 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
  2902. };
  2903. static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
  2904. "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
  2905. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2906. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2907. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2908. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2909. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2910. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2911. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  2912. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  2913. "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
  2914. "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
  2915. "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
  2916. "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
  2917. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2918. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
  2919. "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
  2920. "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
  2921. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2922. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2923. "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
  2924. "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
  2925. "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
  2926. "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
  2927. "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
  2928. "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
  2929. "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
  2930. "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
  2931. };
  2932. static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
  2933. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  2934. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  2935. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  2936. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  2937. 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
  2938. 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
  2939. 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
  2940. 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
  2941. 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
  2942. 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
  2943. 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
  2944. 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
  2945. 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
  2946. 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
  2947. 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
  2948. 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
  2949. };
  2950. static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
  2951. "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
  2952. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2953. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2954. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2955. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2956. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2957. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2958. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  2959. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  2960. "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
  2961. "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
  2962. "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
  2963. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
  2964. "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
  2965. "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2966. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2967. "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
  2968. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2969. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2970. "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2971. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2972. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2973. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2974. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  2975. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
  2976. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
  2977. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
  2978. "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
  2979. "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
  2980. "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
  2981. "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
  2982. };
  2983. static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
  2984. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  2985. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  2986. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  2987. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  2988. 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
  2989. 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
  2990. 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
  2991. 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
  2992. 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
  2993. 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
  2994. 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
  2995. 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
  2996. 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
  2997. 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
  2998. 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
  2999. 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
  3000. };
  3001. static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
  3002. "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
  3003. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3004. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3005. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3006. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3007. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3008. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3009. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3010. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3011. "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
  3012. "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
  3013. "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
  3014. "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
  3015. "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
  3016. "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
  3017. "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
  3018. "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
  3019. "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
  3020. "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
  3021. "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
  3022. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
  3023. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3024. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3025. "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
  3026. "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
  3027. "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
  3028. "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
  3029. };
  3030. static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
  3031. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  3032. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  3033. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  3034. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  3035. 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
  3036. 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
  3037. 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
  3038. 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
  3039. 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
  3040. 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
  3041. 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
  3042. 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
  3043. 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
  3044. 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
  3045. 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
  3046. 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
  3047. };
  3048. static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
  3049. "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3050. "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3051. "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3052. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3053. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3054. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3055. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3056. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3057. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3058. "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
  3059. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3060. "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
  3061. "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
  3062. "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
  3063. "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
  3064. "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
  3065. "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
  3066. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3067. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3068. "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3069. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3070. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3071. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3072. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3073. "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3074. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3075. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3076. };
  3077. static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
  3078. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  3079. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  3080. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  3081. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  3082. 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
  3083. 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
  3084. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  3085. 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
  3086. 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
  3087. 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
  3088. 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
  3089. 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  3090. 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
  3091. 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
  3092. 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  3093. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  3094. };
  3095. static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
  3096. "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3097. "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
  3098. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3099. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3100. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3101. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3102. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3103. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3104. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3105. "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
  3106. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3107. "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3108. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3109. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3110. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3111. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
  3112. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
  3113. "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
  3114. "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
  3115. "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
  3116. "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3117. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3118. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3119. };
  3120. static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
  3121. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  3122. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  3123. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  3124. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  3125. 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
  3126. 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
  3127. 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
  3128. 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
  3129. 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
  3130. 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
  3131. 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
  3132. 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
  3133. 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
  3134. 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
  3135. 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
  3136. 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
  3137. };
  3138. static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
  3139. "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
  3140. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3141. "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3142. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3143. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3144. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3145. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3146. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3147. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3148. "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
  3149. "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
  3150. "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3151. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3152. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3153. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3154. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3155. "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
  3156. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3157. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3158. "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3159. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3160. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3161. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3162. "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
  3163. "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
  3164. "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
  3165. "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
  3166. "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
  3167. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3168. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3169. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3170. };
  3171. static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
  3172. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  3173. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  3174. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  3175. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  3176. 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
  3177. 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
  3178. 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
  3179. 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
  3180. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  3181. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  3182. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  3183. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
  3184. 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
  3185. 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
  3186. 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
  3187. 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
  3188. };
  3189. static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
  3190. "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3191. "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
  3192. "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3193. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3194. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3195. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3196. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3197. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3198. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3199. "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
  3200. "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
  3201. "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3202. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3203. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3204. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3205. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3206. "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
  3207. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3208. "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
  3209. "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3210. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3211. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3212. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3213. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
  3214. "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
  3215. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3216. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3217. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3218. "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
  3219. "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
  3220. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3221. };
  3222. static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
  3223. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  3224. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  3225. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  3226. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  3227. 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
  3228. 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
  3229. 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
  3230. 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
  3231. 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
  3232. 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
  3233. 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
  3234. 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
  3235. 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
  3236. 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
  3237. 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
  3238. 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
  3239. };
  3240. static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
  3241. "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3242. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3243. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3244. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3245. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3246. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3247. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3248. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3249. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3250. "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
  3251. "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
  3252. "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
  3253. "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
  3254. "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
  3255. "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
  3256. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3257. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
  3258. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3259. "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3260. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3261. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
  3262. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3263. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3264. };
  3265. static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
  3266. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  3267. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  3268. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  3269. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  3270. 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
  3271. 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
  3272. 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
  3273. 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
  3274. 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
  3275. 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
  3276. 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
  3277. 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
  3278. 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
  3279. 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
  3280. 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
  3281. 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
  3282. };
  3283. static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
  3284. "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3285. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3286. "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3287. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3288. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3289. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3290. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3291. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3292. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3293. "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
  3294. "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
  3295. "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
  3296. "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
  3297. "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
  3298. "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
  3299. "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
  3300. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3301. "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
  3302. "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
  3303. "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3304. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3305. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3306. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3307. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3308. "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3309. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3310. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3311. "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
  3312. "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
  3313. "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
  3314. "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
  3315. };
  3316. static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
  3317. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  3318. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  3319. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  3320. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  3321. 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
  3322. 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
  3323. 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
  3324. 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
  3325. 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
  3326. 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
  3327. 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
  3328. 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
  3329. 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
  3330. 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
  3331. 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
  3332. 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
  3333. };
  3334. static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
  3335. "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3336. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3337. "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3338. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3339. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3340. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3341. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3342. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3343. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3344. "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3345. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3346. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3347. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3348. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3349. "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
  3350. "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
  3351. "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
  3352. "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
  3353. "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
  3354. "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3355. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3356. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3357. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3358. "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
  3359. "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
  3360. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3361. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3362. };
  3363. static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
  3364. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  3365. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  3366. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  3367. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  3368. 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
  3369. 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
  3370. 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
  3371. 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
  3372. 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
  3373. 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
  3374. 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
  3375. 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
  3376. 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
  3377. 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
  3378. 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
  3379. 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
  3380. };
  3381. static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
  3382. "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3383. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3384. "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3385. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3386. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3387. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3388. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3389. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3390. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3391. "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
  3392. "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
  3393. "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3394. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3395. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3396. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3397. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3398. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
  3399. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3400. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3401. "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
  3402. "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
  3403. "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
  3404. "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
  3405. "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
  3406. "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
  3407. "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
  3408. "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
  3409. "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
  3410. "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
  3411. "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
  3412. "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
  3413. };
  3414. static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
  3415. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  3416. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  3417. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  3418. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  3419. 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
  3420. 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
  3421. 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
  3422. 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
  3423. 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
  3424. 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
  3425. 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
  3426. 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
  3427. 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
  3428. 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
  3429. 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
  3430. 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
  3431. };
  3432. static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
  3433. "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3434. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3435. "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3436. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3437. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3438. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3439. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3440. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3441. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3442. "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
  3443. "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3444. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3445. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3446. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3447. "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
  3448. "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
  3449. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
  3450. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3451. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3452. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
  3453. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3454. "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3455. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3456. "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3457. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3458. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3459. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3460. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3461. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3462. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3463. "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3464. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3465. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3466. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3467. "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
  3468. "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3469. "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
  3470. "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
  3471. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3472. "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
  3473. "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
  3474. "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
  3475. "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
  3476. };
  3477. static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
  3478. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  3479. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  3480. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  3481. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  3482. 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
  3483. 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
  3484. 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
  3485. 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
  3486. 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
  3487. 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
  3488. 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
  3489. 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
  3490. 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
  3491. 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
  3492. 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
  3493. 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
  3494. };
  3495. static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
  3496. "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3497. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3498. "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3499. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3500. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3501. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3502. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3503. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3504. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3505. "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
  3506. "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
  3507. "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3508. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3509. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3510. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3511. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3512. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3513. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
  3514. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3515. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3516. "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3517. "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3518. "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
  3519. "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
  3520. "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
  3521. "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
  3522. "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
  3523. };
  3524. static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
  3525. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  3526. 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
  3527. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  3528. 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
  3529. 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
  3530. 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
  3531. 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
  3532. 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
  3533. 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
  3534. 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
  3535. 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
  3536. 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
  3537. 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
  3538. 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
  3539. 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
  3540. 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
  3541. };
  3542. static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
  3543. "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
  3544. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3545. "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3546. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3547. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3548. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3549. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3550. "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
  3551. "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
  3552. "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
  3553. "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
  3554. "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
  3555. "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
  3556. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3557. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3558. "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3559. "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
  3560. "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3561. "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
  3562. "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3563. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3564. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3565. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3566. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3567. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3568. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
  3569. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3570. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3571. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
  3572. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3573. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3574. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3575. "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
  3576. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3577. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  3578. "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
  3579. "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
  3580. "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
  3581. "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
  3582. };
  3583. /*
  3584. * auto-generated functions for ISO-8859-2 .. ISO-8859-16
  3585. */
  3586. static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
  3587. const unsigned char* in, int *inlen) {
  3588. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
  3589. }
  3590. static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
  3591. const unsigned char* in, int *inlen) {
  3592. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
  3593. }
  3594. static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
  3595. const unsigned char* in, int *inlen) {
  3596. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
  3597. }
  3598. static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
  3599. const unsigned char* in, int *inlen) {
  3600. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
  3601. }
  3602. static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
  3603. const unsigned char* in, int *inlen) {
  3604. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
  3605. }
  3606. static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
  3607. const unsigned char* in, int *inlen) {
  3608. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
  3609. }
  3610. static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
  3611. const unsigned char* in, int *inlen) {
  3612. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
  3613. }
  3614. static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
  3615. const unsigned char* in, int *inlen) {
  3616. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
  3617. }
  3618. static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
  3619. const unsigned char* in, int *inlen) {
  3620. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
  3621. }
  3622. static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
  3623. const unsigned char* in, int *inlen) {
  3624. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
  3625. }
  3626. static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
  3627. const unsigned char* in, int *inlen) {
  3628. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
  3629. }
  3630. static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
  3631. const unsigned char* in, int *inlen) {
  3632. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
  3633. }
  3634. static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
  3635. const unsigned char* in, int *inlen) {
  3636. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
  3637. }
  3638. static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
  3639. const unsigned char* in, int *inlen) {
  3640. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
  3641. }
  3642. static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
  3643. const unsigned char* in, int *inlen) {
  3644. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
  3645. }
  3646. static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
  3647. const unsigned char* in, int *inlen) {
  3648. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
  3649. }
  3650. static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
  3651. const unsigned char* in, int *inlen) {
  3652. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
  3653. }
  3654. static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
  3655. const unsigned char* in, int *inlen) {
  3656. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
  3657. }
  3658. static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
  3659. const unsigned char* in, int *inlen) {
  3660. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
  3661. }
  3662. static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
  3663. const unsigned char* in, int *inlen) {
  3664. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
  3665. }
  3666. static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
  3667. const unsigned char* in, int *inlen) {
  3668. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
  3669. }
  3670. static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
  3671. const unsigned char* in, int *inlen) {
  3672. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
  3673. }
  3674. static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
  3675. const unsigned char* in, int *inlen) {
  3676. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
  3677. }
  3678. static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
  3679. const unsigned char* in, int *inlen) {
  3680. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
  3681. }
  3682. static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
  3683. const unsigned char* in, int *inlen) {
  3684. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
  3685. }
  3686. static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
  3687. const unsigned char* in, int *inlen) {
  3688. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
  3689. }
  3690. static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
  3691. const unsigned char* in, int *inlen) {
  3692. return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
  3693. }
  3694. static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
  3695. const unsigned char* in, int *inlen) {
  3696. return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
  3697. }
  3698. static void
  3699. xmlRegisterCharEncodingHandlersISO8859x (void) {
  3700. xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
  3701. xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
  3702. xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
  3703. xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
  3704. xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
  3705. xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
  3706. xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
  3707. xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
  3708. xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
  3709. xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
  3710. xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
  3711. xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
  3712. xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
  3713. xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
  3714. }
  3715. #endif
  3716. #endif
  3717. #define bottom_encoding
  3718. #include "elfgcchack.h"