tif_lzw.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167
  1. /* $Id: tif_lzw.c,v 1.45 2011-04-02 20:54:09 bfriesen Exp $ */
  2. /*
  3. * Copyright (c) 1988-1997 Sam Leffler
  4. * Copyright (c) 1991-1997 Silicon Graphics, Inc.
  5. *
  6. * Permission to use, copy, modify, distribute, and sell this software and
  7. * its documentation for any purpose is hereby granted without fee, provided
  8. * that (i) the above copyright notices and this permission notice appear in
  9. * all copies of the software and related documentation, and (ii) the names of
  10. * Sam Leffler and Silicon Graphics may not be used in any advertising or
  11. * publicity relating to the software without the specific, prior written
  12. * permission of Sam Leffler and Silicon Graphics.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
  15. * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
  16. * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
  19. * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
  20. * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  21. * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
  22. * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  23. * OF THIS SOFTWARE.
  24. */
  25. #include "tiffiop.h"
  26. #ifdef LZW_SUPPORT
  27. /*
  28. * TIFF Library.
  29. * Rev 5.0 Lempel-Ziv & Welch Compression Support
  30. *
  31. * This code is derived from the compress program whose code is
  32. * derived from software contributed to Berkeley by James A. Woods,
  33. * derived from original work by Spencer Thomas and Joseph Orost.
  34. *
  35. * The original Berkeley copyright notice appears below in its entirety.
  36. */
  37. #include "tif_predict.h"
  38. #include <stdio.h>
  39. /*
  40. * NB: The 5.0 spec describes a different algorithm than Aldus
  41. * implements. Specifically, Aldus does code length transitions
  42. * one code earlier than should be done (for real LZW).
  43. * Earlier versions of this library implemented the correct
  44. * LZW algorithm, but emitted codes in a bit order opposite
  45. * to the TIFF spec. Thus, to maintain compatibility w/ Aldus
  46. * we interpret MSB-LSB ordered codes to be images written w/
  47. * old versions of this library, but otherwise adhere to the
  48. * Aldus "off by one" algorithm.
  49. *
  50. * Future revisions to the TIFF spec are expected to "clarify this issue".
  51. */
  52. #define LZW_COMPAT /* include backwards compatibility code */
  53. /*
  54. * Each strip of data is supposed to be terminated by a CODE_EOI.
  55. * If the following #define is included, the decoder will also
  56. * check for end-of-strip w/o seeing this code. This makes the
  57. * library more robust, but also slower.
  58. */
  59. #define LZW_CHECKEOS /* include checks for strips w/o EOI code */
  60. #define MAXCODE(n) ((1L<<(n))-1)
  61. /*
  62. * The TIFF spec specifies that encoded bit
  63. * strings range from 9 to 12 bits.
  64. */
  65. #define BITS_MIN 9 /* start with 9 bits */
  66. #define BITS_MAX 12 /* max of 12 bit strings */
  67. /* predefined codes */
  68. #define CODE_CLEAR 256 /* code to clear string table */
  69. #define CODE_EOI 257 /* end-of-information code */
  70. #define CODE_FIRST 258 /* first free code entry */
  71. #define CODE_MAX MAXCODE(BITS_MAX)
  72. #define HSIZE 9001L /* 91% occupancy */
  73. #define HSHIFT (13-8)
  74. #ifdef LZW_COMPAT
  75. /* NB: +1024 is for compatibility with old files */
  76. #define CSIZE (MAXCODE(BITS_MAX)+1024L)
  77. #else
  78. #define CSIZE (MAXCODE(BITS_MAX)+1L)
  79. #endif
  80. /*
  81. * State block for each open TIFF file using LZW
  82. * compression/decompression. Note that the predictor
  83. * state block must be first in this data structure.
  84. */
  85. typedef struct {
  86. TIFFPredictorState predict; /* predictor super class */
  87. unsigned short nbits; /* # of bits/code */
  88. unsigned short maxcode; /* maximum code for lzw_nbits */
  89. unsigned short free_ent; /* next free entry in hash table */
  90. long nextdata; /* next bits of i/o */
  91. long nextbits; /* # of valid bits in lzw_nextdata */
  92. int rw_mode; /* preserve rw_mode from init */
  93. } LZWBaseState;
  94. #define lzw_nbits base.nbits
  95. #define lzw_maxcode base.maxcode
  96. #define lzw_free_ent base.free_ent
  97. #define lzw_nextdata base.nextdata
  98. #define lzw_nextbits base.nextbits
  99. /*
  100. * Encoding-specific state.
  101. */
  102. typedef uint16 hcode_t; /* codes fit in 16 bits */
  103. typedef struct {
  104. long hash;
  105. hcode_t code;
  106. } hash_t;
  107. /*
  108. * Decoding-specific state.
  109. */
  110. typedef struct code_ent {
  111. struct code_ent *next;
  112. unsigned short length; /* string len, including this token */
  113. unsigned char value; /* data value */
  114. unsigned char firstchar; /* first token of string */
  115. } code_t;
  116. typedef int (*decodeFunc)(TIFF*, uint8*, tmsize_t, uint16);
  117. typedef struct {
  118. LZWBaseState base;
  119. /* Decoding specific data */
  120. long dec_nbitsmask; /* lzw_nbits 1 bits, right adjusted */
  121. long dec_restart; /* restart count */
  122. #ifdef LZW_CHECKEOS
  123. uint64 dec_bitsleft; /* available bits in raw data */
  124. #endif
  125. decodeFunc dec_decode; /* regular or backwards compatible */
  126. code_t* dec_codep; /* current recognized code */
  127. code_t* dec_oldcodep; /* previously recognized code */
  128. code_t* dec_free_entp; /* next free entry */
  129. code_t* dec_maxcodep; /* max available entry */
  130. code_t* dec_codetab; /* kept separate for small machines */
  131. /* Encoding specific data */
  132. int enc_oldcode; /* last code encountered */
  133. long enc_checkpoint; /* point at which to clear table */
  134. #define CHECK_GAP 10000 /* enc_ratio check interval */
  135. long enc_ratio; /* current compression ratio */
  136. long enc_incount; /* (input) data bytes encoded */
  137. long enc_outcount; /* encoded (output) bytes */
  138. uint8* enc_rawlimit; /* bound on tif_rawdata buffer */
  139. hash_t* enc_hashtab; /* kept separate for small machines */
  140. } LZWCodecState;
  141. #define LZWState(tif) ((LZWBaseState*) (tif)->tif_data)
  142. #define DecoderState(tif) ((LZWCodecState*) LZWState(tif))
  143. #define EncoderState(tif) ((LZWCodecState*) LZWState(tif))
  144. static int LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s);
  145. #ifdef LZW_COMPAT
  146. static int LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s);
  147. #endif
  148. static void cl_hash(LZWCodecState*);
  149. /*
  150. * LZW Decoder.
  151. */
  152. #ifdef LZW_CHECKEOS
  153. /*
  154. * This check shouldn't be necessary because each
  155. * strip is suppose to be terminated with CODE_EOI.
  156. */
  157. #define NextCode(_tif, _sp, _bp, _code, _get) { \
  158. if ((_sp)->dec_bitsleft < (uint64)nbits) { \
  159. TIFFWarningExt(_tif->tif_clientdata, module, \
  160. "LZWDecode: Strip %d not terminated with EOI code", \
  161. _tif->tif_curstrip); \
  162. _code = CODE_EOI; \
  163. } else { \
  164. _get(_sp,_bp,_code); \
  165. (_sp)->dec_bitsleft -= nbits; \
  166. } \
  167. }
  168. #else
  169. #define NextCode(tif, sp, bp, code, get) get(sp, bp, code)
  170. #endif
  171. static int
  172. LZWFixupTags(TIFF* tif)
  173. {
  174. (void) tif;
  175. return (1);
  176. }
  177. static int
  178. LZWSetupDecode(TIFF* tif)
  179. {
  180. static const char module[] = "LZWSetupDecode";
  181. LZWCodecState* sp = DecoderState(tif);
  182. int code;
  183. if( sp == NULL )
  184. {
  185. /*
  186. * Allocate state block so tag methods have storage to record
  187. * values.
  188. */
  189. tif->tif_data = (uint8*) _TIFFmalloc(sizeof(LZWCodecState));
  190. if (tif->tif_data == NULL)
  191. {
  192. TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW state block");
  193. return (0);
  194. }
  195. DecoderState(tif)->dec_codetab = NULL;
  196. DecoderState(tif)->dec_decode = NULL;
  197. /*
  198. * Setup predictor setup.
  199. */
  200. (void) TIFFPredictorInit(tif);
  201. sp = DecoderState(tif);
  202. }
  203. assert(sp != NULL);
  204. if (sp->dec_codetab == NULL) {
  205. sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t));
  206. if (sp->dec_codetab == NULL) {
  207. TIFFErrorExt(tif->tif_clientdata, module,
  208. "No space for LZW code table");
  209. return (0);
  210. }
  211. /*
  212. * Pre-load the table.
  213. */
  214. code = 255;
  215. do {
  216. sp->dec_codetab[code].value = code;
  217. sp->dec_codetab[code].firstchar = code;
  218. sp->dec_codetab[code].length = 1;
  219. sp->dec_codetab[code].next = NULL;
  220. } while (code--);
  221. /*
  222. * Zero-out the unused entries
  223. */
  224. _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0,
  225. (CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
  226. }
  227. return (1);
  228. }
  229. /*
  230. * Setup state for decoding a strip.
  231. */
  232. static int
  233. LZWPreDecode(TIFF* tif, uint16 s)
  234. {
  235. static const char module[] = "LZWPreDecode";
  236. LZWCodecState *sp = DecoderState(tif);
  237. (void) s;
  238. assert(sp != NULL);
  239. if( sp->dec_codetab == NULL )
  240. {
  241. tif->tif_setupdecode( tif );
  242. }
  243. /*
  244. * Check for old bit-reversed codes.
  245. */
  246. if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) {
  247. #ifdef LZW_COMPAT
  248. if (!sp->dec_decode) {
  249. TIFFWarningExt(tif->tif_clientdata, module,
  250. "Old-style LZW codes, convert file");
  251. /*
  252. * Override default decoding methods with
  253. * ones that deal with the old coding.
  254. * Otherwise the predictor versions set
  255. * above will call the compatibility routines
  256. * through the dec_decode method.
  257. */
  258. tif->tif_decoderow = LZWDecodeCompat;
  259. tif->tif_decodestrip = LZWDecodeCompat;
  260. tif->tif_decodetile = LZWDecodeCompat;
  261. /*
  262. * If doing horizontal differencing, must
  263. * re-setup the predictor logic since we
  264. * switched the basic decoder methods...
  265. */
  266. (*tif->tif_setupdecode)(tif);
  267. sp->dec_decode = LZWDecodeCompat;
  268. }
  269. sp->lzw_maxcode = MAXCODE(BITS_MIN);
  270. #else /* !LZW_COMPAT */
  271. if (!sp->dec_decode) {
  272. TIFFErrorExt(tif->tif_clientdata, module,
  273. "Old-style LZW codes not supported");
  274. sp->dec_decode = LZWDecode;
  275. }
  276. return (0);
  277. #endif/* !LZW_COMPAT */
  278. } else {
  279. sp->lzw_maxcode = MAXCODE(BITS_MIN)-1;
  280. sp->dec_decode = LZWDecode;
  281. }
  282. sp->lzw_nbits = BITS_MIN;
  283. sp->lzw_nextbits = 0;
  284. sp->lzw_nextdata = 0;
  285. sp->dec_restart = 0;
  286. sp->dec_nbitsmask = MAXCODE(BITS_MIN);
  287. #ifdef LZW_CHECKEOS
  288. sp->dec_bitsleft = ((uint64)tif->tif_rawcc) << 3;
  289. #endif
  290. sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
  291. /*
  292. * Zero entries that are not yet filled in. We do
  293. * this to guard against bogus input data that causes
  294. * us to index into undefined entries. If you can
  295. * come up with a way to safely bounds-check input codes
  296. * while decoding then you can remove this operation.
  297. */
  298. _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t));
  299. sp->dec_oldcodep = &sp->dec_codetab[-1];
  300. sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1];
  301. return (1);
  302. }
  303. /*
  304. * Decode a "hunk of data".
  305. */
  306. #define GetNextCode(sp, bp, code) { \
  307. nextdata = (nextdata<<8) | *(bp)++; \
  308. nextbits += 8; \
  309. if (nextbits < nbits) { \
  310. nextdata = (nextdata<<8) | *(bp)++; \
  311. nextbits += 8; \
  312. } \
  313. code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask); \
  314. nextbits -= nbits; \
  315. }
  316. static void
  317. codeLoop(TIFF* tif, const char* module)
  318. {
  319. TIFFErrorExt(tif->tif_clientdata, module,
  320. "Bogus encoding, loop in the code table; scanline %d",
  321. tif->tif_row);
  322. }
  323. static int
  324. LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s)
  325. {
  326. static const char module[] = "LZWDecode";
  327. LZWCodecState *sp = DecoderState(tif);
  328. char *op = (char*) op0;
  329. long occ = (long) occ0;
  330. char *tp;
  331. unsigned char *bp;
  332. hcode_t code;
  333. int len;
  334. long nbits, nextbits, nextdata, nbitsmask;
  335. code_t *codep, *free_entp, *maxcodep, *oldcodep;
  336. (void) s;
  337. assert(sp != NULL);
  338. assert(sp->dec_codetab != NULL);
  339. /*
  340. Fail if value does not fit in long.
  341. */
  342. if ((tmsize_t) occ != occ0)
  343. return (0);
  344. /*
  345. * Restart interrupted output operation.
  346. */
  347. if (sp->dec_restart) {
  348. long residue;
  349. codep = sp->dec_codep;
  350. residue = codep->length - sp->dec_restart;
  351. if (residue > occ) {
  352. /*
  353. * Residue from previous decode is sufficient
  354. * to satisfy decode request. Skip to the
  355. * start of the decoded string, place decoded
  356. * values in the output buffer, and return.
  357. */
  358. sp->dec_restart += occ;
  359. do {
  360. codep = codep->next;
  361. } while (--residue > occ && codep);
  362. if (codep) {
  363. tp = op + occ;
  364. do {
  365. *--tp = codep->value;
  366. codep = codep->next;
  367. } while (--occ && codep);
  368. }
  369. return (1);
  370. }
  371. /*
  372. * Residue satisfies only part of the decode request.
  373. */
  374. op += residue, occ -= residue;
  375. tp = op;
  376. do {
  377. int t;
  378. --tp;
  379. t = codep->value;
  380. codep = codep->next;
  381. *tp = t;
  382. } while (--residue && codep);
  383. sp->dec_restart = 0;
  384. }
  385. bp = (unsigned char *)tif->tif_rawcp;
  386. nbits = sp->lzw_nbits;
  387. nextdata = sp->lzw_nextdata;
  388. nextbits = sp->lzw_nextbits;
  389. nbitsmask = sp->dec_nbitsmask;
  390. oldcodep = sp->dec_oldcodep;
  391. free_entp = sp->dec_free_entp;
  392. maxcodep = sp->dec_maxcodep;
  393. while (occ > 0) {
  394. NextCode(tif, sp, bp, code, GetNextCode);
  395. if (code == CODE_EOI)
  396. break;
  397. if (code == CODE_CLEAR) {
  398. free_entp = sp->dec_codetab + CODE_FIRST;
  399. _TIFFmemset(free_entp, 0,
  400. (CSIZE - CODE_FIRST) * sizeof (code_t));
  401. nbits = BITS_MIN;
  402. nbitsmask = MAXCODE(BITS_MIN);
  403. maxcodep = sp->dec_codetab + nbitsmask-1;
  404. NextCode(tif, sp, bp, code, GetNextCode);
  405. if (code == CODE_EOI)
  406. break;
  407. if (code >= CODE_CLEAR) {
  408. TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
  409. "LZWDecode: Corrupted LZW table at scanline %d",
  410. tif->tif_row);
  411. return (0);
  412. }
  413. *op++ = (char)code, occ--;
  414. oldcodep = sp->dec_codetab + code;
  415. continue;
  416. }
  417. codep = sp->dec_codetab + code;
  418. /*
  419. * Add the new entry to the code table.
  420. */
  421. if (free_entp < &sp->dec_codetab[0] ||
  422. free_entp >= &sp->dec_codetab[CSIZE]) {
  423. TIFFErrorExt(tif->tif_clientdata, module,
  424. "Corrupted LZW table at scanline %d",
  425. tif->tif_row);
  426. return (0);
  427. }
  428. free_entp->next = oldcodep;
  429. if (free_entp->next < &sp->dec_codetab[0] ||
  430. free_entp->next >= &sp->dec_codetab[CSIZE]) {
  431. TIFFErrorExt(tif->tif_clientdata, module,
  432. "Corrupted LZW table at scanline %d",
  433. tif->tif_row);
  434. return (0);
  435. }
  436. free_entp->firstchar = free_entp->next->firstchar;
  437. free_entp->length = free_entp->next->length+1;
  438. free_entp->value = (codep < free_entp) ?
  439. codep->firstchar : free_entp->firstchar;
  440. if (++free_entp > maxcodep) {
  441. if (++nbits > BITS_MAX) /* should not happen */
  442. nbits = BITS_MAX;
  443. nbitsmask = MAXCODE(nbits);
  444. maxcodep = sp->dec_codetab + nbitsmask-1;
  445. }
  446. oldcodep = codep;
  447. if (code >= 256) {
  448. /*
  449. * Code maps to a string, copy string
  450. * value to output (written in reverse).
  451. */
  452. if(codep->length == 0) {
  453. TIFFErrorExt(tif->tif_clientdata, module,
  454. "Wrong length of decoded string: "
  455. "data probably corrupted at scanline %d",
  456. tif->tif_row);
  457. return (0);
  458. }
  459. if (codep->length > occ) {
  460. /*
  461. * String is too long for decode buffer,
  462. * locate portion that will fit, copy to
  463. * the decode buffer, and setup restart
  464. * logic for the next decoding call.
  465. */
  466. sp->dec_codep = codep;
  467. do {
  468. codep = codep->next;
  469. } while (codep && codep->length > occ);
  470. if (codep) {
  471. sp->dec_restart = (long)occ;
  472. tp = op + occ;
  473. do {
  474. *--tp = codep->value;
  475. codep = codep->next;
  476. } while (--occ && codep);
  477. if (codep)
  478. codeLoop(tif, module);
  479. }
  480. break;
  481. }
  482. len = codep->length;
  483. tp = op + len;
  484. do {
  485. int t;
  486. --tp;
  487. t = codep->value;
  488. codep = codep->next;
  489. *tp = t;
  490. } while (codep && tp > op);
  491. if (codep) {
  492. codeLoop(tif, module);
  493. break;
  494. }
  495. assert(occ >= len);
  496. op += len, occ -= len;
  497. } else
  498. *op++ = (char)code, occ--;
  499. }
  500. tif->tif_rawcp = (uint8*) bp;
  501. sp->lzw_nbits = (unsigned short) nbits;
  502. sp->lzw_nextdata = nextdata;
  503. sp->lzw_nextbits = nextbits;
  504. sp->dec_nbitsmask = nbitsmask;
  505. sp->dec_oldcodep = oldcodep;
  506. sp->dec_free_entp = free_entp;
  507. sp->dec_maxcodep = maxcodep;
  508. if (occ > 0) {
  509. #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__))
  510. TIFFErrorExt(tif->tif_clientdata, module,
  511. "Not enough data at scanline %d (short %I64d bytes)",
  512. tif->tif_row, (unsigned __int64) occ);
  513. #else
  514. TIFFErrorExt(tif->tif_clientdata, module,
  515. "Not enough data at scanline %d (short %llu bytes)",
  516. tif->tif_row, (unsigned long long) occ);
  517. #endif
  518. return (0);
  519. }
  520. return (1);
  521. }
  522. #ifdef LZW_COMPAT
  523. /*
  524. * Decode a "hunk of data" for old images.
  525. */
  526. #define GetNextCodeCompat(sp, bp, code) { \
  527. nextdata |= (unsigned long) *(bp)++ << nextbits; \
  528. nextbits += 8; \
  529. if (nextbits < nbits) { \
  530. nextdata |= (unsigned long) *(bp)++ << nextbits;\
  531. nextbits += 8; \
  532. } \
  533. code = (hcode_t)(nextdata & nbitsmask); \
  534. nextdata >>= nbits; \
  535. nextbits -= nbits; \
  536. }
  537. static int
  538. LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s)
  539. {
  540. static const char module[] = "LZWDecodeCompat";
  541. LZWCodecState *sp = DecoderState(tif);
  542. char *op = (char*) op0;
  543. long occ = (long) occ0;
  544. char *tp;
  545. unsigned char *bp;
  546. int code, nbits;
  547. long nextbits, nextdata, nbitsmask;
  548. code_t *codep, *free_entp, *maxcodep, *oldcodep;
  549. (void) s;
  550. assert(sp != NULL);
  551. /*
  552. Fail if value does not fit in long.
  553. */
  554. if ((tmsize_t) occ != occ0)
  555. return (0);
  556. /*
  557. * Restart interrupted output operation.
  558. */
  559. if (sp->dec_restart) {
  560. long residue;
  561. codep = sp->dec_codep;
  562. residue = codep->length - sp->dec_restart;
  563. if (residue > occ) {
  564. /*
  565. * Residue from previous decode is sufficient
  566. * to satisfy decode request. Skip to the
  567. * start of the decoded string, place decoded
  568. * values in the output buffer, and return.
  569. */
  570. sp->dec_restart += occ;
  571. do {
  572. codep = codep->next;
  573. } while (--residue > occ);
  574. tp = op + occ;
  575. do {
  576. *--tp = codep->value;
  577. codep = codep->next;
  578. } while (--occ);
  579. return (1);
  580. }
  581. /*
  582. * Residue satisfies only part of the decode request.
  583. */
  584. op += residue, occ -= residue;
  585. tp = op;
  586. do {
  587. *--tp = codep->value;
  588. codep = codep->next;
  589. } while (--residue);
  590. sp->dec_restart = 0;
  591. }
  592. bp = (unsigned char *)tif->tif_rawcp;
  593. nbits = sp->lzw_nbits;
  594. nextdata = sp->lzw_nextdata;
  595. nextbits = sp->lzw_nextbits;
  596. nbitsmask = sp->dec_nbitsmask;
  597. oldcodep = sp->dec_oldcodep;
  598. free_entp = sp->dec_free_entp;
  599. maxcodep = sp->dec_maxcodep;
  600. while (occ > 0) {
  601. NextCode(tif, sp, bp, code, GetNextCodeCompat);
  602. if (code == CODE_EOI)
  603. break;
  604. if (code == CODE_CLEAR) {
  605. free_entp = sp->dec_codetab + CODE_FIRST;
  606. _TIFFmemset(free_entp, 0,
  607. (CSIZE - CODE_FIRST) * sizeof (code_t));
  608. nbits = BITS_MIN;
  609. nbitsmask = MAXCODE(BITS_MIN);
  610. maxcodep = sp->dec_codetab + nbitsmask;
  611. NextCode(tif, sp, bp, code, GetNextCodeCompat);
  612. if (code == CODE_EOI)
  613. break;
  614. if (code >= CODE_CLEAR) {
  615. TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
  616. "LZWDecode: Corrupted LZW table at scanline %d",
  617. tif->tif_row);
  618. return (0);
  619. }
  620. *op++ = code, occ--;
  621. oldcodep = sp->dec_codetab + code;
  622. continue;
  623. }
  624. codep = sp->dec_codetab + code;
  625. /*
  626. * Add the new entry to the code table.
  627. */
  628. if (free_entp < &sp->dec_codetab[0] ||
  629. free_entp >= &sp->dec_codetab[CSIZE]) {
  630. TIFFErrorExt(tif->tif_clientdata, module,
  631. "Corrupted LZW table at scanline %d", tif->tif_row);
  632. return (0);
  633. }
  634. free_entp->next = oldcodep;
  635. if (free_entp->next < &sp->dec_codetab[0] ||
  636. free_entp->next >= &sp->dec_codetab[CSIZE]) {
  637. TIFFErrorExt(tif->tif_clientdata, module,
  638. "Corrupted LZW table at scanline %d", tif->tif_row);
  639. return (0);
  640. }
  641. free_entp->firstchar = free_entp->next->firstchar;
  642. free_entp->length = free_entp->next->length+1;
  643. free_entp->value = (codep < free_entp) ?
  644. codep->firstchar : free_entp->firstchar;
  645. if (++free_entp > maxcodep) {
  646. if (++nbits > BITS_MAX) /* should not happen */
  647. nbits = BITS_MAX;
  648. nbitsmask = MAXCODE(nbits);
  649. maxcodep = sp->dec_codetab + nbitsmask;
  650. }
  651. oldcodep = codep;
  652. if (code >= 256) {
  653. /*
  654. * Code maps to a string, copy string
  655. * value to output (written in reverse).
  656. */
  657. if(codep->length == 0) {
  658. TIFFErrorExt(tif->tif_clientdata, module,
  659. "Wrong length of decoded "
  660. "string: data probably corrupted at scanline %d",
  661. tif->tif_row);
  662. return (0);
  663. }
  664. if (codep->length > occ) {
  665. /*
  666. * String is too long for decode buffer,
  667. * locate portion that will fit, copy to
  668. * the decode buffer, and setup restart
  669. * logic for the next decoding call.
  670. */
  671. sp->dec_codep = codep;
  672. do {
  673. codep = codep->next;
  674. } while (codep->length > occ);
  675. sp->dec_restart = occ;
  676. tp = op + occ;
  677. do {
  678. *--tp = codep->value;
  679. codep = codep->next;
  680. } while (--occ);
  681. break;
  682. }
  683. assert(occ >= codep->length);
  684. op += codep->length, occ -= codep->length;
  685. tp = op;
  686. do {
  687. *--tp = codep->value;
  688. } while( (codep = codep->next) != NULL );
  689. } else
  690. *op++ = code, occ--;
  691. }
  692. tif->tif_rawcp = (uint8*) bp;
  693. sp->lzw_nbits = nbits;
  694. sp->lzw_nextdata = nextdata;
  695. sp->lzw_nextbits = nextbits;
  696. sp->dec_nbitsmask = nbitsmask;
  697. sp->dec_oldcodep = oldcodep;
  698. sp->dec_free_entp = free_entp;
  699. sp->dec_maxcodep = maxcodep;
  700. if (occ > 0) {
  701. #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__))
  702. TIFFErrorExt(tif->tif_clientdata, module,
  703. "Not enough data at scanline %d (short %I64d bytes)",
  704. tif->tif_row, (unsigned __int64) occ);
  705. #else
  706. TIFFErrorExt(tif->tif_clientdata, module,
  707. "Not enough data at scanline %d (short %llu bytes)",
  708. tif->tif_row, (unsigned long long) occ);
  709. #endif
  710. return (0);
  711. }
  712. return (1);
  713. }
  714. #endif /* LZW_COMPAT */
  715. /*
  716. * LZW Encoding.
  717. */
  718. static int
  719. LZWSetupEncode(TIFF* tif)
  720. {
  721. static const char module[] = "LZWSetupEncode";
  722. LZWCodecState* sp = EncoderState(tif);
  723. assert(sp != NULL);
  724. sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t));
  725. if (sp->enc_hashtab == NULL) {
  726. TIFFErrorExt(tif->tif_clientdata, module,
  727. "No space for LZW hash table");
  728. return (0);
  729. }
  730. return (1);
  731. }
  732. /*
  733. * Reset encoding state at the start of a strip.
  734. */
  735. static int
  736. LZWPreEncode(TIFF* tif, uint16 s)
  737. {
  738. LZWCodecState *sp = EncoderState(tif);
  739. (void) s;
  740. assert(sp != NULL);
  741. if( sp->enc_hashtab == NULL )
  742. {
  743. tif->tif_setupencode( tif );
  744. }
  745. sp->lzw_nbits = BITS_MIN;
  746. sp->lzw_maxcode = MAXCODE(BITS_MIN);
  747. sp->lzw_free_ent = CODE_FIRST;
  748. sp->lzw_nextbits = 0;
  749. sp->lzw_nextdata = 0;
  750. sp->enc_checkpoint = CHECK_GAP;
  751. sp->enc_ratio = 0;
  752. sp->enc_incount = 0;
  753. sp->enc_outcount = 0;
  754. /*
  755. * The 4 here insures there is space for 2 max-sized
  756. * codes in LZWEncode and LZWPostDecode.
  757. */
  758. sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4;
  759. cl_hash(sp); /* clear hash table */
  760. sp->enc_oldcode = (hcode_t) -1; /* generates CODE_CLEAR in LZWEncode */
  761. return (1);
  762. }
  763. #define CALCRATIO(sp, rat) { \
  764. if (incount > 0x007fffff) { /* NB: shift will overflow */\
  765. rat = outcount >> 8; \
  766. rat = (rat == 0 ? 0x7fffffff : incount/rat); \
  767. } else \
  768. rat = (incount<<8) / outcount; \
  769. }
  770. #define PutNextCode(op, c) { \
  771. nextdata = (nextdata << nbits) | c; \
  772. nextbits += nbits; \
  773. *op++ = (unsigned char)(nextdata >> (nextbits-8)); \
  774. nextbits -= 8; \
  775. if (nextbits >= 8) { \
  776. *op++ = (unsigned char)(nextdata >> (nextbits-8)); \
  777. nextbits -= 8; \
  778. } \
  779. outcount += nbits; \
  780. }
  781. /*
  782. * Encode a chunk of pixels.
  783. *
  784. * Uses an open addressing double hashing (no chaining) on the
  785. * prefix code/next character combination. We do a variant of
  786. * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's
  787. * relatively-prime secondary probe. Here, the modular division
  788. * first probe is gives way to a faster exclusive-or manipulation.
  789. * Also do block compression with an adaptive reset, whereby the
  790. * code table is cleared when the compression ratio decreases,
  791. * but after the table fills. The variable-length output codes
  792. * are re-sized at this point, and a CODE_CLEAR is generated
  793. * for the decoder.
  794. */
  795. static int
  796. LZWEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s)
  797. {
  798. register LZWCodecState *sp = EncoderState(tif);
  799. register long fcode;
  800. register hash_t *hp;
  801. register int h, c;
  802. hcode_t ent;
  803. long disp;
  804. long incount, outcount, checkpoint;
  805. long nextdata, nextbits;
  806. int free_ent, maxcode, nbits;
  807. uint8* op;
  808. uint8* limit;
  809. (void) s;
  810. if (sp == NULL)
  811. return (0);
  812. assert(sp->enc_hashtab != NULL);
  813. /*
  814. * Load local state.
  815. */
  816. incount = sp->enc_incount;
  817. outcount = sp->enc_outcount;
  818. checkpoint = sp->enc_checkpoint;
  819. nextdata = sp->lzw_nextdata;
  820. nextbits = sp->lzw_nextbits;
  821. free_ent = sp->lzw_free_ent;
  822. maxcode = sp->lzw_maxcode;
  823. nbits = sp->lzw_nbits;
  824. op = tif->tif_rawcp;
  825. limit = sp->enc_rawlimit;
  826. ent = sp->enc_oldcode;
  827. if (ent == (hcode_t) -1 && cc > 0) {
  828. /*
  829. * NB: This is safe because it can only happen
  830. * at the start of a strip where we know there
  831. * is space in the data buffer.
  832. */
  833. PutNextCode(op, CODE_CLEAR);
  834. ent = *bp++; cc--; incount++;
  835. }
  836. while (cc > 0) {
  837. c = *bp++; cc--; incount++;
  838. fcode = ((long)c << BITS_MAX) + ent;
  839. h = (c << HSHIFT) ^ ent; /* xor hashing */
  840. #ifdef _WINDOWS
  841. /*
  842. * Check hash index for an overflow.
  843. */
  844. if (h >= HSIZE)
  845. h -= HSIZE;
  846. #endif
  847. hp = &sp->enc_hashtab[h];
  848. if (hp->hash == fcode) {
  849. ent = hp->code;
  850. continue;
  851. }
  852. if (hp->hash >= 0) {
  853. /*
  854. * Primary hash failed, check secondary hash.
  855. */
  856. disp = HSIZE - h;
  857. if (h == 0)
  858. disp = 1;
  859. do {
  860. /*
  861. * Avoid pointer arithmetic 'cuz of
  862. * wraparound problems with segments.
  863. */
  864. if ((h -= disp) < 0)
  865. h += HSIZE;
  866. hp = &sp->enc_hashtab[h];
  867. if (hp->hash == fcode) {
  868. ent = hp->code;
  869. goto hit;
  870. }
  871. } while (hp->hash >= 0);
  872. }
  873. /*
  874. * New entry, emit code and add to table.
  875. */
  876. /*
  877. * Verify there is space in the buffer for the code
  878. * and any potential Clear code that might be emitted
  879. * below. The value of limit is setup so that there
  880. * are at least 4 bytes free--room for 2 codes.
  881. */
  882. if (op > limit) {
  883. tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
  884. TIFFFlushData1(tif);
  885. op = tif->tif_rawdata;
  886. }
  887. PutNextCode(op, ent);
  888. ent = c;
  889. hp->code = free_ent++;
  890. hp->hash = fcode;
  891. if (free_ent == CODE_MAX-1) {
  892. /* table is full, emit clear code and reset */
  893. cl_hash(sp);
  894. sp->enc_ratio = 0;
  895. incount = 0;
  896. outcount = 0;
  897. free_ent = CODE_FIRST;
  898. PutNextCode(op, CODE_CLEAR);
  899. nbits = BITS_MIN;
  900. maxcode = MAXCODE(BITS_MIN);
  901. } else {
  902. /*
  903. * If the next entry is going to be too big for
  904. * the code size, then increase it, if possible.
  905. */
  906. if (free_ent > maxcode) {
  907. nbits++;
  908. assert(nbits <= BITS_MAX);
  909. maxcode = (int) MAXCODE(nbits);
  910. } else if (incount >= checkpoint) {
  911. long rat;
  912. /*
  913. * Check compression ratio and, if things seem
  914. * to be slipping, clear the hash table and
  915. * reset state. The compression ratio is a
  916. * 24+8-bit fractional number.
  917. */
  918. checkpoint = incount+CHECK_GAP;
  919. CALCRATIO(sp, rat);
  920. if (rat <= sp->enc_ratio) {
  921. cl_hash(sp);
  922. sp->enc_ratio = 0;
  923. incount = 0;
  924. outcount = 0;
  925. free_ent = CODE_FIRST;
  926. PutNextCode(op, CODE_CLEAR);
  927. nbits = BITS_MIN;
  928. maxcode = MAXCODE(BITS_MIN);
  929. } else
  930. sp->enc_ratio = rat;
  931. }
  932. }
  933. hit:
  934. ;
  935. }
  936. /*
  937. * Restore global state.
  938. */
  939. sp->enc_incount = incount;
  940. sp->enc_outcount = outcount;
  941. sp->enc_checkpoint = checkpoint;
  942. sp->enc_oldcode = ent;
  943. sp->lzw_nextdata = nextdata;
  944. sp->lzw_nextbits = nextbits;
  945. sp->lzw_free_ent = free_ent;
  946. sp->lzw_maxcode = maxcode;
  947. sp->lzw_nbits = nbits;
  948. tif->tif_rawcp = op;
  949. return (1);
  950. }
  951. /*
  952. * Finish off an encoded strip by flushing the last
  953. * string and tacking on an End Of Information code.
  954. */
  955. static int
  956. LZWPostEncode(TIFF* tif)
  957. {
  958. register LZWCodecState *sp = EncoderState(tif);
  959. uint8* op = tif->tif_rawcp;
  960. long nextbits = sp->lzw_nextbits;
  961. long nextdata = sp->lzw_nextdata;
  962. long outcount = sp->enc_outcount;
  963. int nbits = sp->lzw_nbits;
  964. if (op > sp->enc_rawlimit) {
  965. tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
  966. TIFFFlushData1(tif);
  967. op = tif->tif_rawdata;
  968. }
  969. if (sp->enc_oldcode != (hcode_t) -1) {
  970. PutNextCode(op, sp->enc_oldcode);
  971. sp->enc_oldcode = (hcode_t) -1;
  972. }
  973. PutNextCode(op, CODE_EOI);
  974. if (nextbits > 0)
  975. *op++ = (unsigned char)(nextdata << (8-nextbits));
  976. tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
  977. return (1);
  978. }
  979. /*
  980. * Reset encoding hash table.
  981. */
  982. static void
  983. cl_hash(LZWCodecState* sp)
  984. {
  985. register hash_t *hp = &sp->enc_hashtab[HSIZE-1];
  986. register long i = HSIZE-8;
  987. do {
  988. i -= 8;
  989. hp[-7].hash = -1;
  990. hp[-6].hash = -1;
  991. hp[-5].hash = -1;
  992. hp[-4].hash = -1;
  993. hp[-3].hash = -1;
  994. hp[-2].hash = -1;
  995. hp[-1].hash = -1;
  996. hp[ 0].hash = -1;
  997. hp -= 8;
  998. } while (i >= 0);
  999. for (i += 8; i > 0; i--, hp--)
  1000. hp->hash = -1;
  1001. }
  1002. static void
  1003. LZWCleanup(TIFF* tif)
  1004. {
  1005. (void)TIFFPredictorCleanup(tif);
  1006. assert(tif->tif_data != 0);
  1007. if (DecoderState(tif)->dec_codetab)
  1008. _TIFFfree(DecoderState(tif)->dec_codetab);
  1009. if (EncoderState(tif)->enc_hashtab)
  1010. _TIFFfree(EncoderState(tif)->enc_hashtab);
  1011. _TIFFfree(tif->tif_data);
  1012. tif->tif_data = NULL;
  1013. _TIFFSetDefaultCompressionState(tif);
  1014. }
  1015. int
  1016. TIFFInitLZW(TIFF* tif, int scheme)
  1017. {
  1018. static const char module[] = "TIFFInitLZW";
  1019. assert(scheme == COMPRESSION_LZW);
  1020. /*
  1021. * Allocate state block so tag methods have storage to record values.
  1022. */
  1023. tif->tif_data = (uint8*) _TIFFmalloc(sizeof (LZWCodecState));
  1024. if (tif->tif_data == NULL)
  1025. goto bad;
  1026. DecoderState(tif)->dec_codetab = NULL;
  1027. DecoderState(tif)->dec_decode = NULL;
  1028. EncoderState(tif)->enc_hashtab = NULL;
  1029. LZWState(tif)->rw_mode = tif->tif_mode;
  1030. /*
  1031. * Install codec methods.
  1032. */
  1033. tif->tif_fixuptags = LZWFixupTags;
  1034. tif->tif_setupdecode = LZWSetupDecode;
  1035. tif->tif_predecode = LZWPreDecode;
  1036. tif->tif_decoderow = LZWDecode;
  1037. tif->tif_decodestrip = LZWDecode;
  1038. tif->tif_decodetile = LZWDecode;
  1039. tif->tif_setupencode = LZWSetupEncode;
  1040. tif->tif_preencode = LZWPreEncode;
  1041. tif->tif_postencode = LZWPostEncode;
  1042. tif->tif_encoderow = LZWEncode;
  1043. tif->tif_encodestrip = LZWEncode;
  1044. tif->tif_encodetile = LZWEncode;
  1045. tif->tif_cleanup = LZWCleanup;
  1046. /*
  1047. * Setup predictor setup.
  1048. */
  1049. (void) TIFFPredictorInit(tif);
  1050. return (1);
  1051. bad:
  1052. TIFFErrorExt(tif->tif_clientdata, module,
  1053. "No space for LZW state block");
  1054. return (0);
  1055. }
  1056. /*
  1057. * Copyright (c) 1985, 1986 The Regents of the University of California.
  1058. * All rights reserved.
  1059. *
  1060. * This code is derived from software contributed to Berkeley by
  1061. * James A. Woods, derived from original work by Spencer Thomas
  1062. * and Joseph Orost.
  1063. *
  1064. * Redistribution and use in source and binary forms are permitted
  1065. * provided that the above copyright notice and this paragraph are
  1066. * duplicated in all such forms and that any documentation,
  1067. * advertising materials, and other materials related to such
  1068. * distribution and use acknowledge that the software was developed
  1069. * by the University of California, Berkeley. The name of the
  1070. * University may not be used to endorse or promote products derived
  1071. * from this software without specific prior written permission.
  1072. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
  1073. * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  1074. * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  1075. */
  1076. #endif /* LZW_SUPPORT */
  1077. /* vim: set ts=8 sts=8 sw=8 noet: */
  1078. /*
  1079. * Local Variables:
  1080. * mode: c
  1081. * c-basic-offset: 8
  1082. * fill-column: 78
  1083. * End:
  1084. */