blob: fd9c7a0a74ba55c2dd0c8e976c86929cdb7f2649
1 | /* $Id: tif_lzw.c,v 1.45 2011-04-02 20:54:09 bfriesen Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 1988-1997 Sam Leffler |
5 | * Copyright (c) 1991-1997 Silicon Graphics, Inc. |
6 | * |
7 | * Permission to use, copy, modify, distribute, and sell this software and |
8 | * its documentation for any purpose is hereby granted without fee, provided |
9 | * that (i) the above copyright notices and this permission notice appear in |
10 | * all copies of the software and related documentation, and (ii) the names of |
11 | * Sam Leffler and Silicon Graphics may not be used in any advertising or |
12 | * publicity relating to the software without the specific, prior written |
13 | * permission of Sam Leffler and Silicon Graphics. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, |
16 | * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY |
17 | * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. |
18 | * |
19 | * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR |
20 | * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, |
21 | * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
22 | * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF |
23 | * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
24 | * OF THIS SOFTWARE. |
25 | */ |
26 | |
27 | #include "tiffiop.h" |
28 | #ifdef LZW_SUPPORT |
29 | /* |
30 | * TIFF Library. |
31 | * Rev 5.0 Lempel-Ziv & Welch Compression Support |
32 | * |
33 | * This code is derived from the compress program whose code is |
34 | * derived from software contributed to Berkeley by James A. Woods, |
35 | * derived from original work by Spencer Thomas and Joseph Orost. |
36 | * |
37 | * The original Berkeley copyright notice appears below in its entirety. |
38 | */ |
39 | #include "tif_predict.h" |
40 | |
41 | #include <stdio.h> |
42 | |
43 | /* |
44 | * NB: The 5.0 spec describes a different algorithm than Aldus |
45 | * implements. Specifically, Aldus does code length transitions |
46 | * one code earlier than should be done (for real LZW). |
47 | * Earlier versions of this library implemented the correct |
48 | * LZW algorithm, but emitted codes in a bit order opposite |
49 | * to the TIFF spec. Thus, to maintain compatibility w/ Aldus |
50 | * we interpret MSB-LSB ordered codes to be images written w/ |
51 | * old versions of this library, but otherwise adhere to the |
52 | * Aldus "off by one" algorithm. |
53 | * |
54 | * Future revisions to the TIFF spec are expected to "clarify this issue". |
55 | */ |
56 | #define LZW_COMPAT /* include backwards compatibility code */ |
57 | /* |
58 | * Each strip of data is supposed to be terminated by a CODE_EOI. |
59 | * If the following #define is included, the decoder will also |
60 | * check for end-of-strip w/o seeing this code. This makes the |
61 | * library more robust, but also slower. |
62 | */ |
63 | #define LZW_CHECKEOS /* include checks for strips w/o EOI code */ |
64 | |
65 | #define MAXCODE(n) ((1L<<(n))-1) |
66 | /* |
67 | * The TIFF spec specifies that encoded bit |
68 | * strings range from 9 to 12 bits. |
69 | */ |
70 | #define BITS_MIN 9 /* start with 9 bits */ |
71 | #define BITS_MAX 12 /* max of 12 bit strings */ |
72 | /* predefined codes */ |
73 | #define CODE_CLEAR 256 /* code to clear string table */ |
74 | #define CODE_EOI 257 /* end-of-information code */ |
75 | #define CODE_FIRST 258 /* first free code entry */ |
76 | #define CODE_MAX MAXCODE(BITS_MAX) |
77 | #define HSIZE 9001L /* 91% occupancy */ |
78 | #define HSHIFT (13-8) |
79 | #ifdef LZW_COMPAT |
80 | /* NB: +1024 is for compatibility with old files */ |
81 | #define CSIZE (MAXCODE(BITS_MAX)+1024L) |
82 | #else |
83 | #define CSIZE (MAXCODE(BITS_MAX)+1L) |
84 | #endif |
85 | |
86 | /* |
87 | * State block for each open TIFF file using LZW |
88 | * compression/decompression. Note that the predictor |
89 | * state block must be first in this data structure. |
90 | */ |
91 | typedef struct { |
92 | TIFFPredictorState predict; /* predictor super class */ |
93 | |
94 | unsigned short nbits; /* # of bits/code */ |
95 | unsigned short maxcode; /* maximum code for lzw_nbits */ |
96 | unsigned short free_ent; /* next free entry in hash table */ |
97 | long nextdata; /* next bits of i/o */ |
98 | long nextbits; /* # of valid bits in lzw_nextdata */ |
99 | |
100 | int rw_mode; /* preserve rw_mode from init */ |
101 | } LZWBaseState; |
102 | |
103 | #define lzw_nbits base.nbits |
104 | #define lzw_maxcode base.maxcode |
105 | #define lzw_free_ent base.free_ent |
106 | #define lzw_nextdata base.nextdata |
107 | #define lzw_nextbits base.nextbits |
108 | |
109 | /* |
110 | * Encoding-specific state. |
111 | */ |
112 | typedef uint16 hcode_t; /* codes fit in 16 bits */ |
113 | typedef struct { |
114 | long hash; |
115 | hcode_t code; |
116 | } hash_t; |
117 | |
118 | /* |
119 | * Decoding-specific state. |
120 | */ |
121 | typedef struct code_ent { |
122 | struct code_ent *next; |
123 | unsigned short length; /* string len, including this token */ |
124 | unsigned char value; /* data value */ |
125 | unsigned char firstchar; /* first token of string */ |
126 | } code_t; |
127 | |
128 | typedef int (*decodeFunc)(TIFF*, uint8*, tmsize_t, uint16); |
129 | |
130 | typedef struct { |
131 | LZWBaseState base; |
132 | |
133 | /* Decoding specific data */ |
134 | long dec_nbitsmask; /* lzw_nbits 1 bits, right adjusted */ |
135 | long dec_restart; /* restart count */ |
136 | #ifdef LZW_CHECKEOS |
137 | uint64 dec_bitsleft; /* available bits in raw data */ |
138 | #endif |
139 | decodeFunc dec_decode; /* regular or backwards compatible */ |
140 | code_t* dec_codep; /* current recognized code */ |
141 | code_t* dec_oldcodep; /* previously recognized code */ |
142 | code_t* dec_free_entp; /* next free entry */ |
143 | code_t* dec_maxcodep; /* max available entry */ |
144 | code_t* dec_codetab; /* kept separate for small machines */ |
145 | |
146 | /* Encoding specific data */ |
147 | int enc_oldcode; /* last code encountered */ |
148 | long enc_checkpoint; /* point at which to clear table */ |
149 | #define CHECK_GAP 10000 /* enc_ratio check interval */ |
150 | long enc_ratio; /* current compression ratio */ |
151 | long enc_incount; /* (input) data bytes encoded */ |
152 | long enc_outcount; /* encoded (output) bytes */ |
153 | uint8* enc_rawlimit; /* bound on tif_rawdata buffer */ |
154 | hash_t* enc_hashtab; /* kept separate for small machines */ |
155 | } LZWCodecState; |
156 | |
157 | #define LZWState(tif) ((LZWBaseState*) (tif)->tif_data) |
158 | #define DecoderState(tif) ((LZWCodecState*) LZWState(tif)) |
159 | #define EncoderState(tif) ((LZWCodecState*) LZWState(tif)) |
160 | |
161 | static int LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s); |
162 | #ifdef LZW_COMPAT |
163 | static int LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s); |
164 | #endif |
165 | static void cl_hash(LZWCodecState*); |
166 | |
167 | /* |
168 | * LZW Decoder. |
169 | */ |
170 | |
171 | #ifdef LZW_CHECKEOS |
172 | /* |
173 | * This check shouldn't be necessary because each |
174 | * strip is suppose to be terminated with CODE_EOI. |
175 | */ |
176 | #define NextCode(_tif, _sp, _bp, _code, _get) { \ |
177 | if ((_sp)->dec_bitsleft < (uint64)nbits) { \ |
178 | TIFFWarningExt(_tif->tif_clientdata, module, \ |
179 | "LZWDecode: Strip %d not terminated with EOI code", \ |
180 | _tif->tif_curstrip); \ |
181 | _code = CODE_EOI; \ |
182 | } else { \ |
183 | _get(_sp,_bp,_code); \ |
184 | (_sp)->dec_bitsleft -= nbits; \ |
185 | } \ |
186 | } |
187 | #else |
188 | #define NextCode(tif, sp, bp, code, get) get(sp, bp, code) |
189 | #endif |
190 | |
191 | static int |
192 | LZWFixupTags(TIFF* tif) |
193 | { |
194 | (void) tif; |
195 | return (1); |
196 | } |
197 | |
198 | static int |
199 | LZWSetupDecode(TIFF* tif) |
200 | { |
201 | static const char module[] = "LZWSetupDecode"; |
202 | LZWCodecState* sp = DecoderState(tif); |
203 | int code; |
204 | |
205 | if( sp == NULL ) |
206 | { |
207 | /* |
208 | * Allocate state block so tag methods have storage to record |
209 | * values. |
210 | */ |
211 | tif->tif_data = (uint8*) _TIFFmalloc(sizeof(LZWCodecState)); |
212 | if (tif->tif_data == NULL) |
213 | { |
214 | TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW state block"); |
215 | return (0); |
216 | } |
217 | |
218 | DecoderState(tif)->dec_codetab = NULL; |
219 | DecoderState(tif)->dec_decode = NULL; |
220 | |
221 | /* |
222 | * Setup predictor setup. |
223 | */ |
224 | (void) TIFFPredictorInit(tif); |
225 | |
226 | sp = DecoderState(tif); |
227 | } |
228 | |
229 | assert(sp != NULL); |
230 | |
231 | if (sp->dec_codetab == NULL) { |
232 | sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t)); |
233 | if (sp->dec_codetab == NULL) { |
234 | TIFFErrorExt(tif->tif_clientdata, module, |
235 | "No space for LZW code table"); |
236 | return (0); |
237 | } |
238 | /* |
239 | * Pre-load the table. |
240 | */ |
241 | code = 255; |
242 | do { |
243 | sp->dec_codetab[code].value = code; |
244 | sp->dec_codetab[code].firstchar = code; |
245 | sp->dec_codetab[code].length = 1; |
246 | sp->dec_codetab[code].next = NULL; |
247 | } while (code--); |
248 | /* |
249 | * Zero-out the unused entries |
250 | */ |
251 | _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0, |
252 | (CODE_FIRST - CODE_CLEAR) * sizeof (code_t)); |
253 | } |
254 | return (1); |
255 | } |
256 | |
257 | /* |
258 | * Setup state for decoding a strip. |
259 | */ |
260 | static int |
261 | LZWPreDecode(TIFF* tif, uint16 s) |
262 | { |
263 | static const char module[] = "LZWPreDecode"; |
264 | LZWCodecState *sp = DecoderState(tif); |
265 | |
266 | (void) s; |
267 | assert(sp != NULL); |
268 | if( sp->dec_codetab == NULL ) |
269 | { |
270 | tif->tif_setupdecode( tif ); |
271 | } |
272 | |
273 | /* |
274 | * Check for old bit-reversed codes. |
275 | */ |
276 | if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) { |
277 | #ifdef LZW_COMPAT |
278 | if (!sp->dec_decode) { |
279 | TIFFWarningExt(tif->tif_clientdata, module, |
280 | "Old-style LZW codes, convert file"); |
281 | /* |
282 | * Override default decoding methods with |
283 | * ones that deal with the old coding. |
284 | * Otherwise the predictor versions set |
285 | * above will call the compatibility routines |
286 | * through the dec_decode method. |
287 | */ |
288 | tif->tif_decoderow = LZWDecodeCompat; |
289 | tif->tif_decodestrip = LZWDecodeCompat; |
290 | tif->tif_decodetile = LZWDecodeCompat; |
291 | /* |
292 | * If doing horizontal differencing, must |
293 | * re-setup the predictor logic since we |
294 | * switched the basic decoder methods... |
295 | */ |
296 | (*tif->tif_setupdecode)(tif); |
297 | sp->dec_decode = LZWDecodeCompat; |
298 | } |
299 | sp->lzw_maxcode = MAXCODE(BITS_MIN); |
300 | #else /* !LZW_COMPAT */ |
301 | if (!sp->dec_decode) { |
302 | TIFFErrorExt(tif->tif_clientdata, module, |
303 | "Old-style LZW codes not supported"); |
304 | sp->dec_decode = LZWDecode; |
305 | } |
306 | return (0); |
307 | #endif/* !LZW_COMPAT */ |
308 | } else { |
309 | sp->lzw_maxcode = MAXCODE(BITS_MIN)-1; |
310 | sp->dec_decode = LZWDecode; |
311 | } |
312 | sp->lzw_nbits = BITS_MIN; |
313 | sp->lzw_nextbits = 0; |
314 | sp->lzw_nextdata = 0; |
315 | |
316 | sp->dec_restart = 0; |
317 | sp->dec_nbitsmask = MAXCODE(BITS_MIN); |
318 | #ifdef LZW_CHECKEOS |
319 | sp->dec_bitsleft = ((uint64)tif->tif_rawcc) << 3; |
320 | #endif |
321 | sp->dec_free_entp = sp->dec_codetab + CODE_FIRST; |
322 | /* |
323 | * Zero entries that are not yet filled in. We do |
324 | * this to guard against bogus input data that causes |
325 | * us to index into undefined entries. If you can |
326 | * come up with a way to safely bounds-check input codes |
327 | * while decoding then you can remove this operation. |
328 | */ |
329 | _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t)); |
330 | sp->dec_oldcodep = &sp->dec_codetab[-1]; |
331 | sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1]; |
332 | return (1); |
333 | } |
334 | |
335 | /* |
336 | * Decode a "hunk of data". |
337 | */ |
338 | #define GetNextCode(sp, bp, code) { \ |
339 | nextdata = (nextdata<<8) | *(bp)++; \ |
340 | nextbits += 8; \ |
341 | if (nextbits < nbits) { \ |
342 | nextdata = (nextdata<<8) | *(bp)++; \ |
343 | nextbits += 8; \ |
344 | } \ |
345 | code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask); \ |
346 | nextbits -= nbits; \ |
347 | } |
348 | |
349 | static void |
350 | codeLoop(TIFF* tif, const char* module) |
351 | { |
352 | TIFFErrorExt(tif->tif_clientdata, module, |
353 | "Bogus encoding, loop in the code table; scanline %d", |
354 | tif->tif_row); |
355 | } |
356 | |
357 | static int |
358 | LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s) |
359 | { |
360 | static const char module[] = "LZWDecode"; |
361 | LZWCodecState *sp = DecoderState(tif); |
362 | char *op = (char*) op0; |
363 | long occ = (long) occ0; |
364 | char *tp; |
365 | unsigned char *bp; |
366 | hcode_t code; |
367 | int len; |
368 | long nbits, nextbits, nextdata, nbitsmask; |
369 | code_t *codep, *free_entp, *maxcodep, *oldcodep; |
370 | |
371 | (void) s; |
372 | assert(sp != NULL); |
373 | assert(sp->dec_codetab != NULL); |
374 | |
375 | /* |
376 | Fail if value does not fit in long. |
377 | */ |
378 | if ((tmsize_t) occ != occ0) |
379 | return (0); |
380 | /* |
381 | * Restart interrupted output operation. |
382 | */ |
383 | if (sp->dec_restart) { |
384 | long residue; |
385 | |
386 | codep = sp->dec_codep; |
387 | residue = codep->length - sp->dec_restart; |
388 | if (residue > occ) { |
389 | /* |
390 | * Residue from previous decode is sufficient |
391 | * to satisfy decode request. Skip to the |
392 | * start of the decoded string, place decoded |
393 | * values in the output buffer, and return. |
394 | */ |
395 | sp->dec_restart += occ; |
396 | do { |
397 | codep = codep->next; |
398 | } while (--residue > occ && codep); |
399 | if (codep) { |
400 | tp = op + occ; |
401 | do { |
402 | *--tp = codep->value; |
403 | codep = codep->next; |
404 | } while (--occ && codep); |
405 | } |
406 | return (1); |
407 | } |
408 | /* |
409 | * Residue satisfies only part of the decode request. |
410 | */ |
411 | op += residue, occ -= residue; |
412 | tp = op; |
413 | do { |
414 | int t; |
415 | --tp; |
416 | t = codep->value; |
417 | codep = codep->next; |
418 | *tp = t; |
419 | } while (--residue && codep); |
420 | sp->dec_restart = 0; |
421 | } |
422 | |
423 | bp = (unsigned char *)tif->tif_rawcp; |
424 | nbits = sp->lzw_nbits; |
425 | nextdata = sp->lzw_nextdata; |
426 | nextbits = sp->lzw_nextbits; |
427 | nbitsmask = sp->dec_nbitsmask; |
428 | oldcodep = sp->dec_oldcodep; |
429 | free_entp = sp->dec_free_entp; |
430 | maxcodep = sp->dec_maxcodep; |
431 | |
432 | while (occ > 0) { |
433 | NextCode(tif, sp, bp, code, GetNextCode); |
434 | if (code == CODE_EOI) |
435 | break; |
436 | if (code == CODE_CLEAR) { |
437 | free_entp = sp->dec_codetab + CODE_FIRST; |
438 | _TIFFmemset(free_entp, 0, |
439 | (CSIZE - CODE_FIRST) * sizeof (code_t)); |
440 | nbits = BITS_MIN; |
441 | nbitsmask = MAXCODE(BITS_MIN); |
442 | maxcodep = sp->dec_codetab + nbitsmask-1; |
443 | NextCode(tif, sp, bp, code, GetNextCode); |
444 | if (code == CODE_EOI) |
445 | break; |
446 | if (code >= CODE_CLEAR) { |
447 | TIFFErrorExt(tif->tif_clientdata, tif->tif_name, |
448 | "LZWDecode: Corrupted LZW table at scanline %d", |
449 | tif->tif_row); |
450 | return (0); |
451 | } |
452 | *op++ = (char)code, occ--; |
453 | oldcodep = sp->dec_codetab + code; |
454 | continue; |
455 | } |
456 | codep = sp->dec_codetab + code; |
457 | |
458 | /* |
459 | * Add the new entry to the code table. |
460 | */ |
461 | if (free_entp < &sp->dec_codetab[0] || |
462 | free_entp >= &sp->dec_codetab[CSIZE]) { |
463 | TIFFErrorExt(tif->tif_clientdata, module, |
464 | "Corrupted LZW table at scanline %d", |
465 | tif->tif_row); |
466 | return (0); |
467 | } |
468 | |
469 | free_entp->next = oldcodep; |
470 | if (free_entp->next < &sp->dec_codetab[0] || |
471 | free_entp->next >= &sp->dec_codetab[CSIZE]) { |
472 | TIFFErrorExt(tif->tif_clientdata, module, |
473 | "Corrupted LZW table at scanline %d", |
474 | tif->tif_row); |
475 | return (0); |
476 | } |
477 | free_entp->firstchar = free_entp->next->firstchar; |
478 | free_entp->length = free_entp->next->length+1; |
479 | free_entp->value = (codep < free_entp) ? |
480 | codep->firstchar : free_entp->firstchar; |
481 | if (++free_entp > maxcodep) { |
482 | if (++nbits > BITS_MAX) /* should not happen */ |
483 | nbits = BITS_MAX; |
484 | nbitsmask = MAXCODE(nbits); |
485 | maxcodep = sp->dec_codetab + nbitsmask-1; |
486 | } |
487 | oldcodep = codep; |
488 | if (code >= 256) { |
489 | /* |
490 | * Code maps to a string, copy string |
491 | * value to output (written in reverse). |
492 | */ |
493 | if(codep->length == 0) { |
494 | TIFFErrorExt(tif->tif_clientdata, module, |
495 | "Wrong length of decoded string: " |
496 | "data probably corrupted at scanline %d", |
497 | tif->tif_row); |
498 | return (0); |
499 | } |
500 | if (codep->length > occ) { |
501 | /* |
502 | * String is too long for decode buffer, |
503 | * locate portion that will fit, copy to |
504 | * the decode buffer, and setup restart |
505 | * logic for the next decoding call. |
506 | */ |
507 | sp->dec_codep = codep; |
508 | do { |
509 | codep = codep->next; |
510 | } while (codep && codep->length > occ); |
511 | if (codep) { |
512 | sp->dec_restart = (long)occ; |
513 | tp = op + occ; |
514 | do { |
515 | *--tp = codep->value; |
516 | codep = codep->next; |
517 | } while (--occ && codep); |
518 | if (codep) |
519 | codeLoop(tif, module); |
520 | } |
521 | break; |
522 | } |
523 | len = codep->length; |
524 | tp = op + len; |
525 | do { |
526 | int t; |
527 | --tp; |
528 | t = codep->value; |
529 | codep = codep->next; |
530 | *tp = t; |
531 | } while (codep && tp > op); |
532 | if (codep) { |
533 | codeLoop(tif, module); |
534 | break; |
535 | } |
536 | assert(occ >= len); |
537 | op += len, occ -= len; |
538 | } else |
539 | *op++ = (char)code, occ--; |
540 | } |
541 | |
542 | tif->tif_rawcp = (uint8*) bp; |
543 | sp->lzw_nbits = (unsigned short) nbits; |
544 | sp->lzw_nextdata = nextdata; |
545 | sp->lzw_nextbits = nextbits; |
546 | sp->dec_nbitsmask = nbitsmask; |
547 | sp->dec_oldcodep = oldcodep; |
548 | sp->dec_free_entp = free_entp; |
549 | sp->dec_maxcodep = maxcodep; |
550 | |
551 | if (occ > 0) { |
552 | #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) |
553 | TIFFErrorExt(tif->tif_clientdata, module, |
554 | "Not enough data at scanline %d (short %I64d bytes)", |
555 | tif->tif_row, (unsigned __int64) occ); |
556 | #else |
557 | TIFFErrorExt(tif->tif_clientdata, module, |
558 | "Not enough data at scanline %d (short %llu bytes)", |
559 | tif->tif_row, (unsigned long long) occ); |
560 | #endif |
561 | return (0); |
562 | } |
563 | return (1); |
564 | } |
565 | |
566 | #ifdef LZW_COMPAT |
567 | /* |
568 | * Decode a "hunk of data" for old images. |
569 | */ |
570 | #define GetNextCodeCompat(sp, bp, code) { \ |
571 | nextdata |= (unsigned long) *(bp)++ << nextbits; \ |
572 | nextbits += 8; \ |
573 | if (nextbits < nbits) { \ |
574 | nextdata |= (unsigned long) *(bp)++ << nextbits;\ |
575 | nextbits += 8; \ |
576 | } \ |
577 | code = (hcode_t)(nextdata & nbitsmask); \ |
578 | nextdata >>= nbits; \ |
579 | nextbits -= nbits; \ |
580 | } |
581 | |
582 | static int |
583 | LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s) |
584 | { |
585 | static const char module[] = "LZWDecodeCompat"; |
586 | LZWCodecState *sp = DecoderState(tif); |
587 | char *op = (char*) op0; |
588 | long occ = (long) occ0; |
589 | char *tp; |
590 | unsigned char *bp; |
591 | int code, nbits; |
592 | long nextbits, nextdata, nbitsmask; |
593 | code_t *codep, *free_entp, *maxcodep, *oldcodep; |
594 | |
595 | (void) s; |
596 | assert(sp != NULL); |
597 | |
598 | /* |
599 | Fail if value does not fit in long. |
600 | */ |
601 | if ((tmsize_t) occ != occ0) |
602 | return (0); |
603 | |
604 | /* |
605 | * Restart interrupted output operation. |
606 | */ |
607 | if (sp->dec_restart) { |
608 | long residue; |
609 | |
610 | codep = sp->dec_codep; |
611 | residue = codep->length - sp->dec_restart; |
612 | if (residue > occ) { |
613 | /* |
614 | * Residue from previous decode is sufficient |
615 | * to satisfy decode request. Skip to the |
616 | * start of the decoded string, place decoded |
617 | * values in the output buffer, and return. |
618 | */ |
619 | sp->dec_restart += occ; |
620 | do { |
621 | codep = codep->next; |
622 | } while (--residue > occ); |
623 | tp = op + occ; |
624 | do { |
625 | *--tp = codep->value; |
626 | codep = codep->next; |
627 | } while (--occ); |
628 | return (1); |
629 | } |
630 | /* |
631 | * Residue satisfies only part of the decode request. |
632 | */ |
633 | op += residue, occ -= residue; |
634 | tp = op; |
635 | do { |
636 | *--tp = codep->value; |
637 | codep = codep->next; |
638 | } while (--residue); |
639 | sp->dec_restart = 0; |
640 | } |
641 | |
642 | bp = (unsigned char *)tif->tif_rawcp; |
643 | nbits = sp->lzw_nbits; |
644 | nextdata = sp->lzw_nextdata; |
645 | nextbits = sp->lzw_nextbits; |
646 | nbitsmask = sp->dec_nbitsmask; |
647 | oldcodep = sp->dec_oldcodep; |
648 | free_entp = sp->dec_free_entp; |
649 | maxcodep = sp->dec_maxcodep; |
650 | |
651 | while (occ > 0) { |
652 | NextCode(tif, sp, bp, code, GetNextCodeCompat); |
653 | if (code == CODE_EOI) |
654 | break; |
655 | if (code == CODE_CLEAR) { |
656 | free_entp = sp->dec_codetab + CODE_FIRST; |
657 | _TIFFmemset(free_entp, 0, |
658 | (CSIZE - CODE_FIRST) * sizeof (code_t)); |
659 | nbits = BITS_MIN; |
660 | nbitsmask = MAXCODE(BITS_MIN); |
661 | maxcodep = sp->dec_codetab + nbitsmask; |
662 | NextCode(tif, sp, bp, code, GetNextCodeCompat); |
663 | if (code == CODE_EOI) |
664 | break; |
665 | if (code >= CODE_CLEAR) { |
666 | TIFFErrorExt(tif->tif_clientdata, tif->tif_name, |
667 | "LZWDecode: Corrupted LZW table at scanline %d", |
668 | tif->tif_row); |
669 | return (0); |
670 | } |
671 | *op++ = code, occ--; |
672 | oldcodep = sp->dec_codetab + code; |
673 | continue; |
674 | } |
675 | codep = sp->dec_codetab + code; |
676 | |
677 | /* |
678 | * Add the new entry to the code table. |
679 | */ |
680 | if (free_entp < &sp->dec_codetab[0] || |
681 | free_entp >= &sp->dec_codetab[CSIZE]) { |
682 | TIFFErrorExt(tif->tif_clientdata, module, |
683 | "Corrupted LZW table at scanline %d", tif->tif_row); |
684 | return (0); |
685 | } |
686 | |
687 | free_entp->next = oldcodep; |
688 | if (free_entp->next < &sp->dec_codetab[0] || |
689 | free_entp->next >= &sp->dec_codetab[CSIZE]) { |
690 | TIFFErrorExt(tif->tif_clientdata, module, |
691 | "Corrupted LZW table at scanline %d", tif->tif_row); |
692 | return (0); |
693 | } |
694 | free_entp->firstchar = free_entp->next->firstchar; |
695 | free_entp->length = free_entp->next->length+1; |
696 | free_entp->value = (codep < free_entp) ? |
697 | codep->firstchar : free_entp->firstchar; |
698 | if (++free_entp > maxcodep) { |
699 | if (++nbits > BITS_MAX) /* should not happen */ |
700 | nbits = BITS_MAX; |
701 | nbitsmask = MAXCODE(nbits); |
702 | maxcodep = sp->dec_codetab + nbitsmask; |
703 | } |
704 | oldcodep = codep; |
705 | if (code >= 256) { |
706 | /* |
707 | * Code maps to a string, copy string |
708 | * value to output (written in reverse). |
709 | */ |
710 | if(codep->length == 0) { |
711 | TIFFErrorExt(tif->tif_clientdata, module, |
712 | "Wrong length of decoded " |
713 | "string: data probably corrupted at scanline %d", |
714 | tif->tif_row); |
715 | return (0); |
716 | } |
717 | if (codep->length > occ) { |
718 | /* |
719 | * String is too long for decode buffer, |
720 | * locate portion that will fit, copy to |
721 | * the decode buffer, and setup restart |
722 | * logic for the next decoding call. |
723 | */ |
724 | sp->dec_codep = codep; |
725 | do { |
726 | codep = codep->next; |
727 | } while (codep->length > occ); |
728 | sp->dec_restart = occ; |
729 | tp = op + occ; |
730 | do { |
731 | *--tp = codep->value; |
732 | codep = codep->next; |
733 | } while (--occ); |
734 | break; |
735 | } |
736 | assert(occ >= codep->length); |
737 | op += codep->length, occ -= codep->length; |
738 | tp = op; |
739 | do { |
740 | *--tp = codep->value; |
741 | } while( (codep = codep->next) != NULL ); |
742 | } else |
743 | *op++ = code, occ--; |
744 | } |
745 | |
746 | tif->tif_rawcp = (uint8*) bp; |
747 | sp->lzw_nbits = nbits; |
748 | sp->lzw_nextdata = nextdata; |
749 | sp->lzw_nextbits = nextbits; |
750 | sp->dec_nbitsmask = nbitsmask; |
751 | sp->dec_oldcodep = oldcodep; |
752 | sp->dec_free_entp = free_entp; |
753 | sp->dec_maxcodep = maxcodep; |
754 | |
755 | if (occ > 0) { |
756 | #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) |
757 | TIFFErrorExt(tif->tif_clientdata, module, |
758 | "Not enough data at scanline %d (short %I64d bytes)", |
759 | tif->tif_row, (unsigned __int64) occ); |
760 | #else |
761 | TIFFErrorExt(tif->tif_clientdata, module, |
762 | "Not enough data at scanline %d (short %llu bytes)", |
763 | tif->tif_row, (unsigned long long) occ); |
764 | #endif |
765 | return (0); |
766 | } |
767 | return (1); |
768 | } |
769 | #endif /* LZW_COMPAT */ |
770 | |
771 | /* |
772 | * LZW Encoding. |
773 | */ |
774 | |
775 | static int |
776 | LZWSetupEncode(TIFF* tif) |
777 | { |
778 | static const char module[] = "LZWSetupEncode"; |
779 | LZWCodecState* sp = EncoderState(tif); |
780 | |
781 | assert(sp != NULL); |
782 | sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t)); |
783 | if (sp->enc_hashtab == NULL) { |
784 | TIFFErrorExt(tif->tif_clientdata, module, |
785 | "No space for LZW hash table"); |
786 | return (0); |
787 | } |
788 | return (1); |
789 | } |
790 | |
791 | /* |
792 | * Reset encoding state at the start of a strip. |
793 | */ |
794 | static int |
795 | LZWPreEncode(TIFF* tif, uint16 s) |
796 | { |
797 | LZWCodecState *sp = EncoderState(tif); |
798 | |
799 | (void) s; |
800 | assert(sp != NULL); |
801 | |
802 | if( sp->enc_hashtab == NULL ) |
803 | { |
804 | tif->tif_setupencode( tif ); |
805 | } |
806 | |
807 | sp->lzw_nbits = BITS_MIN; |
808 | sp->lzw_maxcode = MAXCODE(BITS_MIN); |
809 | sp->lzw_free_ent = CODE_FIRST; |
810 | sp->lzw_nextbits = 0; |
811 | sp->lzw_nextdata = 0; |
812 | sp->enc_checkpoint = CHECK_GAP; |
813 | sp->enc_ratio = 0; |
814 | sp->enc_incount = 0; |
815 | sp->enc_outcount = 0; |
816 | /* |
817 | * The 4 here insures there is space for 2 max-sized |
818 | * codes in LZWEncode and LZWPostDecode. |
819 | */ |
820 | sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4; |
821 | cl_hash(sp); /* clear hash table */ |
822 | sp->enc_oldcode = (hcode_t) -1; /* generates CODE_CLEAR in LZWEncode */ |
823 | return (1); |
824 | } |
825 | |
826 | #define CALCRATIO(sp, rat) { \ |
827 | if (incount > 0x007fffff) { /* NB: shift will overflow */\ |
828 | rat = outcount >> 8; \ |
829 | rat = (rat == 0 ? 0x7fffffff : incount/rat); \ |
830 | } else \ |
831 | rat = (incount<<8) / outcount; \ |
832 | } |
833 | #define PutNextCode(op, c) { \ |
834 | nextdata = (nextdata << nbits) | c; \ |
835 | nextbits += nbits; \ |
836 | *op++ = (unsigned char)(nextdata >> (nextbits-8)); \ |
837 | nextbits -= 8; \ |
838 | if (nextbits >= 8) { \ |
839 | *op++ = (unsigned char)(nextdata >> (nextbits-8)); \ |
840 | nextbits -= 8; \ |
841 | } \ |
842 | outcount += nbits; \ |
843 | } |
844 | |
845 | /* |
846 | * Encode a chunk of pixels. |
847 | * |
848 | * Uses an open addressing double hashing (no chaining) on the |
849 | * prefix code/next character combination. We do a variant of |
850 | * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's |
851 | * relatively-prime secondary probe. Here, the modular division |
852 | * first probe is gives way to a faster exclusive-or manipulation. |
853 | * Also do block compression with an adaptive reset, whereby the |
854 | * code table is cleared when the compression ratio decreases, |
855 | * but after the table fills. The variable-length output codes |
856 | * are re-sized at this point, and a CODE_CLEAR is generated |
857 | * for the decoder. |
858 | */ |
859 | static int |
860 | LZWEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) |
861 | { |
862 | register LZWCodecState *sp = EncoderState(tif); |
863 | register long fcode; |
864 | register hash_t *hp; |
865 | register int h, c; |
866 | hcode_t ent; |
867 | long disp; |
868 | long incount, outcount, checkpoint; |
869 | long nextdata, nextbits; |
870 | int free_ent, maxcode, nbits; |
871 | uint8* op; |
872 | uint8* limit; |
873 | |
874 | (void) s; |
875 | if (sp == NULL) |
876 | return (0); |
877 | |
878 | assert(sp->enc_hashtab != NULL); |
879 | |
880 | /* |
881 | * Load local state. |
882 | */ |
883 | incount = sp->enc_incount; |
884 | outcount = sp->enc_outcount; |
885 | checkpoint = sp->enc_checkpoint; |
886 | nextdata = sp->lzw_nextdata; |
887 | nextbits = sp->lzw_nextbits; |
888 | free_ent = sp->lzw_free_ent; |
889 | maxcode = sp->lzw_maxcode; |
890 | nbits = sp->lzw_nbits; |
891 | op = tif->tif_rawcp; |
892 | limit = sp->enc_rawlimit; |
893 | ent = sp->enc_oldcode; |
894 | |
895 | if (ent == (hcode_t) -1 && cc > 0) { |
896 | /* |
897 | * NB: This is safe because it can only happen |
898 | * at the start of a strip where we know there |
899 | * is space in the data buffer. |
900 | */ |
901 | PutNextCode(op, CODE_CLEAR); |
902 | ent = *bp++; cc--; incount++; |
903 | } |
904 | while (cc > 0) { |
905 | c = *bp++; cc--; incount++; |
906 | fcode = ((long)c << BITS_MAX) + ent; |
907 | h = (c << HSHIFT) ^ ent; /* xor hashing */ |
908 | #ifdef _WINDOWS |
909 | /* |
910 | * Check hash index for an overflow. |
911 | */ |
912 | if (h >= HSIZE) |
913 | h -= HSIZE; |
914 | #endif |
915 | hp = &sp->enc_hashtab[h]; |
916 | if (hp->hash == fcode) { |
917 | ent = hp->code; |
918 | continue; |
919 | } |
920 | if (hp->hash >= 0) { |
921 | /* |
922 | * Primary hash failed, check secondary hash. |
923 | */ |
924 | disp = HSIZE - h; |
925 | if (h == 0) |
926 | disp = 1; |
927 | do { |
928 | /* |
929 | * Avoid pointer arithmetic 'cuz of |
930 | * wraparound problems with segments. |
931 | */ |
932 | if ((h -= disp) < 0) |
933 | h += HSIZE; |
934 | hp = &sp->enc_hashtab[h]; |
935 | if (hp->hash == fcode) { |
936 | ent = hp->code; |
937 | goto hit; |
938 | } |
939 | } while (hp->hash >= 0); |
940 | } |
941 | /* |
942 | * New entry, emit code and add to table. |
943 | */ |
944 | /* |
945 | * Verify there is space in the buffer for the code |
946 | * and any potential Clear code that might be emitted |
947 | * below. The value of limit is setup so that there |
948 | * are at least 4 bytes free--room for 2 codes. |
949 | */ |
950 | if (op > limit) { |
951 | tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); |
952 | TIFFFlushData1(tif); |
953 | op = tif->tif_rawdata; |
954 | } |
955 | PutNextCode(op, ent); |
956 | ent = c; |
957 | hp->code = free_ent++; |
958 | hp->hash = fcode; |
959 | if (free_ent == CODE_MAX-1) { |
960 | /* table is full, emit clear code and reset */ |
961 | cl_hash(sp); |
962 | sp->enc_ratio = 0; |
963 | incount = 0; |
964 | outcount = 0; |
965 | free_ent = CODE_FIRST; |
966 | PutNextCode(op, CODE_CLEAR); |
967 | nbits = BITS_MIN; |
968 | maxcode = MAXCODE(BITS_MIN); |
969 | } else { |
970 | /* |
971 | * If the next entry is going to be too big for |
972 | * the code size, then increase it, if possible. |
973 | */ |
974 | if (free_ent > maxcode) { |
975 | nbits++; |
976 | assert(nbits <= BITS_MAX); |
977 | maxcode = (int) MAXCODE(nbits); |
978 | } else if (incount >= checkpoint) { |
979 | long rat; |
980 | /* |
981 | * Check compression ratio and, if things seem |
982 | * to be slipping, clear the hash table and |
983 | * reset state. The compression ratio is a |
984 | * 24+8-bit fractional number. |
985 | */ |
986 | checkpoint = incount+CHECK_GAP; |
987 | CALCRATIO(sp, rat); |
988 | if (rat <= sp->enc_ratio) { |
989 | cl_hash(sp); |
990 | sp->enc_ratio = 0; |
991 | incount = 0; |
992 | outcount = 0; |
993 | free_ent = CODE_FIRST; |
994 | PutNextCode(op, CODE_CLEAR); |
995 | nbits = BITS_MIN; |
996 | maxcode = MAXCODE(BITS_MIN); |
997 | } else |
998 | sp->enc_ratio = rat; |
999 | } |
1000 | } |
1001 | hit: |
1002 | ; |
1003 | } |
1004 | |
1005 | /* |
1006 | * Restore global state. |
1007 | */ |
1008 | sp->enc_incount = incount; |
1009 | sp->enc_outcount = outcount; |
1010 | sp->enc_checkpoint = checkpoint; |
1011 | sp->enc_oldcode = ent; |
1012 | sp->lzw_nextdata = nextdata; |
1013 | sp->lzw_nextbits = nextbits; |
1014 | sp->lzw_free_ent = free_ent; |
1015 | sp->lzw_maxcode = maxcode; |
1016 | sp->lzw_nbits = nbits; |
1017 | tif->tif_rawcp = op; |
1018 | return (1); |
1019 | } |
1020 | |
1021 | /* |
1022 | * Finish off an encoded strip by flushing the last |
1023 | * string and tacking on an End Of Information code. |
1024 | */ |
1025 | static int |
1026 | LZWPostEncode(TIFF* tif) |
1027 | { |
1028 | register LZWCodecState *sp = EncoderState(tif); |
1029 | uint8* op = tif->tif_rawcp; |
1030 | long nextbits = sp->lzw_nextbits; |
1031 | long nextdata = sp->lzw_nextdata; |
1032 | long outcount = sp->enc_outcount; |
1033 | int nbits = sp->lzw_nbits; |
1034 | |
1035 | if (op > sp->enc_rawlimit) { |
1036 | tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); |
1037 | TIFFFlushData1(tif); |
1038 | op = tif->tif_rawdata; |
1039 | } |
1040 | if (sp->enc_oldcode != (hcode_t) -1) { |
1041 | PutNextCode(op, sp->enc_oldcode); |
1042 | sp->enc_oldcode = (hcode_t) -1; |
1043 | } |
1044 | PutNextCode(op, CODE_EOI); |
1045 | if (nextbits > 0) |
1046 | *op++ = (unsigned char)(nextdata << (8-nextbits)); |
1047 | tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); |
1048 | return (1); |
1049 | } |
1050 | |
1051 | /* |
1052 | * Reset encoding hash table. |
1053 | */ |
1054 | static void |
1055 | cl_hash(LZWCodecState* sp) |
1056 | { |
1057 | register hash_t *hp = &sp->enc_hashtab[HSIZE-1]; |
1058 | register long i = HSIZE-8; |
1059 | |
1060 | do { |
1061 | i -= 8; |
1062 | hp[-7].hash = -1; |
1063 | hp[-6].hash = -1; |
1064 | hp[-5].hash = -1; |
1065 | hp[-4].hash = -1; |
1066 | hp[-3].hash = -1; |
1067 | hp[-2].hash = -1; |
1068 | hp[-1].hash = -1; |
1069 | hp[ 0].hash = -1; |
1070 | hp -= 8; |
1071 | } while (i >= 0); |
1072 | for (i += 8; i > 0; i--, hp--) |
1073 | hp->hash = -1; |
1074 | } |
1075 | |
1076 | static void |
1077 | LZWCleanup(TIFF* tif) |
1078 | { |
1079 | (void)TIFFPredictorCleanup(tif); |
1080 | |
1081 | assert(tif->tif_data != 0); |
1082 | |
1083 | if (DecoderState(tif)->dec_codetab) |
1084 | _TIFFfree(DecoderState(tif)->dec_codetab); |
1085 | |
1086 | if (EncoderState(tif)->enc_hashtab) |
1087 | _TIFFfree(EncoderState(tif)->enc_hashtab); |
1088 | |
1089 | _TIFFfree(tif->tif_data); |
1090 | tif->tif_data = NULL; |
1091 | |
1092 | _TIFFSetDefaultCompressionState(tif); |
1093 | } |
1094 | |
1095 | int |
1096 | TIFFInitLZW(TIFF* tif, int scheme) |
1097 | { |
1098 | static const char module[] = "TIFFInitLZW"; |
1099 | assert(scheme == COMPRESSION_LZW); |
1100 | /* |
1101 | * Allocate state block so tag methods have storage to record values. |
1102 | */ |
1103 | tif->tif_data = (uint8*) _TIFFmalloc(sizeof (LZWCodecState)); |
1104 | if (tif->tif_data == NULL) |
1105 | goto bad; |
1106 | DecoderState(tif)->dec_codetab = NULL; |
1107 | DecoderState(tif)->dec_decode = NULL; |
1108 | EncoderState(tif)->enc_hashtab = NULL; |
1109 | LZWState(tif)->rw_mode = tif->tif_mode; |
1110 | |
1111 | /* |
1112 | * Install codec methods. |
1113 | */ |
1114 | tif->tif_fixuptags = LZWFixupTags; |
1115 | tif->tif_setupdecode = LZWSetupDecode; |
1116 | tif->tif_predecode = LZWPreDecode; |
1117 | tif->tif_decoderow = LZWDecode; |
1118 | tif->tif_decodestrip = LZWDecode; |
1119 | tif->tif_decodetile = LZWDecode; |
1120 | tif->tif_setupencode = LZWSetupEncode; |
1121 | tif->tif_preencode = LZWPreEncode; |
1122 | tif->tif_postencode = LZWPostEncode; |
1123 | tif->tif_encoderow = LZWEncode; |
1124 | tif->tif_encodestrip = LZWEncode; |
1125 | tif->tif_encodetile = LZWEncode; |
1126 | tif->tif_cleanup = LZWCleanup; |
1127 | /* |
1128 | * Setup predictor setup. |
1129 | */ |
1130 | (void) TIFFPredictorInit(tif); |
1131 | return (1); |
1132 | bad: |
1133 | TIFFErrorExt(tif->tif_clientdata, module, |
1134 | "No space for LZW state block"); |
1135 | return (0); |
1136 | } |
1137 | |
1138 | /* |
1139 | * Copyright (c) 1985, 1986 The Regents of the University of California. |
1140 | * All rights reserved. |
1141 | * |
1142 | * This code is derived from software contributed to Berkeley by |
1143 | * James A. Woods, derived from original work by Spencer Thomas |
1144 | * and Joseph Orost. |
1145 | * |
1146 | * Redistribution and use in source and binary forms are permitted |
1147 | * provided that the above copyright notice and this paragraph are |
1148 | * duplicated in all such forms and that any documentation, |
1149 | * advertising materials, and other materials related to such |
1150 | * distribution and use acknowledge that the software was developed |
1151 | * by the University of California, Berkeley. The name of the |
1152 | * University may not be used to endorse or promote products derived |
1153 | * from this software without specific prior written permission. |
1154 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR |
1155 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED |
1156 | * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
1157 | */ |
1158 | #endif /* LZW_SUPPORT */ |
1159 | |
1160 | /* vim: set ts=8 sts=8 sw=8 noet: */ |
1161 | /* |
1162 | * Local Variables: |
1163 | * mode: c |
1164 | * c-basic-offset: 8 |
1165 | * fill-column: 78 |
1166 | * End: |
1167 | */ |
1168 |