summaryrefslogtreecommitdiff
path: root/libavcodec/texturedsp.c (plain)
blob: 5012245a26ea39c2d45c1bea7e826173fc94b9dd
1/*
2 * Texture block decompression
3 * Copyright (C) 2009 Benjamin Dobell, Glass Echidna
4 * Copyright (C) 2012 Matthäus G. "Anteru" Chajdas (http://anteru.net)
5 * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25#include <stddef.h>
26#include <stdint.h>
27
28#include "libavutil/attributes.h"
29#include "libavutil/common.h"
30#include "libavutil/intreadwrite.h"
31#include "libavutil/libm.h"
32
33#include "texturedsp.h"
34
35#define RGBA(r, g, b, a) (((uint8_t)(r) << 0) | \
36 ((uint8_t)(g) << 8) | \
37 ((uint8_t)(b) << 16) | \
38 ((uint8_t)(a) << 24))
39
40static av_always_inline void extract_color(uint32_t colors[4],
41 uint16_t color0,
42 uint16_t color1,
43 int dxtn, int alpha)
44{
45 int tmp;
46 uint8_t r0, g0, b0, r1, g1, b1;
47 uint8_t a = dxtn ? 0 : 255;
48
49 tmp = (color0 >> 11) * 255 + 16;
50 r0 = (uint8_t) ((tmp / 32 + tmp) / 32);
51 tmp = ((color0 & 0x07E0) >> 5) * 255 + 32;
52 g0 = (uint8_t) ((tmp / 64 + tmp) / 64);
53 tmp = (color0 & 0x001F) * 255 + 16;
54 b0 = (uint8_t) ((tmp / 32 + tmp) / 32);
55
56 tmp = (color1 >> 11) * 255 + 16;
57 r1 = (uint8_t) ((tmp / 32 + tmp) / 32);
58 tmp = ((color1 & 0x07E0) >> 5) * 255 + 32;
59 g1 = (uint8_t) ((tmp / 64 + tmp) / 64);
60 tmp = (color1 & 0x001F) * 255 + 16;
61 b1 = (uint8_t) ((tmp / 32 + tmp) / 32);
62
63 if (dxtn || color0 > color1) {
64 colors[0] = RGBA(r0, g0, b0, a);
65 colors[1] = RGBA(r1, g1, b1, a);
66 colors[2] = RGBA((2 * r0 + r1) / 3,
67 (2 * g0 + g1) / 3,
68 (2 * b0 + b1) / 3,
69 a);
70 colors[3] = RGBA((2 * r1 + r0) / 3,
71 (2 * g1 + g0) / 3,
72 (2 * b1 + b0) / 3,
73 a);
74 } else {
75 colors[0] = RGBA(r0, g0, b0, a);
76 colors[1] = RGBA(r1, g1, b1, a);
77 colors[2] = RGBA((r0 + r1) / 2,
78 (g0 + g1) / 2,
79 (b0 + b1) / 2,
80 a);
81 colors[3] = RGBA(0, 0, 0, alpha);
82 }
83}
84
85static inline void dxt1_block_internal(uint8_t *dst, ptrdiff_t stride,
86 const uint8_t *block, uint8_t alpha)
87{
88 int x, y;
89 uint32_t colors[4];
90 uint16_t color0 = AV_RL16(block + 0);
91 uint16_t color1 = AV_RL16(block + 2);
92 uint32_t code = AV_RL32(block + 4);
93
94 extract_color(colors, color0, color1, 0, alpha);
95
96 for (y = 0; y < 4; y++) {
97 for (x = 0; x < 4; x++) {
98 uint32_t pixel = colors[code & 3];
99 code >>= 2;
100 AV_WL32(dst + x * 4, pixel);
101 }
102 dst += stride;
103 }
104}
105
106/**
107 * Decompress one block of a DXT1 texture and store the resulting
108 * RGBA pixels in 'dst'. Alpha component is fully opaque.
109 *
110 * @param dst output buffer.
111 * @param stride scanline in bytes.
112 * @param block block to decompress.
113 * @return how much texture data has been consumed.
114 */
115static int dxt1_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
116{
117 dxt1_block_internal(dst, stride, block, 255);
118
119 return 8;
120}
121
122/**
123 * Decompress one block of a DXT1 with 1-bit alpha texture and store
124 * the resulting RGBA pixels in 'dst'. Alpha is either fully opaque or
125 * fully transparent.
126 *
127 * @param dst output buffer.
128 * @param stride scanline in bytes.
129 * @param block block to decompress.
130 * @return how much texture data has been consumed.
131 */
132static int dxt1a_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
133{
134 dxt1_block_internal(dst, stride, block, 0);
135
136 return 8;
137}
138
139static inline void dxt3_block_internal(uint8_t *dst, ptrdiff_t stride,
140 const uint8_t *block)
141{
142 int x, y;
143 uint32_t colors[4];
144 uint16_t color0 = AV_RL16(block + 8);
145 uint16_t color1 = AV_RL16(block + 10);
146 uint32_t code = AV_RL32(block + 12);
147
148 extract_color(colors, color0, color1, 1, 0);
149
150 for (y = 0; y < 4; y++) {
151 const uint16_t alpha_code = AV_RL16(block + 2 * y);
152 uint8_t alpha_values[4];
153
154 alpha_values[0] = ((alpha_code >> 0) & 0x0F) * 17;
155 alpha_values[1] = ((alpha_code >> 4) & 0x0F) * 17;
156 alpha_values[2] = ((alpha_code >> 8) & 0x0F) * 17;
157 alpha_values[3] = ((alpha_code >> 12) & 0x0F) * 17;
158
159 for (x = 0; x < 4; x++) {
160 uint8_t alpha = alpha_values[x];
161 uint32_t pixel = colors[code & 3] | (alpha << 24);
162 code >>= 2;
163
164 AV_WL32(dst + x * 4, pixel);
165 }
166 dst += stride;
167 }
168}
169
170/** Convert a premultiplied alpha pixel to a straight alpha pixel. */
171static av_always_inline void premult2straight(uint8_t *src)
172{
173 int r = src[0];
174 int g = src[1];
175 int b = src[2];
176 int a = src[3]; /* unchanged */
177
178 src[0] = (uint8_t) r * a / 255;
179 src[1] = (uint8_t) g * a / 255;
180 src[2] = (uint8_t) b * a / 255;
181}
182
183/**
184 * Decompress one block of a DXT2 texture and store the resulting
185 * RGBA pixels in 'dst'.
186 *
187 * @param dst output buffer.
188 * @param stride scanline in bytes.
189 * @param block block to decompress.
190 * @return how much texture data has been consumed.
191 */
192static int dxt2_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
193{
194 int x, y;
195
196 dxt3_block_internal(dst, stride, block);
197
198 /* This format is DXT3, but returns premultiplied alpha. It needs to be
199 * converted because it's what lavc outputs (and swscale expects). */
200 for (y = 0; y < 4; y++)
201 for (x = 0; x < 4; x++)
202 premult2straight(dst + x * 4 + y * stride);
203
204 return 16;
205}
206
207/**
208 * Decompress one block of a DXT3 texture and store the resulting
209 * RGBA pixels in 'dst'.
210 *
211 * @param dst output buffer.
212 * @param stride scanline in bytes.
213 * @param block block to decompress.
214 * @return how much texture data has been consumed.
215 */
216static int dxt3_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
217{
218 dxt3_block_internal(dst, stride, block);
219
220 return 16;
221}
222
223/**
224 * Decompress a BC 16x3 index block stored as
225 * h g f e
226 * d c b a
227 * p o n m
228 * l k j i
229 *
230 * Bits packed as
231 * | h | g | f | e | d | c | b | a | // Entry
232 * |765 432 107 654 321 076 543 210| // Bit
233 * |0000000000111111111112222222222| // Byte
234 *
235 * into 16 8-bit indices.
236 */
237static void decompress_indices(uint8_t *dst, const uint8_t *src)
238{
239 int block, i;
240
241 for (block = 0; block < 2; block++) {
242 int tmp = AV_RL24(src);
243
244 /* Unpack 8x3 bit from last 3 byte block */
245 for (i = 0; i < 8; i++)
246 dst[i] = (tmp >> (i * 3)) & 0x7;
247
248 src += 3;
249 dst += 8;
250 }
251}
252
253static inline void dxt5_block_internal(uint8_t *dst, ptrdiff_t stride,
254 const uint8_t *block)
255{
256 int x, y;
257 uint32_t colors[4];
258 uint8_t alpha_indices[16];
259 uint16_t color0 = AV_RL16(block + 8);
260 uint16_t color1 = AV_RL16(block + 10);
261 uint32_t code = AV_RL32(block + 12);
262 uint8_t alpha0 = *(block);
263 uint8_t alpha1 = *(block + 1);
264
265 decompress_indices(alpha_indices, block + 2);
266
267 extract_color(colors, color0, color1, 1, 0);
268
269 for (y = 0; y < 4; y++) {
270 for (x = 0; x < 4; x++) {
271 int alpha_code = alpha_indices[x + y * 4];
272 uint32_t pixel;
273 uint8_t alpha;
274
275 if (alpha_code == 0) {
276 alpha = alpha0;
277 } else if (alpha_code == 1) {
278 alpha = alpha1;
279 } else {
280 if (alpha0 > alpha1) {
281 alpha = (uint8_t) (((8 - alpha_code) * alpha0 +
282 (alpha_code - 1) * alpha1) / 7);
283 } else {
284 if (alpha_code == 6) {
285 alpha = 0;
286 } else if (alpha_code == 7) {
287 alpha = 255;
288 } else {
289 alpha = (uint8_t) (((6 - alpha_code) * alpha0 +
290 (alpha_code - 1) * alpha1) / 5);
291 }
292 }
293 }
294 pixel = colors[code & 3] | (alpha << 24);
295 code >>= 2;
296 AV_WL32(dst + x * 4, pixel);
297 }
298 dst += stride;
299 }
300}
301
302/**
303 * Decompress one block of a DXT4 texture and store the resulting
304 * RGBA pixels in 'dst'.
305 *
306 * @param dst output buffer.
307 * @param stride scanline in bytes.
308 * @param block block to decompress.
309 * @return how much texture data has been consumed.
310 */
311static int dxt4_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
312{
313 int x, y;
314
315 dxt5_block_internal(dst, stride, block);
316
317 /* This format is DXT5, but returns premultiplied alpha. It needs to be
318 * converted because it's what lavc outputs (and swscale expects). */
319 for (y = 0; y < 4; y++)
320 for (x = 0; x < 4; x++)
321 premult2straight(dst + x * 4 + y * stride);
322
323 return 16;
324}
325
326/**
327 * Decompress one block of a DXT5 texture and store the resulting
328 * RGBA pixels in 'dst'.
329 *
330 * @param dst output buffer.
331 * @param stride scanline in bytes.
332 * @param block block to decompress.
333 * @return how much texture data has been consumed.
334 */
335static int dxt5_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
336{
337 dxt5_block_internal(dst, stride, block);
338
339 return 16;
340}
341
342/**
343 * Convert a YCoCg buffer to RGBA.
344 *
345 * @param src input buffer.
346 * @param scaled variant with scaled chroma components and opaque alpha.
347 */
348static av_always_inline void ycocg2rgba(uint8_t *src, int scaled)
349{
350 int r = src[0];
351 int g = src[1];
352 int b = src[2];
353 int a = src[3];
354
355 int s = scaled ? (b >> 3) + 1 : 1;
356 int y = a;
357 int co = (r - 128) / s;
358 int cg = (g - 128) / s;
359
360 src[0] = av_clip_uint8(y + co - cg);
361 src[1] = av_clip_uint8(y + cg);
362 src[2] = av_clip_uint8(y - co - cg);
363 src[3] = scaled ? 255 : b;
364}
365
366/**
367 * Decompress one block of a DXT5 texture with classic YCoCg and store
368 * the resulting RGBA pixels in 'dst'. Alpha component is fully opaque.
369 *
370 * @param dst output buffer.
371 * @param stride scanline in bytes.
372 * @param block block to decompress.
373 * @return how much texture data has been consumed.
374 */
375static int dxt5y_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
376{
377 int x, y;
378
379 /* This format is basically DXT5, with luma stored in alpha.
380 * Run a normal decompress and then reorder the components. */
381 dxt5_block_internal(dst, stride, block);
382
383 for (y = 0; y < 4; y++)
384 for (x = 0; x < 4; x++)
385 ycocg2rgba(dst + x * 4 + y * stride, 0);
386
387 return 16;
388}
389
390/**
391 * Decompress one block of a DXT5 texture with scaled YCoCg and store
392 * the resulting RGBA pixels in 'dst'. Alpha component is fully opaque.
393 *
394 * @param dst output buffer.
395 * @param stride scanline in bytes.
396 * @param block block to decompress.
397 * @return how much texture data has been consumed.
398 */
399static int dxt5ys_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
400{
401 int x, y;
402
403 /* This format is basically DXT5, with luma stored in alpha.
404 * Run a normal decompress and then reorder the components. */
405 dxt5_block_internal(dst, stride, block);
406
407 for (y = 0; y < 4; y++)
408 for (x = 0; x < 4; x++)
409 ycocg2rgba(dst + x * 4 + y * stride, 1);
410
411 return 16;
412}
413
414static inline void rgtc_block_internal(uint8_t *dst, ptrdiff_t stride,
415 const uint8_t *block,
416 const int *color_tab)
417{
418 uint8_t indices[16];
419 int x, y;
420
421 decompress_indices(indices, block + 2);
422
423 /* Only one or two channels are stored at most, since it only used to
424 * compress specular (black and white) or normal (red and green) maps.
425 * Although the standard says to zero out unused components, many
426 * implementations fill all of them with the same value. */
427 for (y = 0; y < 4; y++) {
428 for (x = 0; x < 4; x++) {
429 int i = indices[x + y * 4];
430 /* Interval expansion from [-1 1] or [0 1] to [0 255]. */
431 int c = color_tab[i];
432 uint32_t pixel = RGBA(c, c, c, 255U);
433 AV_WL32(dst + x * 4 + y * stride, pixel);
434 }
435 }
436}
437
438static inline void rgtc1_block_internal(uint8_t *dst, ptrdiff_t stride,
439 const uint8_t *block, int sign)
440{
441 int color_table[8];
442 int r0, r1;
443
444 if (sign) {
445 /* signed data is in [-128 127] so just offset it to unsigned
446 * and it can be treated exactly the same */
447 r0 = ((int8_t) block[0]) + 128;
448 r1 = ((int8_t) block[1]) + 128;
449 } else {
450 r0 = block[0];
451 r1 = block[1];
452 }
453
454 color_table[0] = r0;
455 color_table[1] = r1;
456
457 if (r0 > r1) {
458 /* 6 interpolated color values */
459 color_table[2] = (6 * r0 + 1 * r1) / 7; // bit code 010
460 color_table[3] = (5 * r0 + 2 * r1) / 7; // bit code 011
461 color_table[4] = (4 * r0 + 3 * r1) / 7; // bit code 100
462 color_table[5] = (3 * r0 + 4 * r1) / 7; // bit code 101
463 color_table[6] = (2 * r0 + 5 * r1) / 7; // bit code 110
464 color_table[7] = (1 * r0 + 6 * r1) / 7; // bit code 111
465 } else {
466 /* 4 interpolated color values */
467 color_table[2] = (4 * r0 + 1 * r1) / 5; // bit code 010
468 color_table[3] = (3 * r0 + 2 * r1) / 5; // bit code 011
469 color_table[4] = (2 * r0 + 3 * r1) / 5; // bit code 100
470 color_table[5] = (1 * r0 + 4 * r1) / 5; // bit code 101
471 color_table[6] = 0; /* min range */ // bit code 110
472 color_table[7] = 255; /* max range */ // bit code 111
473 }
474
475 rgtc_block_internal(dst, stride, block, color_table);
476}
477
478/**
479 * Decompress one block of a RGRC1 texture with signed components
480 * and store the resulting RGBA pixels in 'dst'.
481 *
482 * @param dst output buffer.
483 * @param stride scanline in bytes.
484 * @param block block to decompress.
485 * @return how much texture data has been consumed.
486 */
487static int rgtc1s_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
488{
489 rgtc1_block_internal(dst, stride, block, 1);
490
491 return 8;
492}
493
494/**
495 * Decompress one block of a RGRC1 texture with unsigned components
496 * and store the resulting RGBA pixels in 'dst'.
497 *
498 * @param dst output buffer.
499 * @param stride scanline in bytes.
500 * @param block block to decompress.
501 * @return how much texture data has been consumed.
502 */
503static int rgtc1u_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
504{
505 rgtc1_block_internal(dst, stride, block, 0);
506
507 return 8;
508}
509
510static inline void rgtc2_block_internal(uint8_t *dst, ptrdiff_t stride,
511 const uint8_t *block, int sign)
512{
513 /* 4x4 block containing 4 component pixels. */
514 uint8_t c0[4 * 4 * 4];
515 uint8_t c1[4 * 4 * 4];
516 int x, y;
517
518 /* Decompress the two channels separately and interleave them afterwards. */
519 rgtc1_block_internal(c0, 16, block, sign);
520 rgtc1_block_internal(c1, 16, block + 8, sign);
521
522 /* B is rebuilt exactly like a normal map. */
523 for (y = 0; y < 4; y++) {
524 for (x = 0; x < 4; x++) {
525 uint8_t *p = dst + x * 4 + y * stride;
526 int r = c0[x * 4 + y * 16];
527 int g = c1[x * 4 + y * 16];
528 int b = 127;
529
530 int d = (255 * 255 - r * r - g * g) / 2;
531 if (d > 0)
532 b = lrint(sqrtf(d));
533
534 p[0] = r;
535 p[1] = g;
536 p[2] = b;
537 p[3] = 255;
538 }
539 }
540}
541
542/**
543 * Decompress one block of a RGRC2 texture with signed components
544 * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque.
545 *
546 * @param dst output buffer.
547 * @param stride scanline in bytes.
548 * @param block block to decompress.
549 * @return how much texture data has been consumed.
550 */
551static int rgtc2s_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
552{
553 rgtc2_block_internal(dst, stride, block, 1);
554
555 return 16;
556}
557
558/**
559 * Decompress one block of a RGRC2 texture with unsigned components
560 * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque.
561 *
562 * @param dst output buffer.
563 * @param stride scanline in bytes.
564 * @param block block to decompress.
565 * @return how much texture data has been consumed.
566 */
567static int rgtc2u_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
568{
569 rgtc2_block_internal(dst, stride, block, 0);
570
571 return 16;
572}
573
574/**
575 * Decompress one block of a 3Dc texture with unsigned components
576 * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque.
577 *
578 * @param dst output buffer.
579 * @param stride scanline in bytes.
580 * @param block block to decompress.
581 * @return how much texture data has been consumed.
582 */
583static int dxn3dc_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
584{
585 int x, y;
586 rgtc2_block_internal(dst, stride, block, 0);
587
588 /* This is the 3Dc variant of RGTC2, with swapped R and G. */
589 for (y = 0; y < 4; y++) {
590 for (x = 0; x < 4; x++) {
591 uint8_t *p = dst + x * 4 + y * stride;
592 FFSWAP(uint8_t, p[0], p[1]);
593 }
594 }
595
596 return 16;
597}
598
599av_cold void ff_texturedsp_init(TextureDSPContext *c)
600{
601 c->dxt1_block = dxt1_block;
602 c->dxt1a_block = dxt1a_block;
603 c->dxt2_block = dxt2_block;
604 c->dxt3_block = dxt3_block;
605 c->dxt4_block = dxt4_block;
606 c->dxt5_block = dxt5_block;
607 c->dxt5y_block = dxt5y_block;
608 c->dxt5ys_block = dxt5ys_block;
609 c->rgtc1s_block = rgtc1s_block;
610 c->rgtc1u_block = rgtc1u_block;
611 c->rgtc2s_block = rgtc2s_block;
612 c->rgtc2u_block = rgtc2u_block;
613 c->dxn3dc_block = dxn3dc_block;
614}
615