platform/external/ffmpeg.git - Unnamed repository; edit this file 'description' to name the repository.

1 /*
2  * NewTek SpeedHQ codec
3  * Copyright 2017 Steinar H. Gunderson
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * NewTek SpeedHQ decoder.
25  */
26
27 #define BITSTREAM_READER_LE
28
29 #include "libavutil/attributes.h"
30
31 #include "avcodec.h"
32 #include "get_bits.h"
33 #include "internal.h"
34 #include "libavutil/thread.h"
35 #include "mathops.h"
36 #include "mpeg12.h"
37 #include "mpeg12data.h"
38 #include "mpeg12vlc.h"
39
40 #define MAX_INDEX (64 - 1)
41
42 /*
43  * 5 bits makes for very small tables, with no more than two lookups needed
44  * for the longest (10-bit) codes.
45  */
46 #define ALPHA_VLC_BITS 5
47
48 typedef struct SHQContext {
49     AVCodecContext *avctx;
50     BlockDSPContext bdsp;
51     IDCTDSPContext idsp;
52     ScanTable intra_scantable;
53     int quant_matrix[64];
54     enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 }
55         subsampling;
56     enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type;
57 } SHQContext;
58
59
60 /* AC codes: Very similar but not identical to MPEG-2. */
61 static uint16_t speedhq_vlc[123][2] = {
62     {0x02, 2}, {0x06, 3}, {0x07, 4}, {0x1c, 5},
63     {0x1d, 5}, {0x05, 6}, {0x04, 6}, {0x7b, 7},
64     {0x7c, 7}, {0x23, 8}, {0x22, 8}, {0xfa, 8},
65     {0xfb, 8}, {0xfe, 8}, {0xff, 8}, {0x1f,14},
66     {0x1e,14}, {0x1d,14}, {0x1c,14}, {0x1b,14},
67     {0x1a,14}, {0x19,14}, {0x18,14}, {0x17,14},
68     {0x16,14}, {0x15,14}, {0x14,14}, {0x13,14},
69     {0x12,14}, {0x11,14}, {0x10,14}, {0x18,15},
70     {0x17,15}, {0x16,15}, {0x15,15}, {0x14,15},
71     {0x13,15}, {0x12,15}, {0x11,15}, {0x10,15},
72     {0x02, 3}, {0x06, 5}, {0x79, 7}, {0x27, 8},
73     {0x20, 8}, {0x16,13}, {0x15,13}, {0x1f,15},
74     {0x1e,15}, {0x1d,15}, {0x1c,15}, {0x1b,15},
75     {0x1a,15}, {0x19,15}, {0x13,16}, {0x12,16},
76     {0x11,16}, {0x10,16}, {0x18,13}, {0x17,13},
77     {0x05, 5}, {0x07, 7}, {0xfc, 8}, {0x0c,10},
78     {0x14,13}, {0x18,12}, {0x14,12}, {0x13,12},
79     {0x10,12}, {0x1a,13}, {0x19,13}, {0x07, 5},
80     {0x26, 8}, {0x1c,12}, {0x13,13}, {0x1b,12},
81     {0x06, 6}, {0xfd, 8}, {0x12,12}, {0x1d,12},
82     {0x07, 6}, {0x04, 9}, {0x12,13}, {0x06, 7},
83     {0x1e,12}, {0x14,16}, {0x04, 7}, {0x15,12},
84     {0x05, 7}, {0x11,12}, {0x78, 7}, {0x11,13},
85     {0x7a, 7}, {0x10,13}, {0x21, 8}, {0x1a,16},
86     {0x25, 8}, {0x19,16}, {0x24, 8}, {0x18,16},
87     {0x05, 9}, {0x17,16}, {0x07, 9}, {0x16,16},
88     {0x0d,10}, {0x15,16}, {0x1f,12}, {0x1a,12},
89     {0x19,12}, {0x17,12}, {0x16,12}, {0x1f,13},
90     {0x1e,13}, {0x1d,13}, {0x1c,13}, {0x1b,13},
91     {0x1f,16}, {0x1e,16}, {0x1d,16}, {0x1c,16},
92     {0x1b,16},
93     {0x01,6}, /* escape */
94     {0x06,4}, /* EOB */
95 };
96
97 static const uint8_t speedhq_level[121] = {
98      1,  2,  3,  4,  5,  6,  7,  8,
99      9, 10, 11, 12, 13, 14, 15, 16,
100     17, 18, 19, 20, 21, 22, 23, 24,
101     25, 26, 27, 28, 29, 30, 31, 32,
102     33, 34, 35, 36, 37, 38, 39, 40,
103      1,  2,  3,  4,  5,  6,  7,  8,
104      9, 10, 11, 12, 13, 14, 15, 16,
105     17, 18, 19, 20,  1,  2,  3,  4,
106      5,  6,  7,  8,  9, 10, 11,  1,
107      2,  3,  4,  5,  1,  2,  3,  4,
108      1,  2,  3,  1,  2,  3,  1,  2,
109      1,  2,  1,  2,  1,  2,  1,  2,
110      1,  2,  1,  2,  1,  2,  1,  2,
111      1,  2,  1,  1,  1,  1,  1,  1,
112      1,  1,  1,  1,  1,  1,  1,  1,
113      1,
114 };
115
116 static const uint8_t speedhq_run[121] = {
117      0,  0,  0,  0,  0,  0,  0,  0,
118      0,  0,  0,  0,  0,  0,  0,  0,
119      0,  0,  0,  0,  0,  0,  0,  0,
120      0,  0,  0,  0,  0,  0,  0,  0,
121      0,  0,  0,  0,  0,  0,  0,  0,
122      1,  1,  1,  1,  1,  1,  1,  1,
123      1,  1,  1,  1,  1,  1,  1,  1,
124      1,  1,  1,  1,  2,  2,  2,  2,
125      2,  2,  2,  2,  2,  2,  2,  3,
126      3,  3,  3,  3,  4,  4,  4,  4,
127      5,  5,  5,  6,  6,  6,  7,  7,
128      8,  8,  9,  9, 10, 10, 11, 11,
129     12, 12, 13, 13, 14, 14, 15, 15,
130     16, 16, 17, 18, 19, 20, 21, 22,
131     23, 24, 25, 26, 27, 28, 29, 30,
132     31,
133 };
134
135 static RLTable ff_rl_speedhq = {
136     121,
137     121,
138     (const uint16_t (*)[])speedhq_vlc,
139     speedhq_run,
140     speedhq_level,
141 };
142
143 /* NOTE: The first element is always 16, unscaled. */
144 static const uint8_t unscaled_quant_matrix[64] = {
145     16, 16, 19, 22, 26, 27, 29, 34,
146     16, 16, 22, 24, 27, 29, 34, 37,
147     19, 22, 26, 27, 29, 34, 34, 38,
148     22, 22, 26, 27, 29, 34, 37, 40,
149     22, 26, 27, 29, 32, 35, 40, 48,
150     26, 27, 29, 32, 35, 40, 48, 58,
151     26, 27, 29, 34, 38, 46, 56, 69,
152     27, 29, 35, 38, 46, 56, 69, 83
153 };
154
155 static uint8_t ff_speedhq_static_rl_table_store[2][2*MAX_RUN + MAX_LEVEL + 3];
156
157 static VLC ff_dc_lum_vlc_le;
158 static VLC ff_dc_chroma_vlc_le;
159 static VLC ff_dc_alpha_run_vlc_le;
160 static VLC ff_dc_alpha_level_vlc_le;
161
162 static inline int decode_dc_le(GetBitContext *gb, int component)
163 {
164     int code, diff;
165
166     if (component == 0 || component == 3) {
167         code = get_vlc2(gb, ff_dc_lum_vlc_le.table, DC_VLC_BITS, 2);
168     } else {
169         code = get_vlc2(gb, ff_dc_chroma_vlc_le.table, DC_VLC_BITS, 2);
170     }
171     if (code < 0) {
172         av_log(NULL, AV_LOG_ERROR, "invalid dc code at\n");
173         return 0xffff;
174     }
175     if (!code) {
176         diff = 0;
177     } else {
178         diff = get_xbits_le(gb, code);
179     }
180     return diff;
181 }
182
183 static inline int decode_alpha_block(const SHQContext *s, GetBitContext *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize)
184 {
185     uint8_t block[128];
186     int i = 0, x, y;
187
188     memset(block, 0, sizeof(block));
189
190     {
191         OPEN_READER(re, gb);
192
193         for ( ;; ) {
194             int run, level;
195
196             UPDATE_CACHE_LE(re, gb);
197             GET_VLC(run, re, gb, ff_dc_alpha_run_vlc_le.table, ALPHA_VLC_BITS, 2);
198
199             if (run < 0) break;
200             i += run;
201             if (i >= 128)
202                 return AVERROR_INVALIDDATA;
203
204             UPDATE_CACHE_LE(re, gb);
205             GET_VLC(level, re, gb, ff_dc_alpha_level_vlc_le.table, ALPHA_VLC_BITS, 2);
206             block[i++] = level;
207         }
208
209         CLOSE_READER(re, gb);
210     }
211
212     for (y = 0; y < 8; y++) {
213         for (x = 0; x < 16; x++) {
214             last_alpha[x] -= block[y * 16 + x];
215         }
216         memcpy(dest, last_alpha, 16);
217         dest += linesize;
218     }
219
220     return 0;
221 }
222
223 static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, int last_dc[4], int component, uint8_t *dest, int linesize)
224 {
225     const int *quant_matrix = s->quant_matrix;
226     const uint8_t *scantable = s->intra_scantable.permutated;
227     LOCAL_ALIGNED_16(int16_t, block, [64]);
228     int dc_offset;
229
230     s->bdsp.clear_block(block);
231
232     dc_offset = decode_dc_le(gb, component);
233     last_dc[component] -= dc_offset;  /* Note: Opposite of most codecs. */
234     block[scantable[0]] = last_dc[component];  /* quant_matrix[0] is always 16. */
235
236     /* Read AC coefficients. */
237     {
238         int i = 0;
239         OPEN_READER(re, gb);
240         for ( ;; ) {
241             int level, run;
242             UPDATE_CACHE_LE(re, gb);
243             GET_RL_VLC(level, run, re, gb, ff_rl_speedhq.rl_vlc[0],
244                        TEX_VLC_BITS, 2, 0);
245             if (level == 127) {
246                 break;
247             } else if (level) {
248                 i += run;
249                 if (i > MAX_INDEX)
250                     return AVERROR_INVALIDDATA;
251                 /* If next bit is 1, level = -level */
252                 level = (level ^ SHOW_SBITS(re, gb, 1)) -
253                         SHOW_SBITS(re, gb, 1);
254                 LAST_SKIP_BITS(re, gb, 1);
255             } else {
256                 /* Escape. */
257 #if MIN_CACHE_BITS < 6 + 6 + 12
258 #error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE
259 #endif
260                 run = SHOW_UBITS(re, gb, 6) + 1;
261                 SKIP_BITS(re, gb, 6);
262                 level = SHOW_UBITS(re, gb, 12) - 2048;
263                 LAST_SKIP_BITS(re, gb, 12);
264
265                 i += run;
266                 if (i > MAX_INDEX)
267                     return AVERROR_INVALIDDATA;
268             }
269
270             block[scantable[i]] = (level * quant_matrix[i]) >> 4;
271         }
272         CLOSE_READER(re, gb);
273     }
274
275     s->idsp.idct_put(dest, linesize, block);
276
277     return 0;
278 }
279
280 static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride)
281 {
282     int ret, slice_number, slice_offsets[5];
283     int linesize_y  = frame->linesize[0] * line_stride;
284     int linesize_cb = frame->linesize[1] * line_stride;
285     int linesize_cr = frame->linesize[2] * line_stride;
286     int linesize_a;
287
288     if (s->alpha_type != SHQ_NO_ALPHA)
289         linesize_a = frame->linesize[3] * line_stride;
290
291     if (end < start || end - start < 3 || end > buf_size)
292         return AVERROR_INVALIDDATA;
293
294     slice_offsets[0] = start;
295     slice_offsets[4] = end;
296     for (slice_number = 1; slice_number < 4; slice_number++) {
297         uint32_t last_offset, slice_len;
298
299         last_offset = slice_offsets[slice_number - 1];
300         slice_len = AV_RL24(buf + last_offset);
301         slice_offsets[slice_number] = last_offset + slice_len;
302
303         if (slice_len < 3 || slice_offsets[slice_number] > end - 3)
304             return AVERROR_INVALIDDATA;
305     }
306
307     for (slice_number = 0; slice_number < 4; slice_number++) {
308         GetBitContext gb;
309         uint32_t slice_begin, slice_end;
310         int x, y;
311
312         slice_begin = slice_offsets[slice_number];
313         slice_end = slice_offsets[slice_number + 1];
314
315         if ((ret = init_get_bits8(&gb, buf + slice_begin + 3, slice_end - slice_begin - 3)) < 0)
316             return ret;
317
318         for (y = slice_number * 16 * line_stride; y < frame->height; y += line_stride * 64) {
319             uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
320             int last_dc[4] = { 1024, 1024, 1024, 1024 };
321             uint8_t last_alpha[16];
322
323             memset(last_alpha, 255, sizeof(last_alpha));
324
325             dest_y = frame->data[0] + frame->linesize[0] * (y + field_number);
326             if (s->subsampling == SHQ_SUBSAMPLING_420) {
327                 dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number);
328                 dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number);
329             } else {
330                 dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number);
331                 dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number);
332             }
333             if (s->alpha_type != SHQ_NO_ALPHA) {
334                 dest_a = frame->data[3] + frame->linesize[3] * (y + field_number);
335             }
336
337             for (x = 0; x < frame->width; x += 16) {
338                 /* Decode the four luma blocks. */
339                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0)
340                     return ret;
341                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
342                     return ret;
343                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
344                     return ret;
345                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
346                     return ret;
347
348                 /*
349                  * Decode the first chroma block. For 4:2:0, this is the only one;
350                  * for 4:2:2, it's the top block; for 4:4:4, it's the top-left block.
351                  */
352                 if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
353                     return ret;
354                 if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
355                     return ret;
356
357                 if (s->subsampling != SHQ_SUBSAMPLING_420) {
358                     /* For 4:2:2, this is the bottom block; for 4:4:4, it's the bottom-left block. */
359                     if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
360                         return ret;
361                     if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
362                         return ret;
363
364                     if (s->subsampling == SHQ_SUBSAMPLING_444) {
365                         /* Top-right and bottom-right blocks. */
366                         if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8, linesize_cb)) < 0)
367                             return ret;
368                         if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8, linesize_cr)) < 0)
369                             return ret;
370                         if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0)
371                             return ret;
372                         if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0)
373                             return ret;
374
375                         dest_cb += 8;
376                         dest_cr += 8;
377                     }
378                 }
379                 dest_y += 16;
380                 dest_cb += 8;
381                 dest_cr += 8;
382
383                 if (s->alpha_type == SHQ_RLE_ALPHA) {
384                     /* Alpha coded using 16x8 RLE blocks. */
385                     if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a, linesize_a)) < 0)
386                         return ret;
387                     if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0)
388                         return ret;
389                     dest_a += 16;
390                 } else if (s->alpha_type == SHQ_DCT_ALPHA) {
391                     /* Alpha encoded exactly like luma. */
392                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a, linesize_a)) < 0)
393                         return ret;
394                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8, linesize_a)) < 0)
395                         return ret;
396                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0)
397                         return ret;
398                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0)
399                         return ret;
400                     dest_a += 16;
401                 }
402             }
403         }
404     }
405
406     return 0;
407 }
408
409 static void compute_quant_matrix(int *output, int qscale)
410 {
411     int i;
412     for (i = 0; i < 64; i++) output[i] = unscaled_quant_matrix[ff_zigzag_direct[i]] * qscale;
413 }
414
415 static int speedhq_decode_frame(AVCodecContext *avctx,
416                                 void *data, int *got_frame,
417                                 AVPacket *avpkt)
418 {
419     SHQContext * const s = avctx->priv_data;
420     const uint8_t *buf   = avpkt->data;
421     int buf_size         = avpkt->size;
422     AVFrame *frame       = data;
423     uint8_t quality;
424     uint32_t second_field_offset;
425     int ret;
426
427     if (buf_size < 4)
428         return AVERROR_INVALIDDATA;
429
430     quality = buf[0];
431     if (quality >= 100) {
432         return AVERROR_INVALIDDATA;
433     }
434
435     compute_quant_matrix(s->quant_matrix, 100 - quality);
436
437     second_field_offset = AV_RL24(buf + 1);
438     if (second_field_offset >= buf_size - 3) {
439         return AVERROR_INVALIDDATA;
440     }
441
442     avctx->coded_width = FFALIGN(avctx->width, 16);
443     avctx->coded_height = FFALIGN(avctx->height, 16);
444
445     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
446         return ret;
447     }
448     frame->key_frame = 1;
449
450     if (second_field_offset == 4) {
451         /*
452          * Overlapping first and second fields is used to signal
453          * encoding only a single field (the second field then comes
454          * as a separate, later frame).
455          */
456         frame->height >>= 1;
457         if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, buf_size, 1)) < 0)
458             return ret;
459     } else {
460         if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, second_field_offset, 2)) < 0)
461             return ret;
462         if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 1, second_field_offset, buf_size, 2)) < 0)
463             return ret;
464     }
465
466     *got_frame = 1;
467     return buf_size;
468 }
469
470 /*
471  * Alpha VLC. Run and level are independently coded, and would be
472  * outside the default limits for MAX_RUN/MAX_LEVEL, so we don't
473  * bother with combining them into one table.
474  */
475 static av_cold void compute_alpha_vlcs(void)
476 {
477     uint16_t run_code[134], level_code[266];
478     uint8_t run_bits[134], level_bits[266];
479     int16_t run_symbols[134], level_symbols[266];
480     int entry, i, sign;
481
482     /* Initialize VLC for alpha run. */
483     entry = 0;
484
485     /* 0 -> 0. */
486     run_code[entry] = 0;
487     run_bits[entry] = 1;
488     run_symbols[entry] = 0;
489     ++entry;
490
491     /* 10xx -> xx plus 1. */
492     for (i = 0; i < 4; ++i) {
493         run_code[entry] = (i << 2) | 1;
494         run_bits[entry] = 4;
495         run_symbols[entry] = i + 1;
496         ++entry;
497     }
498
499     /* 111xxxxxxx -> xxxxxxx. */
500     for (i = 0; i < 128; ++i) {
501         run_code[entry] = (i << 3) | 7;
502         run_bits[entry] = 10;
503         run_symbols[entry] = i;
504         ++entry;
505     }
506
507     /* 110 -> EOB. */
508     run_code[entry] = 3;
509     run_bits[entry] = 3;
510     run_symbols[entry] = -1;
511     ++entry;
512
513     av_assert0(entry == FF_ARRAY_ELEMS(run_code));
514
515     INIT_LE_VLC_SPARSE_STATIC(&ff_dc_alpha_run_vlc_le, ALPHA_VLC_BITS,
516                               FF_ARRAY_ELEMS(run_code),
517                               run_bits, 1, 1,
518                               run_code, 2, 2,
519                               run_symbols, 2, 2, 160);
520
521     /* Initialize VLC for alpha level. */
522     entry = 0;
523
524     for (sign = 0; sign <= 1; ++sign) {
525         /* 1s -> -1 or +1 (depending on sign bit). */
526         level_code[entry] = (sign << 1) | 1;
527         level_bits[entry] = 2;
528         level_symbols[entry] = sign ? -1 : 1;
529         ++entry;
530
531         /* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit). */
532         for (i = 0; i < 4; ++i) {
533             level_code[entry] = (i << 3) | (sign << 2) | 2;
534             level_bits[entry] = 5;
535             level_symbols[entry] = sign ? -(i + 2) : (i + 2);
536             ++entry;
537         }
538     }
539
540     /*
541      * 00xxxxxxxx -> xxxxxxxx, in two's complement. There are many codes
542      * here that would better be encoded in other ways (e.g. 0 would be
543      * encoded by increasing run, and +/- 1 would be encoded with a
544      * shorter code), but it doesn't hurt to allow everything.
545      */
546     for (i = 0; i < 256; ++i) {
547         level_code[entry] = i << 2;
548         level_bits[entry] = 10;
549         level_symbols[entry] = i;
550         ++entry;
551     }
552
553     av_assert0(entry == FF_ARRAY_ELEMS(level_code));
554
555     INIT_LE_VLC_SPARSE_STATIC(&ff_dc_alpha_level_vlc_le, ALPHA_VLC_BITS,
556                               FF_ARRAY_ELEMS(level_code),
557                               level_bits, 1, 1,
558                               level_code, 2, 2,
559                               level_symbols, 2, 2, 288);
560 }
561
562 static uint32_t reverse(uint32_t num, int bits)
563 {
564     return bitswap_32(num) >> (32 - bits);
565 }
566
567 static void reverse_code(const uint16_t *code, const uint8_t *bits,
568                          uint16_t *reversed_code, int num_entries)
569 {
570     int i;
571     for (i = 0; i < num_entries; i++) {
572         reversed_code[i] = reverse(code[i], bits[i]);
573     }
574 }
575
576 static av_cold void speedhq_static_init(void)
577 {
578     uint16_t ff_mpeg12_vlc_dc_lum_code_reversed[12];
579     uint16_t ff_mpeg12_vlc_dc_chroma_code_reversed[12];
580     int i;
581
582     /* Exactly the same as MPEG-2, except little-endian. */
583     reverse_code(ff_mpeg12_vlc_dc_lum_code,
584                  ff_mpeg12_vlc_dc_lum_bits,
585                  ff_mpeg12_vlc_dc_lum_code_reversed,
586                  12);
587     INIT_LE_VLC_STATIC(&ff_dc_lum_vlc_le, DC_VLC_BITS, 12,
588                        ff_mpeg12_vlc_dc_lum_bits, 1, 1,
589                        ff_mpeg12_vlc_dc_lum_code_reversed, 2, 2, 512);
590     reverse_code(ff_mpeg12_vlc_dc_chroma_code,
591                  ff_mpeg12_vlc_dc_chroma_bits,
592                  ff_mpeg12_vlc_dc_chroma_code_reversed,
593                  12);
594     INIT_LE_VLC_STATIC(&ff_dc_chroma_vlc_le, DC_VLC_BITS, 12,
595                        ff_mpeg12_vlc_dc_chroma_bits, 1, 1,
596                        ff_mpeg12_vlc_dc_chroma_code_reversed, 2, 2, 514);
597
598     /* Reverse the AC VLC, because INIT_VLC_LE wants it in that order. */
599     for (i = 0; i < FF_ARRAY_ELEMS(speedhq_vlc); ++i) {
600         speedhq_vlc[i][0] = reverse(speedhq_vlc[i][0], speedhq_vlc[i][1]);
601     }
602     ff_rl_init(&ff_rl_speedhq, ff_speedhq_static_rl_table_store);
603     INIT_2D_VLC_RL(ff_rl_speedhq, 674, INIT_VLC_LE);
604
605     compute_alpha_vlcs();
606 }
607
608 static av_cold int speedhq_decode_init(AVCodecContext *avctx)
609 {
610     int ret;
611     static AVOnce init_once = AV_ONCE_INIT;
612     SHQContext * const s = avctx->priv_data;
613
614     s->avctx = avctx;
615
616     ret = ff_thread_once(&init_once, speedhq_static_init);
617     if (ret)
618         return AVERROR_UNKNOWN;
619
620     ff_blockdsp_init(&s->bdsp, avctx);
621     ff_idctdsp_init(&s->idsp, avctx);
622     ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
623
624     switch (avctx->codec_tag) {
625     case MKTAG('S', 'H', 'Q', '0'):
626         s->subsampling = SHQ_SUBSAMPLING_420;
627         s->alpha_type = SHQ_NO_ALPHA;
628         avctx->pix_fmt = AV_PIX_FMT_YUV420P;
629         break;
630     case MKTAG('S', 'H', 'Q', '1'):
631         s->subsampling = SHQ_SUBSAMPLING_420;
632         s->alpha_type = SHQ_RLE_ALPHA;
633         avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
634         break;
635     case MKTAG('S', 'H', 'Q', '2'):
636         s->subsampling = SHQ_SUBSAMPLING_422;
637         s->alpha_type = SHQ_NO_ALPHA;
638         avctx->pix_fmt = AV_PIX_FMT_YUV422P;
639         break;
640     case MKTAG('S', 'H', 'Q', '3'):
641         s->subsampling = SHQ_SUBSAMPLING_422;
642         s->alpha_type = SHQ_RLE_ALPHA;
643         avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
644         break;
645     case MKTAG('S', 'H', 'Q', '4'):
646         s->subsampling = SHQ_SUBSAMPLING_444;
647         s->alpha_type = SHQ_NO_ALPHA;
648         avctx->pix_fmt = AV_PIX_FMT_YUV444P;
649         break;
650     case MKTAG('S', 'H', 'Q', '5'):
651         s->subsampling = SHQ_SUBSAMPLING_444;
652         s->alpha_type = SHQ_RLE_ALPHA;
653         avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
654         break;
655     case MKTAG('S', 'H', 'Q', '7'):
656         s->subsampling = SHQ_SUBSAMPLING_422;
657         s->alpha_type = SHQ_DCT_ALPHA;
658         avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
659         break;
660     case MKTAG('S', 'H', 'Q', '9'):
661         s->subsampling = SHQ_SUBSAMPLING_444;
662         s->alpha_type = SHQ_DCT_ALPHA;
663         avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
664         break;
665     default:
666         av_log(avctx, AV_LOG_ERROR, "Unknown NewTek SpeedHQ FOURCC provided (%08X)\n",
667                avctx->codec_tag);
668         return AVERROR_INVALIDDATA;
669     }
670
671     /* This matches what NDI's RGB -> Y'CbCr 4:2:2 converter uses. */
672     avctx->colorspace = AVCOL_SPC_BT470BG;
673     avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
674
675     return 0;
676 }
677
678 AVCodec ff_speedhq_decoder = {
679     .name           = "speedhq",
680     .long_name      = NULL_IF_CONFIG_SMALL("NewTek SpeedHQ"),
681     .type           = AVMEDIA_TYPE_VIDEO,
682     .id             = AV_CODEC_ID_SPEEDHQ,
683     .priv_data_size = sizeof(SHQContext),
684     .init           = speedhq_decode_init,
685     .decode         = speedhq_decode_frame,
686     .capabilities   = AV_CODEC_CAP_DR1,
687 };
688