blob: 4d7310f6d4d149c82fa0e69ed95a1d60de26700d
1 | /* |
2 | * VP9 compatible video decoder |
3 | * |
4 | * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> |
5 | * Copyright (C) 2013 Clément Bœsch <u pkh me> |
6 | * |
7 | * This file is part of FFmpeg. |
8 | * |
9 | * FFmpeg is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU Lesser General Public |
11 | * License as published by the Free Software Foundation; either |
12 | * version 2.1 of the License, or (at your option) any later version. |
13 | * |
14 | * FFmpeg is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | * Lesser General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU Lesser General Public |
20 | * License along with FFmpeg; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | */ |
23 | |
24 | #include "avcodec.h" |
25 | #include "get_bits.h" |
26 | #include "internal.h" |
27 | #include "profiles.h" |
28 | #include "thread.h" |
29 | #include "videodsp.h" |
30 | #include "vp56.h" |
31 | #include "vp9.h" |
32 | #include "vp9data.h" |
33 | #include "vp9dec.h" |
34 | #include "libavutil/avassert.h" |
35 | #include "libavutil/pixdesc.h" |
36 | |
37 | #define VP9_SYNCCODE 0x498342 |
38 | |
39 | static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f) |
40 | { |
41 | ff_thread_release_buffer(avctx, &f->tf); |
42 | av_buffer_unref(&f->extradata); |
43 | av_buffer_unref(&f->hwaccel_priv_buf); |
44 | f->segmentation_map = NULL; |
45 | f->hwaccel_picture_private = NULL; |
46 | } |
47 | |
48 | static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f) |
49 | { |
50 | VP9Context *s = avctx->priv_data; |
51 | int ret, sz; |
52 | |
53 | ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF); |
54 | if (ret < 0) |
55 | return ret; |
56 | |
57 | sz = 64 * s->sb_cols * s->sb_rows; |
58 | f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair))); |
59 | if (!f->extradata) { |
60 | goto fail; |
61 | } |
62 | |
63 | f->segmentation_map = f->extradata->data; |
64 | f->mv = (VP9mvrefPair *) (f->extradata->data + sz); |
65 | |
66 | if (avctx->hwaccel) { |
67 | const AVHWAccel *hwaccel = avctx->hwaccel; |
68 | av_assert0(!f->hwaccel_picture_private); |
69 | if (hwaccel->frame_priv_data_size) { |
70 | f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); |
71 | if (!f->hwaccel_priv_buf) |
72 | goto fail; |
73 | f->hwaccel_picture_private = f->hwaccel_priv_buf->data; |
74 | } |
75 | } |
76 | |
77 | return 0; |
78 | |
79 | fail: |
80 | vp9_frame_unref(avctx, f); |
81 | return AVERROR(ENOMEM); |
82 | } |
83 | |
84 | static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src) |
85 | { |
86 | int ret; |
87 | |
88 | ret = ff_thread_ref_frame(&dst->tf, &src->tf); |
89 | if (ret < 0) |
90 | return ret; |
91 | |
92 | dst->extradata = av_buffer_ref(src->extradata); |
93 | if (!dst->extradata) |
94 | goto fail; |
95 | |
96 | dst->segmentation_map = src->segmentation_map; |
97 | dst->mv = src->mv; |
98 | dst->uses_2pass = src->uses_2pass; |
99 | |
100 | if (src->hwaccel_picture_private) { |
101 | dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf); |
102 | if (!dst->hwaccel_priv_buf) |
103 | goto fail; |
104 | dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data; |
105 | } |
106 | |
107 | return 0; |
108 | |
109 | fail: |
110 | vp9_frame_unref(avctx, dst); |
111 | return AVERROR(ENOMEM); |
112 | } |
113 | |
114 | static int update_size(AVCodecContext *avctx, int w, int h) |
115 | { |
116 | #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL + CONFIG_VP9_VAAPI_HWACCEL) |
117 | enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts; |
118 | VP9Context *s = avctx->priv_data; |
119 | uint8_t *p; |
120 | int bytesperpixel = s->bytesperpixel, ret, cols, rows; |
121 | |
122 | av_assert0(w > 0 && h > 0); |
123 | |
124 | if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) { |
125 | if ((ret = ff_set_dimensions(avctx, w, h)) < 0) |
126 | return ret; |
127 | |
128 | switch (s->pix_fmt) { |
129 | case AV_PIX_FMT_YUV420P: |
130 | #if CONFIG_VP9_DXVA2_HWACCEL |
131 | *fmtp++ = AV_PIX_FMT_DXVA2_VLD; |
132 | #endif |
133 | #if CONFIG_VP9_D3D11VA_HWACCEL |
134 | *fmtp++ = AV_PIX_FMT_D3D11VA_VLD; |
135 | #endif |
136 | #if CONFIG_VP9_VAAPI_HWACCEL |
137 | *fmtp++ = AV_PIX_FMT_VAAPI; |
138 | #endif |
139 | break; |
140 | case AV_PIX_FMT_YUV420P10: |
141 | case AV_PIX_FMT_YUV420P12: |
142 | #if CONFIG_VP9_VAAPI_HWACCEL |
143 | *fmtp++ = AV_PIX_FMT_VAAPI; |
144 | #endif |
145 | break; |
146 | } |
147 | |
148 | *fmtp++ = s->pix_fmt; |
149 | *fmtp = AV_PIX_FMT_NONE; |
150 | |
151 | ret = ff_thread_get_format(avctx, pix_fmts); |
152 | if (ret < 0) |
153 | return ret; |
154 | |
155 | avctx->pix_fmt = ret; |
156 | s->gf_fmt = s->pix_fmt; |
157 | s->w = w; |
158 | s->h = h; |
159 | } |
160 | |
161 | cols = (w + 7) >> 3; |
162 | rows = (h + 7) >> 3; |
163 | |
164 | if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt) |
165 | return 0; |
166 | |
167 | s->last_fmt = s->pix_fmt; |
168 | s->sb_cols = (w + 63) >> 6; |
169 | s->sb_rows = (h + 63) >> 6; |
170 | s->cols = (w + 7) >> 3; |
171 | s->rows = (h + 7) >> 3; |
172 | |
173 | #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var) |
174 | av_freep(&s->intra_pred_data[0]); |
175 | // FIXME we slightly over-allocate here for subsampled chroma, but a little |
176 | // bit of padding shouldn't affect performance... |
177 | p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel + |
178 | sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx))); |
179 | if (!p) |
180 | return AVERROR(ENOMEM); |
181 | assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel); |
182 | assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel); |
183 | assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel); |
184 | assign(s->above_y_nnz_ctx, uint8_t *, 16); |
185 | assign(s->above_mode_ctx, uint8_t *, 16); |
186 | assign(s->above_mv_ctx, VP56mv(*)[2], 16); |
187 | assign(s->above_uv_nnz_ctx[0], uint8_t *, 16); |
188 | assign(s->above_uv_nnz_ctx[1], uint8_t *, 16); |
189 | assign(s->above_partition_ctx, uint8_t *, 8); |
190 | assign(s->above_skip_ctx, uint8_t *, 8); |
191 | assign(s->above_txfm_ctx, uint8_t *, 8); |
192 | assign(s->above_segpred_ctx, uint8_t *, 8); |
193 | assign(s->above_intra_ctx, uint8_t *, 8); |
194 | assign(s->above_comp_ctx, uint8_t *, 8); |
195 | assign(s->above_ref_ctx, uint8_t *, 8); |
196 | assign(s->above_filter_ctx, uint8_t *, 8); |
197 | assign(s->lflvl, VP9Filter *, 1); |
198 | #undef assign |
199 | |
200 | // these will be re-allocated a little later |
201 | av_freep(&s->b_base); |
202 | av_freep(&s->block_base); |
203 | |
204 | if (s->s.h.bpp != s->last_bpp) { |
205 | ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT); |
206 | ff_videodsp_init(&s->vdsp, s->s.h.bpp); |
207 | s->last_bpp = s->s.h.bpp; |
208 | } |
209 | |
210 | return 0; |
211 | } |
212 | |
213 | static int update_block_buffers(AVCodecContext *avctx) |
214 | { |
215 | VP9Context *s = avctx->priv_data; |
216 | int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel; |
217 | |
218 | if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass) |
219 | return 0; |
220 | |
221 | av_free(s->b_base); |
222 | av_free(s->block_base); |
223 | chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v); |
224 | chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v); |
225 | if (s->s.frames[CUR_FRAME].uses_2pass) { |
226 | int sbs = s->sb_cols * s->sb_rows; |
227 | |
228 | s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block)); |
229 | s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) + |
230 | 16 * 16 + 2 * chroma_eobs) * sbs); |
231 | if (!s->b_base || !s->block_base) |
232 | return AVERROR(ENOMEM); |
233 | s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel; |
234 | s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel; |
235 | s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel); |
236 | s->uveob_base[0] = s->eob_base + 16 * 16 * sbs; |
237 | s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs; |
238 | } else { |
239 | s->b_base = av_malloc(sizeof(VP9Block)); |
240 | s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) + |
241 | 16 * 16 + 2 * chroma_eobs); |
242 | if (!s->b_base || !s->block_base) |
243 | return AVERROR(ENOMEM); |
244 | s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel; |
245 | s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel; |
246 | s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel); |
247 | s->uveob_base[0] = s->eob_base + 16 * 16; |
248 | s->uveob_base[1] = s->uveob_base[0] + chroma_eobs; |
249 | } |
250 | s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass; |
251 | |
252 | return 0; |
253 | } |
254 | |
255 | // The sign bit is at the end, not the start, of a bit sequence |
256 | static av_always_inline int get_sbits_inv(GetBitContext *gb, int n) |
257 | { |
258 | int v = get_bits(gb, n); |
259 | return get_bits1(gb) ? -v : v; |
260 | } |
261 | |
262 | static av_always_inline int inv_recenter_nonneg(int v, int m) |
263 | { |
264 | if (v > 2 * m) |
265 | return v; |
266 | if (v & 1) |
267 | return m - ((v + 1) >> 1); |
268 | return m + (v >> 1); |
269 | } |
270 | |
271 | // differential forward probability updates |
272 | static int update_prob(VP56RangeCoder *c, int p) |
273 | { |
274 | static const int inv_map_table[255] = { |
275 | 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, |
276 | 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, |
277 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, |
278 | 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, |
279 | 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, |
280 | 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, |
281 | 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, |
282 | 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, |
283 | 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115, |
284 | 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, |
285 | 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145, |
286 | 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, |
287 | 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, |
288 | 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, |
289 | 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206, |
290 | 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, |
291 | 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, |
292 | 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, |
293 | 252, 253, 253, |
294 | }; |
295 | int d; |
296 | |
297 | /* This code is trying to do a differential probability update. For a |
298 | * current probability A in the range [1, 255], the difference to a new |
299 | * probability of any value can be expressed differentially as 1-A, 255-A |
300 | * where some part of this (absolute range) exists both in positive as |
301 | * well as the negative part, whereas another part only exists in one |
302 | * half. We're trying to code this shared part differentially, i.e. |
303 | * times two where the value of the lowest bit specifies the sign, and |
304 | * the single part is then coded on top of this. This absolute difference |
305 | * then again has a value of [0, 254], but a bigger value in this range |
306 | * indicates that we're further away from the original value A, so we |
307 | * can code this as a VLC code, since higher values are increasingly |
308 | * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough' |
309 | * updates vs. the 'fine, exact' updates further down the range, which |
310 | * adds one extra dimension to this differential update model. */ |
311 | |
312 | if (!vp8_rac_get(c)) { |
313 | d = vp8_rac_get_uint(c, 4) + 0; |
314 | } else if (!vp8_rac_get(c)) { |
315 | d = vp8_rac_get_uint(c, 4) + 16; |
316 | } else if (!vp8_rac_get(c)) { |
317 | d = vp8_rac_get_uint(c, 5) + 32; |
318 | } else { |
319 | d = vp8_rac_get_uint(c, 7); |
320 | if (d >= 65) |
321 | d = (d << 1) - 65 + vp8_rac_get(c); |
322 | d += 64; |
323 | av_assert2(d < FF_ARRAY_ELEMS(inv_map_table)); |
324 | } |
325 | |
326 | return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) : |
327 | 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p); |
328 | } |
329 | |
330 | static int read_colorspace_details(AVCodecContext *avctx) |
331 | { |
332 | static const enum AVColorSpace colorspaces[8] = { |
333 | AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M, |
334 | AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB, |
335 | }; |
336 | VP9Context *s = avctx->priv_data; |
337 | int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12 |
338 | |
339 | s->bpp_index = bits; |
340 | s->s.h.bpp = 8 + bits * 2; |
341 | s->bytesperpixel = (7 + s->s.h.bpp) >> 3; |
342 | avctx->colorspace = colorspaces[get_bits(&s->gb, 3)]; |
343 | if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1 |
344 | static const enum AVPixelFormat pix_fmt_rgb[3] = { |
345 | AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12 |
346 | }; |
347 | s->ss_h = s->ss_v = 0; |
348 | avctx->color_range = AVCOL_RANGE_JPEG; |
349 | s->pix_fmt = pix_fmt_rgb[bits]; |
350 | if (avctx->profile & 1) { |
351 | if (get_bits1(&s->gb)) { |
352 | av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n"); |
353 | return AVERROR_INVALIDDATA; |
354 | } |
355 | } else { |
356 | av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n", |
357 | avctx->profile); |
358 | return AVERROR_INVALIDDATA; |
359 | } |
360 | } else { |
361 | static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = { |
362 | { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P }, |
363 | { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } }, |
364 | { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 }, |
365 | { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } }, |
366 | { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 }, |
367 | { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } } |
368 | }; |
369 | avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; |
370 | if (avctx->profile & 1) { |
371 | s->ss_h = get_bits1(&s->gb); |
372 | s->ss_v = get_bits1(&s->gb); |
373 | s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]; |
374 | if (s->pix_fmt == AV_PIX_FMT_YUV420P) { |
375 | av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n", |
376 | avctx->profile); |
377 | return AVERROR_INVALIDDATA; |
378 | } else if (get_bits1(&s->gb)) { |
379 | av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n", |
380 | avctx->profile); |
381 | return AVERROR_INVALIDDATA; |
382 | } |
383 | } else { |
384 | s->ss_h = s->ss_v = 1; |
385 | s->pix_fmt = pix_fmt_for_ss[bits][1][1]; |
386 | } |
387 | } |
388 | |
389 | return 0; |
390 | } |
391 | |
392 | static int decode_frame_header(AVCodecContext *avctx, |
393 | const uint8_t *data, int size, int *ref) |
394 | { |
395 | VP9Context *s = avctx->priv_data; |
396 | int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp; |
397 | int last_invisible; |
398 | const uint8_t *data2; |
399 | |
400 | /* general header */ |
401 | if ((ret = init_get_bits8(&s->gb, data, size)) < 0) { |
402 | av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n"); |
403 | return ret; |
404 | } |
405 | if (get_bits(&s->gb, 2) != 0x2) { // frame marker |
406 | av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n"); |
407 | return AVERROR_INVALIDDATA; |
408 | } |
409 | avctx->profile = get_bits1(&s->gb); |
410 | avctx->profile |= get_bits1(&s->gb) << 1; |
411 | if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb); |
412 | if (avctx->profile > 3) { |
413 | av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile); |
414 | return AVERROR_INVALIDDATA; |
415 | } |
416 | s->s.h.profile = avctx->profile; |
417 | if (get_bits1(&s->gb)) { |
418 | *ref = get_bits(&s->gb, 3); |
419 | return 0; |
420 | } |
421 | |
422 | s->last_keyframe = s->s.h.keyframe; |
423 | s->s.h.keyframe = !get_bits1(&s->gb); |
424 | |
425 | last_invisible = s->s.h.invisible; |
426 | s->s.h.invisible = !get_bits1(&s->gb); |
427 | s->s.h.errorres = get_bits1(&s->gb); |
428 | s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible; |
429 | |
430 | if (s->s.h.keyframe) { |
431 | if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode |
432 | av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n"); |
433 | return AVERROR_INVALIDDATA; |
434 | } |
435 | if ((ret = read_colorspace_details(avctx)) < 0) |
436 | return ret; |
437 | // for profile 1, here follows the subsampling bits |
438 | s->s.h.refreshrefmask = 0xff; |
439 | w = get_bits(&s->gb, 16) + 1; |
440 | h = get_bits(&s->gb, 16) + 1; |
441 | if (get_bits1(&s->gb)) // display size |
442 | skip_bits(&s->gb, 32); |
443 | } else { |
444 | s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0; |
445 | s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2); |
446 | if (s->s.h.intraonly) { |
447 | if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode |
448 | av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n"); |
449 | return AVERROR_INVALIDDATA; |
450 | } |
451 | if (avctx->profile >= 1) { |
452 | if ((ret = read_colorspace_details(avctx)) < 0) |
453 | return ret; |
454 | } else { |
455 | s->ss_h = s->ss_v = 1; |
456 | s->s.h.bpp = 8; |
457 | s->bpp_index = 0; |
458 | s->bytesperpixel = 1; |
459 | s->pix_fmt = AV_PIX_FMT_YUV420P; |
460 | avctx->colorspace = AVCOL_SPC_BT470BG; |
461 | avctx->color_range = AVCOL_RANGE_JPEG; |
462 | } |
463 | s->s.h.refreshrefmask = get_bits(&s->gb, 8); |
464 | w = get_bits(&s->gb, 16) + 1; |
465 | h = get_bits(&s->gb, 16) + 1; |
466 | if (get_bits1(&s->gb)) // display size |
467 | skip_bits(&s->gb, 32); |
468 | } else { |
469 | s->s.h.refreshrefmask = get_bits(&s->gb, 8); |
470 | s->s.h.refidx[0] = get_bits(&s->gb, 3); |
471 | s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres; |
472 | s->s.h.refidx[1] = get_bits(&s->gb, 3); |
473 | s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres; |
474 | s->s.h.refidx[2] = get_bits(&s->gb, 3); |
475 | s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres; |
476 | if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] || |
477 | !s->s.refs[s->s.h.refidx[1]].f->buf[0] || |
478 | !s->s.refs[s->s.h.refidx[2]].f->buf[0]) { |
479 | av_log(avctx, AV_LOG_ERROR, "Not all references are available\n"); |
480 | return AVERROR_INVALIDDATA; |
481 | } |
482 | if (get_bits1(&s->gb)) { |
483 | w = s->s.refs[s->s.h.refidx[0]].f->width; |
484 | h = s->s.refs[s->s.h.refidx[0]].f->height; |
485 | } else if (get_bits1(&s->gb)) { |
486 | w = s->s.refs[s->s.h.refidx[1]].f->width; |
487 | h = s->s.refs[s->s.h.refidx[1]].f->height; |
488 | } else if (get_bits1(&s->gb)) { |
489 | w = s->s.refs[s->s.h.refidx[2]].f->width; |
490 | h = s->s.refs[s->s.h.refidx[2]].f->height; |
491 | } else { |
492 | w = get_bits(&s->gb, 16) + 1; |
493 | h = get_bits(&s->gb, 16) + 1; |
494 | } |
495 | // Note that in this code, "CUR_FRAME" is actually before we |
496 | // have formally allocated a frame, and thus actually represents |
497 | // the _last_ frame |
498 | s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w && |
499 | s->s.frames[CUR_FRAME].tf.f->height == h; |
500 | if (get_bits1(&s->gb)) // display size |
501 | skip_bits(&s->gb, 32); |
502 | s->s.h.highprecisionmvs = get_bits1(&s->gb); |
503 | s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE : |
504 | get_bits(&s->gb, 2); |
505 | s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] || |
506 | s->s.h.signbias[0] != s->s.h.signbias[2]; |
507 | if (s->s.h.allowcompinter) { |
508 | if (s->s.h.signbias[0] == s->s.h.signbias[1]) { |
509 | s->s.h.fixcompref = 2; |
510 | s->s.h.varcompref[0] = 0; |
511 | s->s.h.varcompref[1] = 1; |
512 | } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) { |
513 | s->s.h.fixcompref = 1; |
514 | s->s.h.varcompref[0] = 0; |
515 | s->s.h.varcompref[1] = 2; |
516 | } else { |
517 | s->s.h.fixcompref = 0; |
518 | s->s.h.varcompref[0] = 1; |
519 | s->s.h.varcompref[1] = 2; |
520 | } |
521 | } |
522 | } |
523 | } |
524 | s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb); |
525 | s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb); |
526 | s->s.h.framectxid = c = get_bits(&s->gb, 2); |
527 | if (s->s.h.keyframe || s->s.h.intraonly) |
528 | s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes |
529 | |
530 | /* loopfilter header data */ |
531 | if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) { |
532 | // reset loopfilter defaults |
533 | s->s.h.lf_delta.ref[0] = 1; |
534 | s->s.h.lf_delta.ref[1] = 0; |
535 | s->s.h.lf_delta.ref[2] = -1; |
536 | s->s.h.lf_delta.ref[3] = -1; |
537 | s->s.h.lf_delta.mode[0] = 0; |
538 | s->s.h.lf_delta.mode[1] = 0; |
539 | memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat)); |
540 | } |
541 | s->s.h.filter.level = get_bits(&s->gb, 6); |
542 | sharp = get_bits(&s->gb, 3); |
543 | // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep |
544 | // the old cache values since they are still valid |
545 | if (s->s.h.filter.sharpness != sharp) |
546 | memset(s->filter_lut.lim_lut, 0, sizeof(s->filter_lut.lim_lut)); |
547 | s->s.h.filter.sharpness = sharp; |
548 | if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) { |
549 | if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) { |
550 | for (i = 0; i < 4; i++) |
551 | if (get_bits1(&s->gb)) |
552 | s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6); |
553 | for (i = 0; i < 2; i++) |
554 | if (get_bits1(&s->gb)) |
555 | s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6); |
556 | } |
557 | } |
558 | |
559 | /* quantization header data */ |
560 | s->s.h.yac_qi = get_bits(&s->gb, 8); |
561 | s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0; |
562 | s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0; |
563 | s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0; |
564 | s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 && |
565 | s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0; |
566 | if (s->s.h.lossless) |
567 | avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS; |
568 | |
569 | /* segmentation header info */ |
570 | if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) { |
571 | if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) { |
572 | for (i = 0; i < 7; i++) |
573 | s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ? |
574 | get_bits(&s->gb, 8) : 255; |
575 | if ((s->s.h.segmentation.temporal = get_bits1(&s->gb))) |
576 | for (i = 0; i < 3; i++) |
577 | s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ? |
578 | get_bits(&s->gb, 8) : 255; |
579 | } |
580 | |
581 | if (get_bits1(&s->gb)) { |
582 | s->s.h.segmentation.absolute_vals = get_bits1(&s->gb); |
583 | for (i = 0; i < 8; i++) { |
584 | if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb))) |
585 | s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8); |
586 | if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb))) |
587 | s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6); |
588 | if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb))) |
589 | s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2); |
590 | s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb); |
591 | } |
592 | } |
593 | } |
594 | |
595 | // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas |
596 | for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) { |
597 | int qyac, qydc, quvac, quvdc, lflvl, sh; |
598 | |
599 | if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) { |
600 | if (s->s.h.segmentation.absolute_vals) |
601 | qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8); |
602 | else |
603 | qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8); |
604 | } else { |
605 | qyac = s->s.h.yac_qi; |
606 | } |
607 | qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8); |
608 | quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8); |
609 | quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8); |
610 | qyac = av_clip_uintp2(qyac, 8); |
611 | |
612 | s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc]; |
613 | s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac]; |
614 | s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc]; |
615 | s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac]; |
616 | |
617 | sh = s->s.h.filter.level >= 32; |
618 | if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) { |
619 | if (s->s.h.segmentation.absolute_vals) |
620 | lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6); |
621 | else |
622 | lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6); |
623 | } else { |
624 | lflvl = s->s.h.filter.level; |
625 | } |
626 | if (s->s.h.lf_delta.enabled) { |
627 | s->s.h.segmentation.feat[i].lflvl[0][0] = |
628 | s->s.h.segmentation.feat[i].lflvl[0][1] = |
629 | av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6); |
630 | for (j = 1; j < 4; j++) { |
631 | s->s.h.segmentation.feat[i].lflvl[j][0] = |
632 | av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] + |
633 | s->s.h.lf_delta.mode[0]) * (1 << sh)), 6); |
634 | s->s.h.segmentation.feat[i].lflvl[j][1] = |
635 | av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] + |
636 | s->s.h.lf_delta.mode[1]) * (1 << sh)), 6); |
637 | } |
638 | } else { |
639 | memset(s->s.h.segmentation.feat[i].lflvl, lflvl, |
640 | sizeof(s->s.h.segmentation.feat[i].lflvl)); |
641 | } |
642 | } |
643 | |
644 | /* tiling info */ |
645 | if ((ret = update_size(avctx, w, h)) < 0) { |
646 | av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", |
647 | w, h, s->pix_fmt); |
648 | return ret; |
649 | } |
650 | for (s->s.h.tiling.log2_tile_cols = 0; |
651 | s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols); |
652 | s->s.h.tiling.log2_tile_cols++) ; |
653 | for (max = 0; (s->sb_cols >> max) >= 4; max++) ; |
654 | max = FFMAX(0, max - 1); |
655 | while (max > s->s.h.tiling.log2_tile_cols) { |
656 | if (get_bits1(&s->gb)) |
657 | s->s.h.tiling.log2_tile_cols++; |
658 | else |
659 | break; |
660 | } |
661 | s->s.h.tiling.log2_tile_rows = decode012(&s->gb); |
662 | s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows; |
663 | if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) { |
664 | s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols; |
665 | s->c_b = av_fast_realloc(s->c_b, &s->c_b_size, |
666 | sizeof(VP56RangeCoder) * s->s.h.tiling.tile_cols); |
667 | if (!s->c_b) { |
668 | av_log(avctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n"); |
669 | return AVERROR(ENOMEM); |
670 | } |
671 | } |
672 | |
673 | /* check reference frames */ |
674 | if (!s->s.h.keyframe && !s->s.h.intraonly) { |
675 | for (i = 0; i < 3; i++) { |
676 | AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f; |
677 | int refw = ref->width, refh = ref->height; |
678 | |
679 | if (ref->format != avctx->pix_fmt) { |
680 | av_log(avctx, AV_LOG_ERROR, |
681 | "Ref pixfmt (%s) did not match current frame (%s)", |
682 | av_get_pix_fmt_name(ref->format), |
683 | av_get_pix_fmt_name(avctx->pix_fmt)); |
684 | return AVERROR_INVALIDDATA; |
685 | } else if (refw == w && refh == h) { |
686 | s->mvscale[i][0] = s->mvscale[i][1] = 0; |
687 | } else { |
688 | if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) { |
689 | av_log(avctx, AV_LOG_ERROR, |
690 | "Invalid ref frame dimensions %dx%d for frame size %dx%d\n", |
691 | refw, refh, w, h); |
692 | return AVERROR_INVALIDDATA; |
693 | } |
694 | s->mvscale[i][0] = (refw << 14) / w; |
695 | s->mvscale[i][1] = (refh << 14) / h; |
696 | s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14; |
697 | s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14; |
698 | } |
699 | } |
700 | } |
701 | |
702 | if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) { |
703 | s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p = |
704 | s->prob_ctx[3].p = ff_vp9_default_probs; |
705 | memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs, |
706 | sizeof(ff_vp9_default_coef_probs)); |
707 | memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs, |
708 | sizeof(ff_vp9_default_coef_probs)); |
709 | memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs, |
710 | sizeof(ff_vp9_default_coef_probs)); |
711 | memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs, |
712 | sizeof(ff_vp9_default_coef_probs)); |
713 | } else if (s->s.h.intraonly && s->s.h.resetctx == 2) { |
714 | s->prob_ctx[c].p = ff_vp9_default_probs; |
715 | memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs, |
716 | sizeof(ff_vp9_default_coef_probs)); |
717 | } |
718 | |
719 | // next 16 bits is size of the rest of the header (arith-coded) |
720 | s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16); |
721 | s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8; |
722 | |
723 | data2 = align_get_bits(&s->gb); |
724 | if (size2 > size - (data2 - data)) { |
725 | av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n"); |
726 | return AVERROR_INVALIDDATA; |
727 | } |
728 | ret = ff_vp56_init_range_decoder(&s->c, data2, size2); |
729 | if (ret < 0) |
730 | return ret; |
731 | |
732 | if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit |
733 | av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n"); |
734 | return AVERROR_INVALIDDATA; |
735 | } |
736 | |
737 | if (s->s.h.keyframe || s->s.h.intraonly) { |
738 | memset(s->counts.coef, 0, sizeof(s->counts.coef)); |
739 | memset(s->counts.eob, 0, sizeof(s->counts.eob)); |
740 | } else { |
741 | memset(&s->counts, 0, sizeof(s->counts)); |
742 | } |
743 | /* FIXME is it faster to not copy here, but do it down in the fw updates |
744 | * as explicit copies if the fw update is missing (and skip the copy upon |
745 | * fw update)? */ |
746 | s->prob.p = s->prob_ctx[c].p; |
747 | |
748 | // txfm updates |
749 | if (s->s.h.lossless) { |
750 | s->s.h.txfmmode = TX_4X4; |
751 | } else { |
752 | s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2); |
753 | if (s->s.h.txfmmode == 3) |
754 | s->s.h.txfmmode += vp8_rac_get(&s->c); |
755 | |
756 | if (s->s.h.txfmmode == TX_SWITCHABLE) { |
757 | for (i = 0; i < 2; i++) |
758 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
759 | s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]); |
760 | for (i = 0; i < 2; i++) |
761 | for (j = 0; j < 2; j++) |
762 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
763 | s->prob.p.tx16p[i][j] = |
764 | update_prob(&s->c, s->prob.p.tx16p[i][j]); |
765 | for (i = 0; i < 2; i++) |
766 | for (j = 0; j < 3; j++) |
767 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
768 | s->prob.p.tx32p[i][j] = |
769 | update_prob(&s->c, s->prob.p.tx32p[i][j]); |
770 | } |
771 | } |
772 | |
773 | // coef updates |
774 | for (i = 0; i < 4; i++) { |
775 | uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i]; |
776 | if (vp8_rac_get(&s->c)) { |
777 | for (j = 0; j < 2; j++) |
778 | for (k = 0; k < 2; k++) |
779 | for (l = 0; l < 6; l++) |
780 | for (m = 0; m < 6; m++) { |
781 | uint8_t *p = s->prob.coef[i][j][k][l][m]; |
782 | uint8_t *r = ref[j][k][l][m]; |
783 | if (m >= 3 && l == 0) // dc only has 3 pt |
784 | break; |
785 | for (n = 0; n < 3; n++) { |
786 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
787 | p[n] = update_prob(&s->c, r[n]); |
788 | else |
789 | p[n] = r[n]; |
790 | } |
791 | p[3] = 0; |
792 | } |
793 | } else { |
794 | for (j = 0; j < 2; j++) |
795 | for (k = 0; k < 2; k++) |
796 | for (l = 0; l < 6; l++) |
797 | for (m = 0; m < 6; m++) { |
798 | uint8_t *p = s->prob.coef[i][j][k][l][m]; |
799 | uint8_t *r = ref[j][k][l][m]; |
800 | if (m > 3 && l == 0) // dc only has 3 pt |
801 | break; |
802 | memcpy(p, r, 3); |
803 | p[3] = 0; |
804 | } |
805 | } |
806 | if (s->s.h.txfmmode == i) |
807 | break; |
808 | } |
809 | |
810 | // mode updates |
811 | for (i = 0; i < 3; i++) |
812 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
813 | s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]); |
814 | if (!s->s.h.keyframe && !s->s.h.intraonly) { |
815 | for (i = 0; i < 7; i++) |
816 | for (j = 0; j < 3; j++) |
817 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
818 | s->prob.p.mv_mode[i][j] = |
819 | update_prob(&s->c, s->prob.p.mv_mode[i][j]); |
820 | |
821 | if (s->s.h.filtermode == FILTER_SWITCHABLE) |
822 | for (i = 0; i < 4; i++) |
823 | for (j = 0; j < 2; j++) |
824 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
825 | s->prob.p.filter[i][j] = |
826 | update_prob(&s->c, s->prob.p.filter[i][j]); |
827 | |
828 | for (i = 0; i < 4; i++) |
829 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
830 | s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]); |
831 | |
832 | if (s->s.h.allowcompinter) { |
833 | s->s.h.comppredmode = vp8_rac_get(&s->c); |
834 | if (s->s.h.comppredmode) |
835 | s->s.h.comppredmode += vp8_rac_get(&s->c); |
836 | if (s->s.h.comppredmode == PRED_SWITCHABLE) |
837 | for (i = 0; i < 5; i++) |
838 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
839 | s->prob.p.comp[i] = |
840 | update_prob(&s->c, s->prob.p.comp[i]); |
841 | } else { |
842 | s->s.h.comppredmode = PRED_SINGLEREF; |
843 | } |
844 | |
845 | if (s->s.h.comppredmode != PRED_COMPREF) { |
846 | for (i = 0; i < 5; i++) { |
847 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
848 | s->prob.p.single_ref[i][0] = |
849 | update_prob(&s->c, s->prob.p.single_ref[i][0]); |
850 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
851 | s->prob.p.single_ref[i][1] = |
852 | update_prob(&s->c, s->prob.p.single_ref[i][1]); |
853 | } |
854 | } |
855 | |
856 | if (s->s.h.comppredmode != PRED_SINGLEREF) { |
857 | for (i = 0; i < 5; i++) |
858 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
859 | s->prob.p.comp_ref[i] = |
860 | update_prob(&s->c, s->prob.p.comp_ref[i]); |
861 | } |
862 | |
863 | for (i = 0; i < 4; i++) |
864 | for (j = 0; j < 9; j++) |
865 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
866 | s->prob.p.y_mode[i][j] = |
867 | update_prob(&s->c, s->prob.p.y_mode[i][j]); |
868 | |
869 | for (i = 0; i < 4; i++) |
870 | for (j = 0; j < 4; j++) |
871 | for (k = 0; k < 3; k++) |
872 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
873 | s->prob.p.partition[3 - i][j][k] = |
874 | update_prob(&s->c, |
875 | s->prob.p.partition[3 - i][j][k]); |
876 | |
877 | // mv fields don't use the update_prob subexp model for some reason |
878 | for (i = 0; i < 3; i++) |
879 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
880 | s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1; |
881 | |
882 | for (i = 0; i < 2; i++) { |
883 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
884 | s->prob.p.mv_comp[i].sign = |
885 | (vp8_rac_get_uint(&s->c, 7) << 1) | 1; |
886 | |
887 | for (j = 0; j < 10; j++) |
888 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
889 | s->prob.p.mv_comp[i].classes[j] = |
890 | (vp8_rac_get_uint(&s->c, 7) << 1) | 1; |
891 | |
892 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
893 | s->prob.p.mv_comp[i].class0 = |
894 | (vp8_rac_get_uint(&s->c, 7) << 1) | 1; |
895 | |
896 | for (j = 0; j < 10; j++) |
897 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
898 | s->prob.p.mv_comp[i].bits[j] = |
899 | (vp8_rac_get_uint(&s->c, 7) << 1) | 1; |
900 | } |
901 | |
902 | for (i = 0; i < 2; i++) { |
903 | for (j = 0; j < 2; j++) |
904 | for (k = 0; k < 3; k++) |
905 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
906 | s->prob.p.mv_comp[i].class0_fp[j][k] = |
907 | (vp8_rac_get_uint(&s->c, 7) << 1) | 1; |
908 | |
909 | for (j = 0; j < 3; j++) |
910 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
911 | s->prob.p.mv_comp[i].fp[j] = |
912 | (vp8_rac_get_uint(&s->c, 7) << 1) | 1; |
913 | } |
914 | |
915 | if (s->s.h.highprecisionmvs) { |
916 | for (i = 0; i < 2; i++) { |
917 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
918 | s->prob.p.mv_comp[i].class0_hp = |
919 | (vp8_rac_get_uint(&s->c, 7) << 1) | 1; |
920 | |
921 | if (vp56_rac_get_prob_branchy(&s->c, 252)) |
922 | s->prob.p.mv_comp[i].hp = |
923 | (vp8_rac_get_uint(&s->c, 7) << 1) | 1; |
924 | } |
925 | } |
926 | } |
927 | |
928 | return (data2 - data) + size2; |
929 | } |
930 | |
931 | static void decode_sb(AVCodecContext *avctx, int row, int col, VP9Filter *lflvl, |
932 | ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl) |
933 | { |
934 | VP9Context *s = avctx->priv_data; |
935 | int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) | |
936 | (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1); |
937 | const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] : |
938 | s->prob.p.partition[bl][c]; |
939 | enum BlockPartition bp; |
940 | ptrdiff_t hbs = 4 >> bl; |
941 | AVFrame *f = s->s.frames[CUR_FRAME].tf.f; |
942 | ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1]; |
943 | int bytesperpixel = s->bytesperpixel; |
944 | |
945 | if (bl == BL_8X8) { |
946 | bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p); |
947 | ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp); |
948 | } else if (col + hbs < s->cols) { // FIXME why not <=? |
949 | if (row + hbs < s->rows) { // FIXME why not <=? |
950 | bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p); |
951 | switch (bp) { |
952 | case PARTITION_NONE: |
953 | ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp); |
954 | break; |
955 | case PARTITION_H: |
956 | ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp); |
957 | yoff += hbs * 8 * y_stride; |
958 | uvoff += hbs * 8 * uv_stride >> s->ss_v; |
959 | ff_vp9_decode_block(avctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp); |
960 | break; |
961 | case PARTITION_V: |
962 | ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp); |
963 | yoff += hbs * 8 * bytesperpixel; |
964 | uvoff += hbs * 8 * bytesperpixel >> s->ss_h; |
965 | ff_vp9_decode_block(avctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp); |
966 | break; |
967 | case PARTITION_SPLIT: |
968 | decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1); |
969 | decode_sb(avctx, row, col + hbs, lflvl, |
970 | yoff + 8 * hbs * bytesperpixel, |
971 | uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); |
972 | yoff += hbs * 8 * y_stride; |
973 | uvoff += hbs * 8 * uv_stride >> s->ss_v; |
974 | decode_sb(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1); |
975 | decode_sb(avctx, row + hbs, col + hbs, lflvl, |
976 | yoff + 8 * hbs * bytesperpixel, |
977 | uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); |
978 | break; |
979 | default: |
980 | av_assert0(0); |
981 | } |
982 | } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) { |
983 | bp = PARTITION_SPLIT; |
984 | decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1); |
985 | decode_sb(avctx, row, col + hbs, lflvl, |
986 | yoff + 8 * hbs * bytesperpixel, |
987 | uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); |
988 | } else { |
989 | bp = PARTITION_H; |
990 | ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp); |
991 | } |
992 | } else if (row + hbs < s->rows) { // FIXME why not <=? |
993 | if (vp56_rac_get_prob_branchy(&s->c, p[2])) { |
994 | bp = PARTITION_SPLIT; |
995 | decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1); |
996 | yoff += hbs * 8 * y_stride; |
997 | uvoff += hbs * 8 * uv_stride >> s->ss_v; |
998 | decode_sb(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1); |
999 | } else { |
1000 | bp = PARTITION_V; |
1001 | ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp); |
1002 | } |
1003 | } else { |
1004 | bp = PARTITION_SPLIT; |
1005 | decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1); |
1006 | } |
1007 | s->counts.partition[bl][c][bp]++; |
1008 | } |
1009 | |
1010 | static void decode_sb_mem(AVCodecContext *avctx, int row, int col, VP9Filter *lflvl, |
1011 | ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl) |
1012 | { |
1013 | VP9Context *s = avctx->priv_data; |
1014 | VP9Block *b = s->b; |
1015 | ptrdiff_t hbs = 4 >> bl; |
1016 | AVFrame *f = s->s.frames[CUR_FRAME].tf.f; |
1017 | ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1]; |
1018 | int bytesperpixel = s->bytesperpixel; |
1019 | |
1020 | if (bl == BL_8X8) { |
1021 | av_assert2(b->bl == BL_8X8); |
1022 | ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp); |
1023 | } else if (s->b->bl == bl) { |
1024 | ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp); |
1025 | if (b->bp == PARTITION_H && row + hbs < s->rows) { |
1026 | yoff += hbs * 8 * y_stride; |
1027 | uvoff += hbs * 8 * uv_stride >> s->ss_v; |
1028 | ff_vp9_decode_block(avctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp); |
1029 | } else if (b->bp == PARTITION_V && col + hbs < s->cols) { |
1030 | yoff += hbs * 8 * bytesperpixel; |
1031 | uvoff += hbs * 8 * bytesperpixel >> s->ss_h; |
1032 | ff_vp9_decode_block(avctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp); |
1033 | } |
1034 | } else { |
1035 | decode_sb_mem(avctx, row, col, lflvl, yoff, uvoff, bl + 1); |
1036 | if (col + hbs < s->cols) { // FIXME why not <=? |
1037 | if (row + hbs < s->rows) { |
1038 | decode_sb_mem(avctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel, |
1039 | uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); |
1040 | yoff += hbs * 8 * y_stride; |
1041 | uvoff += hbs * 8 * uv_stride >> s->ss_v; |
1042 | decode_sb_mem(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1); |
1043 | decode_sb_mem(avctx, row + hbs, col + hbs, lflvl, |
1044 | yoff + 8 * hbs * bytesperpixel, |
1045 | uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); |
1046 | } else { |
1047 | yoff += hbs * 8 * bytesperpixel; |
1048 | uvoff += hbs * 8 * bytesperpixel >> s->ss_h; |
1049 | decode_sb_mem(avctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1); |
1050 | } |
1051 | } else if (row + hbs < s->rows) { |
1052 | yoff += hbs * 8 * y_stride; |
1053 | uvoff += hbs * 8 * uv_stride >> s->ss_v; |
1054 | decode_sb_mem(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1); |
1055 | } |
1056 | } |
1057 | } |
1058 | |
1059 | static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n) |
1060 | { |
1061 | int sb_start = ( idx * n) >> log2_n; |
1062 | int sb_end = ((idx + 1) * n) >> log2_n; |
1063 | *start = FFMIN(sb_start, n) << 3; |
1064 | *end = FFMIN(sb_end, n) << 3; |
1065 | } |
1066 | |
1067 | static void free_buffers(VP9Context *s) |
1068 | { |
1069 | av_freep(&s->intra_pred_data[0]); |
1070 | av_freep(&s->b_base); |
1071 | av_freep(&s->block_base); |
1072 | } |
1073 | |
1074 | static av_cold int vp9_decode_free(AVCodecContext *avctx) |
1075 | { |
1076 | VP9Context *s = avctx->priv_data; |
1077 | int i; |
1078 | |
1079 | for (i = 0; i < 3; i++) { |
1080 | if (s->s.frames[i].tf.f->buf[0]) |
1081 | vp9_frame_unref(avctx, &s->s.frames[i]); |
1082 | av_frame_free(&s->s.frames[i].tf.f); |
1083 | } |
1084 | for (i = 0; i < 8; i++) { |
1085 | if (s->s.refs[i].f->buf[0]) |
1086 | ff_thread_release_buffer(avctx, &s->s.refs[i]); |
1087 | av_frame_free(&s->s.refs[i].f); |
1088 | if (s->next_refs[i].f->buf[0]) |
1089 | ff_thread_release_buffer(avctx, &s->next_refs[i]); |
1090 | av_frame_free(&s->next_refs[i].f); |
1091 | } |
1092 | free_buffers(s); |
1093 | av_freep(&s->c_b); |
1094 | s->c_b_size = 0; |
1095 | |
1096 | return 0; |
1097 | } |
1098 | |
1099 | |
1100 | static int vp9_decode_frame(AVCodecContext *avctx, void *frame, |
1101 | int *got_frame, AVPacket *pkt) |
1102 | { |
1103 | const uint8_t *data = pkt->data; |
1104 | int size = pkt->size; |
1105 | VP9Context *s = avctx->priv_data; |
1106 | int ret, tile_row, tile_col, i, ref, row, col; |
1107 | int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map && |
1108 | (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map); |
1109 | ptrdiff_t yoff, uvoff, ls_y, ls_uv; |
1110 | AVFrame *f; |
1111 | int bytesperpixel; |
1112 | |
1113 | if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) { |
1114 | return ret; |
1115 | } else if (ret == 0) { |
1116 | if (!s->s.refs[ref].f->buf[0]) { |
1117 | av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref); |
1118 | return AVERROR_INVALIDDATA; |
1119 | } |
1120 | if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0) |
1121 | return ret; |
1122 | ((AVFrame *)frame)->pts = pkt->pts; |
1123 | #if FF_API_PKT_PTS |
1124 | FF_DISABLE_DEPRECATION_WARNINGS |
1125 | ((AVFrame *)frame)->pkt_pts = pkt->pts; |
1126 | FF_ENABLE_DEPRECATION_WARNINGS |
1127 | #endif |
1128 | ((AVFrame *)frame)->pkt_dts = pkt->dts; |
1129 | for (i = 0; i < 8; i++) { |
1130 | if (s->next_refs[i].f->buf[0]) |
1131 | ff_thread_release_buffer(avctx, &s->next_refs[i]); |
1132 | if (s->s.refs[i].f->buf[0] && |
1133 | (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0) |
1134 | return ret; |
1135 | } |
1136 | *got_frame = 1; |
1137 | return pkt->size; |
1138 | } |
1139 | data += ret; |
1140 | size -= ret; |
1141 | |
1142 | if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) { |
1143 | if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0]) |
1144 | vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]); |
1145 | if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] && |
1146 | (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0) |
1147 | return ret; |
1148 | } |
1149 | if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0]) |
1150 | vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]); |
1151 | if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] && |
1152 | (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0) |
1153 | return ret; |
1154 | if (s->s.frames[CUR_FRAME].tf.f->buf[0]) |
1155 | vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]); |
1156 | if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0) |
1157 | return ret; |
1158 | f = s->s.frames[CUR_FRAME].tf.f; |
1159 | f->key_frame = s->s.h.keyframe; |
1160 | f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; |
1161 | ls_y = f->linesize[0]; |
1162 | ls_uv =f->linesize[1]; |
1163 | |
1164 | if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] && |
1165 | (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width || |
1166 | s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) { |
1167 | vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]); |
1168 | } |
1169 | |
1170 | // ref frame setup |
1171 | for (i = 0; i < 8; i++) { |
1172 | if (s->next_refs[i].f->buf[0]) |
1173 | ff_thread_release_buffer(avctx, &s->next_refs[i]); |
1174 | if (s->s.h.refreshrefmask & (1 << i)) { |
1175 | ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf); |
1176 | } else if (s->s.refs[i].f->buf[0]) { |
1177 | ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]); |
1178 | } |
1179 | if (ret < 0) |
1180 | return ret; |
1181 | } |
1182 | |
1183 | if (avctx->hwaccel) { |
1184 | ret = avctx->hwaccel->start_frame(avctx, NULL, 0); |
1185 | if (ret < 0) |
1186 | return ret; |
1187 | ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size); |
1188 | if (ret < 0) |
1189 | return ret; |
1190 | ret = avctx->hwaccel->end_frame(avctx); |
1191 | if (ret < 0) |
1192 | return ret; |
1193 | goto finish; |
1194 | } |
1195 | |
1196 | // main tile decode loop |
1197 | bytesperpixel = s->bytesperpixel; |
1198 | memset(s->above_partition_ctx, 0, s->cols); |
1199 | memset(s->above_skip_ctx, 0, s->cols); |
1200 | if (s->s.h.keyframe || s->s.h.intraonly) { |
1201 | memset(s->above_mode_ctx, DC_PRED, s->cols * 2); |
1202 | } else { |
1203 | memset(s->above_mode_ctx, NEARESTMV, s->cols); |
1204 | } |
1205 | memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16); |
1206 | memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h); |
1207 | memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h); |
1208 | memset(s->above_segpred_ctx, 0, s->cols); |
1209 | s->pass = s->s.frames[CUR_FRAME].uses_2pass = |
1210 | avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode; |
1211 | if ((ret = update_block_buffers(avctx)) < 0) { |
1212 | av_log(avctx, AV_LOG_ERROR, |
1213 | "Failed to allocate block buffers\n"); |
1214 | return ret; |
1215 | } |
1216 | if (s->s.h.refreshctx && s->s.h.parallelmode) { |
1217 | int j, k, l, m; |
1218 | |
1219 | for (i = 0; i < 4; i++) { |
1220 | for (j = 0; j < 2; j++) |
1221 | for (k = 0; k < 2; k++) |
1222 | for (l = 0; l < 6; l++) |
1223 | for (m = 0; m < 6; m++) |
1224 | memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m], |
1225 | s->prob.coef[i][j][k][l][m], 3); |
1226 | if (s->s.h.txfmmode == i) |
1227 | break; |
1228 | } |
1229 | s->prob_ctx[s->s.h.framectxid].p = s->prob.p; |
1230 | ff_thread_finish_setup(avctx); |
1231 | } else if (!s->s.h.refreshctx) { |
1232 | ff_thread_finish_setup(avctx); |
1233 | } |
1234 | |
1235 | do { |
1236 | yoff = uvoff = 0; |
1237 | s->b = s->b_base; |
1238 | s->block = s->block_base; |
1239 | s->uvblock[0] = s->uvblock_base[0]; |
1240 | s->uvblock[1] = s->uvblock_base[1]; |
1241 | s->eob = s->eob_base; |
1242 | s->uveob[0] = s->uveob_base[0]; |
1243 | s->uveob[1] = s->uveob_base[1]; |
1244 | |
1245 | for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) { |
1246 | set_tile_offset(&s->tile_row_start, &s->tile_row_end, |
1247 | tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows); |
1248 | if (s->pass != 2) { |
1249 | for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) { |
1250 | int64_t tile_size; |
1251 | |
1252 | if (tile_col == s->s.h.tiling.tile_cols - 1 && |
1253 | tile_row == s->s.h.tiling.tile_rows - 1) { |
1254 | tile_size = size; |
1255 | } else { |
1256 | tile_size = AV_RB32(data); |
1257 | data += 4; |
1258 | size -= 4; |
1259 | } |
1260 | if (tile_size > size) { |
1261 | ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0); |
1262 | return AVERROR_INVALIDDATA; |
1263 | } |
1264 | ret = ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size); |
1265 | if (ret < 0) |
1266 | return ret; |
1267 | if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit |
1268 | ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0); |
1269 | return AVERROR_INVALIDDATA; |
1270 | } |
1271 | data += tile_size; |
1272 | size -= tile_size; |
1273 | } |
1274 | } |
1275 | |
1276 | for (row = s->tile_row_start; row < s->tile_row_end; |
1277 | row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) { |
1278 | VP9Filter *lflvl_ptr = s->lflvl; |
1279 | ptrdiff_t yoff2 = yoff, uvoff2 = uvoff; |
1280 | |
1281 | for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) { |
1282 | set_tile_offset(&s->tile_col_start, &s->tile_col_end, |
1283 | tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols); |
1284 | |
1285 | if (s->pass != 2) { |
1286 | memset(s->left_partition_ctx, 0, 8); |
1287 | memset(s->left_skip_ctx, 0, 8); |
1288 | if (s->s.h.keyframe || s->s.h.intraonly) { |
1289 | memset(s->left_mode_ctx, DC_PRED, 16); |
1290 | } else { |
1291 | memset(s->left_mode_ctx, NEARESTMV, 8); |
1292 | } |
1293 | memset(s->left_y_nnz_ctx, 0, 16); |
1294 | memset(s->left_uv_nnz_ctx, 0, 32); |
1295 | memset(s->left_segpred_ctx, 0, 8); |
1296 | |
1297 | memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c)); |
1298 | } |
1299 | |
1300 | for (col = s->tile_col_start; |
1301 | col < s->tile_col_end; |
1302 | col += 8, yoff2 += 64 * bytesperpixel, |
1303 | uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) { |
1304 | // FIXME integrate with lf code (i.e. zero after each |
1305 | // use, similar to invtxfm coefficients, or similar) |
1306 | if (s->pass != 1) { |
1307 | memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask)); |
1308 | } |
1309 | |
1310 | if (s->pass == 2) { |
1311 | decode_sb_mem(avctx, row, col, lflvl_ptr, |
1312 | yoff2, uvoff2, BL_64X64); |
1313 | } else { |
1314 | decode_sb(avctx, row, col, lflvl_ptr, |
1315 | yoff2, uvoff2, BL_64X64); |
1316 | } |
1317 | } |
1318 | if (s->pass != 2) |
1319 | memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c)); |
1320 | } |
1321 | |
1322 | if (s->pass == 1) |
1323 | continue; |
1324 | |
1325 | // backup pre-loopfilter reconstruction data for intra |
1326 | // prediction of next row of sb64s |
1327 | if (row + 8 < s->rows) { |
1328 | memcpy(s->intra_pred_data[0], |
1329 | f->data[0] + yoff + 63 * ls_y, |
1330 | 8 * s->cols * bytesperpixel); |
1331 | memcpy(s->intra_pred_data[1], |
1332 | f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, |
1333 | 8 * s->cols * bytesperpixel >> s->ss_h); |
1334 | memcpy(s->intra_pred_data[2], |
1335 | f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, |
1336 | 8 * s->cols * bytesperpixel >> s->ss_h); |
1337 | } |
1338 | |
1339 | // loopfilter one row |
1340 | if (s->s.h.filter.level) { |
1341 | yoff2 = yoff; |
1342 | uvoff2 = uvoff; |
1343 | lflvl_ptr = s->lflvl; |
1344 | for (col = 0; col < s->cols; |
1345 | col += 8, yoff2 += 64 * bytesperpixel, |
1346 | uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) { |
1347 | ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col, |
1348 | yoff2, uvoff2); |
1349 | } |
1350 | } |
1351 | |
1352 | // FIXME maybe we can make this more finegrained by running the |
1353 | // loopfilter per-block instead of after each sbrow |
1354 | // In fact that would also make intra pred left preparation easier? |
1355 | ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0); |
1356 | } |
1357 | } |
1358 | |
1359 | if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) { |
1360 | ff_vp9_adapt_probs(s); |
1361 | ff_thread_finish_setup(avctx); |
1362 | } |
1363 | } while (s->pass++ == 1); |
1364 | ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0); |
1365 | |
1366 | finish: |
1367 | // ref frame setup |
1368 | for (i = 0; i < 8; i++) { |
1369 | if (s->s.refs[i].f->buf[0]) |
1370 | ff_thread_release_buffer(avctx, &s->s.refs[i]); |
1371 | if (s->next_refs[i].f->buf[0] && |
1372 | (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0) |
1373 | return ret; |
1374 | } |
1375 | |
1376 | if (!s->s.h.invisible) { |
1377 | if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0) |
1378 | return ret; |
1379 | *got_frame = 1; |
1380 | } |
1381 | |
1382 | return pkt->size; |
1383 | } |
1384 | |
1385 | static void vp9_decode_flush(AVCodecContext *avctx) |
1386 | { |
1387 | VP9Context *s = avctx->priv_data; |
1388 | int i; |
1389 | |
1390 | for (i = 0; i < 3; i++) |
1391 | vp9_frame_unref(avctx, &s->s.frames[i]); |
1392 | for (i = 0; i < 8; i++) |
1393 | ff_thread_release_buffer(avctx, &s->s.refs[i]); |
1394 | } |
1395 | |
1396 | static int init_frames(AVCodecContext *avctx) |
1397 | { |
1398 | VP9Context *s = avctx->priv_data; |
1399 | int i; |
1400 | |
1401 | for (i = 0; i < 3; i++) { |
1402 | s->s.frames[i].tf.f = av_frame_alloc(); |
1403 | if (!s->s.frames[i].tf.f) { |
1404 | vp9_decode_free(avctx); |
1405 | av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i); |
1406 | return AVERROR(ENOMEM); |
1407 | } |
1408 | } |
1409 | for (i = 0; i < 8; i++) { |
1410 | s->s.refs[i].f = av_frame_alloc(); |
1411 | s->next_refs[i].f = av_frame_alloc(); |
1412 | if (!s->s.refs[i].f || !s->next_refs[i].f) { |
1413 | vp9_decode_free(avctx); |
1414 | av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i); |
1415 | return AVERROR(ENOMEM); |
1416 | } |
1417 | } |
1418 | |
1419 | return 0; |
1420 | } |
1421 | |
1422 | static av_cold int vp9_decode_init(AVCodecContext *avctx) |
1423 | { |
1424 | VP9Context *s = avctx->priv_data; |
1425 | |
1426 | avctx->internal->allocate_progress = 1; |
1427 | s->last_bpp = 0; |
1428 | s->s.h.filter.sharpness = -1; |
1429 | |
1430 | return init_frames(avctx); |
1431 | } |
1432 | |
1433 | #if HAVE_THREADS |
1434 | static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx) |
1435 | { |
1436 | return init_frames(avctx); |
1437 | } |
1438 | |
1439 | static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) |
1440 | { |
1441 | int i, ret; |
1442 | VP9Context *s = dst->priv_data, *ssrc = src->priv_data; |
1443 | |
1444 | for (i = 0; i < 3; i++) { |
1445 | if (s->s.frames[i].tf.f->buf[0]) |
1446 | vp9_frame_unref(dst, &s->s.frames[i]); |
1447 | if (ssrc->s.frames[i].tf.f->buf[0]) { |
1448 | if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0) |
1449 | return ret; |
1450 | } |
1451 | } |
1452 | for (i = 0; i < 8; i++) { |
1453 | if (s->s.refs[i].f->buf[0]) |
1454 | ff_thread_release_buffer(dst, &s->s.refs[i]); |
1455 | if (ssrc->next_refs[i].f->buf[0]) { |
1456 | if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0) |
1457 | return ret; |
1458 | } |
1459 | } |
1460 | |
1461 | s->s.h.invisible = ssrc->s.h.invisible; |
1462 | s->s.h.keyframe = ssrc->s.h.keyframe; |
1463 | s->s.h.intraonly = ssrc->s.h.intraonly; |
1464 | s->ss_v = ssrc->ss_v; |
1465 | s->ss_h = ssrc->ss_h; |
1466 | s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled; |
1467 | s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map; |
1468 | s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals; |
1469 | s->bytesperpixel = ssrc->bytesperpixel; |
1470 | s->gf_fmt = ssrc->gf_fmt; |
1471 | s->w = ssrc->w; |
1472 | s->h = ssrc->h; |
1473 | s->s.h.bpp = ssrc->s.h.bpp; |
1474 | s->bpp_index = ssrc->bpp_index; |
1475 | s->pix_fmt = ssrc->pix_fmt; |
1476 | memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx)); |
1477 | memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta)); |
1478 | memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat, |
1479 | sizeof(s->s.h.segmentation.feat)); |
1480 | |
1481 | return 0; |
1482 | } |
1483 | #endif |
1484 | |
1485 | AVCodec ff_vp9_decoder = { |
1486 | .name = "vp9", |
1487 | .long_name = NULL_IF_CONFIG_SMALL("Google VP9"), |
1488 | .type = AVMEDIA_TYPE_VIDEO, |
1489 | .id = AV_CODEC_ID_VP9, |
1490 | .priv_data_size = sizeof(VP9Context), |
1491 | .init = vp9_decode_init, |
1492 | .close = vp9_decode_free, |
1493 | .decode = vp9_decode_frame, |
1494 | .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS, |
1495 | .flush = vp9_decode_flush, |
1496 | .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy), |
1497 | .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context), |
1498 | .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles), |
1499 | }; |
1500 |