blob: 3aef3cbb52331778724e0685f70dd71d2355cab6
1 | /* |
2 | * H.26L/H.264/AVC/JVT/14496-10/... decoder |
3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | /** |
23 | * @file |
24 | * H.264 / AVC / MPEG4 part10 codec. |
25 | * @author Michael Niedermayer <michaelni@gmx.at> |
26 | */ |
27 | |
28 | #define UNCHECKED_BITSTREAM_READER 1 |
29 | |
30 | #include "libavutil/avassert.h" |
31 | #include "libavutil/imgutils.h" |
32 | #include "libavutil/opt.h" |
33 | #include "internal.h" |
34 | #include "cabac.h" |
35 | #include "cabac_functions.h" |
36 | #include "dsputil.h" |
37 | #include "error_resilience.h" |
38 | #include "avcodec.h" |
39 | #include "mpegvideo.h" |
40 | #include "h264.h" |
41 | #include "h264data.h" |
42 | #include "h264chroma.h" |
43 | #include "h264_mvpred.h" |
44 | #include "golomb.h" |
45 | #include "mathops.h" |
46 | #include "rectangle.h" |
47 | #include "svq3.h" |
48 | #include "thread.h" |
49 | #include "vdpau_internal.h" |
50 | |
51 | #include <assert.h> |
52 | |
53 | static void flush_change(H264Context *h); |
54 | |
55 | const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 }; |
56 | |
57 | static const uint8_t rem6[QP_MAX_NUM + 1] = { |
58 | 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, |
59 | 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, |
60 | 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, |
61 | 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, |
62 | 0, 1, 2, 3, |
63 | }; |
64 | |
65 | static const uint8_t div6[QP_MAX_NUM + 1] = { |
66 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, |
67 | 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, |
68 | 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, |
69 | 10,10,10,11,11,11,11,11,11,12,12,12,12,12,12,13,13,13, 13, 13, 13, |
70 | 14,14,14,14, |
71 | }; |
72 | |
73 | static const uint8_t field_scan[16+1] = { |
74 | 0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4, |
75 | 0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4, |
76 | 2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4, |
77 | 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, |
78 | }; |
79 | |
80 | static const uint8_t field_scan8x8[64+1] = { |
81 | 0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8, |
82 | 1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8, |
83 | 2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8, |
84 | 0 + 7 * 8, 1 + 4 * 8, 2 + 1 * 8, 3 + 0 * 8, |
85 | 2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8, |
86 | 2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8, |
87 | 2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8, |
88 | 3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8, |
89 | 3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8, |
90 | 4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8, |
91 | 4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8, |
92 | 5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8, |
93 | 5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8, |
94 | 7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8, |
95 | 6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8, |
96 | 7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8, |
97 | }; |
98 | |
99 | static const uint8_t field_scan8x8_cavlc[64+1] = { |
100 | 0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8, |
101 | 2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8, |
102 | 3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8, |
103 | 5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8, |
104 | 0 + 1 * 8, 0 + 3 * 8, 1 + 3 * 8, 1 + 4 * 8, |
105 | 1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8, |
106 | 3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8, |
107 | 5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8, |
108 | 0 + 2 * 8, 0 + 4 * 8, 0 + 5 * 8, 2 + 1 * 8, |
109 | 1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8, |
110 | 3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8, |
111 | 5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8, |
112 | 1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8, |
113 | 1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8, |
114 | 3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8, |
115 | 6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8, |
116 | }; |
117 | |
118 | // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] |
119 | static const uint8_t zigzag_scan8x8_cavlc[64+1] = { |
120 | 0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8, |
121 | 4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8, |
122 | 3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8, |
123 | 2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8, |
124 | 1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8, |
125 | 3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8, |
126 | 2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8, |
127 | 3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8, |
128 | 0 + 1 * 8, 3 + 0 * 8, 0 + 4 * 8, 4 + 0 * 8, |
129 | 2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8, |
130 | 1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8, |
131 | 4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8, |
132 | 0 + 2 * 8, 2 + 1 * 8, 1 + 3 * 8, 5 + 0 * 8, |
133 | 1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8, |
134 | 0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8, |
135 | 5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8, |
136 | }; |
137 | |
138 | static const uint8_t dequant4_coeff_init[6][3] = { |
139 | { 10, 13, 16 }, |
140 | { 11, 14, 18 }, |
141 | { 13, 16, 20 }, |
142 | { 14, 18, 23 }, |
143 | { 16, 20, 25 }, |
144 | { 18, 23, 29 }, |
145 | }; |
146 | |
147 | static const uint8_t dequant8_coeff_init_scan[16] = { |
148 | 0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1 |
149 | }; |
150 | |
151 | static const uint8_t dequant8_coeff_init[6][6] = { |
152 | { 20, 18, 32, 19, 25, 24 }, |
153 | { 22, 19, 35, 21, 28, 26 }, |
154 | { 26, 23, 42, 24, 33, 31 }, |
155 | { 28, 25, 45, 26, 35, 33 }, |
156 | { 32, 28, 51, 30, 40, 38 }, |
157 | { 36, 32, 58, 34, 46, 43 }, |
158 | }; |
159 | |
160 | static const enum AVPixelFormat h264_hwaccel_pixfmt_list_420[] = { |
161 | #if CONFIG_H264_DXVA2_HWACCEL |
162 | AV_PIX_FMT_DXVA2_VLD, |
163 | #endif |
164 | #if CONFIG_H264_VAAPI_HWACCEL |
165 | AV_PIX_FMT_VAAPI_VLD, |
166 | #endif |
167 | #if CONFIG_H264_VDA_HWACCEL |
168 | AV_PIX_FMT_VDA_VLD, |
169 | #endif |
170 | #if CONFIG_H264_VDPAU_HWACCEL |
171 | AV_PIX_FMT_VDPAU, |
172 | #endif |
173 | AV_PIX_FMT_YUV420P, |
174 | AV_PIX_FMT_NONE |
175 | }; |
176 | |
177 | static const enum AVPixelFormat h264_hwaccel_pixfmt_list_jpeg_420[] = { |
178 | #if CONFIG_H264_DXVA2_HWACCEL |
179 | AV_PIX_FMT_DXVA2_VLD, |
180 | #endif |
181 | #if CONFIG_H264_VAAPI_HWACCEL |
182 | AV_PIX_FMT_VAAPI_VLD, |
183 | #endif |
184 | #if CONFIG_H264_VDA_HWACCEL |
185 | AV_PIX_FMT_VDA_VLD, |
186 | #endif |
187 | #if CONFIG_H264_VDPAU_HWACCEL |
188 | AV_PIX_FMT_VDPAU, |
189 | #endif |
190 | AV_PIX_FMT_YUVJ420P, |
191 | AV_PIX_FMT_NONE |
192 | }; |
193 | |
194 | int avpriv_h264_has_num_reorder_frames(AVCodecContext *avctx) |
195 | { |
196 | H264Context *h = avctx->priv_data; |
197 | return h ? h->sps.num_reorder_frames : 0; |
198 | } |
199 | |
200 | static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type, |
201 | int (*mv)[2][4][2], |
202 | int mb_x, int mb_y, int mb_intra, int mb_skipped) |
203 | { |
204 | H264Context *h = opaque; |
205 | |
206 | h->mb_x = mb_x; |
207 | h->mb_y = mb_y; |
208 | h->mb_xy = mb_x + mb_y * h->mb_stride; |
209 | memset(h->non_zero_count_cache, 0, sizeof(h->non_zero_count_cache)); |
210 | av_assert1(ref >= 0); |
211 | /* FIXME: It is possible albeit uncommon that slice references |
212 | * differ between slices. We take the easy approach and ignore |
213 | * it for now. If this turns out to have any relevance in |
214 | * practice then correct remapping should be added. */ |
215 | if (ref >= h->ref_count[0]) |
216 | ref = 0; |
217 | if (!h->ref_list[0][ref].f.data[0]) { |
218 | av_log(h->avctx, AV_LOG_DEBUG, "Reference not available for error concealing\n"); |
219 | ref = 0; |
220 | } |
221 | if ((h->ref_list[0][ref].reference&3) != 3) { |
222 | av_log(h->avctx, AV_LOG_DEBUG, "Reference invalid\n"); |
223 | return; |
224 | } |
225 | fill_rectangle(&h->cur_pic.ref_index[0][4 * h->mb_xy], |
226 | 2, 2, 2, ref, 1); |
227 | fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1); |
228 | fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, |
229 | pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4); |
230 | h->mb_mbaff = |
231 | h->mb_field_decoding_flag = 0; |
232 | ff_h264_hl_decode_mb(h); |
233 | } |
234 | |
235 | void ff_h264_draw_horiz_band(H264Context *h, int y, int height) |
236 | { |
237 | AVCodecContext *avctx = h->avctx; |
238 | Picture *cur = &h->cur_pic; |
239 | Picture *last = h->ref_list[0][0].f.data[0] ? &h->ref_list[0][0] : NULL; |
240 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt); |
241 | int vshift = desc->log2_chroma_h; |
242 | const int field_pic = h->picture_structure != PICT_FRAME; |
243 | if (field_pic) { |
244 | height <<= 1; |
245 | y <<= 1; |
246 | } |
247 | |
248 | height = FFMIN(height, avctx->height - y); |
249 | |
250 | if (field_pic && h->first_field && !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD)) |
251 | return; |
252 | |
253 | if (avctx->draw_horiz_band) { |
254 | AVFrame *src; |
255 | int offset[AV_NUM_DATA_POINTERS]; |
256 | int i; |
257 | |
258 | if (cur->f.pict_type == AV_PICTURE_TYPE_B || h->low_delay || |
259 | (avctx->slice_flags & SLICE_FLAG_CODED_ORDER)) |
260 | src = &cur->f; |
261 | else if (last) |
262 | src = &last->f; |
263 | else |
264 | return; |
265 | |
266 | offset[0] = y * src->linesize[0]; |
267 | offset[1] = |
268 | offset[2] = (y >> vshift) * src->linesize[1]; |
269 | for (i = 3; i < AV_NUM_DATA_POINTERS; i++) |
270 | offset[i] = 0; |
271 | |
272 | emms_c(); |
273 | |
274 | avctx->draw_horiz_band(avctx, src, offset, |
275 | y, h->picture_structure, height); |
276 | } |
277 | } |
278 | |
279 | static void unref_picture(H264Context *h, Picture *pic) |
280 | { |
281 | int off = offsetof(Picture, tf) + sizeof(pic->tf); |
282 | int i; |
283 | |
284 | if (!pic->f.data[0]) |
285 | return; |
286 | |
287 | ff_thread_release_buffer(h->avctx, &pic->tf); |
288 | av_buffer_unref(&pic->hwaccel_priv_buf); |
289 | |
290 | av_buffer_unref(&pic->qscale_table_buf); |
291 | av_buffer_unref(&pic->mb_type_buf); |
292 | for (i = 0; i < 2; i++) { |
293 | av_buffer_unref(&pic->motion_val_buf[i]); |
294 | av_buffer_unref(&pic->ref_index_buf[i]); |
295 | } |
296 | |
297 | memset((uint8_t*)pic + off, 0, sizeof(*pic) - off); |
298 | } |
299 | |
300 | static void release_unused_pictures(H264Context *h, int remove_current) |
301 | { |
302 | int i; |
303 | |
304 | /* release non reference frames */ |
305 | for (i = 0; i < MAX_PICTURE_COUNT; i++) { |
306 | if (h->DPB[i].f.data[0] && !h->DPB[i].reference && |
307 | (remove_current || &h->DPB[i] != h->cur_pic_ptr)) { |
308 | unref_picture(h, &h->DPB[i]); |
309 | } |
310 | } |
311 | } |
312 | |
313 | static int ref_picture(H264Context *h, Picture *dst, Picture *src) |
314 | { |
315 | int ret, i; |
316 | |
317 | av_assert0(!dst->f.buf[0]); |
318 | av_assert0(src->f.buf[0]); |
319 | |
320 | src->tf.f = &src->f; |
321 | dst->tf.f = &dst->f; |
322 | ret = ff_thread_ref_frame(&dst->tf, &src->tf); |
323 | if (ret < 0) |
324 | goto fail; |
325 | |
326 | dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf); |
327 | dst->mb_type_buf = av_buffer_ref(src->mb_type_buf); |
328 | if (!dst->qscale_table_buf || !dst->mb_type_buf) |
329 | goto fail; |
330 | dst->qscale_table = src->qscale_table; |
331 | dst->mb_type = src->mb_type; |
332 | |
333 | for (i = 0; i < 2; i++) { |
334 | dst->motion_val_buf[i] = av_buffer_ref(src->motion_val_buf[i]); |
335 | dst->ref_index_buf[i] = av_buffer_ref(src->ref_index_buf[i]); |
336 | if (!dst->motion_val_buf[i] || !dst->ref_index_buf[i]) |
337 | goto fail; |
338 | dst->motion_val[i] = src->motion_val[i]; |
339 | dst->ref_index[i] = src->ref_index[i]; |
340 | } |
341 | |
342 | if (src->hwaccel_picture_private) { |
343 | dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf); |
344 | if (!dst->hwaccel_priv_buf) |
345 | goto fail; |
346 | dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data; |
347 | } |
348 | |
349 | for (i = 0; i < 2; i++) |
350 | dst->field_poc[i] = src->field_poc[i]; |
351 | |
352 | memcpy(dst->ref_poc, src->ref_poc, sizeof(src->ref_poc)); |
353 | memcpy(dst->ref_count, src->ref_count, sizeof(src->ref_count)); |
354 | |
355 | dst->poc = src->poc; |
356 | dst->frame_num = src->frame_num; |
357 | dst->mmco_reset = src->mmco_reset; |
358 | dst->pic_id = src->pic_id; |
359 | dst->long_ref = src->long_ref; |
360 | dst->mbaff = src->mbaff; |
361 | dst->field_picture = src->field_picture; |
362 | dst->needs_realloc = src->needs_realloc; |
363 | dst->reference = src->reference; |
364 | dst->sync = src->sync; |
365 | dst->crop = src->crop; |
366 | dst->crop_left = src->crop_left; |
367 | dst->crop_top = src->crop_top; |
368 | |
369 | return 0; |
370 | fail: |
371 | unref_picture(h, dst); |
372 | return ret; |
373 | } |
374 | |
375 | static int alloc_scratch_buffers(H264Context *h, int linesize) |
376 | { |
377 | int alloc_size = FFALIGN(FFABS(linesize) + 32, 32); |
378 | |
379 | if (h->bipred_scratchpad) |
380 | return 0; |
381 | |
382 | h->bipred_scratchpad = av_malloc(16 * 6 * alloc_size); |
383 | // edge emu needs blocksize + filter length - 1 |
384 | // (= 21x21 for h264) |
385 | h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 21); |
386 | h->me.scratchpad = av_mallocz(alloc_size * 2 * 16 * 2); |
387 | |
388 | if (!h->bipred_scratchpad || !h->edge_emu_buffer || !h->me.scratchpad) { |
389 | av_freep(&h->bipred_scratchpad); |
390 | av_freep(&h->edge_emu_buffer); |
391 | av_freep(&h->me.scratchpad); |
392 | return AVERROR(ENOMEM); |
393 | } |
394 | |
395 | h->me.temp = h->me.scratchpad; |
396 | |
397 | return 0; |
398 | } |
399 | |
400 | static int init_table_pools(H264Context *h) |
401 | { |
402 | const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1; |
403 | const int mb_array_size = h->mb_stride * h->mb_height; |
404 | const int b4_stride = h->mb_width * 4 + 1; |
405 | const int b4_array_size = b4_stride * h->mb_height * 4; |
406 | |
407 | h->qscale_table_pool = av_buffer_pool_init(big_mb_num + h->mb_stride, |
408 | av_buffer_allocz); |
409 | h->mb_type_pool = av_buffer_pool_init((big_mb_num + h->mb_stride) * |
410 | sizeof(uint32_t), av_buffer_allocz); |
411 | h->motion_val_pool = av_buffer_pool_init(2 * (b4_array_size + 4) * |
412 | sizeof(int16_t), av_buffer_allocz); |
413 | h->ref_index_pool = av_buffer_pool_init(4 * mb_array_size, av_buffer_allocz); |
414 | |
415 | if (!h->qscale_table_pool || !h->mb_type_pool || !h->motion_val_pool || |
416 | !h->ref_index_pool) { |
417 | av_buffer_pool_uninit(&h->qscale_table_pool); |
418 | av_buffer_pool_uninit(&h->mb_type_pool); |
419 | av_buffer_pool_uninit(&h->motion_val_pool); |
420 | av_buffer_pool_uninit(&h->ref_index_pool); |
421 | return AVERROR(ENOMEM); |
422 | } |
423 | |
424 | return 0; |
425 | } |
426 | |
427 | static int alloc_picture(H264Context *h, Picture *pic) |
428 | { |
429 | int i, ret = 0; |
430 | |
431 | av_assert0(!pic->f.data[0]); |
432 | |
433 | pic->tf.f = &pic->f; |
434 | ret = ff_thread_get_buffer(h->avctx, &pic->tf, pic->reference ? |
435 | AV_GET_BUFFER_FLAG_REF : 0); |
436 | if (ret < 0) |
437 | goto fail; |
438 | |
439 | h->linesize = pic->f.linesize[0]; |
440 | h->uvlinesize = pic->f.linesize[1]; |
441 | pic->crop = h->sps.crop; |
442 | pic->crop_top = h->sps.crop_top; |
443 | pic->crop_left= h->sps.crop_left; |
444 | |
445 | if (h->avctx->hwaccel) { |
446 | const AVHWAccel *hwaccel = h->avctx->hwaccel; |
447 | av_assert0(!pic->hwaccel_picture_private); |
448 | if (hwaccel->priv_data_size) { |
449 | pic->hwaccel_priv_buf = av_buffer_allocz(hwaccel->priv_data_size); |
450 | if (!pic->hwaccel_priv_buf) |
451 | return AVERROR(ENOMEM); |
452 | pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data; |
453 | } |
454 | } |
455 | |
456 | if (!h->qscale_table_pool) { |
457 | ret = init_table_pools(h); |
458 | if (ret < 0) |
459 | goto fail; |
460 | } |
461 | |
462 | pic->qscale_table_buf = av_buffer_pool_get(h->qscale_table_pool); |
463 | pic->mb_type_buf = av_buffer_pool_get(h->mb_type_pool); |
464 | if (!pic->qscale_table_buf || !pic->mb_type_buf) |
465 | goto fail; |
466 | |
467 | pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1; |
468 | pic->qscale_table = pic->qscale_table_buf->data + 2 * h->mb_stride + 1; |
469 | |
470 | for (i = 0; i < 2; i++) { |
471 | pic->motion_val_buf[i] = av_buffer_pool_get(h->motion_val_pool); |
472 | pic->ref_index_buf[i] = av_buffer_pool_get(h->ref_index_pool); |
473 | if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) |
474 | goto fail; |
475 | |
476 | pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4; |
477 | pic->ref_index[i] = pic->ref_index_buf[i]->data; |
478 | } |
479 | |
480 | return 0; |
481 | fail: |
482 | unref_picture(h, pic); |
483 | return (ret < 0) ? ret : AVERROR(ENOMEM); |
484 | } |
485 | |
486 | static inline int pic_is_unused(H264Context *h, Picture *pic) |
487 | { |
488 | if (pic->f.data[0] == NULL) |
489 | return 1; |
490 | if (pic->needs_realloc && !(pic->reference & DELAYED_PIC_REF)) |
491 | return 1; |
492 | return 0; |
493 | } |
494 | |
495 | static int find_unused_picture(H264Context *h) |
496 | { |
497 | int i; |
498 | |
499 | for (i = 0; i < MAX_PICTURE_COUNT; i++) { |
500 | if (pic_is_unused(h, &h->DPB[i])) |
501 | break; |
502 | } |
503 | if (i == MAX_PICTURE_COUNT) |
504 | return AVERROR_INVALIDDATA; |
505 | |
506 | if (h->DPB[i].needs_realloc) { |
507 | h->DPB[i].needs_realloc = 0; |
508 | unref_picture(h, &h->DPB[i]); |
509 | } |
510 | |
511 | return i; |
512 | } |
513 | |
514 | /** |
515 | * Check if the top & left blocks are available if needed and |
516 | * change the dc mode so it only uses the available blocks. |
517 | */ |
518 | int ff_h264_check_intra4x4_pred_mode(H264Context *h) |
519 | { |
520 | static const int8_t top[12] = { |
521 | -1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0 |
522 | }; |
523 | static const int8_t left[12] = { |
524 | 0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED |
525 | }; |
526 | int i; |
527 | |
528 | if (!(h->top_samples_available & 0x8000)) { |
529 | for (i = 0; i < 4; i++) { |
530 | int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]]; |
531 | if (status < 0) { |
532 | av_log(h->avctx, AV_LOG_ERROR, |
533 | "top block unavailable for requested intra4x4 mode %d at %d %d\n", |
534 | status, h->mb_x, h->mb_y); |
535 | return AVERROR_INVALIDDATA; |
536 | } else if (status) { |
537 | h->intra4x4_pred_mode_cache[scan8[0] + i] = status; |
538 | } |
539 | } |
540 | } |
541 | |
542 | if ((h->left_samples_available & 0x8888) != 0x8888) { |
543 | static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 }; |
544 | for (i = 0; i < 4; i++) |
545 | if (!(h->left_samples_available & mask[i])) { |
546 | int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]]; |
547 | if (status < 0) { |
548 | av_log(h->avctx, AV_LOG_ERROR, |
549 | "left block unavailable for requested intra4x4 mode %d at %d %d\n", |
550 | status, h->mb_x, h->mb_y); |
551 | return AVERROR_INVALIDDATA; |
552 | } else if (status) { |
553 | h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status; |
554 | } |
555 | } |
556 | } |
557 | |
558 | return 0; |
559 | } // FIXME cleanup like ff_h264_check_intra_pred_mode |
560 | |
561 | /** |
562 | * Check if the top & left blocks are available if needed and |
563 | * change the dc mode so it only uses the available blocks. |
564 | */ |
565 | int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma) |
566 | { |
567 | static const int8_t top[4] = { LEFT_DC_PRED8x8, 1, -1, -1 }; |
568 | static const int8_t left[5] = { TOP_DC_PRED8x8, -1, 2, -1, DC_128_PRED8x8 }; |
569 | |
570 | if (mode > 3U) { |
571 | av_log(h->avctx, AV_LOG_ERROR, |
572 | "out of range intra chroma pred mode at %d %d\n", |
573 | h->mb_x, h->mb_y); |
574 | return AVERROR_INVALIDDATA; |
575 | } |
576 | |
577 | if (!(h->top_samples_available & 0x8000)) { |
578 | mode = top[mode]; |
579 | if (mode < 0) { |
580 | av_log(h->avctx, AV_LOG_ERROR, |
581 | "top block unavailable for requested intra mode at %d %d\n", |
582 | h->mb_x, h->mb_y); |
583 | return AVERROR_INVALIDDATA; |
584 | } |
585 | } |
586 | |
587 | if ((h->left_samples_available & 0x8080) != 0x8080) { |
588 | mode = left[mode]; |
589 | if (is_chroma && (h->left_samples_available & 0x8080)) { |
590 | // mad cow disease mode, aka MBAFF + constrained_intra_pred |
591 | mode = ALZHEIMER_DC_L0T_PRED8x8 + |
592 | (!(h->left_samples_available & 0x8000)) + |
593 | 2 * (mode == DC_128_PRED8x8); |
594 | } |
595 | if (mode < 0) { |
596 | av_log(h->avctx, AV_LOG_ERROR, |
597 | "left block unavailable for requested intra mode at %d %d\n", |
598 | h->mb_x, h->mb_y); |
599 | return AVERROR_INVALIDDATA; |
600 | } |
601 | } |
602 | |
603 | return mode; |
604 | } |
605 | |
606 | const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, |
607 | int *dst_length, int *consumed, int length) |
608 | { |
609 | int i, si, di; |
610 | uint8_t *dst; |
611 | int bufidx; |
612 | |
613 | // src[0]&0x80; // forbidden bit |
614 | h->nal_ref_idc = src[0] >> 5; |
615 | h->nal_unit_type = src[0] & 0x1F; |
616 | |
617 | src++; |
618 | length--; |
619 | |
620 | #define STARTCODE_TEST \ |
621 | if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \ |
622 | if (src[i + 2] != 3) { \ |
623 | /* startcode, so we must be past the end */ \ |
624 | length = i; \ |
625 | } \ |
626 | break; \ |
627 | } |
628 | |
629 | #if HAVE_FAST_UNALIGNED |
630 | #define FIND_FIRST_ZERO \ |
631 | if (i > 0 && !src[i]) \ |
632 | i--; \ |
633 | while (src[i]) \ |
634 | i++ |
635 | |
636 | #if HAVE_FAST_64BIT |
637 | for (i = 0; i + 1 < length; i += 9) { |
638 | if (!((~AV_RN64A(src + i) & |
639 | (AV_RN64A(src + i) - 0x0100010001000101ULL)) & |
640 | 0x8000800080008080ULL)) |
641 | continue; |
642 | FIND_FIRST_ZERO; |
643 | STARTCODE_TEST; |
644 | i -= 7; |
645 | } |
646 | #else |
647 | for (i = 0; i + 1 < length; i += 5) { |
648 | if (!((~AV_RN32A(src + i) & |
649 | (AV_RN32A(src + i) - 0x01000101U)) & |
650 | 0x80008080U)) |
651 | continue; |
652 | FIND_FIRST_ZERO; |
653 | STARTCODE_TEST; |
654 | i -= 3; |
655 | } |
656 | #endif |
657 | #else |
658 | for (i = 0; i + 1 < length; i += 2) { |
659 | if (src[i]) |
660 | continue; |
661 | if (i > 0 && src[i - 1] == 0) |
662 | i--; |
663 | STARTCODE_TEST; |
664 | } |
665 | #endif |
666 | |
667 | // use second escape buffer for inter data |
668 | bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; |
669 | |
670 | si = h->rbsp_buffer_size[bufidx]; |
671 | av_fast_padded_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+MAX_MBPAIR_SIZE); |
672 | dst = h->rbsp_buffer[bufidx]; |
673 | |
674 | if (dst == NULL) |
675 | return NULL; |
676 | |
677 | if(i>=length-1){ //no escaped 0 |
678 | *dst_length= length; |
679 | *consumed= length+1; //+1 for the header |
680 | if(h->avctx->flags2 & CODEC_FLAG2_FAST){ |
681 | return src; |
682 | }else{ |
683 | memcpy(dst, src, length); |
684 | return dst; |
685 | } |
686 | } |
687 | |
688 | memcpy(dst, src, i); |
689 | si = di = i; |
690 | while (si + 2 < length) { |
691 | // remove escapes (very rare 1:2^22) |
692 | if (src[si + 2] > 3) { |
693 | dst[di++] = src[si++]; |
694 | dst[di++] = src[si++]; |
695 | } else if (src[si] == 0 && src[si + 1] == 0) { |
696 | if (src[si + 2] == 3) { // escape |
697 | dst[di++] = 0; |
698 | dst[di++] = 0; |
699 | si += 3; |
700 | continue; |
701 | } else // next start code |
702 | goto nsc; |
703 | } |
704 | |
705 | dst[di++] = src[si++]; |
706 | } |
707 | while (si < length) |
708 | dst[di++] = src[si++]; |
709 | |
710 | nsc: |
711 | memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE); |
712 | |
713 | *dst_length = di; |
714 | *consumed = si + 1; // +1 for the header |
715 | /* FIXME store exact number of bits in the getbitcontext |
716 | * (it is needed for decoding) */ |
717 | return dst; |
718 | } |
719 | |
720 | /** |
721 | * Identify the exact end of the bitstream |
722 | * @return the length of the trailing, or 0 if damaged |
723 | */ |
724 | static int decode_rbsp_trailing(H264Context *h, const uint8_t *src) |
725 | { |
726 | int v = *src; |
727 | int r; |
728 | |
729 | tprintf(h->avctx, "rbsp trailing %X\n", v); |
730 | |
731 | for (r = 1; r < 9; r++) { |
732 | if (v & 1) |
733 | return r; |
734 | v >>= 1; |
735 | } |
736 | return 0; |
737 | } |
738 | |
739 | static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, |
740 | int height, int y_offset, int list) |
741 | { |
742 | int raw_my = h->mv_cache[list][scan8[n]][1]; |
743 | int filter_height_down = (raw_my & 3) ? 3 : 0; |
744 | int full_my = (raw_my >> 2) + y_offset; |
745 | int bottom = full_my + filter_height_down + height; |
746 | |
747 | av_assert2(height >= 0); |
748 | |
749 | return FFMAX(0, bottom); |
750 | } |
751 | |
752 | static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, |
753 | int height, int y_offset, int list0, |
754 | int list1, int *nrefs) |
755 | { |
756 | int my; |
757 | |
758 | y_offset += 16 * (h->mb_y >> MB_FIELD(h)); |
759 | |
760 | if (list0) { |
761 | int ref_n = h->ref_cache[0][scan8[n]]; |
762 | Picture *ref = &h->ref_list[0][ref_n]; |
763 | |
764 | // Error resilience puts the current picture in the ref list. |
765 | // Don't try to wait on these as it will cause a deadlock. |
766 | // Fields can wait on each other, though. |
767 | if (ref->tf.progress->data != h->cur_pic.tf.progress->data || |
768 | (ref->reference & 3) != h->picture_structure) { |
769 | my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0); |
770 | if (refs[0][ref_n] < 0) |
771 | nrefs[0] += 1; |
772 | refs[0][ref_n] = FFMAX(refs[0][ref_n], my); |
773 | } |
774 | } |
775 | |
776 | if (list1) { |
777 | int ref_n = h->ref_cache[1][scan8[n]]; |
778 | Picture *ref = &h->ref_list[1][ref_n]; |
779 | |
780 | if (ref->tf.progress->data != h->cur_pic.tf.progress->data || |
781 | (ref->reference & 3) != h->picture_structure) { |
782 | my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1); |
783 | if (refs[1][ref_n] < 0) |
784 | nrefs[1] += 1; |
785 | refs[1][ref_n] = FFMAX(refs[1][ref_n], my); |
786 | } |
787 | } |
788 | } |
789 | |
790 | /** |
791 | * Wait until all reference frames are available for MC operations. |
792 | * |
793 | * @param h the H264 context |
794 | */ |
795 | static void await_references(H264Context *h) |
796 | { |
797 | const int mb_xy = h->mb_xy; |
798 | const int mb_type = h->cur_pic.mb_type[mb_xy]; |
799 | int refs[2][48]; |
800 | int nrefs[2] = { 0 }; |
801 | int ref, list; |
802 | |
803 | memset(refs, -1, sizeof(refs)); |
804 | |
805 | if (IS_16X16(mb_type)) { |
806 | get_lowest_part_y(h, refs, 0, 16, 0, |
807 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
808 | } else if (IS_16X8(mb_type)) { |
809 | get_lowest_part_y(h, refs, 0, 8, 0, |
810 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
811 | get_lowest_part_y(h, refs, 8, 8, 8, |
812 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); |
813 | } else if (IS_8X16(mb_type)) { |
814 | get_lowest_part_y(h, refs, 0, 16, 0, |
815 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
816 | get_lowest_part_y(h, refs, 4, 16, 0, |
817 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); |
818 | } else { |
819 | int i; |
820 | |
821 | av_assert2(IS_8X8(mb_type)); |
822 | |
823 | for (i = 0; i < 4; i++) { |
824 | const int sub_mb_type = h->sub_mb_type[i]; |
825 | const int n = 4 * i; |
826 | int y_offset = (i & 2) << 2; |
827 | |
828 | if (IS_SUB_8X8(sub_mb_type)) { |
829 | get_lowest_part_y(h, refs, n, 8, y_offset, |
830 | IS_DIR(sub_mb_type, 0, 0), |
831 | IS_DIR(sub_mb_type, 0, 1), |
832 | nrefs); |
833 | } else if (IS_SUB_8X4(sub_mb_type)) { |
834 | get_lowest_part_y(h, refs, n, 4, y_offset, |
835 | IS_DIR(sub_mb_type, 0, 0), |
836 | IS_DIR(sub_mb_type, 0, 1), |
837 | nrefs); |
838 | get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4, |
839 | IS_DIR(sub_mb_type, 0, 0), |
840 | IS_DIR(sub_mb_type, 0, 1), |
841 | nrefs); |
842 | } else if (IS_SUB_4X8(sub_mb_type)) { |
843 | get_lowest_part_y(h, refs, n, 8, y_offset, |
844 | IS_DIR(sub_mb_type, 0, 0), |
845 | IS_DIR(sub_mb_type, 0, 1), |
846 | nrefs); |
847 | get_lowest_part_y(h, refs, n + 1, 8, y_offset, |
848 | IS_DIR(sub_mb_type, 0, 0), |
849 | IS_DIR(sub_mb_type, 0, 1), |
850 | nrefs); |
851 | } else { |
852 | int j; |
853 | av_assert2(IS_SUB_4X4(sub_mb_type)); |
854 | for (j = 0; j < 4; j++) { |
855 | int sub_y_offset = y_offset + 2 * (j & 2); |
856 | get_lowest_part_y(h, refs, n + j, 4, sub_y_offset, |
857 | IS_DIR(sub_mb_type, 0, 0), |
858 | IS_DIR(sub_mb_type, 0, 1), |
859 | nrefs); |
860 | } |
861 | } |
862 | } |
863 | } |
864 | |
865 | for (list = h->list_count - 1; list >= 0; list--) |
866 | for (ref = 0; ref < 48 && nrefs[list]; ref++) { |
867 | int row = refs[list][ref]; |
868 | if (row >= 0) { |
869 | Picture *ref_pic = &h->ref_list[list][ref]; |
870 | int ref_field = ref_pic->reference - 1; |
871 | int ref_field_picture = ref_pic->field_picture; |
872 | int pic_height = 16 * h->mb_height >> ref_field_picture; |
873 | |
874 | row <<= MB_MBAFF(h); |
875 | nrefs[list]--; |
876 | |
877 | if (!FIELD_PICTURE(h) && ref_field_picture) { // frame referencing two fields |
878 | ff_thread_await_progress(&ref_pic->tf, |
879 | FFMIN((row >> 1) - !(row & 1), |
880 | pic_height - 1), |
881 | 1); |
882 | ff_thread_await_progress(&ref_pic->tf, |
883 | FFMIN((row >> 1), pic_height - 1), |
884 | 0); |
885 | } else if (FIELD_PICTURE(h) && !ref_field_picture) { // field referencing one field of a frame |
886 | ff_thread_await_progress(&ref_pic->tf, |
887 | FFMIN(row * 2 + ref_field, |
888 | pic_height - 1), |
889 | 0); |
890 | } else if (FIELD_PICTURE(h)) { |
891 | ff_thread_await_progress(&ref_pic->tf, |
892 | FFMIN(row, pic_height - 1), |
893 | ref_field); |
894 | } else { |
895 | ff_thread_await_progress(&ref_pic->tf, |
896 | FFMIN(row, pic_height - 1), |
897 | 0); |
898 | } |
899 | } |
900 | } |
901 | } |
902 | |
903 | static av_always_inline void mc_dir_part(H264Context *h, Picture *pic, |
904 | int n, int square, int height, |
905 | int delta, int list, |
906 | uint8_t *dest_y, uint8_t *dest_cb, |
907 | uint8_t *dest_cr, |
908 | int src_x_offset, int src_y_offset, |
909 | qpel_mc_func *qpix_op, |
910 | h264_chroma_mc_func chroma_op, |
911 | int pixel_shift, int chroma_idc) |
912 | { |
913 | const int mx = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8; |
914 | int my = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8; |
915 | const int luma_xy = (mx & 3) + ((my & 3) << 2); |
916 | ptrdiff_t offset = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize; |
917 | uint8_t *src_y = pic->f.data[0] + offset; |
918 | uint8_t *src_cb, *src_cr; |
919 | int extra_width = 0; |
920 | int extra_height = 0; |
921 | int emu = 0; |
922 | const int full_mx = mx >> 2; |
923 | const int full_my = my >> 2; |
924 | const int pic_width = 16 * h->mb_width; |
925 | const int pic_height = 16 * h->mb_height >> MB_FIELD(h); |
926 | int ysh; |
927 | |
928 | if (mx & 7) |
929 | extra_width -= 3; |
930 | if (my & 7) |
931 | extra_height -= 3; |
932 | |
933 | if (full_mx < 0 - extra_width || |
934 | full_my < 0 - extra_height || |
935 | full_mx + 16 /*FIXME*/ > pic_width + extra_width || |
936 | full_my + 16 /*FIXME*/ > pic_height + extra_height) { |
937 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_linesize, |
938 | src_y - (2 << pixel_shift) - 2 * h->mb_linesize, |
939 | h->mb_linesize, |
940 | 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, |
941 | full_my - 2, pic_width, pic_height); |
942 | src_y = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; |
943 | emu = 1; |
944 | } |
945 | |
946 | qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps? |
947 | if (!square) |
948 | qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); |
949 | |
950 | if (CONFIG_GRAY && h->flags & CODEC_FLAG_GRAY) |
951 | return; |
952 | |
953 | if (chroma_idc == 3 /* yuv444 */) { |
954 | src_cb = pic->f.data[1] + offset; |
955 | if (emu) { |
956 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_linesize, |
957 | src_cb - (2 << pixel_shift) - 2 * h->mb_linesize, |
958 | h->mb_linesize, |
959 | 16 + 5, 16 + 5 /*FIXME*/, |
960 | full_mx - 2, full_my - 2, |
961 | pic_width, pic_height); |
962 | src_cb = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; |
963 | } |
964 | qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps? |
965 | if (!square) |
966 | qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize); |
967 | |
968 | src_cr = pic->f.data[2] + offset; |
969 | if (emu) { |
970 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_linesize, |
971 | src_cr - (2 << pixel_shift) - 2 * h->mb_linesize, |
972 | h->mb_linesize, |
973 | 16 + 5, 16 + 5 /*FIXME*/, |
974 | full_mx - 2, full_my - 2, |
975 | pic_width, pic_height); |
976 | src_cr = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; |
977 | } |
978 | qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps? |
979 | if (!square) |
980 | qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize); |
981 | return; |
982 | } |
983 | |
984 | ysh = 3 - (chroma_idc == 2 /* yuv422 */); |
985 | if (chroma_idc == 1 /* yuv420 */ && MB_FIELD(h)) { |
986 | // chroma offset when predicting from a field of opposite parity |
987 | my += 2 * ((h->mb_y & 1) - (pic->reference - 1)); |
988 | emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1); |
989 | } |
990 | |
991 | src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + |
992 | (my >> ysh) * h->mb_uvlinesize; |
993 | src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + |
994 | (my >> ysh) * h->mb_uvlinesize; |
995 | |
996 | if (emu) { |
997 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_uvlinesize, src_cb, h->mb_uvlinesize, |
998 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), |
999 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); |
1000 | src_cb = h->edge_emu_buffer; |
1001 | } |
1002 | chroma_op(dest_cb, src_cb, h->mb_uvlinesize, |
1003 | height >> (chroma_idc == 1 /* yuv420 */), |
1004 | mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7); |
1005 | |
1006 | if (emu) { |
1007 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_uvlinesize, src_cr, h->mb_uvlinesize, |
1008 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), |
1009 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); |
1010 | src_cr = h->edge_emu_buffer; |
1011 | } |
1012 | chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), |
1013 | mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7); |
1014 | } |
1015 | |
1016 | static av_always_inline void mc_part_std(H264Context *h, int n, int square, |
1017 | int height, int delta, |
1018 | uint8_t *dest_y, uint8_t *dest_cb, |
1019 | uint8_t *dest_cr, |
1020 | int x_offset, int y_offset, |
1021 | qpel_mc_func *qpix_put, |
1022 | h264_chroma_mc_func chroma_put, |
1023 | qpel_mc_func *qpix_avg, |
1024 | h264_chroma_mc_func chroma_avg, |
1025 | int list0, int list1, |
1026 | int pixel_shift, int chroma_idc) |
1027 | { |
1028 | qpel_mc_func *qpix_op = qpix_put; |
1029 | h264_chroma_mc_func chroma_op = chroma_put; |
1030 | |
1031 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
1032 | if (chroma_idc == 3 /* yuv444 */) { |
1033 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
1034 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
1035 | } else if (chroma_idc == 2 /* yuv422 */) { |
1036 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
1037 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
1038 | } else { /* yuv420 */ |
1039 | dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; |
1040 | dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; |
1041 | } |
1042 | x_offset += 8 * h->mb_x; |
1043 | y_offset += 8 * (h->mb_y >> MB_FIELD(h)); |
1044 | |
1045 | if (list0) { |
1046 | Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]]; |
1047 | mc_dir_part(h, ref, n, square, height, delta, 0, |
1048 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
1049 | qpix_op, chroma_op, pixel_shift, chroma_idc); |
1050 | |
1051 | qpix_op = qpix_avg; |
1052 | chroma_op = chroma_avg; |
1053 | } |
1054 | |
1055 | if (list1) { |
1056 | Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]]; |
1057 | mc_dir_part(h, ref, n, square, height, delta, 1, |
1058 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
1059 | qpix_op, chroma_op, pixel_shift, chroma_idc); |
1060 | } |
1061 | } |
1062 | |
1063 | static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, |
1064 | int height, int delta, |
1065 | uint8_t *dest_y, uint8_t *dest_cb, |
1066 | uint8_t *dest_cr, |
1067 | int x_offset, int y_offset, |
1068 | qpel_mc_func *qpix_put, |
1069 | h264_chroma_mc_func chroma_put, |
1070 | h264_weight_func luma_weight_op, |
1071 | h264_weight_func chroma_weight_op, |
1072 | h264_biweight_func luma_weight_avg, |
1073 | h264_biweight_func chroma_weight_avg, |
1074 | int list0, int list1, |
1075 | int pixel_shift, int chroma_idc) |
1076 | { |
1077 | int chroma_height; |
1078 | |
1079 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
1080 | if (chroma_idc == 3 /* yuv444 */) { |
1081 | chroma_height = height; |
1082 | chroma_weight_avg = luma_weight_avg; |
1083 | chroma_weight_op = luma_weight_op; |
1084 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
1085 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
1086 | } else if (chroma_idc == 2 /* yuv422 */) { |
1087 | chroma_height = height; |
1088 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
1089 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
1090 | } else { /* yuv420 */ |
1091 | chroma_height = height >> 1; |
1092 | dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; |
1093 | dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; |
1094 | } |
1095 | x_offset += 8 * h->mb_x; |
1096 | y_offset += 8 * (h->mb_y >> MB_FIELD(h)); |
1097 | |
1098 | if (list0 && list1) { |
1099 | /* don't optimize for luma-only case, since B-frames usually |
1100 | * use implicit weights => chroma too. */ |
1101 | uint8_t *tmp_cb = h->bipred_scratchpad; |
1102 | uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift); |
1103 | uint8_t *tmp_y = h->bipred_scratchpad + 16 * h->mb_uvlinesize; |
1104 | int refn0 = h->ref_cache[0][scan8[n]]; |
1105 | int refn1 = h->ref_cache[1][scan8[n]]; |
1106 | |
1107 | mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0, |
1108 | dest_y, dest_cb, dest_cr, |
1109 | x_offset, y_offset, qpix_put, chroma_put, |
1110 | pixel_shift, chroma_idc); |
1111 | mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1, |
1112 | tmp_y, tmp_cb, tmp_cr, |
1113 | x_offset, y_offset, qpix_put, chroma_put, |
1114 | pixel_shift, chroma_idc); |
1115 | |
1116 | if (h->use_weight == 2) { |
1117 | int weight0 = h->implicit_weight[refn0][refn1][h->mb_y & 1]; |
1118 | int weight1 = 64 - weight0; |
1119 | luma_weight_avg(dest_y, tmp_y, h->mb_linesize, |
1120 | height, 5, weight0, weight1, 0); |
1121 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, |
1122 | chroma_height, 5, weight0, weight1, 0); |
1123 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, |
1124 | chroma_height, 5, weight0, weight1, 0); |
1125 | } else { |
1126 | luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, |
1127 | h->luma_log2_weight_denom, |
1128 | h->luma_weight[refn0][0][0], |
1129 | h->luma_weight[refn1][1][0], |
1130 | h->luma_weight[refn0][0][1] + |
1131 | h->luma_weight[refn1][1][1]); |
1132 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, |
1133 | h->chroma_log2_weight_denom, |
1134 | h->chroma_weight[refn0][0][0][0], |
1135 | h->chroma_weight[refn1][1][0][0], |
1136 | h->chroma_weight[refn0][0][0][1] + |
1137 | h->chroma_weight[refn1][1][0][1]); |
1138 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, |
1139 | h->chroma_log2_weight_denom, |
1140 | h->chroma_weight[refn0][0][1][0], |
1141 | h->chroma_weight[refn1][1][1][0], |
1142 | h->chroma_weight[refn0][0][1][1] + |
1143 | h->chroma_weight[refn1][1][1][1]); |
1144 | } |
1145 | } else { |
1146 | int list = list1 ? 1 : 0; |
1147 | int refn = h->ref_cache[list][scan8[n]]; |
1148 | Picture *ref = &h->ref_list[list][refn]; |
1149 | mc_dir_part(h, ref, n, square, height, delta, list, |
1150 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
1151 | qpix_put, chroma_put, pixel_shift, chroma_idc); |
1152 | |
1153 | luma_weight_op(dest_y, h->mb_linesize, height, |
1154 | h->luma_log2_weight_denom, |
1155 | h->luma_weight[refn][list][0], |
1156 | h->luma_weight[refn][list][1]); |
1157 | if (h->use_weight_chroma) { |
1158 | chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, |
1159 | h->chroma_log2_weight_denom, |
1160 | h->chroma_weight[refn][list][0][0], |
1161 | h->chroma_weight[refn][list][0][1]); |
1162 | chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, |
1163 | h->chroma_log2_weight_denom, |
1164 | h->chroma_weight[refn][list][1][0], |
1165 | h->chroma_weight[refn][list][1][1]); |
1166 | } |
1167 | } |
1168 | } |
1169 | |
1170 | static av_always_inline void prefetch_motion(H264Context *h, int list, |
1171 | int pixel_shift, int chroma_idc) |
1172 | { |
1173 | /* fetch pixels for estimated mv 4 macroblocks ahead |
1174 | * optimized for 64byte cache lines */ |
1175 | const int refn = h->ref_cache[list][scan8[0]]; |
1176 | if (refn >= 0) { |
1177 | const int mx = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * h->mb_x + 8; |
1178 | const int my = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * h->mb_y; |
1179 | uint8_t **src = h->ref_list[list][refn].f.data; |
1180 | int off = (mx << pixel_shift) + |
1181 | (my + (h->mb_x & 3) * 4) * h->mb_linesize + |
1182 | (64 << pixel_shift); |
1183 | h->vdsp.prefetch(src[0] + off, h->linesize, 4); |
1184 | if (chroma_idc == 3 /* yuv444 */) { |
1185 | h->vdsp.prefetch(src[1] + off, h->linesize, 4); |
1186 | h->vdsp.prefetch(src[2] + off, h->linesize, 4); |
1187 | } else { |
1188 | off= (((mx>>1)+64)<<pixel_shift) + ((my>>1) + (h->mb_x&7))*h->uvlinesize; |
1189 | h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); |
1190 | } |
1191 | } |
1192 | } |
1193 | |
1194 | static void free_tables(H264Context *h, int free_rbsp) |
1195 | { |
1196 | int i; |
1197 | H264Context *hx; |
1198 | |
1199 | av_freep(&h->intra4x4_pred_mode); |
1200 | av_freep(&h->chroma_pred_mode_table); |
1201 | av_freep(&h->cbp_table); |
1202 | av_freep(&h->mvd_table[0]); |
1203 | av_freep(&h->mvd_table[1]); |
1204 | av_freep(&h->direct_table); |
1205 | av_freep(&h->non_zero_count); |
1206 | av_freep(&h->slice_table_base); |
1207 | h->slice_table = NULL; |
1208 | av_freep(&h->list_counts); |
1209 | |
1210 | av_freep(&h->mb2b_xy); |
1211 | av_freep(&h->mb2br_xy); |
1212 | |
1213 | for (i = 0; i < 3; i++) |
1214 | av_freep(&h->visualization_buffer[i]); |
1215 | |
1216 | av_buffer_pool_uninit(&h->qscale_table_pool); |
1217 | av_buffer_pool_uninit(&h->mb_type_pool); |
1218 | av_buffer_pool_uninit(&h->motion_val_pool); |
1219 | av_buffer_pool_uninit(&h->ref_index_pool); |
1220 | |
1221 | if (free_rbsp && h->DPB) { |
1222 | for (i = 0; i < MAX_PICTURE_COUNT; i++) |
1223 | unref_picture(h, &h->DPB[i]); |
1224 | av_freep(&h->DPB); |
1225 | } else if (h->DPB) { |
1226 | for (i = 0; i < MAX_PICTURE_COUNT; i++) |
1227 | h->DPB[i].needs_realloc = 1; |
1228 | } |
1229 | |
1230 | h->cur_pic_ptr = NULL; |
1231 | |
1232 | for (i = 0; i < MAX_THREADS; i++) { |
1233 | hx = h->thread_context[i]; |
1234 | if (!hx) |
1235 | continue; |
1236 | av_freep(&hx->top_borders[1]); |
1237 | av_freep(&hx->top_borders[0]); |
1238 | av_freep(&hx->bipred_scratchpad); |
1239 | av_freep(&hx->edge_emu_buffer); |
1240 | av_freep(&hx->dc_val_base); |
1241 | av_freep(&hx->me.scratchpad); |
1242 | av_freep(&hx->er.mb_index2xy); |
1243 | av_freep(&hx->er.error_status_table); |
1244 | av_freep(&hx->er.er_temp_buffer); |
1245 | av_freep(&hx->er.mbintra_table); |
1246 | av_freep(&hx->er.mbskip_table); |
1247 | |
1248 | if (free_rbsp) { |
1249 | av_freep(&hx->rbsp_buffer[1]); |
1250 | av_freep(&hx->rbsp_buffer[0]); |
1251 | hx->rbsp_buffer_size[0] = 0; |
1252 | hx->rbsp_buffer_size[1] = 0; |
1253 | } |
1254 | if (i) |
1255 | av_freep(&h->thread_context[i]); |
1256 | } |
1257 | } |
1258 | |
1259 | static void init_dequant8_coeff_table(H264Context *h) |
1260 | { |
1261 | int i, j, q, x; |
1262 | const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8); |
1263 | |
1264 | for (i = 0; i < 6; i++) { |
1265 | h->dequant8_coeff[i] = h->dequant8_buffer[i]; |
1266 | for (j = 0; j < i; j++) |
1267 | if (!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], |
1268 | 64 * sizeof(uint8_t))) { |
1269 | h->dequant8_coeff[i] = h->dequant8_buffer[j]; |
1270 | break; |
1271 | } |
1272 | if (j < i) |
1273 | continue; |
1274 | |
1275 | for (q = 0; q < max_qp + 1; q++) { |
1276 | int shift = div6[q]; |
1277 | int idx = rem6[q]; |
1278 | for (x = 0; x < 64; x++) |
1279 | h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] = |
1280 | ((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] * |
1281 | h->pps.scaling_matrix8[i][x]) << shift; |
1282 | } |
1283 | } |
1284 | } |
1285 | |
1286 | static void init_dequant4_coeff_table(H264Context *h) |
1287 | { |
1288 | int i, j, q, x; |
1289 | const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8); |
1290 | for (i = 0; i < 6; i++) { |
1291 | h->dequant4_coeff[i] = h->dequant4_buffer[i]; |
1292 | for (j = 0; j < i; j++) |
1293 | if (!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], |
1294 | 16 * sizeof(uint8_t))) { |
1295 | h->dequant4_coeff[i] = h->dequant4_buffer[j]; |
1296 | break; |
1297 | } |
1298 | if (j < i) |
1299 | continue; |
1300 | |
1301 | for (q = 0; q < max_qp + 1; q++) { |
1302 | int shift = div6[q] + 2; |
1303 | int idx = rem6[q]; |
1304 | for (x = 0; x < 16; x++) |
1305 | h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] = |
1306 | ((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * |
1307 | h->pps.scaling_matrix4[i][x]) << shift; |
1308 | } |
1309 | } |
1310 | } |
1311 | |
1312 | static void init_dequant_tables(H264Context *h) |
1313 | { |
1314 | int i, x; |
1315 | init_dequant4_coeff_table(h); |
1316 | if (h->pps.transform_8x8_mode) |
1317 | init_dequant8_coeff_table(h); |
1318 | if (h->sps.transform_bypass) { |
1319 | for (i = 0; i < 6; i++) |
1320 | for (x = 0; x < 16; x++) |
1321 | h->dequant4_coeff[i][0][x] = 1 << 6; |
1322 | if (h->pps.transform_8x8_mode) |
1323 | for (i = 0; i < 6; i++) |
1324 | for (x = 0; x < 64; x++) |
1325 | h->dequant8_coeff[i][0][x] = 1 << 6; |
1326 | } |
1327 | } |
1328 | |
1329 | int ff_h264_alloc_tables(H264Context *h) |
1330 | { |
1331 | const int big_mb_num = h->mb_stride * (h->mb_height + 1); |
1332 | const int row_mb_num = 2*h->mb_stride*FFMAX(h->avctx->thread_count, 1); |
1333 | int x, y, i; |
1334 | |
1335 | FF_ALLOCZ_OR_GOTO(h->avctx, h->intra4x4_pred_mode, |
1336 | row_mb_num * 8 * sizeof(uint8_t), fail) |
1337 | FF_ALLOCZ_OR_GOTO(h->avctx, h->non_zero_count, |
1338 | big_mb_num * 48 * sizeof(uint8_t), fail) |
1339 | FF_ALLOCZ_OR_GOTO(h->avctx, h->slice_table_base, |
1340 | (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base), fail) |
1341 | FF_ALLOCZ_OR_GOTO(h->avctx, h->cbp_table, |
1342 | big_mb_num * sizeof(uint16_t), fail) |
1343 | FF_ALLOCZ_OR_GOTO(h->avctx, h->chroma_pred_mode_table, |
1344 | big_mb_num * sizeof(uint8_t), fail) |
1345 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[0], |
1346 | 16 * row_mb_num * sizeof(uint8_t), fail); |
1347 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[1], |
1348 | 16 * row_mb_num * sizeof(uint8_t), fail); |
1349 | FF_ALLOCZ_OR_GOTO(h->avctx, h->direct_table, |
1350 | 4 * big_mb_num * sizeof(uint8_t), fail); |
1351 | FF_ALLOCZ_OR_GOTO(h->avctx, h->list_counts, |
1352 | big_mb_num * sizeof(uint8_t), fail) |
1353 | |
1354 | memset(h->slice_table_base, -1, |
1355 | (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base)); |
1356 | h->slice_table = h->slice_table_base + h->mb_stride * 2 + 1; |
1357 | |
1358 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mb2b_xy, |
1359 | big_mb_num * sizeof(uint32_t), fail); |
1360 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mb2br_xy, |
1361 | big_mb_num * sizeof(uint32_t), fail); |
1362 | for (y = 0; y < h->mb_height; y++) |
1363 | for (x = 0; x < h->mb_width; x++) { |
1364 | const int mb_xy = x + y * h->mb_stride; |
1365 | const int b_xy = 4 * x + 4 * y * h->b_stride; |
1366 | |
1367 | h->mb2b_xy[mb_xy] = b_xy; |
1368 | h->mb2br_xy[mb_xy] = 8 * (FMO ? mb_xy : (mb_xy % (2 * h->mb_stride))); |
1369 | } |
1370 | |
1371 | if (!h->dequant4_coeff[0]) |
1372 | init_dequant_tables(h); |
1373 | |
1374 | if (!h->DPB) { |
1375 | h->DPB = av_mallocz_array(MAX_PICTURE_COUNT, sizeof(*h->DPB)); |
1376 | if (!h->DPB) |
1377 | return AVERROR(ENOMEM); |
1378 | for (i = 0; i < MAX_PICTURE_COUNT; i++) |
1379 | avcodec_get_frame_defaults(&h->DPB[i].f); |
1380 | avcodec_get_frame_defaults(&h->cur_pic.f); |
1381 | } |
1382 | |
1383 | return 0; |
1384 | |
1385 | fail: |
1386 | free_tables(h, 1); |
1387 | return AVERROR(ENOMEM); |
1388 | } |
1389 | |
1390 | /** |
1391 | * Mimic alloc_tables(), but for every context thread. |
1392 | */ |
1393 | static void clone_tables(H264Context *dst, H264Context *src, int i) |
1394 | { |
1395 | dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i * 8 * 2 * src->mb_stride; |
1396 | dst->non_zero_count = src->non_zero_count; |
1397 | dst->slice_table = src->slice_table; |
1398 | dst->cbp_table = src->cbp_table; |
1399 | dst->mb2b_xy = src->mb2b_xy; |
1400 | dst->mb2br_xy = src->mb2br_xy; |
1401 | dst->chroma_pred_mode_table = src->chroma_pred_mode_table; |
1402 | dst->mvd_table[0] = src->mvd_table[0] + i * 8 * 2 * src->mb_stride; |
1403 | dst->mvd_table[1] = src->mvd_table[1] + i * 8 * 2 * src->mb_stride; |
1404 | dst->direct_table = src->direct_table; |
1405 | dst->list_counts = src->list_counts; |
1406 | dst->DPB = src->DPB; |
1407 | dst->cur_pic_ptr = src->cur_pic_ptr; |
1408 | dst->cur_pic = src->cur_pic; |
1409 | dst->bipred_scratchpad = NULL; |
1410 | dst->edge_emu_buffer = NULL; |
1411 | dst->me.scratchpad = NULL; |
1412 | ff_h264_pred_init(&dst->hpc, src->avctx->codec_id, src->sps.bit_depth_luma, |
1413 | src->sps.chroma_format_idc); |
1414 | } |
1415 | |
1416 | /** |
1417 | * Init context |
1418 | * Allocate buffers which are not shared amongst multiple threads. |
1419 | */ |
1420 | static int context_init(H264Context *h) |
1421 | { |
1422 | ERContext *er = &h->er; |
1423 | int mb_array_size = h->mb_height * h->mb_stride; |
1424 | int y_size = (2 * h->mb_width + 1) * (2 * h->mb_height + 1); |
1425 | int c_size = h->mb_stride * (h->mb_height + 1); |
1426 | int yc_size = y_size + 2 * c_size; |
1427 | int x, y, i; |
1428 | |
1429 | FF_ALLOCZ_OR_GOTO(h->avctx, h->top_borders[0], |
1430 | h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail) |
1431 | FF_ALLOCZ_OR_GOTO(h->avctx, h->top_borders[1], |
1432 | h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail) |
1433 | |
1434 | h->ref_cache[0][scan8[5] + 1] = |
1435 | h->ref_cache[0][scan8[7] + 1] = |
1436 | h->ref_cache[0][scan8[13] + 1] = |
1437 | h->ref_cache[1][scan8[5] + 1] = |
1438 | h->ref_cache[1][scan8[7] + 1] = |
1439 | h->ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE; |
1440 | |
1441 | if (CONFIG_ERROR_RESILIENCE) { |
1442 | /* init ER */ |
1443 | er->avctx = h->avctx; |
1444 | er->dsp = &h->dsp; |
1445 | er->decode_mb = h264_er_decode_mb; |
1446 | er->opaque = h; |
1447 | er->quarter_sample = 1; |
1448 | |
1449 | er->mb_num = h->mb_num; |
1450 | er->mb_width = h->mb_width; |
1451 | er->mb_height = h->mb_height; |
1452 | er->mb_stride = h->mb_stride; |
1453 | er->b8_stride = h->mb_width * 2 + 1; |
1454 | |
1455 | FF_ALLOCZ_OR_GOTO(h->avctx, er->mb_index2xy, (h->mb_num + 1) * sizeof(int), |
1456 | fail); // error ressilience code looks cleaner with this |
1457 | for (y = 0; y < h->mb_height; y++) |
1458 | for (x = 0; x < h->mb_width; x++) |
1459 | er->mb_index2xy[x + y * h->mb_width] = x + y * h->mb_stride; |
1460 | |
1461 | er->mb_index2xy[h->mb_height * h->mb_width] = (h->mb_height - 1) * |
1462 | h->mb_stride + h->mb_width; |
1463 | |
1464 | FF_ALLOCZ_OR_GOTO(h->avctx, er->error_status_table, |
1465 | mb_array_size * sizeof(uint8_t), fail); |
1466 | |
1467 | FF_ALLOC_OR_GOTO(h->avctx, er->mbintra_table, mb_array_size, fail); |
1468 | memset(er->mbintra_table, 1, mb_array_size); |
1469 | |
1470 | FF_ALLOCZ_OR_GOTO(h->avctx, er->mbskip_table, mb_array_size + 2, fail); |
1471 | |
1472 | FF_ALLOC_OR_GOTO(h->avctx, er->er_temp_buffer, h->mb_height * h->mb_stride, |
1473 | fail); |
1474 | |
1475 | FF_ALLOCZ_OR_GOTO(h->avctx, h->dc_val_base, yc_size * sizeof(int16_t), fail); |
1476 | er->dc_val[0] = h->dc_val_base + h->mb_width * 2 + 2; |
1477 | er->dc_val[1] = h->dc_val_base + y_size + h->mb_stride + 1; |
1478 | er->dc_val[2] = er->dc_val[1] + c_size; |
1479 | for (i = 0; i < yc_size; i++) |
1480 | h->dc_val_base[i] = 1024; |
1481 | } |
1482 | |
1483 | return 0; |
1484 | |
1485 | fail: |
1486 | return AVERROR(ENOMEM); // free_tables will clean up for us |
1487 | } |
1488 | |
1489 | static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size, |
1490 | int parse_extradata); |
1491 | |
1492 | int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size) |
1493 | { |
1494 | AVCodecContext *avctx = h->avctx; |
1495 | int ret; |
1496 | |
1497 | if (!buf || size <= 0) |
1498 | return -1; |
1499 | |
1500 | if (buf[0] == 1) { |
1501 | int i, cnt, nalsize; |
1502 | const unsigned char *p = buf; |
1503 | |
1504 | h->is_avc = 1; |
1505 | |
1506 | if (size < 7) { |
1507 | av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); |
1508 | return AVERROR_INVALIDDATA; |
1509 | } |
1510 | /* sps and pps in the avcC always have length coded with 2 bytes, |
1511 | * so put a fake nal_length_size = 2 while parsing them */ |
1512 | h->nal_length_size = 2; |
1513 | // Decode sps from avcC |
1514 | cnt = *(p + 5) & 0x1f; // Number of sps |
1515 | p += 6; |
1516 | for (i = 0; i < cnt; i++) { |
1517 | nalsize = AV_RB16(p) + 2; |
1518 | if(nalsize > size - (p-buf)) |
1519 | return AVERROR_INVALIDDATA; |
1520 | ret = decode_nal_units(h, p, nalsize, 1); |
1521 | if (ret < 0) { |
1522 | av_log(avctx, AV_LOG_ERROR, |
1523 | "Decoding sps %d from avcC failed\n", i); |
1524 | return ret; |
1525 | } |
1526 | p += nalsize; |
1527 | } |
1528 | // Decode pps from avcC |
1529 | cnt = *(p++); // Number of pps |
1530 | for (i = 0; i < cnt; i++) { |
1531 | nalsize = AV_RB16(p) + 2; |
1532 | if(nalsize > size - (p-buf)) |
1533 | return AVERROR_INVALIDDATA; |
1534 | ret = decode_nal_units(h, p, nalsize, 1); |
1535 | if (ret < 0) { |
1536 | av_log(avctx, AV_LOG_ERROR, |
1537 | "Decoding pps %d from avcC failed\n", i); |
1538 | return ret; |
1539 | } |
1540 | p += nalsize; |
1541 | } |
1542 | // Now store right nal length size, that will be used to parse all other nals |
1543 | h->nal_length_size = (buf[4] & 0x03) + 1; |
1544 | } else { |
1545 | h->is_avc = 0; |
1546 | ret = decode_nal_units(h, buf, size, 1); |
1547 | if (ret < 0) |
1548 | return ret; |
1549 | } |
1550 | return size; |
1551 | } |
1552 | |
1553 | av_cold int ff_h264_decode_init(AVCodecContext *avctx) |
1554 | { |
1555 | H264Context *h = avctx->priv_data; |
1556 | int i; |
1557 | int ret; |
1558 | |
1559 | h->avctx = avctx; |
1560 | |
1561 | h->bit_depth_luma = 8; |
1562 | h->chroma_format_idc = 1; |
1563 | |
1564 | h->avctx->bits_per_raw_sample = 8; |
1565 | h->cur_chroma_format_idc = 1; |
1566 | |
1567 | ff_h264dsp_init(&h->h264dsp, 8, 1); |
1568 | av_assert0(h->sps.bit_depth_chroma == 0); |
1569 | ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma); |
1570 | ff_h264qpel_init(&h->h264qpel, 8); |
1571 | ff_h264_pred_init(&h->hpc, h->avctx->codec_id, 8, 1); |
1572 | |
1573 | h->dequant_coeff_pps = -1; |
1574 | h->current_sps_id = -1; |
1575 | |
1576 | /* needed so that IDCT permutation is known early */ |
1577 | if (CONFIG_ERROR_RESILIENCE) |
1578 | ff_dsputil_init(&h->dsp, h->avctx); |
1579 | ff_videodsp_init(&h->vdsp, 8); |
1580 | |
1581 | memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t)); |
1582 | memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t)); |
1583 | |
1584 | h->picture_structure = PICT_FRAME; |
1585 | h->slice_context_count = 1; |
1586 | h->workaround_bugs = avctx->workaround_bugs; |
1587 | h->flags = avctx->flags; |
1588 | |
1589 | /* set defaults */ |
1590 | // s->decode_mb = ff_h263_decode_mb; |
1591 | if (!avctx->has_b_frames) |
1592 | h->low_delay = 1; |
1593 | |
1594 | avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; |
1595 | |
1596 | ff_h264_decode_init_vlc(); |
1597 | |
1598 | ff_init_cabac_states(); |
1599 | |
1600 | h->pixel_shift = 0; |
1601 | h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; |
1602 | |
1603 | h->thread_context[0] = h; |
1604 | h->outputed_poc = h->next_outputed_poc = INT_MIN; |
1605 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) |
1606 | h->last_pocs[i] = INT_MIN; |
1607 | h->prev_poc_msb = 1 << 16; |
1608 | h->prev_frame_num = -1; |
1609 | h->x264_build = -1; |
1610 | h->sei_fpa.frame_packing_arrangement_cancel_flag = -1; |
1611 | ff_h264_reset_sei(h); |
1612 | if (avctx->codec_id == AV_CODEC_ID_H264) { |
1613 | if (avctx->ticks_per_frame == 1) { |
1614 | if(h->avctx->time_base.den < INT_MAX/2) { |
1615 | h->avctx->time_base.den *= 2; |
1616 | } else |
1617 | h->avctx->time_base.num /= 2; |
1618 | } |
1619 | avctx->ticks_per_frame = 2; |
1620 | } |
1621 | |
1622 | if (avctx->extradata_size > 0 && avctx->extradata) { |
1623 | ret = ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size); |
1624 | if (ret < 0) { |
1625 | ff_h264_free_context(h); |
1626 | return ret; |
1627 | } |
1628 | } |
1629 | |
1630 | if (h->sps.bitstream_restriction_flag && |
1631 | h->avctx->has_b_frames < h->sps.num_reorder_frames) { |
1632 | h->avctx->has_b_frames = h->sps.num_reorder_frames; |
1633 | h->low_delay = 0; |
1634 | } |
1635 | |
1636 | avctx->internal->allocate_progress = 1; |
1637 | |
1638 | flush_change(h); |
1639 | |
1640 | return 0; |
1641 | } |
1642 | |
1643 | #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size)))) |
1644 | #undef REBASE_PICTURE |
1645 | #define REBASE_PICTURE(pic, new_ctx, old_ctx) \ |
1646 | ((pic && pic >= old_ctx->DPB && \ |
1647 | pic < old_ctx->DPB + MAX_PICTURE_COUNT) ? \ |
1648 | &new_ctx->DPB[pic - old_ctx->DPB] : NULL) |
1649 | |
1650 | static void copy_picture_range(Picture **to, Picture **from, int count, |
1651 | H264Context *new_base, |
1652 | H264Context *old_base) |
1653 | { |
1654 | int i; |
1655 | |
1656 | for (i = 0; i < count; i++) { |
1657 | assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) || |
1658 | IN_RANGE(from[i], old_base->DPB, |
1659 | sizeof(Picture) * MAX_PICTURE_COUNT) || |
1660 | !from[i])); |
1661 | to[i] = REBASE_PICTURE(from[i], new_base, old_base); |
1662 | } |
1663 | } |
1664 | |
1665 | static int copy_parameter_set(void **to, void **from, int count, int size) |
1666 | { |
1667 | int i; |
1668 | |
1669 | for (i = 0; i < count; i++) { |
1670 | if (to[i] && !from[i]) { |
1671 | av_freep(&to[i]); |
1672 | } else if (from[i] && !to[i]) { |
1673 | to[i] = av_malloc(size); |
1674 | if (!to[i]) |
1675 | return AVERROR(ENOMEM); |
1676 | } |
1677 | |
1678 | if (from[i]) |
1679 | memcpy(to[i], from[i], size); |
1680 | } |
1681 | |
1682 | return 0; |
1683 | } |
1684 | |
1685 | static int decode_init_thread_copy(AVCodecContext *avctx) |
1686 | { |
1687 | H264Context *h = avctx->priv_data; |
1688 | |
1689 | if (!avctx->internal->is_copy) |
1690 | return 0; |
1691 | memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); |
1692 | memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); |
1693 | |
1694 | h->rbsp_buffer[0] = NULL; |
1695 | h->rbsp_buffer[1] = NULL; |
1696 | h->rbsp_buffer_size[0] = 0; |
1697 | h->rbsp_buffer_size[1] = 0; |
1698 | h->context_initialized = 0; |
1699 | |
1700 | return 0; |
1701 | } |
1702 | |
1703 | #define copy_fields(to, from, start_field, end_field) \ |
1704 | memcpy(&to->start_field, &from->start_field, \ |
1705 | (char *)&to->end_field - (char *)&to->start_field) |
1706 | |
1707 | static int h264_slice_header_init(H264Context *, int); |
1708 | |
1709 | static int h264_set_parameter_from_sps(H264Context *h); |
1710 | |
1711 | static int decode_update_thread_context(AVCodecContext *dst, |
1712 | const AVCodecContext *src) |
1713 | { |
1714 | H264Context *h = dst->priv_data, *h1 = src->priv_data; |
1715 | int inited = h->context_initialized, err = 0; |
1716 | int context_reinitialized = 0; |
1717 | int i, ret; |
1718 | |
1719 | if (dst == src) |
1720 | return 0; |
1721 | |
1722 | if (inited && |
1723 | (h->width != h1->width || |
1724 | h->height != h1->height || |
1725 | h->mb_width != h1->mb_width || |
1726 | h->mb_height != h1->mb_height || |
1727 | h->sps.bit_depth_luma != h1->sps.bit_depth_luma || |
1728 | h->sps.chroma_format_idc != h1->sps.chroma_format_idc || |
1729 | h->sps.colorspace != h1->sps.colorspace)) { |
1730 | |
1731 | /* set bits_per_raw_sample to the previous value. the check for changed |
1732 | * bit depth in h264_set_parameter_from_sps() uses it and sets it to |
1733 | * the current value */ |
1734 | h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma; |
1735 | |
1736 | av_freep(&h->bipred_scratchpad); |
1737 | |
1738 | h->width = h1->width; |
1739 | h->height = h1->height; |
1740 | h->mb_height = h1->mb_height; |
1741 | h->mb_width = h1->mb_width; |
1742 | h->mb_num = h1->mb_num; |
1743 | h->mb_stride = h1->mb_stride; |
1744 | h->b_stride = h1->b_stride; |
1745 | // SPS/PPS |
1746 | if ((ret = copy_parameter_set((void **)h->sps_buffers, |
1747 | (void **)h1->sps_buffers, |
1748 | MAX_SPS_COUNT, sizeof(SPS))) < 0) |
1749 | return ret; |
1750 | h->sps = h1->sps; |
1751 | if ((ret = copy_parameter_set((void **)h->pps_buffers, |
1752 | (void **)h1->pps_buffers, |
1753 | MAX_PPS_COUNT, sizeof(PPS))) < 0) |
1754 | return ret; |
1755 | h->pps = h1->pps; |
1756 | |
1757 | if ((err = h264_slice_header_init(h, 1)) < 0) { |
1758 | av_log(h->avctx, AV_LOG_ERROR, "h264_slice_header_init() failed"); |
1759 | return err; |
1760 | } |
1761 | context_reinitialized = 1; |
1762 | |
1763 | #if 0 |
1764 | h264_set_parameter_from_sps(h); |
1765 | //Note we set context_reinitialized which will cause h264_set_parameter_from_sps to be reexecuted |
1766 | h->cur_chroma_format_idc = h1->cur_chroma_format_idc; |
1767 | #endif |
1768 | } |
1769 | /* update linesize on resize for h264. The h264 decoder doesn't |
1770 | * necessarily call ff_MPV_frame_start in the new thread */ |
1771 | h->linesize = h1->linesize; |
1772 | h->uvlinesize = h1->uvlinesize; |
1773 | |
1774 | /* copy block_offset since frame_start may not be called */ |
1775 | memcpy(h->block_offset, h1->block_offset, sizeof(h->block_offset)); |
1776 | |
1777 | if (!inited) { |
1778 | for (i = 0; i < MAX_SPS_COUNT; i++) |
1779 | av_freep(h->sps_buffers + i); |
1780 | |
1781 | for (i = 0; i < MAX_PPS_COUNT; i++) |
1782 | av_freep(h->pps_buffers + i); |
1783 | |
1784 | av_freep(&h->rbsp_buffer[0]); |
1785 | av_freep(&h->rbsp_buffer[1]); |
1786 | memcpy(h, h1, offsetof(H264Context, intra_pcm_ptr)); |
1787 | memcpy(&h->cabac, &h1->cabac, |
1788 | sizeof(H264Context) - offsetof(H264Context, cabac)); |
1789 | av_assert0((void*)&h->cabac == &h->mb_padding + 1); |
1790 | |
1791 | memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); |
1792 | memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); |
1793 | |
1794 | memset(&h->er, 0, sizeof(h->er)); |
1795 | memset(&h->me, 0, sizeof(h->me)); |
1796 | memset(&h->mb, 0, sizeof(h->mb)); |
1797 | memset(&h->mb_luma_dc, 0, sizeof(h->mb_luma_dc)); |
1798 | memset(&h->mb_padding, 0, sizeof(h->mb_padding)); |
1799 | |
1800 | h->avctx = dst; |
1801 | h->DPB = NULL; |
1802 | h->qscale_table_pool = NULL; |
1803 | h->mb_type_pool = NULL; |
1804 | h->ref_index_pool = NULL; |
1805 | h->motion_val_pool = NULL; |
1806 | for (i = 0; i < 2; i++) { |
1807 | h->rbsp_buffer[i] = NULL; |
1808 | h->rbsp_buffer_size[i] = 0; |
1809 | } |
1810 | |
1811 | if (h1->context_initialized) { |
1812 | h->context_initialized = 0; |
1813 | |
1814 | memset(&h->cur_pic, 0, sizeof(h->cur_pic)); |
1815 | avcodec_get_frame_defaults(&h->cur_pic.f); |
1816 | h->cur_pic.tf.f = &h->cur_pic.f; |
1817 | |
1818 | ret = ff_h264_alloc_tables(h); |
1819 | if (ret < 0) { |
1820 | av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n"); |
1821 | return ret; |
1822 | } |
1823 | ret = context_init(h); |
1824 | if (ret < 0) { |
1825 | av_log(dst, AV_LOG_ERROR, "context_init() failed.\n"); |
1826 | return ret; |
1827 | } |
1828 | } |
1829 | |
1830 | h->bipred_scratchpad = NULL; |
1831 | h->edge_emu_buffer = NULL; |
1832 | |
1833 | h->thread_context[0] = h; |
1834 | h->context_initialized = h1->context_initialized; |
1835 | } |
1836 | |
1837 | h->avctx->coded_height = h1->avctx->coded_height; |
1838 | h->avctx->coded_width = h1->avctx->coded_width; |
1839 | h->avctx->width = h1->avctx->width; |
1840 | h->avctx->height = h1->avctx->height; |
1841 | h->coded_picture_number = h1->coded_picture_number; |
1842 | h->first_field = h1->first_field; |
1843 | h->picture_structure = h1->picture_structure; |
1844 | h->qscale = h1->qscale; |
1845 | h->droppable = h1->droppable; |
1846 | h->data_partitioning = h1->data_partitioning; |
1847 | h->low_delay = h1->low_delay; |
1848 | |
1849 | for (i = 0; h->DPB && i < MAX_PICTURE_COUNT; i++) { |
1850 | unref_picture(h, &h->DPB[i]); |
1851 | if (h1->DPB[i].f.data[0] && |
1852 | (ret = ref_picture(h, &h->DPB[i], &h1->DPB[i])) < 0) |
1853 | return ret; |
1854 | } |
1855 | |
1856 | h->cur_pic_ptr = REBASE_PICTURE(h1->cur_pic_ptr, h, h1); |
1857 | unref_picture(h, &h->cur_pic); |
1858 | if (h1->cur_pic.f.buf[0] && (ret = ref_picture(h, &h->cur_pic, &h1->cur_pic)) < 0) |
1859 | return ret; |
1860 | |
1861 | h->workaround_bugs = h1->workaround_bugs; |
1862 | h->low_delay = h1->low_delay; |
1863 | h->droppable = h1->droppable; |
1864 | |
1865 | // extradata/NAL handling |
1866 | h->is_avc = h1->is_avc; |
1867 | |
1868 | // SPS/PPS |
1869 | if ((ret = copy_parameter_set((void **)h->sps_buffers, |
1870 | (void **)h1->sps_buffers, |
1871 | MAX_SPS_COUNT, sizeof(SPS))) < 0) |
1872 | return ret; |
1873 | h->sps = h1->sps; |
1874 | if ((ret = copy_parameter_set((void **)h->pps_buffers, |
1875 | (void **)h1->pps_buffers, |
1876 | MAX_PPS_COUNT, sizeof(PPS))) < 0) |
1877 | return ret; |
1878 | h->pps = h1->pps; |
1879 | |
1880 | // Dequantization matrices |
1881 | // FIXME these are big - can they be only copied when PPS changes? |
1882 | copy_fields(h, h1, dequant4_buffer, dequant4_coeff); |
1883 | |
1884 | for (i = 0; i < 6; i++) |
1885 | h->dequant4_coeff[i] = h->dequant4_buffer[0] + |
1886 | (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]); |
1887 | |
1888 | for (i = 0; i < 6; i++) |
1889 | h->dequant8_coeff[i] = h->dequant8_buffer[0] + |
1890 | (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]); |
1891 | |
1892 | h->dequant_coeff_pps = h1->dequant_coeff_pps; |
1893 | |
1894 | // POC timing |
1895 | copy_fields(h, h1, poc_lsb, redundant_pic_count); |
1896 | |
1897 | // reference lists |
1898 | copy_fields(h, h1, short_ref, cabac_init_idc); |
1899 | |
1900 | copy_picture_range(h->short_ref, h1->short_ref, 32, h, h1); |
1901 | copy_picture_range(h->long_ref, h1->long_ref, 32, h, h1); |
1902 | copy_picture_range(h->delayed_pic, h1->delayed_pic, |
1903 | MAX_DELAYED_PIC_COUNT + 2, h, h1); |
1904 | |
1905 | h->sync = h1->sync; |
1906 | |
1907 | if (context_reinitialized) |
1908 | h264_set_parameter_from_sps(h); |
1909 | |
1910 | if (!h->cur_pic_ptr) |
1911 | return 0; |
1912 | |
1913 | if (!h->droppable) { |
1914 | err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
1915 | h->prev_poc_msb = h->poc_msb; |
1916 | h->prev_poc_lsb = h->poc_lsb; |
1917 | } |
1918 | h->prev_frame_num_offset = h->frame_num_offset; |
1919 | h->prev_frame_num = h->frame_num; |
1920 | h->outputed_poc = h->next_outputed_poc; |
1921 | |
1922 | return err; |
1923 | } |
1924 | |
1925 | static int h264_frame_start(H264Context *h) |
1926 | { |
1927 | Picture *pic; |
1928 | int i, ret; |
1929 | const int pixel_shift = h->pixel_shift; |
1930 | int c[4] = { |
1931 | 1<<(h->sps.bit_depth_luma-1), |
1932 | 1<<(h->sps.bit_depth_chroma-1), |
1933 | 1<<(h->sps.bit_depth_chroma-1), |
1934 | -1 |
1935 | }; |
1936 | |
1937 | if (!ff_thread_can_start_frame(h->avctx)) { |
1938 | av_log(h->avctx, AV_LOG_ERROR, "Attempt to start a frame outside SETUP state\n"); |
1939 | return -1; |
1940 | } |
1941 | |
1942 | release_unused_pictures(h, 1); |
1943 | h->cur_pic_ptr = NULL; |
1944 | |
1945 | i = find_unused_picture(h); |
1946 | if (i < 0) { |
1947 | av_log(h->avctx, AV_LOG_ERROR, "no frame buffer available\n"); |
1948 | return i; |
1949 | } |
1950 | pic = &h->DPB[i]; |
1951 | |
1952 | pic->reference = h->droppable ? 0 : h->picture_structure; |
1953 | pic->f.coded_picture_number = h->coded_picture_number++; |
1954 | pic->field_picture = h->picture_structure != PICT_FRAME; |
1955 | |
1956 | /* |
1957 | * Zero key_frame here; IDR markings per slice in frame or fields are ORed |
1958 | * in later. |
1959 | * See decode_nal_units(). |
1960 | */ |
1961 | pic->f.key_frame = 0; |
1962 | pic->sync = 0; |
1963 | pic->mmco_reset = 0; |
1964 | |
1965 | if ((ret = alloc_picture(h, pic)) < 0) |
1966 | return ret; |
1967 | if(!h->sync && !h->avctx->hwaccel && |
1968 | !(h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)) |
1969 | avpriv_color_frame(&pic->f, c); |
1970 | |
1971 | h->cur_pic_ptr = pic; |
1972 | unref_picture(h, &h->cur_pic); |
1973 | if ((ret = ref_picture(h, &h->cur_pic, h->cur_pic_ptr)) < 0) |
1974 | return ret; |
1975 | |
1976 | if (CONFIG_ERROR_RESILIENCE) { |
1977 | ff_er_frame_start(&h->er); |
1978 | h->er.last_pic = |
1979 | h->er.next_pic = NULL; |
1980 | } |
1981 | |
1982 | assert(h->linesize && h->uvlinesize); |
1983 | |
1984 | for (i = 0; i < 16; i++) { |
1985 | h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->linesize * ((scan8[i] - scan8[0]) >> 3); |
1986 | h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->linesize * ((scan8[i] - scan8[0]) >> 3); |
1987 | } |
1988 | for (i = 0; i < 16; i++) { |
1989 | h->block_offset[16 + i] = |
1990 | h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3); |
1991 | h->block_offset[48 + 16 + i] = |
1992 | h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3); |
1993 | } |
1994 | |
1995 | // s->decode = (h->flags & CODEC_FLAG_PSNR) || !s->encoding || |
1996 | // h->cur_pic.reference /* || h->contains_intra */ || 1; |
1997 | |
1998 | /* We mark the current picture as non-reference after allocating it, so |
1999 | * that if we break out due to an error it can be released automatically |
2000 | * in the next ff_MPV_frame_start(). |
2001 | */ |
2002 | h->cur_pic_ptr->reference = 0; |
2003 | |
2004 | h->cur_pic_ptr->field_poc[0] = h->cur_pic_ptr->field_poc[1] = INT_MAX; |
2005 | |
2006 | h->next_output_pic = NULL; |
2007 | |
2008 | assert(h->cur_pic_ptr->long_ref == 0); |
2009 | |
2010 | return 0; |
2011 | } |
2012 | |
2013 | /** |
2014 | * Run setup operations that must be run after slice header decoding. |
2015 | * This includes finding the next displayed frame. |
2016 | * |
2017 | * @param h h264 master context |
2018 | * @param setup_finished enough NALs have been read that we can call |
2019 | * ff_thread_finish_setup() |
2020 | */ |
2021 | static void decode_postinit(H264Context *h, int setup_finished) |
2022 | { |
2023 | Picture *out = h->cur_pic_ptr; |
2024 | Picture *cur = h->cur_pic_ptr; |
2025 | int i, pics, out_of_order, out_idx; |
2026 | |
2027 | h->cur_pic_ptr->f.pict_type = h->pict_type; |
2028 | |
2029 | if (h->next_output_pic) |
2030 | return; |
2031 | |
2032 | if (cur->field_poc[0] == INT_MAX || cur->field_poc[1] == INT_MAX) { |
2033 | /* FIXME: if we have two PAFF fields in one packet, we can't start |
2034 | * the next thread here. If we have one field per packet, we can. |
2035 | * The check in decode_nal_units() is not good enough to find this |
2036 | * yet, so we assume the worst for now. */ |
2037 | // if (setup_finished) |
2038 | // ff_thread_finish_setup(h->avctx); |
2039 | return; |
2040 | } |
2041 | |
2042 | cur->f.interlaced_frame = 0; |
2043 | cur->f.repeat_pict = 0; |
2044 | |
2045 | /* Signal interlacing information externally. */ |
2046 | /* Prioritize picture timing SEI information over used |
2047 | * decoding process if it exists. */ |
2048 | |
2049 | if (h->sps.pic_struct_present_flag) { |
2050 | switch (h->sei_pic_struct) { |
2051 | case SEI_PIC_STRUCT_FRAME: |
2052 | break; |
2053 | case SEI_PIC_STRUCT_TOP_FIELD: |
2054 | case SEI_PIC_STRUCT_BOTTOM_FIELD: |
2055 | cur->f.interlaced_frame = 1; |
2056 | break; |
2057 | case SEI_PIC_STRUCT_TOP_BOTTOM: |
2058 | case SEI_PIC_STRUCT_BOTTOM_TOP: |
2059 | if (FIELD_OR_MBAFF_PICTURE(h)) |
2060 | cur->f.interlaced_frame = 1; |
2061 | else |
2062 | // try to flag soft telecine progressive |
2063 | cur->f.interlaced_frame = h->prev_interlaced_frame; |
2064 | break; |
2065 | case SEI_PIC_STRUCT_TOP_BOTTOM_TOP: |
2066 | case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: |
2067 | /* Signal the possibility of telecined film externally |
2068 | * (pic_struct 5,6). From these hints, let the applications |
2069 | * decide if they apply deinterlacing. */ |
2070 | cur->f.repeat_pict = 1; |
2071 | break; |
2072 | case SEI_PIC_STRUCT_FRAME_DOUBLING: |
2073 | cur->f.repeat_pict = 2; |
2074 | break; |
2075 | case SEI_PIC_STRUCT_FRAME_TRIPLING: |
2076 | cur->f.repeat_pict = 4; |
2077 | break; |
2078 | } |
2079 | |
2080 | if ((h->sei_ct_type & 3) && |
2081 | h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP) |
2082 | cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0; |
2083 | } else { |
2084 | /* Derive interlacing flag from used decoding process. */ |
2085 | cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE(h); |
2086 | } |
2087 | h->prev_interlaced_frame = cur->f.interlaced_frame; |
2088 | |
2089 | if (cur->field_poc[0] != cur->field_poc[1]) { |
2090 | /* Derive top_field_first from field pocs. */ |
2091 | cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1]; |
2092 | } else { |
2093 | if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) { |
2094 | /* Use picture timing SEI information. Even if it is a |
2095 | * information of a past frame, better than nothing. */ |
2096 | if (h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM || |
2097 | h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP) |
2098 | cur->f.top_field_first = 1; |
2099 | else |
2100 | cur->f.top_field_first = 0; |
2101 | } else { |
2102 | /* Most likely progressive */ |
2103 | cur->f.top_field_first = 0; |
2104 | } |
2105 | } |
2106 | |
2107 | cur->mmco_reset = h->mmco_reset; |
2108 | h->mmco_reset = 0; |
2109 | // FIXME do something with unavailable reference frames |
2110 | |
2111 | /* Sort B-frames into display order */ |
2112 | |
2113 | if (h->sps.bitstream_restriction_flag && |
2114 | h->avctx->has_b_frames < h->sps.num_reorder_frames) { |
2115 | h->avctx->has_b_frames = h->sps.num_reorder_frames; |
2116 | h->low_delay = 0; |
2117 | } |
2118 | |
2119 | if (h->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT && |
2120 | !h->sps.bitstream_restriction_flag) { |
2121 | h->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1; |
2122 | h->low_delay = 0; |
2123 | } |
2124 | |
2125 | for (i = 0; 1; i++) { |
2126 | if(i == MAX_DELAYED_PIC_COUNT || cur->poc < h->last_pocs[i]){ |
2127 | if(i) |
2128 | h->last_pocs[i-1] = cur->poc; |
2129 | break; |
2130 | } else if(i) { |
2131 | h->last_pocs[i-1]= h->last_pocs[i]; |
2132 | } |
2133 | } |
2134 | out_of_order = MAX_DELAYED_PIC_COUNT - i; |
2135 | if( cur->f.pict_type == AV_PICTURE_TYPE_B |
2136 | || (h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > INT_MIN && h->last_pocs[MAX_DELAYED_PIC_COUNT-1] - h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > 2)) |
2137 | out_of_order = FFMAX(out_of_order, 1); |
2138 | if (out_of_order == MAX_DELAYED_PIC_COUNT) { |
2139 | av_log(h->avctx, AV_LOG_VERBOSE, "Invalid POC %d<%d\n", cur->poc, h->last_pocs[0]); |
2140 | for (i = 1; i < MAX_DELAYED_PIC_COUNT; i++) |
2141 | h->last_pocs[i] = INT_MIN; |
2142 | h->last_pocs[0] = cur->poc; |
2143 | cur->mmco_reset = 1; |
2144 | } else if(h->avctx->has_b_frames < out_of_order && !h->sps.bitstream_restriction_flag){ |
2145 | av_log(h->avctx, AV_LOG_VERBOSE, "Increasing reorder buffer to %d\n", out_of_order); |
2146 | h->avctx->has_b_frames = out_of_order; |
2147 | h->low_delay = 0; |
2148 | } |
2149 | |
2150 | pics = 0; |
2151 | while (h->delayed_pic[pics]) |
2152 | pics++; |
2153 | |
2154 | av_assert0(pics <= MAX_DELAYED_PIC_COUNT); |
2155 | |
2156 | h->delayed_pic[pics++] = cur; |
2157 | if (cur->reference == 0) |
2158 | cur->reference = DELAYED_PIC_REF; |
2159 | |
2160 | out = h->delayed_pic[0]; |
2161 | out_idx = 0; |
2162 | for (i = 1; h->delayed_pic[i] && |
2163 | !h->delayed_pic[i]->f.key_frame && |
2164 | !h->delayed_pic[i]->mmco_reset; |
2165 | i++) |
2166 | if (h->delayed_pic[i]->poc < out->poc) { |
2167 | out = h->delayed_pic[i]; |
2168 | out_idx = i; |
2169 | } |
2170 | if (h->avctx->has_b_frames == 0 && |
2171 | (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset)) |
2172 | h->next_outputed_poc = INT_MIN; |
2173 | out_of_order = out->poc < h->next_outputed_poc; |
2174 | |
2175 | if (out_of_order || pics > h->avctx->has_b_frames) { |
2176 | out->reference &= ~DELAYED_PIC_REF; |
2177 | // for frame threading, the owner must be the second field's thread or |
2178 | // else the first thread can release the picture and reuse it unsafely |
2179 | for (i = out_idx; h->delayed_pic[i]; i++) |
2180 | h->delayed_pic[i] = h->delayed_pic[i + 1]; |
2181 | } |
2182 | if (!out_of_order && pics > h->avctx->has_b_frames) { |
2183 | h->next_output_pic = out; |
2184 | if (out_idx == 0 && h->delayed_pic[0] && (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset)) { |
2185 | h->next_outputed_poc = INT_MIN; |
2186 | } else |
2187 | h->next_outputed_poc = out->poc; |
2188 | } else { |
2189 | av_log(h->avctx, AV_LOG_DEBUG, "no picture %s\n", out_of_order ? "ooo" : ""); |
2190 | } |
2191 | |
2192 | if (h->next_output_pic && h->next_output_pic->sync) { |
2193 | h->sync |= 2; |
2194 | } |
2195 | |
2196 | if (setup_finished && !h->avctx->hwaccel) |
2197 | ff_thread_finish_setup(h->avctx); |
2198 | } |
2199 | |
2200 | static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, |
2201 | uint8_t *src_cb, uint8_t *src_cr, |
2202 | int linesize, int uvlinesize, |
2203 | int simple) |
2204 | { |
2205 | uint8_t *top_border; |
2206 | int top_idx = 1; |
2207 | const int pixel_shift = h->pixel_shift; |
2208 | int chroma444 = CHROMA444(h); |
2209 | int chroma422 = CHROMA422(h); |
2210 | |
2211 | src_y -= linesize; |
2212 | src_cb -= uvlinesize; |
2213 | src_cr -= uvlinesize; |
2214 | |
2215 | if (!simple && FRAME_MBAFF(h)) { |
2216 | if (h->mb_y & 1) { |
2217 | if (!MB_MBAFF(h)) { |
2218 | top_border = h->top_borders[0][h->mb_x]; |
2219 | AV_COPY128(top_border, src_y + 15 * linesize); |
2220 | if (pixel_shift) |
2221 | AV_COPY128(top_border + 16, src_y + 15 * linesize + 16); |
2222 | if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { |
2223 | if (chroma444) { |
2224 | if (pixel_shift) { |
2225 | AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize); |
2226 | AV_COPY128(top_border + 48, src_cb + 15 * uvlinesize + 16); |
2227 | AV_COPY128(top_border + 64, src_cr + 15 * uvlinesize); |
2228 | AV_COPY128(top_border + 80, src_cr + 15 * uvlinesize + 16); |
2229 | } else { |
2230 | AV_COPY128(top_border + 16, src_cb + 15 * uvlinesize); |
2231 | AV_COPY128(top_border + 32, src_cr + 15 * uvlinesize); |
2232 | } |
2233 | } else if (chroma422) { |
2234 | if (pixel_shift) { |
2235 | AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize); |
2236 | AV_COPY128(top_border + 48, src_cr + 15 * uvlinesize); |
2237 | } else { |
2238 | AV_COPY64(top_border + 16, src_cb + 15 * uvlinesize); |
2239 | AV_COPY64(top_border + 24, src_cr + 15 * uvlinesize); |
2240 | } |
2241 | } else { |
2242 | if (pixel_shift) { |
2243 | AV_COPY128(top_border + 32, src_cb + 7 * uvlinesize); |
2244 | AV_COPY128(top_border + 48, src_cr + 7 * uvlinesize); |
2245 | } else { |
2246 | AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize); |
2247 | AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize); |
2248 | } |
2249 | } |
2250 | } |
2251 | } |
2252 | } else if (MB_MBAFF(h)) { |
2253 | top_idx = 0; |
2254 | } else |
2255 | return; |
2256 | } |
2257 | |
2258 | top_border = h->top_borders[top_idx][h->mb_x]; |
2259 | /* There are two lines saved, the line above the top macroblock |
2260 | * of a pair, and the line above the bottom macroblock. */ |
2261 | AV_COPY128(top_border, src_y + 16 * linesize); |
2262 | if (pixel_shift) |
2263 | AV_COPY128(top_border + 16, src_y + 16 * linesize + 16); |
2264 | |
2265 | if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { |
2266 | if (chroma444) { |
2267 | if (pixel_shift) { |
2268 | AV_COPY128(top_border + 32, src_cb + 16 * linesize); |
2269 | AV_COPY128(top_border + 48, src_cb + 16 * linesize + 16); |
2270 | AV_COPY128(top_border + 64, src_cr + 16 * linesize); |
2271 | AV_COPY128(top_border + 80, src_cr + 16 * linesize + 16); |
2272 | } else { |
2273 | AV_COPY128(top_border + 16, src_cb + 16 * linesize); |
2274 | AV_COPY128(top_border + 32, src_cr + 16 * linesize); |
2275 | } |
2276 | } else if (chroma422) { |
2277 | if (pixel_shift) { |
2278 | AV_COPY128(top_border + 32, src_cb + 16 * uvlinesize); |
2279 | AV_COPY128(top_border + 48, src_cr + 16 * uvlinesize); |
2280 | } else { |
2281 | AV_COPY64(top_border + 16, src_cb + 16 * uvlinesize); |
2282 | AV_COPY64(top_border + 24, src_cr + 16 * uvlinesize); |
2283 | } |
2284 | } else { |
2285 | if (pixel_shift) { |
2286 | AV_COPY128(top_border + 32, src_cb + 8 * uvlinesize); |
2287 | AV_COPY128(top_border + 48, src_cr + 8 * uvlinesize); |
2288 | } else { |
2289 | AV_COPY64(top_border + 16, src_cb + 8 * uvlinesize); |
2290 | AV_COPY64(top_border + 24, src_cr + 8 * uvlinesize); |
2291 | } |
2292 | } |
2293 | } |
2294 | } |
2295 | |
2296 | static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y, |
2297 | uint8_t *src_cb, uint8_t *src_cr, |
2298 | int linesize, int uvlinesize, |
2299 | int xchg, int chroma444, |
2300 | int simple, int pixel_shift) |
2301 | { |
2302 | int deblock_topleft; |
2303 | int deblock_top; |
2304 | int top_idx = 1; |
2305 | uint8_t *top_border_m1; |
2306 | uint8_t *top_border; |
2307 | |
2308 | if (!simple && FRAME_MBAFF(h)) { |
2309 | if (h->mb_y & 1) { |
2310 | if (!MB_MBAFF(h)) |
2311 | return; |
2312 | } else { |
2313 | top_idx = MB_MBAFF(h) ? 0 : 1; |
2314 | } |
2315 | } |
2316 | |
2317 | if (h->deblocking_filter == 2) { |
2318 | deblock_topleft = h->slice_table[h->mb_xy - 1 - h->mb_stride] == h->slice_num; |
2319 | deblock_top = h->top_type; |
2320 | } else { |
2321 | deblock_topleft = (h->mb_x > 0); |
2322 | deblock_top = (h->mb_y > !!MB_FIELD(h)); |
2323 | } |
2324 | |
2325 | src_y -= linesize + 1 + pixel_shift; |
2326 | src_cb -= uvlinesize + 1 + pixel_shift; |
2327 | src_cr -= uvlinesize + 1 + pixel_shift; |
2328 | |
2329 | top_border_m1 = h->top_borders[top_idx][h->mb_x - 1]; |
2330 | top_border = h->top_borders[top_idx][h->mb_x]; |
2331 | |
2332 | #define XCHG(a, b, xchg) \ |
2333 | if (pixel_shift) { \ |
2334 | if (xchg) { \ |
2335 | AV_SWAP64(b + 0, a + 0); \ |
2336 | AV_SWAP64(b + 8, a + 8); \ |
2337 | } else { \ |
2338 | AV_COPY128(b, a); \ |
2339 | } \ |
2340 | } else if (xchg) \ |
2341 | AV_SWAP64(b, a); \ |
2342 | else \ |
2343 | AV_COPY64(b, a); |
2344 | |
2345 | if (deblock_top) { |
2346 | if (deblock_topleft) { |
2347 | XCHG(top_border_m1 + (8 << pixel_shift), |
2348 | src_y - (7 << pixel_shift), 1); |
2349 | } |
2350 | XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg); |
2351 | XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1); |
2352 | if (h->mb_x + 1 < h->mb_width) { |
2353 | XCHG(h->top_borders[top_idx][h->mb_x + 1], |
2354 | src_y + (17 << pixel_shift), 1); |
2355 | } |
2356 | if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { |
2357 | if (chroma444) { |
2358 | if (deblock_topleft) { |
2359 | XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
2360 | XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1); |
2361 | } |
2362 | XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg); |
2363 | XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1); |
2364 | XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg); |
2365 | XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1); |
2366 | if (h->mb_x + 1 < h->mb_width) { |
2367 | XCHG(h->top_borders[top_idx][h->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1); |
2368 | XCHG(h->top_borders[top_idx][h->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1); |
2369 | } |
2370 | } else { |
2371 | if (deblock_topleft) { |
2372 | XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
2373 | XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); |
2374 | } |
2375 | XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1); |
2376 | XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1); |
2377 | } |
2378 | } |
2379 | } |
2380 | } |
2381 | |
2382 | static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth, |
2383 | int index) |
2384 | { |
2385 | if (high_bit_depth) { |
2386 | return AV_RN32A(((int32_t *)mb) + index); |
2387 | } else |
2388 | return AV_RN16A(mb + index); |
2389 | } |
2390 | |
2391 | static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth, |
2392 | int index, int value) |
2393 | { |
2394 | if (high_bit_depth) { |
2395 | AV_WN32A(((int32_t *)mb) + index, value); |
2396 | } else |
2397 | AV_WN16A(mb + index, value); |
2398 | } |
2399 | |
2400 | static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, |
2401 | int mb_type, int is_h264, |
2402 | int simple, |
2403 | int transform_bypass, |
2404 | int pixel_shift, |
2405 | int *block_offset, |
2406 | int linesize, |
2407 | uint8_t *dest_y, int p) |
2408 | { |
2409 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); |
2410 | void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride); |
2411 | int i; |
2412 | int qscale = p == 0 ? h->qscale : h->chroma_qp[p - 1]; |
2413 | block_offset += 16 * p; |
2414 | if (IS_INTRA4x4(mb_type)) { |
2415 | if (IS_8x8DCT(mb_type)) { |
2416 | if (transform_bypass) { |
2417 | idct_dc_add = |
2418 | idct_add = h->h264dsp.h264_add_pixels8_clear; |
2419 | } else { |
2420 | idct_dc_add = h->h264dsp.h264_idct8_dc_add; |
2421 | idct_add = h->h264dsp.h264_idct8_add; |
2422 | } |
2423 | for (i = 0; i < 16; i += 4) { |
2424 | uint8_t *const ptr = dest_y + block_offset[i]; |
2425 | const int dir = h->intra4x4_pred_mode_cache[scan8[i]]; |
2426 | if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) { |
2427 | h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
2428 | } else { |
2429 | const int nnz = h->non_zero_count_cache[scan8[i + p * 16]]; |
2430 | h->hpc.pred8x8l[dir](ptr, (h->topleft_samples_available << i) & 0x8000, |
2431 | (h->topright_samples_available << i) & 0x4000, linesize); |
2432 | if (nnz) { |
2433 | if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) |
2434 | idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
2435 | else |
2436 | idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
2437 | } |
2438 | } |
2439 | } |
2440 | } else { |
2441 | if (transform_bypass) { |
2442 | idct_dc_add = |
2443 | idct_add = h->h264dsp.h264_add_pixels4_clear; |
2444 | } else { |
2445 | idct_dc_add = h->h264dsp.h264_idct_dc_add; |
2446 | idct_add = h->h264dsp.h264_idct_add; |
2447 | } |
2448 | for (i = 0; i < 16; i++) { |
2449 | uint8_t *const ptr = dest_y + block_offset[i]; |
2450 | const int dir = h->intra4x4_pred_mode_cache[scan8[i]]; |
2451 | |
2452 | if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) { |
2453 | h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
2454 | } else { |
2455 | uint8_t *topright; |
2456 | int nnz, tr; |
2457 | uint64_t tr_high; |
2458 | if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) { |
2459 | const int topright_avail = (h->topright_samples_available << i) & 0x8000; |
2460 | av_assert2(h->mb_y || linesize <= block_offset[i]); |
2461 | if (!topright_avail) { |
2462 | if (pixel_shift) { |
2463 | tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL; |
2464 | topright = (uint8_t *)&tr_high; |
2465 | } else { |
2466 | tr = ptr[3 - linesize] * 0x01010101u; |
2467 | topright = (uint8_t *)&tr; |
2468 | } |
2469 | } else |
2470 | topright = ptr + (4 << pixel_shift) - linesize; |
2471 | } else |
2472 | topright = NULL; |
2473 | |
2474 | h->hpc.pred4x4[dir](ptr, topright, linesize); |
2475 | nnz = h->non_zero_count_cache[scan8[i + p * 16]]; |
2476 | if (nnz) { |
2477 | if (is_h264) { |
2478 | if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) |
2479 | idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
2480 | else |
2481 | idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
2482 | } else if (CONFIG_SVQ3_DECODER) |
2483 | ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, qscale, 0); |
2484 | } |
2485 | } |
2486 | } |
2487 | } |
2488 | } else { |
2489 | h->hpc.pred16x16[h->intra16x16_pred_mode](dest_y, linesize); |
2490 | if (is_h264) { |
2491 | if (h->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) { |
2492 | if (!transform_bypass) |
2493 | h->h264dsp.h264_luma_dc_dequant_idct(h->mb + (p * 256 << pixel_shift), |
2494 | h->mb_luma_dc[p], |
2495 | h->dequant4_coeff[p][qscale][0]); |
2496 | else { |
2497 | static const uint8_t dc_mapping[16] = { |
2498 | 0 * 16, 1 * 16, 4 * 16, 5 * 16, |
2499 | 2 * 16, 3 * 16, 6 * 16, 7 * 16, |
2500 | 8 * 16, 9 * 16, 12 * 16, 13 * 16, |
2501 | 10 * 16, 11 * 16, 14 * 16, 15 * 16 |
2502 | }; |
2503 | for (i = 0; i < 16; i++) |
2504 | dctcoef_set(h->mb + (p * 256 << pixel_shift), |
2505 | pixel_shift, dc_mapping[i], |
2506 | dctcoef_get(h->mb_luma_dc[p], |
2507 | pixel_shift, i)); |
2508 | } |
2509 | } |
2510 | } else if (CONFIG_SVQ3_DECODER) |
2511 | ff_svq3_luma_dc_dequant_idct_c(h->mb + p * 256, |
2512 | h->mb_luma_dc[p], qscale); |
2513 | } |
2514 | } |
2515 | |
2516 | static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, |
2517 | int is_h264, int simple, |
2518 | int transform_bypass, |
2519 | int pixel_shift, |
2520 | int *block_offset, |
2521 | int linesize, |
2522 | uint8_t *dest_y, int p) |
2523 | { |
2524 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); |
2525 | int i; |
2526 | block_offset += 16 * p; |
2527 | if (!IS_INTRA4x4(mb_type)) { |
2528 | if (is_h264) { |
2529 | if (IS_INTRA16x16(mb_type)) { |
2530 | if (transform_bypass) { |
2531 | if (h->sps.profile_idc == 244 && |
2532 | (h->intra16x16_pred_mode == VERT_PRED8x8 || |
2533 | h->intra16x16_pred_mode == HOR_PRED8x8)) { |
2534 | h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, |
2535 | h->mb + (p * 256 << pixel_shift), |
2536 | linesize); |
2537 | } else { |
2538 | for (i = 0; i < 16; i++) |
2539 | if (h->non_zero_count_cache[scan8[i + p * 16]] || |
2540 | dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) |
2541 | h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i], |
2542 | h->mb + (i * 16 + p * 256 << pixel_shift), |
2543 | linesize); |
2544 | } |
2545 | } else { |
2546 | h->h264dsp.h264_idct_add16intra(dest_y, block_offset, |
2547 | h->mb + (p * 256 << pixel_shift), |
2548 | linesize, |
2549 | h->non_zero_count_cache + p * 5 * 8); |
2550 | } |
2551 | } else if (h->cbp & 15) { |
2552 | if (transform_bypass) { |
2553 | const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
2554 | idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear |
2555 | : h->h264dsp.h264_add_pixels4_clear; |
2556 | for (i = 0; i < 16; i += di) |
2557 | if (h->non_zero_count_cache[scan8[i + p * 16]]) |
2558 | idct_add(dest_y + block_offset[i], |
2559 | h->mb + (i * 16 + p * 256 << pixel_shift), |
2560 | linesize); |
2561 | } else { |
2562 | if (IS_8x8DCT(mb_type)) |
2563 | h->h264dsp.h264_idct8_add4(dest_y, block_offset, |
2564 | h->mb + (p * 256 << pixel_shift), |
2565 | linesize, |
2566 | h->non_zero_count_cache + p * 5 * 8); |
2567 | else |
2568 | h->h264dsp.h264_idct_add16(dest_y, block_offset, |
2569 | h->mb + (p * 256 << pixel_shift), |
2570 | linesize, |
2571 | h->non_zero_count_cache + p * 5 * 8); |
2572 | } |
2573 | } |
2574 | } else if (CONFIG_SVQ3_DECODER) { |
2575 | for (i = 0; i < 16; i++) |
2576 | if (h->non_zero_count_cache[scan8[i + p * 16]] || h->mb[i * 16 + p * 256]) { |
2577 | // FIXME benchmark weird rule, & below |
2578 | uint8_t *const ptr = dest_y + block_offset[i]; |
2579 | ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, |
2580 | h->qscale, IS_INTRA(mb_type) ? 1 : 0); |
2581 | } |
2582 | } |
2583 | } |
2584 | } |
2585 | |
2586 | #define BITS 8 |
2587 | #define SIMPLE 1 |
2588 | #include "h264_mb_template.c" |
2589 | |
2590 | #undef BITS |
2591 | #define BITS 16 |
2592 | #include "h264_mb_template.c" |
2593 | |
2594 | #undef SIMPLE |
2595 | #define SIMPLE 0 |
2596 | #include "h264_mb_template.c" |
2597 | |
2598 | void ff_h264_hl_decode_mb(H264Context *h) |
2599 | { |
2600 | const int mb_xy = h->mb_xy; |
2601 | const int mb_type = h->cur_pic.mb_type[mb_xy]; |
2602 | int is_complex = CONFIG_SMALL || h->is_complex || |
2603 | IS_INTRA_PCM(mb_type) || h->qscale == 0; |
2604 | |
2605 | if (CHROMA444(h)) { |
2606 | if (is_complex || h->pixel_shift) |
2607 | hl_decode_mb_444_complex(h); |
2608 | else |
2609 | hl_decode_mb_444_simple_8(h); |
2610 | } else if (is_complex) { |
2611 | hl_decode_mb_complex(h); |
2612 | } else if (h->pixel_shift) { |
2613 | hl_decode_mb_simple_16(h); |
2614 | } else |
2615 | hl_decode_mb_simple_8(h); |
2616 | } |
2617 | |
2618 | int ff_pred_weight_table(H264Context *h) |
2619 | { |
2620 | int list, i; |
2621 | int luma_def, chroma_def; |
2622 | |
2623 | h->use_weight = 0; |
2624 | h->use_weight_chroma = 0; |
2625 | h->luma_log2_weight_denom = get_ue_golomb(&h->gb); |
2626 | if (h->sps.chroma_format_idc) |
2627 | h->chroma_log2_weight_denom = get_ue_golomb(&h->gb); |
2628 | luma_def = 1 << h->luma_log2_weight_denom; |
2629 | chroma_def = 1 << h->chroma_log2_weight_denom; |
2630 | |
2631 | for (list = 0; list < 2; list++) { |
2632 | h->luma_weight_flag[list] = 0; |
2633 | h->chroma_weight_flag[list] = 0; |
2634 | for (i = 0; i < h->ref_count[list]; i++) { |
2635 | int luma_weight_flag, chroma_weight_flag; |
2636 | |
2637 | luma_weight_flag = get_bits1(&h->gb); |
2638 | if (luma_weight_flag) { |
2639 | h->luma_weight[i][list][0] = get_se_golomb(&h->gb); |
2640 | h->luma_weight[i][list][1] = get_se_golomb(&h->gb); |
2641 | if (h->luma_weight[i][list][0] != luma_def || |
2642 | h->luma_weight[i][list][1] != 0) { |
2643 | h->use_weight = 1; |
2644 | h->luma_weight_flag[list] = 1; |
2645 | } |
2646 | } else { |
2647 | h->luma_weight[i][list][0] = luma_def; |
2648 | h->luma_weight[i][list][1] = 0; |
2649 | } |
2650 | |
2651 | if (h->sps.chroma_format_idc) { |
2652 | chroma_weight_flag = get_bits1(&h->gb); |
2653 | if (chroma_weight_flag) { |
2654 | int j; |
2655 | for (j = 0; j < 2; j++) { |
2656 | h->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb); |
2657 | h->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb); |
2658 | if (h->chroma_weight[i][list][j][0] != chroma_def || |
2659 | h->chroma_weight[i][list][j][1] != 0) { |
2660 | h->use_weight_chroma = 1; |
2661 | h->chroma_weight_flag[list] = 1; |
2662 | } |
2663 | } |
2664 | } else { |
2665 | int j; |
2666 | for (j = 0; j < 2; j++) { |
2667 | h->chroma_weight[i][list][j][0] = chroma_def; |
2668 | h->chroma_weight[i][list][j][1] = 0; |
2669 | } |
2670 | } |
2671 | } |
2672 | } |
2673 | if (h->slice_type_nos != AV_PICTURE_TYPE_B) |
2674 | break; |
2675 | } |
2676 | h->use_weight = h->use_weight || h->use_weight_chroma; |
2677 | return 0; |
2678 | } |
2679 | |
2680 | /** |
2681 | * Initialize implicit_weight table. |
2682 | * @param field 0/1 initialize the weight for interlaced MBAFF |
2683 | * -1 initializes the rest |
2684 | */ |
2685 | static void implicit_weight_table(H264Context *h, int field) |
2686 | { |
2687 | int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1; |
2688 | |
2689 | for (i = 0; i < 2; i++) { |
2690 | h->luma_weight_flag[i] = 0; |
2691 | h->chroma_weight_flag[i] = 0; |
2692 | } |
2693 | |
2694 | if (field < 0) { |
2695 | if (h->picture_structure == PICT_FRAME) { |
2696 | cur_poc = h->cur_pic_ptr->poc; |
2697 | } else { |
2698 | cur_poc = h->cur_pic_ptr->field_poc[h->picture_structure - 1]; |
2699 | } |
2700 | if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF(h) && |
2701 | h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) { |
2702 | h->use_weight = 0; |
2703 | h->use_weight_chroma = 0; |
2704 | return; |
2705 | } |
2706 | ref_start = 0; |
2707 | ref_count0 = h->ref_count[0]; |
2708 | ref_count1 = h->ref_count[1]; |
2709 | } else { |
2710 | cur_poc = h->cur_pic_ptr->field_poc[field]; |
2711 | ref_start = 16; |
2712 | ref_count0 = 16 + 2 * h->ref_count[0]; |
2713 | ref_count1 = 16 + 2 * h->ref_count[1]; |
2714 | } |
2715 | |
2716 | h->use_weight = 2; |
2717 | h->use_weight_chroma = 2; |
2718 | h->luma_log2_weight_denom = 5; |
2719 | h->chroma_log2_weight_denom = 5; |
2720 | |
2721 | for (ref0 = ref_start; ref0 < ref_count0; ref0++) { |
2722 | int poc0 = h->ref_list[0][ref0].poc; |
2723 | for (ref1 = ref_start; ref1 < ref_count1; ref1++) { |
2724 | int w = 32; |
2725 | if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) { |
2726 | int poc1 = h->ref_list[1][ref1].poc; |
2727 | int td = av_clip(poc1 - poc0, -128, 127); |
2728 | if (td) { |
2729 | int tb = av_clip(cur_poc - poc0, -128, 127); |
2730 | int tx = (16384 + (FFABS(td) >> 1)) / td; |
2731 | int dist_scale_factor = (tb * tx + 32) >> 8; |
2732 | if (dist_scale_factor >= -64 && dist_scale_factor <= 128) |
2733 | w = 64 - dist_scale_factor; |
2734 | } |
2735 | } |
2736 | if (field < 0) { |
2737 | h->implicit_weight[ref0][ref1][0] = |
2738 | h->implicit_weight[ref0][ref1][1] = w; |
2739 | } else { |
2740 | h->implicit_weight[ref0][ref1][field] = w; |
2741 | } |
2742 | } |
2743 | } |
2744 | } |
2745 | |
2746 | /** |
2747 | * instantaneous decoder refresh. |
2748 | */ |
2749 | static void idr(H264Context *h) |
2750 | { |
2751 | int i; |
2752 | ff_h264_remove_all_refs(h); |
2753 | h->prev_frame_num = 0; |
2754 | h->prev_frame_num_offset = 0; |
2755 | h->prev_poc_msb = 1<<16; |
2756 | h->prev_poc_lsb = 0; |
2757 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) |
2758 | h->last_pocs[i] = INT_MIN; |
2759 | } |
2760 | |
2761 | /* forget old pics after a seek */ |
2762 | static void flush_change(H264Context *h) |
2763 | { |
2764 | int i, j; |
2765 | |
2766 | h->outputed_poc = h->next_outputed_poc = INT_MIN; |
2767 | h->prev_interlaced_frame = 1; |
2768 | idr(h); |
2769 | |
2770 | h->prev_frame_num = -1; |
2771 | if (h->cur_pic_ptr) { |
2772 | h->cur_pic_ptr->reference = 0; |
2773 | for (j=i=0; h->delayed_pic[i]; i++) |
2774 | if (h->delayed_pic[i] != h->cur_pic_ptr) |
2775 | h->delayed_pic[j++] = h->delayed_pic[i]; |
2776 | h->delayed_pic[j] = NULL; |
2777 | } |
2778 | h->first_field = 0; |
2779 | memset(h->ref_list[0], 0, sizeof(h->ref_list[0])); |
2780 | memset(h->ref_list[1], 0, sizeof(h->ref_list[1])); |
2781 | memset(h->default_ref_list[0], 0, sizeof(h->default_ref_list[0])); |
2782 | memset(h->default_ref_list[1], 0, sizeof(h->default_ref_list[1])); |
2783 | ff_h264_reset_sei(h); |
2784 | h->recovery_frame= -1; |
2785 | h->sync= 0; |
2786 | h->list_count = 0; |
2787 | h->current_slice = 0; |
2788 | h->mmco_reset = 1; |
2789 | } |
2790 | |
2791 | /* forget old pics after a seek */ |
2792 | static void flush_dpb(AVCodecContext *avctx) |
2793 | { |
2794 | H264Context *h = avctx->priv_data; |
2795 | int i; |
2796 | |
2797 | for (i = 0; i <= MAX_DELAYED_PIC_COUNT; i++) { |
2798 | if (h->delayed_pic[i]) |
2799 | h->delayed_pic[i]->reference = 0; |
2800 | h->delayed_pic[i] = NULL; |
2801 | } |
2802 | |
2803 | flush_change(h); |
2804 | |
2805 | if (h->DPB) |
2806 | for (i = 0; i < MAX_PICTURE_COUNT; i++) |
2807 | unref_picture(h, &h->DPB[i]); |
2808 | h->cur_pic_ptr = NULL; |
2809 | unref_picture(h, &h->cur_pic); |
2810 | |
2811 | h->mb_x = h->mb_y = 0; |
2812 | |
2813 | h->parse_context.state = -1; |
2814 | h->parse_context.frame_start_found = 0; |
2815 | h->parse_context.overread = 0; |
2816 | h->parse_context.overread_index = 0; |
2817 | h->parse_context.index = 0; |
2818 | h->parse_context.last_index = 0; |
2819 | } |
2820 | |
2821 | int ff_init_poc(H264Context *h, int pic_field_poc[2], int *pic_poc) |
2822 | { |
2823 | const int max_frame_num = 1 << h->sps.log2_max_frame_num; |
2824 | int field_poc[2]; |
2825 | |
2826 | h->frame_num_offset = h->prev_frame_num_offset; |
2827 | if (h->frame_num < h->prev_frame_num) |
2828 | h->frame_num_offset += max_frame_num; |
2829 | |
2830 | if (h->sps.poc_type == 0) { |
2831 | const int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb; |
2832 | |
2833 | if (h->poc_lsb < h->prev_poc_lsb && |
2834 | h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2) |
2835 | h->poc_msb = h->prev_poc_msb + max_poc_lsb; |
2836 | else if (h->poc_lsb > h->prev_poc_lsb && |
2837 | h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb / 2) |
2838 | h->poc_msb = h->prev_poc_msb - max_poc_lsb; |
2839 | else |
2840 | h->poc_msb = h->prev_poc_msb; |
2841 | field_poc[0] = |
2842 | field_poc[1] = h->poc_msb + h->poc_lsb; |
2843 | if (h->picture_structure == PICT_FRAME) |
2844 | field_poc[1] += h->delta_poc_bottom; |
2845 | } else if (h->sps.poc_type == 1) { |
2846 | int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc; |
2847 | int i; |
2848 | |
2849 | if (h->sps.poc_cycle_length != 0) |
2850 | abs_frame_num = h->frame_num_offset + h->frame_num; |
2851 | else |
2852 | abs_frame_num = 0; |
2853 | |
2854 | if (h->nal_ref_idc == 0 && abs_frame_num > 0) |
2855 | abs_frame_num--; |
2856 | |
2857 | expected_delta_per_poc_cycle = 0; |
2858 | for (i = 0; i < h->sps.poc_cycle_length; i++) |
2859 | // FIXME integrate during sps parse |
2860 | expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[i]; |
2861 | |
2862 | if (abs_frame_num > 0) { |
2863 | int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length; |
2864 | int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length; |
2865 | |
2866 | expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle; |
2867 | for (i = 0; i <= frame_num_in_poc_cycle; i++) |
2868 | expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[i]; |
2869 | } else |
2870 | expectedpoc = 0; |
2871 | |
2872 | if (h->nal_ref_idc == 0) |
2873 | expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic; |
2874 | |
2875 | field_poc[0] = expectedpoc + h->delta_poc[0]; |
2876 | field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field; |
2877 | |
2878 | if (h->picture_structure == PICT_FRAME) |
2879 | field_poc[1] += h->delta_poc[1]; |
2880 | } else { |
2881 | int poc = 2 * (h->frame_num_offset + h->frame_num); |
2882 | |
2883 | if (!h->nal_ref_idc) |
2884 | poc--; |
2885 | |
2886 | field_poc[0] = poc; |
2887 | field_poc[1] = poc; |
2888 | } |
2889 | |
2890 | if (h->picture_structure != PICT_BOTTOM_FIELD) |
2891 | pic_field_poc[0] = field_poc[0]; |
2892 | if (h->picture_structure != PICT_TOP_FIELD) |
2893 | pic_field_poc[1] = field_poc[1]; |
2894 | *pic_poc = FFMIN(pic_field_poc[0], pic_field_poc[1]); |
2895 | |
2896 | return 0; |
2897 | } |
2898 | |
2899 | /** |
2900 | * initialize scan tables |
2901 | */ |
2902 | static void init_scan_tables(H264Context *h) |
2903 | { |
2904 | int i; |
2905 | for (i = 0; i < 16; i++) { |
2906 | #define T(x) (x >> 2) | ((x << 2) & 0xF) |
2907 | h->zigzag_scan[i] = T(zigzag_scan[i]); |
2908 | h->field_scan[i] = T(field_scan[i]); |
2909 | #undef T |
2910 | } |
2911 | for (i = 0; i < 64; i++) { |
2912 | #define T(x) (x >> 3) | ((x & 7) << 3) |
2913 | h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); |
2914 | h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); |
2915 | h->field_scan8x8[i] = T(field_scan8x8[i]); |
2916 | h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); |
2917 | #undef T |
2918 | } |
2919 | if (h->sps.transform_bypass) { // FIXME same ugly |
2920 | memcpy(h->zigzag_scan_q0 , zigzag_scan , sizeof(h->zigzag_scan_q0 )); |
2921 | memcpy(h->zigzag_scan8x8_q0 , ff_zigzag_direct , sizeof(h->zigzag_scan8x8_q0 )); |
2922 | memcpy(h->zigzag_scan8x8_cavlc_q0 , zigzag_scan8x8_cavlc , sizeof(h->zigzag_scan8x8_cavlc_q0)); |
2923 | memcpy(h->field_scan_q0 , field_scan , sizeof(h->field_scan_q0 )); |
2924 | memcpy(h->field_scan8x8_q0 , field_scan8x8 , sizeof(h->field_scan8x8_q0 )); |
2925 | memcpy(h->field_scan8x8_cavlc_q0 , field_scan8x8_cavlc , sizeof(h->field_scan8x8_cavlc_q0 )); |
2926 | } else { |
2927 | memcpy(h->zigzag_scan_q0 , h->zigzag_scan , sizeof(h->zigzag_scan_q0 )); |
2928 | memcpy(h->zigzag_scan8x8_q0 , h->zigzag_scan8x8 , sizeof(h->zigzag_scan8x8_q0 )); |
2929 | memcpy(h->zigzag_scan8x8_cavlc_q0 , h->zigzag_scan8x8_cavlc , sizeof(h->zigzag_scan8x8_cavlc_q0)); |
2930 | memcpy(h->field_scan_q0 , h->field_scan , sizeof(h->field_scan_q0 )); |
2931 | memcpy(h->field_scan8x8_q0 , h->field_scan8x8 , sizeof(h->field_scan8x8_q0 )); |
2932 | memcpy(h->field_scan8x8_cavlc_q0 , h->field_scan8x8_cavlc , sizeof(h->field_scan8x8_cavlc_q0 )); |
2933 | } |
2934 | } |
2935 | |
2936 | static int field_end(H264Context *h, int in_setup) |
2937 | { |
2938 | AVCodecContext *const avctx = h->avctx; |
2939 | int err = 0; |
2940 | h->mb_y = 0; |
2941 | |
2942 | if (CONFIG_H264_VDPAU_DECODER && |
2943 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) |
2944 | ff_vdpau_h264_set_reference_frames(h); |
2945 | |
2946 | if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) { |
2947 | if (!h->droppable) { |
2948 | err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
2949 | h->prev_poc_msb = h->poc_msb; |
2950 | h->prev_poc_lsb = h->poc_lsb; |
2951 | } |
2952 | h->prev_frame_num_offset = h->frame_num_offset; |
2953 | h->prev_frame_num = h->frame_num; |
2954 | h->outputed_poc = h->next_outputed_poc; |
2955 | } |
2956 | |
2957 | if (avctx->hwaccel) { |
2958 | if (avctx->hwaccel->end_frame(avctx) < 0) |
2959 | av_log(avctx, AV_LOG_ERROR, |
2960 | "hardware accelerator failed to decode picture\n"); |
2961 | } |
2962 | |
2963 | if (CONFIG_H264_VDPAU_DECODER && |
2964 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) |
2965 | ff_vdpau_h264_picture_complete(h); |
2966 | |
2967 | /* |
2968 | * FIXME: Error handling code does not seem to support interlaced |
2969 | * when slices span multiple rows |
2970 | * The ff_er_add_slice calls don't work right for bottom |
2971 | * fields; they cause massive erroneous error concealing |
2972 | * Error marking covers both fields (top and bottom). |
2973 | * This causes a mismatched s->error_count |
2974 | * and a bad error table. Further, the error count goes to |
2975 | * INT_MAX when called for bottom field, because mb_y is |
2976 | * past end by one (callers fault) and resync_mb_y != 0 |
2977 | * causes problems for the first MB line, too. |
2978 | */ |
2979 | if (CONFIG_ERROR_RESILIENCE && |
2980 | !FIELD_PICTURE(h) && h->current_slice && !h->sps.new) { |
2981 | h->er.cur_pic = h->cur_pic_ptr; |
2982 | ff_er_frame_end(&h->er); |
2983 | } |
2984 | if (!in_setup && !h->droppable) |
2985 | ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, |
2986 | h->picture_structure == PICT_BOTTOM_FIELD); |
2987 | emms_c(); |
2988 | |
2989 | h->current_slice = 0; |
2990 | |
2991 | return err; |
2992 | } |
2993 | |
2994 | /** |
2995 | * Replicate H264 "master" context to thread contexts. |
2996 | */ |
2997 | static int clone_slice(H264Context *dst, H264Context *src) |
2998 | { |
2999 | memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset)); |
3000 | dst->cur_pic_ptr = src->cur_pic_ptr; |
3001 | dst->cur_pic = src->cur_pic; |
3002 | dst->linesize = src->linesize; |
3003 | dst->uvlinesize = src->uvlinesize; |
3004 | dst->first_field = src->first_field; |
3005 | |
3006 | dst->prev_poc_msb = src->prev_poc_msb; |
3007 | dst->prev_poc_lsb = src->prev_poc_lsb; |
3008 | dst->prev_frame_num_offset = src->prev_frame_num_offset; |
3009 | dst->prev_frame_num = src->prev_frame_num; |
3010 | dst->short_ref_count = src->short_ref_count; |
3011 | |
3012 | memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref)); |
3013 | memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref)); |
3014 | memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list)); |
3015 | |
3016 | memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff)); |
3017 | memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff)); |
3018 | |
3019 | return 0; |
3020 | } |
3021 | |
3022 | /** |
3023 | * Compute profile from profile_idc and constraint_set?_flags. |
3024 | * |
3025 | * @param sps SPS |
3026 | * |
3027 | * @return profile as defined by FF_PROFILE_H264_* |
3028 | */ |
3029 | int ff_h264_get_profile(SPS *sps) |
3030 | { |
3031 | int profile = sps->profile_idc; |
3032 | |
3033 | switch (sps->profile_idc) { |
3034 | case FF_PROFILE_H264_BASELINE: |
3035 | // constraint_set1_flag set to 1 |
3036 | profile |= (sps->constraint_set_flags & 1 << 1) ? FF_PROFILE_H264_CONSTRAINED : 0; |
3037 | break; |
3038 | case FF_PROFILE_H264_HIGH_10: |
3039 | case FF_PROFILE_H264_HIGH_422: |
3040 | case FF_PROFILE_H264_HIGH_444_PREDICTIVE: |
3041 | // constraint_set3_flag set to 1 |
3042 | profile |= (sps->constraint_set_flags & 1 << 3) ? FF_PROFILE_H264_INTRA : 0; |
3043 | break; |
3044 | } |
3045 | |
3046 | return profile; |
3047 | } |
3048 | |
3049 | static int h264_set_parameter_from_sps(H264Context *h) |
3050 | { |
3051 | if (h->flags & CODEC_FLAG_LOW_DELAY || |
3052 | (h->sps.bitstream_restriction_flag && |
3053 | !h->sps.num_reorder_frames)) { |
3054 | if (h->avctx->has_b_frames > 1 || h->delayed_pic[0]) |
3055 | av_log(h->avctx, AV_LOG_WARNING, "Delayed frames seen. " |
3056 | "Reenabling low delay requires a codec flush.\n"); |
3057 | else |
3058 | h->low_delay = 1; |
3059 | } |
3060 | |
3061 | if (h->avctx->has_b_frames < 2) |
3062 | h->avctx->has_b_frames = !h->low_delay; |
3063 | |
3064 | if (h->sps.bit_depth_luma != h->sps.bit_depth_chroma) { |
3065 | avpriv_request_sample(h->avctx, |
3066 | "Different chroma and luma bit depth"); |
3067 | return AVERROR_PATCHWELCOME; |
3068 | } |
3069 | |
3070 | if (h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma || |
3071 | h->cur_chroma_format_idc != h->sps.chroma_format_idc) { |
3072 | if (h->avctx->codec && |
3073 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU && |
3074 | (h->sps.bit_depth_luma != 8 || h->sps.chroma_format_idc > 1)) { |
3075 | av_log(h->avctx, AV_LOG_ERROR, |
3076 | "VDPAU decoding does not support video colorspace.\n"); |
3077 | return AVERROR_INVALIDDATA; |
3078 | } |
3079 | if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 14 && |
3080 | h->sps.bit_depth_luma != 11 && h->sps.bit_depth_luma != 13) { |
3081 | h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma; |
3082 | h->cur_chroma_format_idc = h->sps.chroma_format_idc; |
3083 | h->pixel_shift = h->sps.bit_depth_luma > 8; |
3084 | |
3085 | ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, |
3086 | h->sps.chroma_format_idc); |
3087 | ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma); |
3088 | ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma); |
3089 | ff_h264_pred_init(&h->hpc, h->avctx->codec_id, h->sps.bit_depth_luma, |
3090 | h->sps.chroma_format_idc); |
3091 | |
3092 | if (CONFIG_ERROR_RESILIENCE) |
3093 | ff_dsputil_init(&h->dsp, h->avctx); |
3094 | ff_videodsp_init(&h->vdsp, h->sps.bit_depth_luma); |
3095 | } else { |
3096 | av_log(h->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", |
3097 | h->sps.bit_depth_luma); |
3098 | return AVERROR_INVALIDDATA; |
3099 | } |
3100 | } |
3101 | return 0; |
3102 | } |
3103 | |
3104 | static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) |
3105 | { |
3106 | switch (h->sps.bit_depth_luma) { |
3107 | case 9: |
3108 | if (CHROMA444(h)) { |
3109 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
3110 | return AV_PIX_FMT_GBRP9; |
3111 | } else |
3112 | return AV_PIX_FMT_YUV444P9; |
3113 | } else if (CHROMA422(h)) |
3114 | return AV_PIX_FMT_YUV422P9; |
3115 | else |
3116 | return AV_PIX_FMT_YUV420P9; |
3117 | break; |
3118 | case 10: |
3119 | if (CHROMA444(h)) { |
3120 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
3121 | return AV_PIX_FMT_GBRP10; |
3122 | } else |
3123 | return AV_PIX_FMT_YUV444P10; |
3124 | } else if (CHROMA422(h)) |
3125 | return AV_PIX_FMT_YUV422P10; |
3126 | else |
3127 | return AV_PIX_FMT_YUV420P10; |
3128 | break; |
3129 | case 12: |
3130 | if (CHROMA444(h)) { |
3131 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
3132 | return AV_PIX_FMT_GBRP12; |
3133 | } else |
3134 | return AV_PIX_FMT_YUV444P12; |
3135 | } else if (CHROMA422(h)) |
3136 | return AV_PIX_FMT_YUV422P12; |
3137 | else |
3138 | return AV_PIX_FMT_YUV420P12; |
3139 | break; |
3140 | case 14: |
3141 | if (CHROMA444(h)) { |
3142 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
3143 | return AV_PIX_FMT_GBRP14; |
3144 | } else |
3145 | return AV_PIX_FMT_YUV444P14; |
3146 | } else if (CHROMA422(h)) |
3147 | return AV_PIX_FMT_YUV422P14; |
3148 | else |
3149 | return AV_PIX_FMT_YUV420P14; |
3150 | break; |
3151 | case 8: |
3152 | if (CHROMA444(h)) { |
3153 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
3154 | av_log(h->avctx, AV_LOG_DEBUG, "Detected GBR colorspace.\n"); |
3155 | return AV_PIX_FMT_GBR24P; |
3156 | } else if (h->avctx->colorspace == AVCOL_SPC_YCGCO) { |
3157 | av_log(h->avctx, AV_LOG_WARNING, "Detected unsupported YCgCo colorspace.\n"); |
3158 | } |
3159 | return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ444P |
3160 | : AV_PIX_FMT_YUV444P; |
3161 | } else if (CHROMA422(h)) { |
3162 | return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ422P |
3163 | : AV_PIX_FMT_YUV422P; |
3164 | } else { |
3165 | int i; |
3166 | const enum AVPixelFormat * fmt = h->avctx->codec->pix_fmts ? |
3167 | h->avctx->codec->pix_fmts : |
3168 | h->avctx->color_range == AVCOL_RANGE_JPEG ? |
3169 | h264_hwaccel_pixfmt_list_jpeg_420 : |
3170 | h264_hwaccel_pixfmt_list_420; |
3171 | |
3172 | for (i=0; fmt[i] != AV_PIX_FMT_NONE; i++) |
3173 | if (fmt[i] == h->avctx->pix_fmt && !force_callback) |
3174 | return fmt[i]; |
3175 | return ff_thread_get_format(h->avctx, fmt); |
3176 | } |
3177 | break; |
3178 | default: |
3179 | av_log(h->avctx, AV_LOG_ERROR, |
3180 | "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); |
3181 | return AVERROR_INVALIDDATA; |
3182 | } |
3183 | } |
3184 | |
3185 | /* export coded and cropped frame dimensions to AVCodecContext */ |
3186 | static int init_dimensions(H264Context *h) |
3187 | { |
3188 | int width = h->width - (h->sps.crop_right + h->sps.crop_left); |
3189 | int height = h->height - (h->sps.crop_top + h->sps.crop_bottom); |
3190 | av_assert0(h->sps.crop_right + h->sps.crop_left < (unsigned)h->width); |
3191 | av_assert0(h->sps.crop_top + h->sps.crop_bottom < (unsigned)h->height); |
3192 | |
3193 | /* handle container cropping */ |
3194 | if (!h->sps.crop && |
3195 | FFALIGN(h->avctx->width, 16) == h->width && |
3196 | FFALIGN(h->avctx->height, 16) == h->height) { |
3197 | width = h->avctx->width; |
3198 | height = h->avctx->height; |
3199 | } |
3200 | |
3201 | if (width <= 0 || height <= 0) { |
3202 | av_log(h->avctx, AV_LOG_ERROR, "Invalid cropped dimensions: %dx%d.\n", |
3203 | width, height); |
3204 | if (h->avctx->err_recognition & AV_EF_EXPLODE) |
3205 | return AVERROR_INVALIDDATA; |
3206 | |
3207 | av_log(h->avctx, AV_LOG_WARNING, "Ignoring cropping information.\n"); |
3208 | h->sps.crop_bottom = h->sps.crop_top = h->sps.crop_right = h->sps.crop_left = 0; |
3209 | h->sps.crop = 0; |
3210 | |
3211 | width = h->width; |
3212 | height = h->height; |
3213 | } |
3214 | |
3215 | h->avctx->coded_width = h->width; |
3216 | h->avctx->coded_height = h->height; |
3217 | h->avctx->width = width; |
3218 | h->avctx->height = height; |
3219 | |
3220 | return 0; |
3221 | } |
3222 | |
3223 | static int h264_slice_header_init(H264Context *h, int reinit) |
3224 | { |
3225 | int nb_slices = (HAVE_THREADS && |
3226 | h->avctx->active_thread_type & FF_THREAD_SLICE) ? |
3227 | h->avctx->thread_count : 1; |
3228 | int i, ret; |
3229 | |
3230 | h->avctx->sample_aspect_ratio = h->sps.sar; |
3231 | av_assert0(h->avctx->sample_aspect_ratio.den); |
3232 | av_pix_fmt_get_chroma_sub_sample(h->avctx->pix_fmt, |
3233 | &h->chroma_x_shift, &h->chroma_y_shift); |
3234 | |
3235 | if (h->sps.timing_info_present_flag) { |
3236 | int64_t den = h->sps.time_scale; |
3237 | if (h->x264_build < 44U) |
3238 | den *= 2; |
3239 | av_reduce(&h->avctx->time_base.num, &h->avctx->time_base.den, |
3240 | h->sps.num_units_in_tick, den, 1 << 30); |
3241 | } |
3242 | |
3243 | h->avctx->hwaccel = ff_find_hwaccel(h->avctx->codec->id, h->avctx->pix_fmt); |
3244 | |
3245 | if (reinit) |
3246 | free_tables(h, 0); |
3247 | h->first_field = 0; |
3248 | h->prev_interlaced_frame = 1; |
3249 | |
3250 | init_scan_tables(h); |
3251 | ret = ff_h264_alloc_tables(h); |
3252 | if (ret < 0) { |
3253 | av_log(h->avctx, AV_LOG_ERROR, |
3254 | "Could not allocate memory for h264\n"); |
3255 | return ret; |
3256 | } |
3257 | |
3258 | if (nb_slices > MAX_THREADS || (nb_slices > h->mb_height && h->mb_height)) { |
3259 | int max_slices; |
3260 | if (h->mb_height) |
3261 | max_slices = FFMIN(MAX_THREADS, h->mb_height); |
3262 | else |
3263 | max_slices = MAX_THREADS; |
3264 | av_log(h->avctx, AV_LOG_WARNING, "too many threads/slices (%d)," |
3265 | " reducing to %d\n", nb_slices, max_slices); |
3266 | nb_slices = max_slices; |
3267 | } |
3268 | h->slice_context_count = nb_slices; |
3269 | |
3270 | if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_SLICE)) { |
3271 | ret = context_init(h); |
3272 | if (ret < 0) { |
3273 | av_log(h->avctx, AV_LOG_ERROR, "context_init() failed.\n"); |
3274 | return ret; |
3275 | } |
3276 | } else { |
3277 | for (i = 1; i < h->slice_context_count; i++) { |
3278 | H264Context *c; |
3279 | c = h->thread_context[i] = av_mallocz(sizeof(H264Context)); |
3280 | if (!c) |
3281 | return AVERROR(ENOMEM); |
3282 | c->avctx = h->avctx; |
3283 | if (CONFIG_ERROR_RESILIENCE) { |
3284 | c->dsp = h->dsp; |
3285 | } |
3286 | c->vdsp = h->vdsp; |
3287 | c->h264dsp = h->h264dsp; |
3288 | c->h264qpel = h->h264qpel; |
3289 | c->h264chroma = h->h264chroma; |
3290 | c->sps = h->sps; |
3291 | c->pps = h->pps; |
3292 | c->pixel_shift = h->pixel_shift; |
3293 | c->cur_chroma_format_idc = h->cur_chroma_format_idc; |
3294 | c->width = h->width; |
3295 | c->height = h->height; |
3296 | c->linesize = h->linesize; |
3297 | c->uvlinesize = h->uvlinesize; |
3298 | c->chroma_x_shift = h->chroma_x_shift; |
3299 | c->chroma_y_shift = h->chroma_y_shift; |
3300 | c->qscale = h->qscale; |
3301 | c->droppable = h->droppable; |
3302 | c->data_partitioning = h->data_partitioning; |
3303 | c->low_delay = h->low_delay; |
3304 | c->mb_width = h->mb_width; |
3305 | c->mb_height = h->mb_height; |
3306 | c->mb_stride = h->mb_stride; |
3307 | c->mb_num = h->mb_num; |
3308 | c->flags = h->flags; |
3309 | c->workaround_bugs = h->workaround_bugs; |
3310 | c->pict_type = h->pict_type; |
3311 | |
3312 | init_scan_tables(c); |
3313 | clone_tables(c, h, i); |
3314 | c->context_initialized = 1; |
3315 | } |
3316 | |
3317 | for (i = 0; i < h->slice_context_count; i++) |
3318 | if ((ret = context_init(h->thread_context[i])) < 0) { |
3319 | av_log(h->avctx, AV_LOG_ERROR, "context_init() failed.\n"); |
3320 | return ret; |
3321 | } |
3322 | } |
3323 | |
3324 | h->context_initialized = 1; |
3325 | |
3326 | return 0; |
3327 | } |
3328 | |
3329 | int ff_set_ref_count(H264Context *h) |
3330 | { |
3331 | int num_ref_idx_active_override_flag; |
3332 | |
3333 | // set defaults, might be overridden a few lines later |
3334 | h->ref_count[0] = h->pps.ref_count[0]; |
3335 | h->ref_count[1] = h->pps.ref_count[1]; |
3336 | |
3337 | if (h->slice_type_nos != AV_PICTURE_TYPE_I) { |
3338 | unsigned max[2]; |
3339 | max[0] = max[1] = h->picture_structure == PICT_FRAME ? 15 : 31; |
3340 | |
3341 | if (h->slice_type_nos == AV_PICTURE_TYPE_B) |
3342 | h->direct_spatial_mv_pred = get_bits1(&h->gb); |
3343 | num_ref_idx_active_override_flag = get_bits1(&h->gb); |
3344 | |
3345 | if (num_ref_idx_active_override_flag) { |
3346 | h->ref_count[0] = get_ue_golomb(&h->gb) + 1; |
3347 | if (h->slice_type_nos == AV_PICTURE_TYPE_B) { |
3348 | h->ref_count[1] = get_ue_golomb(&h->gb) + 1; |
3349 | } else |
3350 | // full range is spec-ok in this case, even for frames |
3351 | h->ref_count[1] = 1; |
3352 | } |
3353 | |
3354 | if (h->ref_count[0]-1 > max[0] || h->ref_count[1]-1 > max[1]){ |
3355 | av_log(h->avctx, AV_LOG_ERROR, "reference overflow %u > %u or %u > %u\n", h->ref_count[0]-1, max[0], h->ref_count[1]-1, max[1]); |
3356 | h->ref_count[0] = h->ref_count[1] = 0; |
3357 | return AVERROR_INVALIDDATA; |
3358 | } |
3359 | |
3360 | if (h->slice_type_nos == AV_PICTURE_TYPE_B) |
3361 | h->list_count = 2; |
3362 | else |
3363 | h->list_count = 1; |
3364 | } else { |
3365 | h->list_count = 0; |
3366 | h->ref_count[0] = h->ref_count[1] = 0; |
3367 | } |
3368 | |
3369 | return 0; |
3370 | } |
3371 | |
3372 | /** |
3373 | * Decode a slice header. |
3374 | * This will also call ff_MPV_common_init() and frame_start() as needed. |
3375 | * |
3376 | * @param h h264context |
3377 | * @param h0 h264 master context (differs from 'h' when doing sliced based |
3378 | * parallel decoding) |
3379 | * |
3380 | * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded |
3381 | */ |
3382 | static int decode_slice_header(H264Context *h, H264Context *h0) |
3383 | { |
3384 | unsigned int first_mb_in_slice; |
3385 | unsigned int pps_id; |
3386 | int ret; |
3387 | unsigned int slice_type, tmp, i, j; |
3388 | int last_pic_structure, last_pic_droppable; |
3389 | int must_reinit; |
3390 | int needs_reinit = 0; |
3391 | int field_pic_flag, bottom_field_flag; |
3392 | |
3393 | h->me.qpel_put = h->h264qpel.put_h264_qpel_pixels_tab; |
3394 | h->me.qpel_avg = h->h264qpel.avg_h264_qpel_pixels_tab; |
3395 | |
3396 | first_mb_in_slice = get_ue_golomb_long(&h->gb); |
3397 | |
3398 | if (first_mb_in_slice == 0) { // FIXME better field boundary detection |
3399 | if (h0->current_slice && FIELD_PICTURE(h)) { |
3400 | field_end(h, 1); |
3401 | } |
3402 | |
3403 | h0->current_slice = 0; |
3404 | if (!h0->first_field) { |
3405 | if (h->cur_pic_ptr && !h->droppable) { |
3406 | ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, |
3407 | h->picture_structure == PICT_BOTTOM_FIELD); |
3408 | } |
3409 | h->cur_pic_ptr = NULL; |
3410 | } |
3411 | } |
3412 | |
3413 | slice_type = get_ue_golomb_31(&h->gb); |
3414 | if (slice_type > 9) { |
3415 | av_log(h->avctx, AV_LOG_ERROR, |
3416 | "slice type too large (%d) at %d %d\n", |
3417 | slice_type, h->mb_x, h->mb_y); |
3418 | return AVERROR_INVALIDDATA; |
3419 | } |
3420 | if (slice_type > 4) { |
3421 | slice_type -= 5; |
3422 | h->slice_type_fixed = 1; |
3423 | } else |
3424 | h->slice_type_fixed = 0; |
3425 | |
3426 | slice_type = golomb_to_pict_type[slice_type]; |
3427 | h->slice_type = slice_type; |
3428 | h->slice_type_nos = slice_type & 3; |
3429 | |
3430 | // to make a few old functions happy, it's wrong though |
3431 | h->pict_type = h->slice_type; |
3432 | |
3433 | pps_id = get_ue_golomb(&h->gb); |
3434 | if (pps_id >= MAX_PPS_COUNT) { |
3435 | av_log(h->avctx, AV_LOG_ERROR, "pps_id %d out of range\n", pps_id); |
3436 | return AVERROR_INVALIDDATA; |
3437 | } |
3438 | if (!h0->pps_buffers[pps_id]) { |
3439 | av_log(h->avctx, AV_LOG_ERROR, |
3440 | "non-existing PPS %u referenced\n", |
3441 | pps_id); |
3442 | return AVERROR_INVALIDDATA; |
3443 | } |
3444 | h->pps = *h0->pps_buffers[pps_id]; |
3445 | |
3446 | if (!h0->sps_buffers[h->pps.sps_id]) { |
3447 | av_log(h->avctx, AV_LOG_ERROR, |
3448 | "non-existing SPS %u referenced\n", |
3449 | h->pps.sps_id); |
3450 | return AVERROR_INVALIDDATA; |
3451 | } |
3452 | |
3453 | if (h->pps.sps_id != h->current_sps_id || |
3454 | h0->sps_buffers[h->pps.sps_id]->new) { |
3455 | h0->sps_buffers[h->pps.sps_id]->new = 0; |
3456 | |
3457 | h->current_sps_id = h->pps.sps_id; |
3458 | h->sps = *h0->sps_buffers[h->pps.sps_id]; |
3459 | |
3460 | if (h->mb_width != h->sps.mb_width || |
3461 | h->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) || |
3462 | h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma || |
3463 | h->cur_chroma_format_idc != h->sps.chroma_format_idc |
3464 | ) |
3465 | needs_reinit = 1; |
3466 | |
3467 | if (h->bit_depth_luma != h->sps.bit_depth_luma || |
3468 | h->chroma_format_idc != h->sps.chroma_format_idc) { |
3469 | h->bit_depth_luma = h->sps.bit_depth_luma; |
3470 | h->chroma_format_idc = h->sps.chroma_format_idc; |
3471 | needs_reinit = 1; |
3472 | } |
3473 | if ((ret = h264_set_parameter_from_sps(h)) < 0) |
3474 | return ret; |
3475 | } |
3476 | |
3477 | h->avctx->profile = ff_h264_get_profile(&h->sps); |
3478 | h->avctx->level = h->sps.level_idc; |
3479 | h->avctx->refs = h->sps.ref_frame_count; |
3480 | |
3481 | must_reinit = (h->context_initialized && |
3482 | ( 16*h->sps.mb_width != h->avctx->coded_width |
3483 | || 16*h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) != h->avctx->coded_height |
3484 | || h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma |
3485 | || h->cur_chroma_format_idc != h->sps.chroma_format_idc |
3486 | || av_cmp_q(h->sps.sar, h->avctx->sample_aspect_ratio) |
3487 | || h->mb_width != h->sps.mb_width |
3488 | || h->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) |
3489 | )); |
3490 | if (h0->avctx->pix_fmt != get_pixel_format(h0, 0)) |
3491 | must_reinit = 1; |
3492 | |
3493 | h->mb_width = h->sps.mb_width; |
3494 | h->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); |
3495 | h->mb_num = h->mb_width * h->mb_height; |
3496 | h->mb_stride = h->mb_width + 1; |
3497 | |
3498 | h->b_stride = h->mb_width * 4; |
3499 | |
3500 | h->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p |
3501 | |
3502 | h->width = 16 * h->mb_width; |
3503 | h->height = 16 * h->mb_height; |
3504 | |
3505 | ret = init_dimensions(h); |
3506 | if (ret < 0) |
3507 | return ret; |
3508 | |
3509 | if (h->sps.video_signal_type_present_flag) { |
3510 | h->avctx->color_range = h->sps.full_range>0 ? AVCOL_RANGE_JPEG |
3511 | : AVCOL_RANGE_MPEG; |
3512 | if (h->sps.colour_description_present_flag) { |
3513 | if (h->avctx->colorspace != h->sps.colorspace) |
3514 | needs_reinit = 1; |
3515 | h->avctx->color_primaries = h->sps.color_primaries; |
3516 | h->avctx->color_trc = h->sps.color_trc; |
3517 | h->avctx->colorspace = h->sps.colorspace; |
3518 | } |
3519 | } |
3520 | |
3521 | if (h->context_initialized && |
3522 | (h->width != h->avctx->coded_width || |
3523 | h->height != h->avctx->coded_height || |
3524 | must_reinit || |
3525 | needs_reinit)) { |
3526 | if (h != h0) { |
3527 | av_log(h->avctx, AV_LOG_ERROR, "changing width/height on " |
3528 | "slice %d\n", h0->current_slice + 1); |
3529 | return AVERROR_INVALIDDATA; |
3530 | } |
3531 | |
3532 | flush_change(h); |
3533 | |
3534 | if ((ret = get_pixel_format(h, 1)) < 0) |
3535 | return ret; |
3536 | h->avctx->pix_fmt = ret; |
3537 | |
3538 | av_log(h->avctx, AV_LOG_INFO, "Reinit context to %dx%d, " |
3539 | "pix_fmt: %s\n", h->width, h->height, av_get_pix_fmt_name(h->avctx->pix_fmt)); |
3540 | |
3541 | if ((ret = h264_slice_header_init(h, 1)) < 0) { |
3542 | av_log(h->avctx, AV_LOG_ERROR, |
3543 | "h264_slice_header_init() failed\n"); |
3544 | return ret; |
3545 | } |
3546 | } |
3547 | if (!h->context_initialized) { |
3548 | if (h != h0) { |
3549 | av_log(h->avctx, AV_LOG_ERROR, |
3550 | "Cannot (re-)initialize context during parallel decoding.\n"); |
3551 | return AVERROR_PATCHWELCOME; |
3552 | } |
3553 | |
3554 | if ((ret = get_pixel_format(h, 1)) < 0) |
3555 | return ret; |
3556 | h->avctx->pix_fmt = ret; |
3557 | |
3558 | if ((ret = h264_slice_header_init(h, 0)) < 0) { |
3559 | av_log(h->avctx, AV_LOG_ERROR, |
3560 | "h264_slice_header_init() failed\n"); |
3561 | return ret; |
3562 | } |
3563 | } |
3564 | |
3565 | if (h == h0 && h->dequant_coeff_pps != pps_id) { |
3566 | h->dequant_coeff_pps = pps_id; |
3567 | init_dequant_tables(h); |
3568 | } |
3569 | |
3570 | h->frame_num = get_bits(&h->gb, h->sps.log2_max_frame_num); |
3571 | |
3572 | h->mb_mbaff = 0; |
3573 | h->mb_aff_frame = 0; |
3574 | last_pic_structure = h0->picture_structure; |
3575 | last_pic_droppable = h0->droppable; |
3576 | h->droppable = h->nal_ref_idc == 0; |
3577 | if (h->sps.frame_mbs_only_flag) { |
3578 | h->picture_structure = PICT_FRAME; |
3579 | } else { |
3580 | if (!h->sps.direct_8x8_inference_flag && slice_type == AV_PICTURE_TYPE_B) { |
3581 | av_log(h->avctx, AV_LOG_ERROR, "This stream was generated by a broken encoder, invalid 8x8 inference\n"); |
3582 | return -1; |
3583 | } |
3584 | field_pic_flag = get_bits1(&h->gb); |
3585 | if (field_pic_flag) { |
3586 | bottom_field_flag = get_bits1(&h->gb); |
3587 | h->picture_structure = PICT_TOP_FIELD + bottom_field_flag; |
3588 | } else { |
3589 | h->picture_structure = PICT_FRAME; |
3590 | h->mb_aff_frame = h->sps.mb_aff; |
3591 | } |
3592 | } |
3593 | h->mb_field_decoding_flag = h->picture_structure != PICT_FRAME; |
3594 | |
3595 | if (h0->current_slice != 0) { |
3596 | if (last_pic_structure != h->picture_structure || |
3597 | last_pic_droppable != h->droppable) { |
3598 | av_log(h->avctx, AV_LOG_ERROR, |
3599 | "Changing field mode (%d -> %d) between slices is not allowed\n", |
3600 | last_pic_structure, h->picture_structure); |
3601 | h->picture_structure = last_pic_structure; |
3602 | h->droppable = last_pic_droppable; |
3603 | return AVERROR_INVALIDDATA; |
3604 | } else if (!h0->cur_pic_ptr) { |
3605 | av_log(h->avctx, AV_LOG_ERROR, |
3606 | "unset cur_pic_ptr on %d. slice\n", |
3607 | h0->current_slice + 1); |
3608 | return AVERROR_INVALIDDATA; |
3609 | } |
3610 | } else { |
3611 | /* Shorten frame num gaps so we don't have to allocate reference |
3612 | * frames just to throw them away */ |
3613 | if (h->frame_num != h->prev_frame_num && h->prev_frame_num >= 0) { |
3614 | int unwrap_prev_frame_num = h->prev_frame_num; |
3615 | int max_frame_num = 1 << h->sps.log2_max_frame_num; |
3616 | |
3617 | if (unwrap_prev_frame_num > h->frame_num) |
3618 | unwrap_prev_frame_num -= max_frame_num; |
3619 | |
3620 | if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) { |
3621 | unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1; |
3622 | if (unwrap_prev_frame_num < 0) |
3623 | unwrap_prev_frame_num += max_frame_num; |
3624 | |
3625 | h->prev_frame_num = unwrap_prev_frame_num; |
3626 | } |
3627 | } |
3628 | |
3629 | /* See if we have a decoded first field looking for a pair... |
3630 | * Here, we're using that to see if we should mark previously |
3631 | * decode frames as "finished". |
3632 | * We have to do that before the "dummy" in-between frame allocation, |
3633 | * since that can modify h->cur_pic_ptr. */ |
3634 | if (h0->first_field) { |
3635 | assert(h0->cur_pic_ptr); |
3636 | assert(h0->cur_pic_ptr->f.data[0]); |
3637 | assert(h0->cur_pic_ptr->reference != DELAYED_PIC_REF); |
3638 | |
3639 | /* Mark old field/frame as completed */ |
3640 | if (h0->cur_pic_ptr->tf.owner == h0->avctx) { |
3641 | ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX, |
3642 | last_pic_structure == PICT_BOTTOM_FIELD); |
3643 | } |
3644 | |
3645 | /* figure out if we have a complementary field pair */ |
3646 | if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) { |
3647 | /* Previous field is unmatched. Don't display it, but let it |
3648 | * remain for reference if marked as such. */ |
3649 | if (last_pic_structure != PICT_FRAME) { |
3650 | ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX, |
3651 | last_pic_structure == PICT_TOP_FIELD); |
3652 | } |
3653 | } else { |
3654 | if (h0->cur_pic_ptr->frame_num != h->frame_num) { |
3655 | /* This and previous field were reference, but had |
3656 | * different frame_nums. Consider this field first in |
3657 | * pair. Throw away previous field except for reference |
3658 | * purposes. */ |
3659 | if (last_pic_structure != PICT_FRAME) { |
3660 | ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX, |
3661 | last_pic_structure == PICT_TOP_FIELD); |
3662 | } |
3663 | } else { |
3664 | /* Second field in complementary pair */ |
3665 | if (!((last_pic_structure == PICT_TOP_FIELD && |
3666 | h->picture_structure == PICT_BOTTOM_FIELD) || |
3667 | (last_pic_structure == PICT_BOTTOM_FIELD && |
3668 | h->picture_structure == PICT_TOP_FIELD))) { |
3669 | av_log(h->avctx, AV_LOG_ERROR, |
3670 | "Invalid field mode combination %d/%d\n", |
3671 | last_pic_structure, h->picture_structure); |
3672 | h->picture_structure = last_pic_structure; |
3673 | h->droppable = last_pic_droppable; |
3674 | return AVERROR_INVALIDDATA; |
3675 | } else if (last_pic_droppable != h->droppable) { |
3676 | avpriv_request_sample(h->avctx, |
3677 | "Found reference and non-reference fields in the same frame, which"); |
3678 | h->picture_structure = last_pic_structure; |
3679 | h->droppable = last_pic_droppable; |
3680 | return AVERROR_PATCHWELCOME; |
3681 | } |
3682 | } |
3683 | } |
3684 | } |
3685 | |
3686 | while (h->frame_num != h->prev_frame_num && h->prev_frame_num >= 0 && !h0->first_field && |
3687 | h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) { |
3688 | Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL; |
3689 | av_log(h->avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", |
3690 | h->frame_num, h->prev_frame_num); |
3691 | if (!h->sps.gaps_in_frame_num_allowed_flag) |
3692 | for(i=0; i<FF_ARRAY_ELEMS(h->last_pocs); i++) |
3693 | h->last_pocs[i] = INT_MIN; |
3694 | ret = h264_frame_start(h); |
3695 | if (ret < 0) |
3696 | return ret; |
3697 | h->prev_frame_num++; |
3698 | h->prev_frame_num %= 1 << h->sps.log2_max_frame_num; |
3699 | h->cur_pic_ptr->frame_num = h->prev_frame_num; |
3700 | ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 0); |
3701 | ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 1); |
3702 | ret = ff_generate_sliding_window_mmcos(h, 1); |
3703 | if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE)) |
3704 | return ret; |
3705 | ret = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
3706 | if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE)) |
3707 | return ret; |
3708 | /* Error concealment: If a ref is missing, copy the previous ref |
3709 | * in its place. |
3710 | * FIXME: Avoiding a memcpy would be nice, but ref handling makes |
3711 | * many assumptions about there being no actual duplicates. |
3712 | * FIXME: This does not copy padding for out-of-frame motion |
3713 | * vectors. Given we are concealing a lost frame, this probably |
3714 | * is not noticeable by comparison, but it should be fixed. */ |
3715 | if (h->short_ref_count) { |
3716 | if (prev) { |
3717 | av_image_copy(h->short_ref[0]->f.data, |
3718 | h->short_ref[0]->f.linesize, |
3719 | (const uint8_t **)prev->f.data, |
3720 | prev->f.linesize, |
3721 | h->avctx->pix_fmt, |
3722 | h->mb_width * 16, |
3723 | h->mb_height * 16); |
3724 | h->short_ref[0]->poc = prev->poc + 2; |
3725 | } |
3726 | h->short_ref[0]->frame_num = h->prev_frame_num; |
3727 | } |
3728 | } |
3729 | |
3730 | /* See if we have a decoded first field looking for a pair... |
3731 | * We're using that to see whether to continue decoding in that |
3732 | * frame, or to allocate a new one. */ |
3733 | if (h0->first_field) { |
3734 | assert(h0->cur_pic_ptr); |
3735 | assert(h0->cur_pic_ptr->f.data[0]); |
3736 | assert(h0->cur_pic_ptr->reference != DELAYED_PIC_REF); |
3737 | |
3738 | /* figure out if we have a complementary field pair */ |
3739 | if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) { |
3740 | /* Previous field is unmatched. Don't display it, but let it |
3741 | * remain for reference if marked as such. */ |
3742 | h0->cur_pic_ptr = NULL; |
3743 | h0->first_field = FIELD_PICTURE(h); |
3744 | } else { |
3745 | if (h0->cur_pic_ptr->frame_num != h->frame_num) { |
3746 | ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX, |
3747 | h0->picture_structure==PICT_BOTTOM_FIELD); |
3748 | /* This and the previous field had different frame_nums. |
3749 | * Consider this field first in pair. Throw away previous |
3750 | * one except for reference purposes. */ |
3751 | h0->first_field = 1; |
3752 | h0->cur_pic_ptr = NULL; |
3753 | } else { |
3754 | /* Second field in complementary pair */ |
3755 | h0->first_field = 0; |
3756 | } |
3757 | } |
3758 | } else { |
3759 | /* Frame or first field in a potentially complementary pair */ |
3760 | h0->first_field = FIELD_PICTURE(h); |
3761 | } |
3762 | |
3763 | if (!FIELD_PICTURE(h) || h0->first_field) { |
3764 | if (h264_frame_start(h) < 0) { |
3765 | h0->first_field = 0; |
3766 | return AVERROR_INVALIDDATA; |
3767 | } |
3768 | } else { |
3769 | release_unused_pictures(h, 0); |
3770 | } |
3771 | /* Some macroblocks can be accessed before they're available in case |
3772 | * of lost slices, MBAFF or threading. */ |
3773 | if (FIELD_PICTURE(h)) { |
3774 | for(i = (h->picture_structure == PICT_BOTTOM_FIELD); i<h->mb_height; i++) |
3775 | memset(h->slice_table + i*h->mb_stride, -1, (h->mb_stride - (i+1==h->mb_height)) * sizeof(*h->slice_table)); |
3776 | } else { |
3777 | memset(h->slice_table, -1, |
3778 | (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table)); |
3779 | } |
3780 | h0->last_slice_type = -1; |
3781 | } |
3782 | if (h != h0 && (ret = clone_slice(h, h0)) < 0) |
3783 | return ret; |
3784 | |
3785 | /* can't be in alloc_tables because linesize isn't known there. |
3786 | * FIXME: redo bipred weight to not require extra buffer? */ |
3787 | for (i = 0; i < h->slice_context_count; i++) |
3788 | if (h->thread_context[i]) { |
3789 | ret = alloc_scratch_buffers(h->thread_context[i], h->linesize); |
3790 | if (ret < 0) |
3791 | return ret; |
3792 | } |
3793 | |
3794 | h->cur_pic_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup |
3795 | |
3796 | av_assert1(h->mb_num == h->mb_width * h->mb_height); |
3797 | if (first_mb_in_slice << FIELD_OR_MBAFF_PICTURE(h) >= h->mb_num || |
3798 | first_mb_in_slice >= h->mb_num) { |
3799 | av_log(h->avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n"); |
3800 | return AVERROR_INVALIDDATA; |
3801 | } |
3802 | h->resync_mb_x = h->mb_x = first_mb_in_slice % h->mb_width; |
3803 | h->resync_mb_y = h->mb_y = (first_mb_in_slice / h->mb_width) << |
3804 | FIELD_OR_MBAFF_PICTURE(h); |
3805 | if (h->picture_structure == PICT_BOTTOM_FIELD) |
3806 | h->resync_mb_y = h->mb_y = h->mb_y + 1; |
3807 | av_assert1(h->mb_y < h->mb_height); |
3808 | |
3809 | if (h->picture_structure == PICT_FRAME) { |
3810 | h->curr_pic_num = h->frame_num; |
3811 | h->max_pic_num = 1 << h->sps.log2_max_frame_num; |
3812 | } else { |
3813 | h->curr_pic_num = 2 * h->frame_num + 1; |
3814 | h->max_pic_num = 1 << (h->sps.log2_max_frame_num + 1); |
3815 | } |
3816 | |
3817 | if (h->nal_unit_type == NAL_IDR_SLICE) |
3818 | get_ue_golomb(&h->gb); /* idr_pic_id */ |
3819 | |
3820 | if (h->sps.poc_type == 0) { |
3821 | h->poc_lsb = get_bits(&h->gb, h->sps.log2_max_poc_lsb); |
3822 | |
3823 | if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME) |
3824 | h->delta_poc_bottom = get_se_golomb(&h->gb); |
3825 | } |
3826 | |
3827 | if (h->sps.poc_type == 1 && !h->sps.delta_pic_order_always_zero_flag) { |
3828 | h->delta_poc[0] = get_se_golomb(&h->gb); |
3829 | |
3830 | if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME) |
3831 | h->delta_poc[1] = get_se_golomb(&h->gb); |
3832 | } |
3833 | |
3834 | ff_init_poc(h, h->cur_pic_ptr->field_poc, &h->cur_pic_ptr->poc); |
3835 | |
3836 | if (h->pps.redundant_pic_cnt_present) |
3837 | h->redundant_pic_count = get_ue_golomb(&h->gb); |
3838 | |
3839 | ret = ff_set_ref_count(h); |
3840 | if (ret < 0) |
3841 | return ret; |
3842 | |
3843 | if (slice_type != AV_PICTURE_TYPE_I && |
3844 | (h0->current_slice == 0 || |
3845 | slice_type != h0->last_slice_type || |
3846 | memcmp(h0->last_ref_count, h0->ref_count, sizeof(h0->ref_count)))) { |
3847 | |
3848 | ff_h264_fill_default_ref_list(h); |
3849 | } |
3850 | |
3851 | if (h->slice_type_nos != AV_PICTURE_TYPE_I) { |
3852 | ret = ff_h264_decode_ref_pic_list_reordering(h); |
3853 | if (ret < 0) { |
3854 | h->ref_count[1] = h->ref_count[0] = 0; |
3855 | return ret; |
3856 | } |
3857 | } |
3858 | |
3859 | if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) || |
3860 | (h->pps.weighted_bipred_idc == 1 && |
3861 | h->slice_type_nos == AV_PICTURE_TYPE_B)) |
3862 | ff_pred_weight_table(h); |
3863 | else if (h->pps.weighted_bipred_idc == 2 && |
3864 | h->slice_type_nos == AV_PICTURE_TYPE_B) { |
3865 | implicit_weight_table(h, -1); |
3866 | } else { |
3867 | h->use_weight = 0; |
3868 | for (i = 0; i < 2; i++) { |
3869 | h->luma_weight_flag[i] = 0; |
3870 | h->chroma_weight_flag[i] = 0; |
3871 | } |
3872 | } |
3873 | |
3874 | // If frame-mt is enabled, only update mmco tables for the first slice |
3875 | // in a field. Subsequent slices can temporarily clobber h->mmco_index |
3876 | // or h->mmco, which will cause ref list mix-ups and decoding errors |
3877 | // further down the line. This may break decoding if the first slice is |
3878 | // corrupt, thus we only do this if frame-mt is enabled. |
3879 | if (h->nal_ref_idc) { |
3880 | ret = ff_h264_decode_ref_pic_marking(h0, &h->gb, |
3881 | !(h->avctx->active_thread_type & FF_THREAD_FRAME) || |
3882 | h0->current_slice == 0); |
3883 | if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE)) |
3884 | return AVERROR_INVALIDDATA; |
3885 | } |
3886 | |
3887 | if (FRAME_MBAFF(h)) { |
3888 | ff_h264_fill_mbaff_ref_list(h); |
3889 | |
3890 | if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) { |
3891 | implicit_weight_table(h, 0); |
3892 | implicit_weight_table(h, 1); |
3893 | } |
3894 | } |
3895 | |
3896 | if (h->slice_type_nos == AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred) |
3897 | ff_h264_direct_dist_scale_factor(h); |
3898 | ff_h264_direct_ref_list_init(h); |
3899 | |
3900 | if (h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac) { |
3901 | tmp = get_ue_golomb_31(&h->gb); |
3902 | if (tmp > 2) { |
3903 | av_log(h->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n"); |
3904 | return AVERROR_INVALIDDATA; |
3905 | } |
3906 | h->cabac_init_idc = tmp; |
3907 | } |
3908 | |
3909 | h->last_qscale_diff = 0; |
3910 | tmp = h->pps.init_qp + get_se_golomb(&h->gb); |
3911 | if (tmp > 51 + 6 * (h->sps.bit_depth_luma - 8)) { |
3912 | av_log(h->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp); |
3913 | return AVERROR_INVALIDDATA; |
3914 | } |
3915 | h->qscale = tmp; |
3916 | h->chroma_qp[0] = get_chroma_qp(h, 0, h->qscale); |
3917 | h->chroma_qp[1] = get_chroma_qp(h, 1, h->qscale); |
3918 | // FIXME qscale / qp ... stuff |
3919 | if (h->slice_type == AV_PICTURE_TYPE_SP) |
3920 | get_bits1(&h->gb); /* sp_for_switch_flag */ |
3921 | if (h->slice_type == AV_PICTURE_TYPE_SP || |
3922 | h->slice_type == AV_PICTURE_TYPE_SI) |
3923 | get_se_golomb(&h->gb); /* slice_qs_delta */ |
3924 | |
3925 | h->deblocking_filter = 1; |
3926 | h->slice_alpha_c0_offset = 52; |
3927 | h->slice_beta_offset = 52; |
3928 | if (h->pps.deblocking_filter_parameters_present) { |
3929 | tmp = get_ue_golomb_31(&h->gb); |
3930 | if (tmp > 2) { |
3931 | av_log(h->avctx, AV_LOG_ERROR, |
3932 | "deblocking_filter_idc %u out of range\n", tmp); |
3933 | return AVERROR_INVALIDDATA; |
3934 | } |
3935 | h->deblocking_filter = tmp; |
3936 | if (h->deblocking_filter < 2) |
3937 | h->deblocking_filter ^= 1; // 1<->0 |
3938 | |
3939 | if (h->deblocking_filter) { |
3940 | h->slice_alpha_c0_offset += get_se_golomb(&h->gb) << 1; |
3941 | h->slice_beta_offset += get_se_golomb(&h->gb) << 1; |
3942 | if (h->slice_alpha_c0_offset > 104U || |
3943 | h->slice_beta_offset > 104U) { |
3944 | av_log(h->avctx, AV_LOG_ERROR, |
3945 | "deblocking filter parameters %d %d out of range\n", |
3946 | h->slice_alpha_c0_offset, h->slice_beta_offset); |
3947 | return AVERROR_INVALIDDATA; |
3948 | } |
3949 | } |
3950 | } |
3951 | |
3952 | if (h->avctx->skip_loop_filter >= AVDISCARD_ALL || |
3953 | (h->avctx->skip_loop_filter >= AVDISCARD_NONKEY && |
3954 | h->slice_type_nos != AV_PICTURE_TYPE_I) || |
3955 | (h->avctx->skip_loop_filter >= AVDISCARD_BIDIR && |
3956 | h->slice_type_nos == AV_PICTURE_TYPE_B) || |
3957 | (h->avctx->skip_loop_filter >= AVDISCARD_NONREF && |
3958 | h->nal_ref_idc == 0)) |
3959 | h->deblocking_filter = 0; |
3960 | |
3961 | if (h->deblocking_filter == 1 && h0->max_contexts > 1) { |
3962 | if (h->avctx->flags2 & CODEC_FLAG2_FAST) { |
3963 | /* Cheat slightly for speed: |
3964 | * Do not bother to deblock across slices. */ |
3965 | h->deblocking_filter = 2; |
3966 | } else { |
3967 | h0->max_contexts = 1; |
3968 | if (!h0->single_decode_warning) { |
3969 | av_log(h->avctx, AV_LOG_INFO, |
3970 | "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); |
3971 | h0->single_decode_warning = 1; |
3972 | } |
3973 | if (h != h0) { |
3974 | av_log(h->avctx, AV_LOG_ERROR, |
3975 | "Deblocking switched inside frame.\n"); |
3976 | return 1; |
3977 | } |
3978 | } |
3979 | } |
3980 | h->qp_thresh = 15 + 52 - |
3981 | FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - |
3982 | FFMAX3(0, |
3983 | h->pps.chroma_qp_index_offset[0], |
3984 | h->pps.chroma_qp_index_offset[1]) + |
3985 | 6 * (h->sps.bit_depth_luma - 8); |
3986 | |
3987 | h0->last_slice_type = slice_type; |
3988 | memcpy(h0->last_ref_count, h0->ref_count, sizeof(h0->last_ref_count)); |
3989 | h->slice_num = ++h0->current_slice; |
3990 | |
3991 | if (h->slice_num) |
3992 | h0->slice_row[(h->slice_num-1)&(MAX_SLICES-1)]= h->resync_mb_y; |
3993 | if ( h0->slice_row[h->slice_num&(MAX_SLICES-1)] + 3 >= h->resync_mb_y |
3994 | && h0->slice_row[h->slice_num&(MAX_SLICES-1)] <= h->resync_mb_y |
3995 | && h->slice_num >= MAX_SLICES) { |
3996 | //in case of ASO this check needs to be updated depending on how we decide to assign slice numbers in this case |
3997 | av_log(h->avctx, AV_LOG_WARNING, "Possibly too many slices (%d >= %d), increase MAX_SLICES and recompile if there are artifacts\n", h->slice_num, MAX_SLICES); |
3998 | } |
3999 | |
4000 | for (j = 0; j < 2; j++) { |
4001 | int id_list[16]; |
4002 | int *ref2frm = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][j]; |
4003 | for (i = 0; i < 16; i++) { |
4004 | id_list[i] = 60; |
4005 | if (j < h->list_count && i < h->ref_count[j] && |
4006 | h->ref_list[j][i].f.buf[0]) { |
4007 | int k; |
4008 | AVBuffer *buf = h->ref_list[j][i].f.buf[0]->buffer; |
4009 | for (k = 0; k < h->short_ref_count; k++) |
4010 | if (h->short_ref[k]->f.buf[0]->buffer == buf) { |
4011 | id_list[i] = k; |
4012 | break; |
4013 | } |
4014 | for (k = 0; k < h->long_ref_count; k++) |
4015 | if (h->long_ref[k] && h->long_ref[k]->f.buf[0]->buffer == buf) { |
4016 | id_list[i] = h->short_ref_count + k; |
4017 | break; |
4018 | } |
4019 | } |
4020 | } |
4021 | |
4022 | ref2frm[0] = |
4023 | ref2frm[1] = -1; |
4024 | for (i = 0; i < 16; i++) |
4025 | ref2frm[i + 2] = 4 * id_list[i] + (h->ref_list[j][i].reference & 3); |
4026 | ref2frm[18 + 0] = |
4027 | ref2frm[18 + 1] = -1; |
4028 | for (i = 16; i < 48; i++) |
4029 | ref2frm[i + 4] = 4 * id_list[(i - 16) >> 1] + |
4030 | (h->ref_list[j][i].reference & 3); |
4031 | } |
4032 | |
4033 | if (h->ref_count[0]) h->er.last_pic = &h->ref_list[0][0]; |
4034 | if (h->ref_count[1]) h->er.next_pic = &h->ref_list[1][0]; |
4035 | h->er.ref_count = h->ref_count[0]; |
4036 | |
4037 | if (h->avctx->debug & FF_DEBUG_PICT_INFO) { |
4038 | av_log(h->avctx, AV_LOG_DEBUG, |
4039 | "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n", |
4040 | h->slice_num, |
4041 | (h->picture_structure == PICT_FRAME ? "F" : h->picture_structure == PICT_TOP_FIELD ? "T" : "B"), |
4042 | first_mb_in_slice, |
4043 | av_get_picture_type_char(h->slice_type), |
4044 | h->slice_type_fixed ? " fix" : "", |
4045 | h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "", |
4046 | pps_id, h->frame_num, |
4047 | h->cur_pic_ptr->field_poc[0], |
4048 | h->cur_pic_ptr->field_poc[1], |
4049 | h->ref_count[0], h->ref_count[1], |
4050 | h->qscale, |
4051 | h->deblocking_filter, |
4052 | h->slice_alpha_c0_offset / 2 - 26, h->slice_beta_offset / 2 - 26, |
4053 | h->use_weight, |
4054 | h->use_weight == 1 && h->use_weight_chroma ? "c" : "", |
4055 | h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""); |
4056 | } |
4057 | |
4058 | return 0; |
4059 | } |
4060 | |
4061 | int ff_h264_get_slice_type(const H264Context *h) |
4062 | { |
4063 | switch (h->slice_type) { |
4064 | case AV_PICTURE_TYPE_P: |
4065 | return 0; |
4066 | case AV_PICTURE_TYPE_B: |
4067 | return 1; |
4068 | case AV_PICTURE_TYPE_I: |
4069 | return 2; |
4070 | case AV_PICTURE_TYPE_SP: |
4071 | return 3; |
4072 | case AV_PICTURE_TYPE_SI: |
4073 | return 4; |
4074 | default: |
4075 | return AVERROR_INVALIDDATA; |
4076 | } |
4077 | } |
4078 | |
4079 | static av_always_inline void fill_filter_caches_inter(H264Context *h, |
4080 | int mb_type, int top_xy, |
4081 | int left_xy[LEFT_MBS], |
4082 | int top_type, |
4083 | int left_type[LEFT_MBS], |
4084 | int mb_xy, int list) |
4085 | { |
4086 | int b_stride = h->b_stride; |
4087 | int16_t(*mv_dst)[2] = &h->mv_cache[list][scan8[0]]; |
4088 | int8_t *ref_cache = &h->ref_cache[list][scan8[0]]; |
4089 | if (IS_INTER(mb_type) || IS_DIRECT(mb_type)) { |
4090 | if (USES_LIST(top_type, list)) { |
4091 | const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride; |
4092 | const int b8_xy = 4 * top_xy + 2; |
4093 | int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2)); |
4094 | AV_COPY128(mv_dst - 1 * 8, h->cur_pic.motion_val[list][b_xy + 0]); |
4095 | ref_cache[0 - 1 * 8] = |
4096 | ref_cache[1 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 0]]; |
4097 | ref_cache[2 - 1 * 8] = |
4098 | ref_cache[3 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 1]]; |
4099 | } else { |
4100 | AV_ZERO128(mv_dst - 1 * 8); |
4101 | AV_WN32A(&ref_cache[0 - 1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u); |
4102 | } |
4103 | |
4104 | if (!IS_INTERLACED(mb_type ^ left_type[LTOP])) { |
4105 | if (USES_LIST(left_type[LTOP], list)) { |
4106 | const int b_xy = h->mb2b_xy[left_xy[LTOP]] + 3; |
4107 | const int b8_xy = 4 * left_xy[LTOP] + 1; |
4108 | int (*ref2frm)[64] =(void*)( h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2)); |
4109 | AV_COPY32(mv_dst - 1 + 0, h->cur_pic.motion_val[list][b_xy + b_stride * 0]); |
4110 | AV_COPY32(mv_dst - 1 + 8, h->cur_pic.motion_val[list][b_xy + b_stride * 1]); |
4111 | AV_COPY32(mv_dst - 1 + 16, h->cur_pic.motion_val[list][b_xy + b_stride * 2]); |
4112 | AV_COPY32(mv_dst - 1 + 24, h->cur_pic.motion_val[list][b_xy + b_stride * 3]); |
4113 | ref_cache[-1 + 0] = |
4114 | ref_cache[-1 + 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 0]]; |
4115 | ref_cache[-1 + 16] = |
4116 | ref_cache[-1 + 24] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 1]]; |
4117 | } else { |
4118 | AV_ZERO32(mv_dst - 1 + 0); |
4119 | AV_ZERO32(mv_dst - 1 + 8); |
4120 | AV_ZERO32(mv_dst - 1 + 16); |
4121 | AV_ZERO32(mv_dst - 1 + 24); |
4122 | ref_cache[-1 + 0] = |
4123 | ref_cache[-1 + 8] = |
4124 | ref_cache[-1 + 16] = |
4125 | ref_cache[-1 + 24] = LIST_NOT_USED; |
4126 | } |
4127 | } |
4128 | } |
4129 | |
4130 | if (!USES_LIST(mb_type, list)) { |
4131 | fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0, 0), 4); |
4132 | AV_WN32A(&ref_cache[0 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u); |
4133 | AV_WN32A(&ref_cache[1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u); |
4134 | AV_WN32A(&ref_cache[2 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u); |
4135 | AV_WN32A(&ref_cache[3 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u); |
4136 | return; |
4137 | } |
4138 | |
4139 | { |
4140 | int8_t *ref = &h->cur_pic.ref_index[list][4 * mb_xy]; |
4141 | int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2)); |
4142 | uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101; |
4143 | uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101; |
4144 | AV_WN32A(&ref_cache[0 * 8], ref01); |
4145 | AV_WN32A(&ref_cache[1 * 8], ref01); |
4146 | AV_WN32A(&ref_cache[2 * 8], ref23); |
4147 | AV_WN32A(&ref_cache[3 * 8], ref23); |
4148 | } |
4149 | |
4150 | { |
4151 | int16_t(*mv_src)[2] = &h->cur_pic.motion_val[list][4 * h->mb_x + 4 * h->mb_y * b_stride]; |
4152 | AV_COPY128(mv_dst + 8 * 0, mv_src + 0 * b_stride); |
4153 | AV_COPY128(mv_dst + 8 * 1, mv_src + 1 * b_stride); |
4154 | AV_COPY128(mv_dst + 8 * 2, mv_src + 2 * b_stride); |
4155 | AV_COPY128(mv_dst + 8 * 3, mv_src + 3 * b_stride); |
4156 | } |
4157 | } |
4158 | |
4159 | /** |
4160 | * |
4161 | * @return non zero if the loop filter can be skipped |
4162 | */ |
4163 | static int fill_filter_caches(H264Context *h, int mb_type) |
4164 | { |
4165 | const int mb_xy = h->mb_xy; |
4166 | int top_xy, left_xy[LEFT_MBS]; |
4167 | int top_type, left_type[LEFT_MBS]; |
4168 | uint8_t *nnz; |
4169 | uint8_t *nnz_cache; |
4170 | |
4171 | top_xy = mb_xy - (h->mb_stride << MB_FIELD(h)); |
4172 | |
4173 | /* Wow, what a mess, why didn't they simplify the interlacing & intra |
4174 | * stuff, I can't imagine that these complex rules are worth it. */ |
4175 | |
4176 | left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1; |
4177 | if (FRAME_MBAFF(h)) { |
4178 | const int left_mb_field_flag = IS_INTERLACED(h->cur_pic.mb_type[mb_xy - 1]); |
4179 | const int curr_mb_field_flag = IS_INTERLACED(mb_type); |
4180 | if (h->mb_y & 1) { |
4181 | if (left_mb_field_flag != curr_mb_field_flag) |
4182 | left_xy[LTOP] -= h->mb_stride; |
4183 | } else { |
4184 | if (curr_mb_field_flag) |
4185 | top_xy += h->mb_stride & |
4186 | (((h->cur_pic.mb_type[top_xy] >> 7) & 1) - 1); |
4187 | if (left_mb_field_flag != curr_mb_field_flag) |
4188 | left_xy[LBOT] += h->mb_stride; |
4189 | } |
4190 | } |
4191 | |
4192 | h->top_mb_xy = top_xy; |
4193 | h->left_mb_xy[LTOP] = left_xy[LTOP]; |
4194 | h->left_mb_xy[LBOT] = left_xy[LBOT]; |
4195 | { |
4196 | /* For sufficiently low qp, filtering wouldn't do anything. |
4197 | * This is a conservative estimate: could also check beta_offset |
4198 | * and more accurate chroma_qp. */ |
4199 | int qp_thresh = h->qp_thresh; // FIXME strictly we should store qp_thresh for each mb of a slice |
4200 | int qp = h->cur_pic.qscale_table[mb_xy]; |
4201 | if (qp <= qp_thresh && |
4202 | (left_xy[LTOP] < 0 || |
4203 | ((qp + h->cur_pic.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh) && |
4204 | (top_xy < 0 || |
4205 | ((qp + h->cur_pic.qscale_table[top_xy] + 1) >> 1) <= qp_thresh)) { |
4206 | if (!FRAME_MBAFF(h)) |
4207 | return 1; |
4208 | if ((left_xy[LTOP] < 0 || |
4209 | ((qp + h->cur_pic.qscale_table[left_xy[LBOT]] + 1) >> 1) <= qp_thresh) && |
4210 | (top_xy < h->mb_stride || |
4211 | ((qp + h->cur_pic.qscale_table[top_xy - h->mb_stride] + 1) >> 1) <= qp_thresh)) |
4212 | return 1; |
4213 | } |
4214 | } |
4215 | |
4216 | top_type = h->cur_pic.mb_type[top_xy]; |
4217 | left_type[LTOP] = h->cur_pic.mb_type[left_xy[LTOP]]; |
4218 | left_type[LBOT] = h->cur_pic.mb_type[left_xy[LBOT]]; |
4219 | if (h->deblocking_filter == 2) { |
4220 | if (h->slice_table[top_xy] != h->slice_num) |
4221 | top_type = 0; |
4222 | if (h->slice_table[left_xy[LBOT]] != h->slice_num) |
4223 | left_type[LTOP] = left_type[LBOT] = 0; |
4224 | } else { |
4225 | if (h->slice_table[top_xy] == 0xFFFF) |
4226 | top_type = 0; |
4227 | if (h->slice_table[left_xy[LBOT]] == 0xFFFF) |
4228 | left_type[LTOP] = left_type[LBOT] = 0; |
4229 | } |
4230 | h->top_type = top_type; |
4231 | h->left_type[LTOP] = left_type[LTOP]; |
4232 | h->left_type[LBOT] = left_type[LBOT]; |
4233 | |
4234 | if (IS_INTRA(mb_type)) |
4235 | return 0; |
4236 | |
4237 | fill_filter_caches_inter(h, mb_type, top_xy, left_xy, |
4238 | top_type, left_type, mb_xy, 0); |
4239 | if (h->list_count == 2) |
4240 | fill_filter_caches_inter(h, mb_type, top_xy, left_xy, |
4241 | top_type, left_type, mb_xy, 1); |
4242 | |
4243 | nnz = h->non_zero_count[mb_xy]; |
4244 | nnz_cache = h->non_zero_count_cache; |
4245 | AV_COPY32(&nnz_cache[4 + 8 * 1], &nnz[0]); |
4246 | AV_COPY32(&nnz_cache[4 + 8 * 2], &nnz[4]); |
4247 | AV_COPY32(&nnz_cache[4 + 8 * 3], &nnz[8]); |
4248 | AV_COPY32(&nnz_cache[4 + 8 * 4], &nnz[12]); |
4249 | h->cbp = h->cbp_table[mb_xy]; |
4250 | |
4251 | if (top_type) { |
4252 | nnz = h->non_zero_count[top_xy]; |
4253 | AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[3 * 4]); |
4254 | } |
4255 | |
4256 | if (left_type[LTOP]) { |
4257 | nnz = h->non_zero_count[left_xy[LTOP]]; |
4258 | nnz_cache[3 + 8 * 1] = nnz[3 + 0 * 4]; |
4259 | nnz_cache[3 + 8 * 2] = nnz[3 + 1 * 4]; |
4260 | nnz_cache[3 + 8 * 3] = nnz[3 + 2 * 4]; |
4261 | nnz_cache[3 + 8 * 4] = nnz[3 + 3 * 4]; |
4262 | } |
4263 | |
4264 | /* CAVLC 8x8dct requires NNZ values for residual decoding that differ |
4265 | * from what the loop filter needs */ |
4266 | if (!CABAC(h) && h->pps.transform_8x8_mode) { |
4267 | if (IS_8x8DCT(top_type)) { |
4268 | nnz_cache[4 + 8 * 0] = |
4269 | nnz_cache[5 + 8 * 0] = (h->cbp_table[top_xy] & 0x4000) >> 12; |
4270 | nnz_cache[6 + 8 * 0] = |
4271 | nnz_cache[7 + 8 * 0] = (h->cbp_table[top_xy] & 0x8000) >> 12; |
4272 | } |
4273 | if (IS_8x8DCT(left_type[LTOP])) { |
4274 | nnz_cache[3 + 8 * 1] = |
4275 | nnz_cache[3 + 8 * 2] = (h->cbp_table[left_xy[LTOP]] & 0x2000) >> 12; // FIXME check MBAFF |
4276 | } |
4277 | if (IS_8x8DCT(left_type[LBOT])) { |
4278 | nnz_cache[3 + 8 * 3] = |
4279 | nnz_cache[3 + 8 * 4] = (h->cbp_table[left_xy[LBOT]] & 0x8000) >> 12; // FIXME check MBAFF |
4280 | } |
4281 | |
4282 | if (IS_8x8DCT(mb_type)) { |
4283 | nnz_cache[scan8[0]] = |
4284 | nnz_cache[scan8[1]] = |
4285 | nnz_cache[scan8[2]] = |
4286 | nnz_cache[scan8[3]] = (h->cbp & 0x1000) >> 12; |
4287 | |
4288 | nnz_cache[scan8[0 + 4]] = |
4289 | nnz_cache[scan8[1 + 4]] = |
4290 | nnz_cache[scan8[2 + 4]] = |
4291 | nnz_cache[scan8[3 + 4]] = (h->cbp & 0x2000) >> 12; |
4292 | |
4293 | nnz_cache[scan8[0 + 8]] = |
4294 | nnz_cache[scan8[1 + 8]] = |
4295 | nnz_cache[scan8[2 + 8]] = |
4296 | nnz_cache[scan8[3 + 8]] = (h->cbp & 0x4000) >> 12; |
4297 | |
4298 | nnz_cache[scan8[0 + 12]] = |
4299 | nnz_cache[scan8[1 + 12]] = |
4300 | nnz_cache[scan8[2 + 12]] = |
4301 | nnz_cache[scan8[3 + 12]] = (h->cbp & 0x8000) >> 12; |
4302 | } |
4303 | } |
4304 | |
4305 | return 0; |
4306 | } |
4307 | |
4308 | static void loop_filter(H264Context *h, int start_x, int end_x) |
4309 | { |
4310 | uint8_t *dest_y, *dest_cb, *dest_cr; |
4311 | int linesize, uvlinesize, mb_x, mb_y; |
4312 | const int end_mb_y = h->mb_y + FRAME_MBAFF(h); |
4313 | const int old_slice_type = h->slice_type; |
4314 | const int pixel_shift = h->pixel_shift; |
4315 | const int block_h = 16 >> h->chroma_y_shift; |
4316 | |
4317 | if (h->deblocking_filter) { |
4318 | for (mb_x = start_x; mb_x < end_x; mb_x++) |
4319 | for (mb_y = end_mb_y - FRAME_MBAFF(h); mb_y <= end_mb_y; mb_y++) { |
4320 | int mb_xy, mb_type; |
4321 | mb_xy = h->mb_xy = mb_x + mb_y * h->mb_stride; |
4322 | h->slice_num = h->slice_table[mb_xy]; |
4323 | mb_type = h->cur_pic.mb_type[mb_xy]; |
4324 | h->list_count = h->list_counts[mb_xy]; |
4325 | |
4326 | if (FRAME_MBAFF(h)) |
4327 | h->mb_mbaff = |
4328 | h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type); |
4329 | |
4330 | h->mb_x = mb_x; |
4331 | h->mb_y = mb_y; |
4332 | dest_y = h->cur_pic.f.data[0] + |
4333 | ((mb_x << pixel_shift) + mb_y * h->linesize) * 16; |
4334 | dest_cb = h->cur_pic.f.data[1] + |
4335 | (mb_x << pixel_shift) * (8 << CHROMA444(h)) + |
4336 | mb_y * h->uvlinesize * block_h; |
4337 | dest_cr = h->cur_pic.f.data[2] + |
4338 | (mb_x << pixel_shift) * (8 << CHROMA444(h)) + |
4339 | mb_y * h->uvlinesize * block_h; |
4340 | // FIXME simplify above |
4341 | |
4342 | if (MB_FIELD(h)) { |
4343 | linesize = h->mb_linesize = h->linesize * 2; |
4344 | uvlinesize = h->mb_uvlinesize = h->uvlinesize * 2; |
4345 | if (mb_y & 1) { // FIXME move out of this function? |
4346 | dest_y -= h->linesize * 15; |
4347 | dest_cb -= h->uvlinesize * (block_h - 1); |
4348 | dest_cr -= h->uvlinesize * (block_h - 1); |
4349 | } |
4350 | } else { |
4351 | linesize = h->mb_linesize = h->linesize; |
4352 | uvlinesize = h->mb_uvlinesize = h->uvlinesize; |
4353 | } |
4354 | backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, |
4355 | uvlinesize, 0); |
4356 | if (fill_filter_caches(h, mb_type)) |
4357 | continue; |
4358 | h->chroma_qp[0] = get_chroma_qp(h, 0, h->cur_pic.qscale_table[mb_xy]); |
4359 | h->chroma_qp[1] = get_chroma_qp(h, 1, h->cur_pic.qscale_table[mb_xy]); |
4360 | |
4361 | if (FRAME_MBAFF(h)) { |
4362 | ff_h264_filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, |
4363 | linesize, uvlinesize); |
4364 | } else { |
4365 | ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, |
4366 | dest_cr, linesize, uvlinesize); |
4367 | } |
4368 | } |
4369 | } |
4370 | h->slice_type = old_slice_type; |
4371 | h->mb_x = end_x; |
4372 | h->mb_y = end_mb_y - FRAME_MBAFF(h); |
4373 | h->chroma_qp[0] = get_chroma_qp(h, 0, h->qscale); |
4374 | h->chroma_qp[1] = get_chroma_qp(h, 1, h->qscale); |
4375 | } |
4376 | |
4377 | static void predict_field_decoding_flag(H264Context *h) |
4378 | { |
4379 | const int mb_xy = h->mb_x + h->mb_y * h->mb_stride; |
4380 | int mb_type = (h->slice_table[mb_xy - 1] == h->slice_num) ? |
4381 | h->cur_pic.mb_type[mb_xy - 1] : |
4382 | (h->slice_table[mb_xy - h->mb_stride] == h->slice_num) ? |
4383 | h->cur_pic.mb_type[mb_xy - h->mb_stride] : 0; |
4384 | h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; |
4385 | } |
4386 | |
4387 | /** |
4388 | * Draw edges and report progress for the last MB row. |
4389 | */ |
4390 | static void decode_finish_row(H264Context *h) |
4391 | { |
4392 | int top = 16 * (h->mb_y >> FIELD_PICTURE(h)); |
4393 | int pic_height = 16 * h->mb_height >> FIELD_PICTURE(h); |
4394 | int height = 16 << FRAME_MBAFF(h); |
4395 | int deblock_border = (16 + 4) << FRAME_MBAFF(h); |
4396 | |
4397 | if (h->deblocking_filter) { |
4398 | if ((top + height) >= pic_height) |
4399 | height += deblock_border; |
4400 | top -= deblock_border; |
4401 | } |
4402 | |
4403 | if (top >= pic_height || (top + height) < 0) |
4404 | return; |
4405 | |
4406 | height = FFMIN(height, pic_height - top); |
4407 | if (top < 0) { |
4408 | height = top + height; |
4409 | top = 0; |
4410 | } |
4411 | |
4412 | ff_h264_draw_horiz_band(h, top, height); |
4413 | |
4414 | if (h->droppable || h->er.error_occurred) |
4415 | return; |
4416 | |
4417 | ff_thread_report_progress(&h->cur_pic_ptr->tf, top + height - 1, |
4418 | h->picture_structure == PICT_BOTTOM_FIELD); |
4419 | } |
4420 | |
4421 | static void er_add_slice(H264Context *h, int startx, int starty, |
4422 | int endx, int endy, int status) |
4423 | { |
4424 | if (CONFIG_ERROR_RESILIENCE) { |
4425 | ERContext *er = &h->er; |
4426 | |
4427 | ff_er_add_slice(er, startx, starty, endx, endy, status); |
4428 | } |
4429 | } |
4430 | |
4431 | static int decode_slice(struct AVCodecContext *avctx, void *arg) |
4432 | { |
4433 | H264Context *h = *(void **)arg; |
4434 | int lf_x_start = h->mb_x; |
4435 | |
4436 | h->mb_skip_run = -1; |
4437 | |
4438 | av_assert0(h->block_offset[15] == (4 * ((scan8[15] - scan8[0]) & 7) << h->pixel_shift) + 4 * h->linesize * ((scan8[15] - scan8[0]) >> 3)); |
4439 | |
4440 | h->is_complex = FRAME_MBAFF(h) || h->picture_structure != PICT_FRAME || |
4441 | avctx->codec_id != AV_CODEC_ID_H264 || |
4442 | (CONFIG_GRAY && (h->flags & CODEC_FLAG_GRAY)); |
4443 | |
4444 | if (!(h->avctx->active_thread_type & FF_THREAD_SLICE) && h->picture_structure == PICT_FRAME && h->er.error_status_table) { |
4445 | const int start_i = av_clip(h->resync_mb_x + h->resync_mb_y * h->mb_width, 0, h->mb_num - 1); |
4446 | if (start_i) { |
4447 | int prev_status = h->er.error_status_table[h->er.mb_index2xy[start_i - 1]]; |
4448 | prev_status &= ~ VP_START; |
4449 | if (prev_status != (ER_MV_END | ER_DC_END | ER_AC_END)) |
4450 | h->er.error_occurred = 1; |
4451 | } |
4452 | } |
4453 | |
4454 | if (h->pps.cabac) { |
4455 | /* realign */ |
4456 | align_get_bits(&h->gb); |
4457 | |
4458 | /* init cabac */ |
4459 | ff_init_cabac_decoder(&h->cabac, |
4460 | h->gb.buffer + get_bits_count(&h->gb) / 8, |
4461 | (get_bits_left(&h->gb) + 7) / 8); |
4462 | |
4463 | ff_h264_init_cabac_states(h); |
4464 | |
4465 | for (;;) { |
4466 | // START_TIMER |
4467 | int ret = ff_h264_decode_mb_cabac(h); |
4468 | int eos; |
4469 | // STOP_TIMER("decode_mb_cabac") |
4470 | |
4471 | if (ret >= 0) |
4472 | ff_h264_hl_decode_mb(h); |
4473 | |
4474 | // FIXME optimal? or let mb_decode decode 16x32 ? |
4475 | if (ret >= 0 && FRAME_MBAFF(h)) { |
4476 | h->mb_y++; |
4477 | |
4478 | ret = ff_h264_decode_mb_cabac(h); |
4479 | |
4480 | if (ret >= 0) |
4481 | ff_h264_hl_decode_mb(h); |
4482 | h->mb_y--; |
4483 | } |
4484 | eos = get_cabac_terminate(&h->cabac); |
4485 | |
4486 | if ((h->workaround_bugs & FF_BUG_TRUNCATED) && |
4487 | h->cabac.bytestream > h->cabac.bytestream_end + 2) { |
4488 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x - 1, |
4489 | h->mb_y, ER_MB_END); |
4490 | if (h->mb_x >= lf_x_start) |
4491 | loop_filter(h, lf_x_start, h->mb_x + 1); |
4492 | return 0; |
4493 | } |
4494 | if (h->cabac.bytestream > h->cabac.bytestream_end + 2 ) |
4495 | av_log(h->avctx, AV_LOG_DEBUG, "bytestream overread %td\n", h->cabac.bytestream_end - h->cabac.bytestream); |
4496 | if (ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 4) { |
4497 | av_log(h->avctx, AV_LOG_ERROR, |
4498 | "error while decoding MB %d %d, bytestream (%td)\n", |
4499 | h->mb_x, h->mb_y, |
4500 | h->cabac.bytestream_end - h->cabac.bytestream); |
4501 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x, |
4502 | h->mb_y, ER_MB_ERROR); |
4503 | return AVERROR_INVALIDDATA; |
4504 | } |
4505 | |
4506 | if (++h->mb_x >= h->mb_width) { |
4507 | loop_filter(h, lf_x_start, h->mb_x); |
4508 | h->mb_x = lf_x_start = 0; |
4509 | decode_finish_row(h); |
4510 | ++h->mb_y; |
4511 | if (FIELD_OR_MBAFF_PICTURE(h)) { |
4512 | ++h->mb_y; |
4513 | if (FRAME_MBAFF(h) && h->mb_y < h->mb_height) |
4514 | predict_field_decoding_flag(h); |
4515 | } |
4516 | } |
4517 | |
4518 | if (eos || h->mb_y >= h->mb_height) { |
4519 | tprintf(h->avctx, "slice end %d %d\n", |
4520 | get_bits_count(&h->gb), h->gb.size_in_bits); |
4521 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x - 1, |
4522 | h->mb_y, ER_MB_END); |
4523 | if (h->mb_x > lf_x_start) |
4524 | loop_filter(h, lf_x_start, h->mb_x); |
4525 | return 0; |
4526 | } |
4527 | } |
4528 | } else { |
4529 | for (;;) { |
4530 | int ret = ff_h264_decode_mb_cavlc(h); |
4531 | |
4532 | if (ret >= 0) |
4533 | ff_h264_hl_decode_mb(h); |
4534 | |
4535 | // FIXME optimal? or let mb_decode decode 16x32 ? |
4536 | if (ret >= 0 && FRAME_MBAFF(h)) { |
4537 | h->mb_y++; |
4538 | ret = ff_h264_decode_mb_cavlc(h); |
4539 | |
4540 | if (ret >= 0) |
4541 | ff_h264_hl_decode_mb(h); |
4542 | h->mb_y--; |
4543 | } |
4544 | |
4545 | if (ret < 0) { |
4546 | av_log(h->avctx, AV_LOG_ERROR, |
4547 | "error while decoding MB %d %d\n", h->mb_x, h->mb_y); |
4548 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x, |
4549 | h->mb_y, ER_MB_ERROR); |
4550 | return ret; |
4551 | } |
4552 | |
4553 | if (++h->mb_x >= h->mb_width) { |
4554 | loop_filter(h, lf_x_start, h->mb_x); |
4555 | h->mb_x = lf_x_start = 0; |
4556 | decode_finish_row(h); |
4557 | ++h->mb_y; |
4558 | if (FIELD_OR_MBAFF_PICTURE(h)) { |
4559 | ++h->mb_y; |
4560 | if (FRAME_MBAFF(h) && h->mb_y < h->mb_height) |
4561 | predict_field_decoding_flag(h); |
4562 | } |
4563 | if (h->mb_y >= h->mb_height) { |
4564 | tprintf(h->avctx, "slice end %d %d\n", |
4565 | get_bits_count(&h->gb), h->gb.size_in_bits); |
4566 | |
4567 | if ( get_bits_left(&h->gb) == 0 |
4568 | || get_bits_left(&h->gb) > 0 && !(h->avctx->err_recognition & AV_EF_AGGRESSIVE)) { |
4569 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, |
4570 | h->mb_x - 1, h->mb_y, |
4571 | ER_MB_END); |
4572 | |
4573 | return 0; |
4574 | } else { |
4575 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, |
4576 | h->mb_x, h->mb_y, |
4577 | ER_MB_END); |
4578 | |
4579 | return AVERROR_INVALIDDATA; |
4580 | } |
4581 | } |
4582 | } |
4583 | |
4584 | if (get_bits_left(&h->gb) <= 0 && h->mb_skip_run <= 0) { |
4585 | tprintf(h->avctx, "slice end %d %d\n", |
4586 | get_bits_count(&h->gb), h->gb.size_in_bits); |
4587 | |
4588 | if (get_bits_left(&h->gb) == 0) { |
4589 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, |
4590 | h->mb_x - 1, h->mb_y, |
4591 | ER_MB_END); |
4592 | if (h->mb_x > lf_x_start) |
4593 | loop_filter(h, lf_x_start, h->mb_x); |
4594 | |
4595 | return 0; |
4596 | } else { |
4597 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x, |
4598 | h->mb_y, ER_MB_ERROR); |
4599 | |
4600 | return AVERROR_INVALIDDATA; |
4601 | } |
4602 | } |
4603 | } |
4604 | } |
4605 | } |
4606 | |
4607 | /** |
4608 | * Call decode_slice() for each context. |
4609 | * |
4610 | * @param h h264 master context |
4611 | * @param context_count number of contexts to execute |
4612 | */ |
4613 | static int execute_decode_slices(H264Context *h, int context_count) |
4614 | { |
4615 | AVCodecContext *const avctx = h->avctx; |
4616 | H264Context *hx; |
4617 | int i; |
4618 | |
4619 | if (h->avctx->hwaccel || |
4620 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) |
4621 | return 0; |
4622 | if (context_count == 1) { |
4623 | return decode_slice(avctx, &h); |
4624 | } else { |
4625 | av_assert0(context_count > 0); |
4626 | for (i = 1; i < context_count; i++) { |
4627 | hx = h->thread_context[i]; |
4628 | if (CONFIG_ERROR_RESILIENCE) { |
4629 | hx->er.error_count = 0; |
4630 | } |
4631 | hx->x264_build = h->x264_build; |
4632 | } |
4633 | |
4634 | avctx->execute(avctx, decode_slice, h->thread_context, |
4635 | NULL, context_count, sizeof(void *)); |
4636 | |
4637 | /* pull back stuff from slices to master context */ |
4638 | hx = h->thread_context[context_count - 1]; |
4639 | h->mb_x = hx->mb_x; |
4640 | h->mb_y = hx->mb_y; |
4641 | h->droppable = hx->droppable; |
4642 | h->picture_structure = hx->picture_structure; |
4643 | if (CONFIG_ERROR_RESILIENCE) { |
4644 | for (i = 1; i < context_count; i++) |
4645 | h->er.error_count += h->thread_context[i]->er.error_count; |
4646 | } |
4647 | } |
4648 | |
4649 | return 0; |
4650 | } |
4651 | |
4652 | static const uint8_t start_code[] = { 0x00, 0x00, 0x01 }; |
4653 | |
4654 | static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size, |
4655 | int parse_extradata) |
4656 | { |
4657 | AVCodecContext *const avctx = h->avctx; |
4658 | H264Context *hx; ///< thread context |
4659 | int buf_index; |
4660 | int context_count; |
4661 | int next_avc; |
4662 | int pass = !(avctx->active_thread_type & FF_THREAD_FRAME); |
4663 | int nals_needed = 0; ///< number of NALs that need decoding before the next frame thread starts |
4664 | int nal_index; |
4665 | int idr_cleared=0; |
4666 | int first_slice = 0; |
4667 | int ret = 0; |
4668 | |
4669 | h->nal_unit_type= 0; |
4670 | |
4671 | if(!h->slice_context_count) |
4672 | h->slice_context_count= 1; |
4673 | h->max_contexts = h->slice_context_count; |
4674 | if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS)) { |
4675 | h->current_slice = 0; |
4676 | if (!h->first_field) |
4677 | h->cur_pic_ptr = NULL; |
4678 | ff_h264_reset_sei(h); |
4679 | } |
4680 | |
4681 | if (h->nal_length_size == 4) { |
4682 | if (buf_size > 8 && AV_RB32(buf) == 1 && AV_RB32(buf+5) > (unsigned)buf_size) { |
4683 | h->is_avc = 0; |
4684 | }else if(buf_size > 3 && AV_RB32(buf) > 1 && AV_RB32(buf) <= (unsigned)buf_size) |
4685 | h->is_avc = 1; |
4686 | } |
4687 | |
4688 | for (; pass <= 1; pass++) { |
4689 | buf_index = 0; |
4690 | context_count = 0; |
4691 | next_avc = h->is_avc ? 0 : buf_size; |
4692 | nal_index = 0; |
4693 | for (;;) { |
4694 | int consumed; |
4695 | int dst_length; |
4696 | int bit_length; |
4697 | const uint8_t *ptr; |
4698 | int i, nalsize = 0; |
4699 | int err; |
4700 | |
4701 | if (buf_index >= next_avc) { |
4702 | if (buf_index >= buf_size - h->nal_length_size) |
4703 | break; |
4704 | nalsize = 0; |
4705 | for (i = 0; i < h->nal_length_size; i++) |
4706 | nalsize = (nalsize << 8) | buf[buf_index++]; |
4707 | if (nalsize <= 0 || nalsize > buf_size - buf_index) { |
4708 | av_log(h->avctx, AV_LOG_ERROR, |
4709 | "AVC: nal size %d\n", nalsize); |
4710 | break; |
4711 | } |
4712 | next_avc = buf_index + nalsize; |
4713 | } else { |
4714 | // start code prefix search |
4715 | for (; buf_index + 3 < next_avc; buf_index++) |
4716 | // This should always succeed in the first iteration. |
4717 | if (buf[buf_index] == 0 && |
4718 | buf[buf_index + 1] == 0 && |
4719 | buf[buf_index + 2] == 1) |
4720 | break; |
4721 | |
4722 | if (buf_index + 3 >= buf_size) { |
4723 | buf_index = buf_size; |
4724 | break; |
4725 | } |
4726 | |
4727 | buf_index += 3; |
4728 | if (buf_index >= next_avc) |
4729 | continue; |
4730 | } |
4731 | |
4732 | hx = h->thread_context[context_count]; |
4733 | |
4734 | ptr = ff_h264_decode_nal(hx, buf + buf_index, &dst_length, |
4735 | &consumed, next_avc - buf_index); |
4736 | if (ptr == NULL || dst_length < 0) { |
4737 | ret = -1; |
4738 | goto end; |
4739 | } |
4740 | i = buf_index + consumed; |
4741 | if ((h->workaround_bugs & FF_BUG_AUTODETECT) && i + 3 < next_avc && |
4742 | buf[i] == 0x00 && buf[i + 1] == 0x00 && |
4743 | buf[i + 2] == 0x01 && buf[i + 3] == 0xE0) |
4744 | h->workaround_bugs |= FF_BUG_TRUNCATED; |
4745 | |
4746 | if (!(h->workaround_bugs & FF_BUG_TRUNCATED)) |
4747 | while(dst_length > 0 && ptr[dst_length - 1] == 0) |
4748 | dst_length--; |
4749 | bit_length = !dst_length ? 0 |
4750 | : (8 * dst_length - |
4751 | decode_rbsp_trailing(h, ptr + dst_length - 1)); |
4752 | |
4753 | if (h->avctx->debug & FF_DEBUG_STARTCODE) |
4754 | av_log(h->avctx, AV_LOG_DEBUG, "NAL %d/%d at %d/%d length %d pass %d\n", hx->nal_unit_type, hx->nal_ref_idc, buf_index, buf_size, dst_length, pass); |
4755 | |
4756 | if (h->is_avc && (nalsize != consumed) && nalsize) |
4757 | av_log(h->avctx, AV_LOG_DEBUG, |
4758 | "AVC: Consumed only %d bytes instead of %d\n", |
4759 | consumed, nalsize); |
4760 | |
4761 | buf_index += consumed; |
4762 | nal_index++; |
4763 | |
4764 | if (pass == 0) { |
4765 | /* packets can sometimes contain multiple PPS/SPS, |
4766 | * e.g. two PAFF field pictures in one packet, or a demuxer |
4767 | * which splits NALs strangely if so, when frame threading we |
4768 | * can't start the next thread until we've read all of them */ |
4769 | switch (hx->nal_unit_type) { |
4770 | case NAL_SPS: |
4771 | case NAL_PPS: |
4772 | nals_needed = nal_index; |
4773 | break; |
4774 | case NAL_DPA: |
4775 | case NAL_IDR_SLICE: |
4776 | case NAL_SLICE: |
4777 | init_get_bits(&hx->gb, ptr, bit_length); |
4778 | if (!get_ue_golomb(&hx->gb) || !first_slice) |
4779 | nals_needed = nal_index; |
4780 | if (!first_slice) |
4781 | first_slice = hx->nal_unit_type; |
4782 | } |
4783 | continue; |
4784 | } |
4785 | |
4786 | if (!first_slice) |
4787 | switch (hx->nal_unit_type) { |
4788 | case NAL_DPA: |
4789 | case NAL_IDR_SLICE: |
4790 | case NAL_SLICE: |
4791 | first_slice = hx->nal_unit_type; |
4792 | } |
4793 | |
4794 | if (avctx->skip_frame >= AVDISCARD_NONREF && |
4795 | h->nal_ref_idc == 0 && |
4796 | h->nal_unit_type != NAL_SEI) |
4797 | continue; |
4798 | |
4799 | again: |
4800 | /* Ignore per frame NAL unit type during extradata |
4801 | * parsing. Decoding slices is not possible in codec init |
4802 | * with frame-mt */ |
4803 | if (parse_extradata) { |
4804 | switch (hx->nal_unit_type) { |
4805 | case NAL_IDR_SLICE: |
4806 | case NAL_SLICE: |
4807 | case NAL_DPA: |
4808 | case NAL_DPB: |
4809 | case NAL_DPC: |
4810 | av_log(h->avctx, AV_LOG_WARNING, |
4811 | "Ignoring NAL %d in global header/extradata\n", |
4812 | hx->nal_unit_type); |
4813 | // fall through to next case |
4814 | case NAL_AUXILIARY_SLICE: |
4815 | hx->nal_unit_type = NAL_FF_IGNORE; |
4816 | } |
4817 | } |
4818 | |
4819 | err = 0; |
4820 | |
4821 | switch (hx->nal_unit_type) { |
4822 | case NAL_IDR_SLICE: |
4823 | if (first_slice != NAL_IDR_SLICE) { |
4824 | av_log(h->avctx, AV_LOG_ERROR, |
4825 | "Invalid mix of idr and non-idr slices\n"); |
4826 | ret = -1; |
4827 | goto end; |
4828 | } |
4829 | if(!idr_cleared) |
4830 | idr(h); // FIXME ensure we don't lose some frames if there is reordering |
4831 | idr_cleared = 1; |
4832 | case NAL_SLICE: |
4833 | init_get_bits(&hx->gb, ptr, bit_length); |
4834 | hx->intra_gb_ptr = |
4835 | hx->inter_gb_ptr = &hx->gb; |
4836 | hx->data_partitioning = 0; |
4837 | |
4838 | if ((err = decode_slice_header(hx, h))) |
4839 | break; |
4840 | |
4841 | if (h->sei_recovery_frame_cnt >= 0 && (h->frame_num != h->sei_recovery_frame_cnt || hx->slice_type_nos != AV_PICTURE_TYPE_I)) |
4842 | h->valid_recovery_point = 1; |
4843 | |
4844 | if ( h->sei_recovery_frame_cnt >= 0 |
4845 | && ( h->recovery_frame<0 |
4846 | || ((h->recovery_frame - h->frame_num) & ((1 << h->sps.log2_max_frame_num)-1)) > h->sei_recovery_frame_cnt)) { |
4847 | h->recovery_frame = (h->frame_num + h->sei_recovery_frame_cnt) % |
4848 | (1 << h->sps.log2_max_frame_num); |
4849 | |
4850 | if (!h->valid_recovery_point) |
4851 | h->recovery_frame = h->frame_num; |
4852 | } |
4853 | |
4854 | h->cur_pic_ptr->f.key_frame |= |
4855 | (hx->nal_unit_type == NAL_IDR_SLICE); |
4856 | |
4857 | if (h->recovery_frame == h->frame_num) { |
4858 | h->cur_pic_ptr->sync |= 1; |
4859 | h->recovery_frame = -1; |
4860 | } |
4861 | |
4862 | h->sync |= !!h->cur_pic_ptr->f.key_frame; |
4863 | h->sync |= 3*!!(avctx->flags2 & CODEC_FLAG2_SHOW_ALL); |
4864 | h->cur_pic_ptr->sync |= h->sync; |
4865 | |
4866 | if (h->current_slice == 1) { |
4867 | if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS)) |
4868 | decode_postinit(h, nal_index >= nals_needed); |
4869 | |
4870 | if (h->avctx->hwaccel && |
4871 | (ret = h->avctx->hwaccel->start_frame(h->avctx, NULL, 0)) < 0) |
4872 | return ret; |
4873 | if (CONFIG_H264_VDPAU_DECODER && |
4874 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) |
4875 | ff_vdpau_h264_picture_start(h); |
4876 | } |
4877 | |
4878 | if (hx->redundant_pic_count == 0 && |
4879 | (avctx->skip_frame < AVDISCARD_NONREF || |
4880 | hx->nal_ref_idc) && |
4881 | (avctx->skip_frame < AVDISCARD_BIDIR || |
4882 | hx->slice_type_nos != AV_PICTURE_TYPE_B) && |
4883 | (avctx->skip_frame < AVDISCARD_NONKEY || |
4884 | hx->slice_type_nos == AV_PICTURE_TYPE_I) && |
4885 | avctx->skip_frame < AVDISCARD_ALL) { |
4886 | if (avctx->hwaccel) { |
4887 | ret = avctx->hwaccel->decode_slice(avctx, |
4888 | &buf[buf_index - consumed], |
4889 | consumed); |
4890 | if (ret < 0) |
4891 | return ret; |
4892 | } else if (CONFIG_H264_VDPAU_DECODER && |
4893 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) { |
4894 | ff_vdpau_add_data_chunk(h->cur_pic_ptr->f.data[0], |
4895 | start_code, |
4896 | sizeof(start_code)); |
4897 | ff_vdpau_add_data_chunk(h->cur_pic_ptr->f.data[0], |
4898 | &buf[buf_index - consumed], |
4899 | consumed); |
4900 | } else |
4901 | context_count++; |
4902 | } |
4903 | break; |
4904 | case NAL_DPA: |
4905 | init_get_bits(&hx->gb, ptr, bit_length); |
4906 | hx->intra_gb_ptr = |
4907 | hx->inter_gb_ptr = NULL; |
4908 | |
4909 | if ((err = decode_slice_header(hx, h)) < 0) |
4910 | break; |
4911 | |
4912 | hx->data_partitioning = 1; |
4913 | break; |
4914 | case NAL_DPB: |
4915 | init_get_bits(&hx->intra_gb, ptr, bit_length); |
4916 | hx->intra_gb_ptr = &hx->intra_gb; |
4917 | break; |
4918 | case NAL_DPC: |
4919 | init_get_bits(&hx->inter_gb, ptr, bit_length); |
4920 | hx->inter_gb_ptr = &hx->inter_gb; |
4921 | |
4922 | av_log(h->avctx, AV_LOG_ERROR, "Partitioned H.264 support is incomplete\n"); |
4923 | break; |
4924 | |
4925 | if (hx->redundant_pic_count == 0 && |
4926 | hx->intra_gb_ptr && |
4927 | hx->data_partitioning && |
4928 | h->cur_pic_ptr && h->context_initialized && |
4929 | (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) && |
4930 | (avctx->skip_frame < AVDISCARD_BIDIR || |
4931 | hx->slice_type_nos != AV_PICTURE_TYPE_B) && |
4932 | (avctx->skip_frame < AVDISCARD_NONKEY || |
4933 | hx->slice_type_nos == AV_PICTURE_TYPE_I) && |
4934 | avctx->skip_frame < AVDISCARD_ALL) |
4935 | context_count++; |
4936 | break; |
4937 | case NAL_SEI: |
4938 | init_get_bits(&h->gb, ptr, bit_length); |
4939 | ff_h264_decode_sei(h); |
4940 | break; |
4941 | case NAL_SPS: |
4942 | init_get_bits(&h->gb, ptr, bit_length); |
4943 | if (ff_h264_decode_seq_parameter_set(h) < 0 && (h->is_avc ? nalsize : 1)) { |
4944 | av_log(h->avctx, AV_LOG_DEBUG, |
4945 | "SPS decoding failure, trying again with the complete NAL\n"); |
4946 | if (h->is_avc) |
4947 | av_assert0(next_avc - buf_index + consumed == nalsize); |
4948 | if ((next_avc - buf_index + consumed - 1) >= INT_MAX/8) |
4949 | break; |
4950 | init_get_bits(&h->gb, &buf[buf_index + 1 - consumed], |
4951 | 8*(next_avc - buf_index + consumed - 1)); |
4952 | ff_h264_decode_seq_parameter_set(h); |
4953 | } |
4954 | |
4955 | break; |
4956 | case NAL_PPS: |
4957 | init_get_bits(&h->gb, ptr, bit_length); |
4958 | ff_h264_decode_picture_parameter_set(h, bit_length); |
4959 | break; |
4960 | case NAL_AUD: |
4961 | case NAL_END_SEQUENCE: |
4962 | case NAL_END_STREAM: |
4963 | case NAL_FILLER_DATA: |
4964 | case NAL_SPS_EXT: |
4965 | case NAL_AUXILIARY_SLICE: |
4966 | break; |
4967 | case NAL_PREFIX: |
4968 | case NAL_SUB_SPS: |
4969 | case NAL_SLC_EXT: |
4970 | /*0x1b is ts avc stream type set mvc once*/ |
4971 | if (avctx->codec_tag == 0x1b) { |
4972 | avctx->codec_tag = MKTAG('M', 'V', 'C', ' '); |
4973 | } |
4974 | break; |
4975 | case NAL_FF_IGNORE: |
4976 | break; |
4977 | default: |
4978 | av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", |
4979 | hx->nal_unit_type, bit_length); |
4980 | } |
4981 | |
4982 | if (context_count == h->max_contexts) { |
4983 | execute_decode_slices(h, context_count); |
4984 | context_count = 0; |
4985 | } |
4986 | |
4987 | if (err < 0) |
4988 | av_log(h->avctx, AV_LOG_ERROR, "decode_slice_header error\n"); |
4989 | else if (err == 1) { |
4990 | /* Slice could not be decoded in parallel mode, copy down |
4991 | * NAL unit stuff to context 0 and restart. Note that |
4992 | * rbsp_buffer is not transferred, but since we no longer |
4993 | * run in parallel mode this should not be an issue. */ |
4994 | h->nal_unit_type = hx->nal_unit_type; |
4995 | h->nal_ref_idc = hx->nal_ref_idc; |
4996 | hx = h; |
4997 | goto again; |
4998 | } |
4999 | } |
5000 | } |
5001 | if (context_count) |
5002 | execute_decode_slices(h, context_count); |
5003 | |
5004 | end: |
5005 | /* clean up */ |
5006 | if (h->cur_pic_ptr && !h->droppable) { |
5007 | ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, |
5008 | h->picture_structure == PICT_BOTTOM_FIELD); |
5009 | } |
5010 | |
5011 | return (ret < 0) ? ret : buf_index; |
5012 | } |
5013 | |
5014 | /** |
5015 | * Return the number of bytes consumed for building the current frame. |
5016 | */ |
5017 | static int get_consumed_bytes(int pos, int buf_size) |
5018 | { |
5019 | if (pos == 0) |
5020 | pos = 1; // avoid infinite loops (i doubt that is needed but ...) |
5021 | if (pos + 10 > buf_size) |
5022 | pos = buf_size; // oops ;) |
5023 | |
5024 | return pos; |
5025 | } |
5026 | |
5027 | static int output_frame(H264Context *h, AVFrame *dst, Picture *srcp) |
5028 | { |
5029 | AVFrame *src = &srcp->f; |
5030 | int i; |
5031 | int ret = av_frame_ref(dst, src); |
5032 | if (ret < 0) |
5033 | return ret; |
5034 | |
5035 | av_dict_set(&dst->metadata, "stereo_mode", ff_h264_sei_stereo_mode(h), 0); |
5036 | |
5037 | if (!srcp->crop) |
5038 | return 0; |
5039 | |
5040 | for (i = 0; i < 3; i++) { |
5041 | int hshift = (i > 0) ? h->chroma_x_shift : 0; |
5042 | int vshift = (i > 0) ? h->chroma_y_shift : 0; |
5043 | int off = ((srcp->crop_left >> hshift) << h->pixel_shift) + |
5044 | (srcp->crop_top >> vshift) * dst->linesize[i]; |
5045 | dst->data[i] += off; |
5046 | } |
5047 | return 0; |
5048 | } |
5049 | |
5050 | static int decode_frame(AVCodecContext *avctx, void *data, |
5051 | int *got_frame, AVPacket *avpkt) |
5052 | { |
5053 | const uint8_t *buf = avpkt->data; |
5054 | int buf_size = avpkt->size; |
5055 | H264Context *h = avctx->priv_data; |
5056 | AVFrame *pict = data; |
5057 | int buf_index = 0; |
5058 | Picture *out; |
5059 | int i, out_idx; |
5060 | int ret; |
5061 | |
5062 | h->flags = avctx->flags; |
5063 | |
5064 | /* end of stream, output what is still in the buffers */ |
5065 | if (buf_size == 0) { |
5066 | out: |
5067 | |
5068 | h->cur_pic_ptr = NULL; |
5069 | h->first_field = 0; |
5070 | |
5071 | // FIXME factorize this with the output code below |
5072 | out = h->delayed_pic[0]; |
5073 | out_idx = 0; |
5074 | for (i = 1; |
5075 | h->delayed_pic[i] && |
5076 | !h->delayed_pic[i]->f.key_frame && |
5077 | !h->delayed_pic[i]->mmco_reset; |
5078 | i++) |
5079 | if (h->delayed_pic[i]->poc < out->poc) { |
5080 | out = h->delayed_pic[i]; |
5081 | out_idx = i; |
5082 | } |
5083 | |
5084 | for (i = out_idx; h->delayed_pic[i]; i++) |
5085 | h->delayed_pic[i] = h->delayed_pic[i + 1]; |
5086 | |
5087 | if (out) { |
5088 | out->reference &= ~DELAYED_PIC_REF; |
5089 | ret = output_frame(h, pict, out); |
5090 | if (ret < 0) |
5091 | return ret; |
5092 | *got_frame = 1; |
5093 | } |
5094 | |
5095 | return buf_index; |
5096 | } |
5097 | if(h->is_avc && buf_size >= 9 && buf[0]==1 && buf[2]==0 && (buf[4]&0xFC)==0xFC && (buf[5]&0x1F) && buf[8]==0x67){ |
5098 | int cnt= buf[5]&0x1f; |
5099 | const uint8_t *p= buf+6; |
5100 | while(cnt--){ |
5101 | int nalsize= AV_RB16(p) + 2; |
5102 | if(nalsize > buf_size - (p-buf) || p[2]!=0x67) |
5103 | goto not_extra; |
5104 | p += nalsize; |
5105 | } |
5106 | cnt = *(p++); |
5107 | if(!cnt) |
5108 | goto not_extra; |
5109 | while(cnt--){ |
5110 | int nalsize= AV_RB16(p) + 2; |
5111 | if(nalsize > buf_size - (p-buf) || p[2]!=0x68) |
5112 | goto not_extra; |
5113 | p += nalsize; |
5114 | } |
5115 | |
5116 | return ff_h264_decode_extradata(h, buf, buf_size); |
5117 | } |
5118 | not_extra: |
5119 | |
5120 | buf_index = decode_nal_units(h, buf, buf_size, 0); |
5121 | if (buf_index < 0) |
5122 | return AVERROR_INVALIDDATA; |
5123 | |
5124 | if (!h->cur_pic_ptr && h->nal_unit_type == NAL_END_SEQUENCE) { |
5125 | av_assert0(buf_index <= buf_size); |
5126 | goto out; |
5127 | } |
5128 | |
5129 | if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS) && !h->cur_pic_ptr) { |
5130 | if (avctx->skip_frame >= AVDISCARD_NONREF || |
5131 | buf_size >= 4 && !memcmp("Q264", buf, 4)) |
5132 | return buf_size; |
5133 | av_log(avctx, AV_LOG_ERROR, "no frame!\n"); |
5134 | return AVERROR_INVALIDDATA; |
5135 | } |
5136 | |
5137 | if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS) || |
5138 | (h->mb_y >= h->mb_height && h->mb_height)) { |
5139 | if (avctx->flags2 & CODEC_FLAG2_CHUNKS) |
5140 | decode_postinit(h, 1); |
5141 | |
5142 | field_end(h, 0); |
5143 | |
5144 | /* Wait for second field. */ |
5145 | *got_frame = 0; |
5146 | if (h->next_output_pic && (h->next_output_pic->sync || h->sync>1)) { |
5147 | ret = output_frame(h, pict, h->next_output_pic); |
5148 | if (ret < 0) |
5149 | return ret; |
5150 | *got_frame = 1; |
5151 | if (CONFIG_MPEGVIDEO) { |
5152 | ff_print_debug_info2(h->avctx, h->next_output_pic, pict, h->er.mbskip_table, |
5153 | &h->low_delay, |
5154 | h->mb_width, h->mb_height, h->mb_stride, 1); |
5155 | } |
5156 | } |
5157 | } |
5158 | |
5159 | assert(pict->data[0] || !*got_frame); |
5160 | |
5161 | return get_consumed_bytes(buf_index, buf_size); |
5162 | } |
5163 | |
5164 | av_cold void ff_h264_free_context(H264Context *h) |
5165 | { |
5166 | int i; |
5167 | |
5168 | free_tables(h, 1); // FIXME cleanup init stuff perhaps |
5169 | |
5170 | for (i = 0; i < MAX_SPS_COUNT; i++) |
5171 | av_freep(h->sps_buffers + i); |
5172 | |
5173 | for (i = 0; i < MAX_PPS_COUNT; i++) |
5174 | av_freep(h->pps_buffers + i); |
5175 | } |
5176 | |
5177 | static av_cold int h264_decode_end(AVCodecContext *avctx) |
5178 | { |
5179 | H264Context *h = avctx->priv_data; |
5180 | |
5181 | ff_h264_remove_all_refs(h); |
5182 | ff_h264_free_context(h); |
5183 | |
5184 | unref_picture(h, &h->cur_pic); |
5185 | |
5186 | return 0; |
5187 | } |
5188 | |
5189 | static const AVProfile profiles[] = { |
5190 | { FF_PROFILE_H264_BASELINE, "Baseline" }, |
5191 | { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline" }, |
5192 | { FF_PROFILE_H264_MAIN, "Main" }, |
5193 | { FF_PROFILE_H264_EXTENDED, "Extended" }, |
5194 | { FF_PROFILE_H264_HIGH, "High" }, |
5195 | { FF_PROFILE_H264_HIGH_10, "High 10" }, |
5196 | { FF_PROFILE_H264_HIGH_10_INTRA, "High 10 Intra" }, |
5197 | { FF_PROFILE_H264_HIGH_422, "High 4:2:2" }, |
5198 | { FF_PROFILE_H264_HIGH_422_INTRA, "High 4:2:2 Intra" }, |
5199 | { FF_PROFILE_H264_HIGH_444, "High 4:4:4" }, |
5200 | { FF_PROFILE_H264_HIGH_444_PREDICTIVE, "High 4:4:4 Predictive" }, |
5201 | { FF_PROFILE_H264_HIGH_444_INTRA, "High 4:4:4 Intra" }, |
5202 | { FF_PROFILE_H264_CAVLC_444, "CAVLC 4:4:4" }, |
5203 | { FF_PROFILE_UNKNOWN }, |
5204 | }; |
5205 | |
5206 | static const AVOption h264_options[] = { |
5207 | {"is_avc", "is avc", offsetof(H264Context, is_avc), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 1, 0}, |
5208 | {"nal_length_size", "nal_length_size", offsetof(H264Context, nal_length_size), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 4, 0}, |
5209 | {NULL} |
5210 | }; |
5211 | |
5212 | static const AVClass h264_class = { |
5213 | .class_name = "H264 Decoder", |
5214 | .item_name = av_default_item_name, |
5215 | .option = h264_options, |
5216 | .version = LIBAVUTIL_VERSION_INT, |
5217 | }; |
5218 | |
5219 | static const AVClass h264_vdpau_class = { |
5220 | .class_name = "H264 VDPAU Decoder", |
5221 | .item_name = av_default_item_name, |
5222 | .option = h264_options, |
5223 | .version = LIBAVUTIL_VERSION_INT, |
5224 | }; |
5225 | |
5226 | AVCodec ff_h264_decoder = { |
5227 | .name = "h264", |
5228 | .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"), |
5229 | .type = AVMEDIA_TYPE_VIDEO, |
5230 | .id = AV_CODEC_ID_H264, |
5231 | .priv_data_size = sizeof(H264Context), |
5232 | .init = ff_h264_decode_init, |
5233 | .close = h264_decode_end, |
5234 | .decode = decode_frame, |
5235 | .capabilities = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | |
5236 | CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS | |
5237 | CODEC_CAP_FRAME_THREADS, |
5238 | .flush = flush_dpb, |
5239 | .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy), |
5240 | .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context), |
5241 | .profiles = NULL_IF_CONFIG_SMALL(profiles), |
5242 | .priv_class = &h264_class, |
5243 | }; |
5244 | |
5245 | #if CONFIG_H264_VDPAU_DECODER |
5246 | AVCodec ff_h264_vdpau_decoder = { |
5247 | .name = "h264_vdpau", |
5248 | .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"), |
5249 | .type = AVMEDIA_TYPE_VIDEO, |
5250 | .id = AV_CODEC_ID_H264, |
5251 | .priv_data_size = sizeof(H264Context), |
5252 | .init = ff_h264_decode_init, |
5253 | .close = h264_decode_end, |
5254 | .decode = decode_frame, |
5255 | .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU, |
5256 | .flush = flush_dpb, |
5257 | .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_VDPAU_H264, |
5258 | AV_PIX_FMT_NONE}, |
5259 | .profiles = NULL_IF_CONFIG_SMALL(profiles), |
5260 | .priv_class = &h264_vdpau_class, |
5261 | }; |
5262 | #endif |
5263 |