blob: a7e627eba38c6bf3c49eae6c5469301e42fcbe34
1 | /* |
2 | * H.26L/H.264/AVC/JVT/14496-10/... SEI decoding |
3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | /** |
23 | * @file |
24 | * H.264 / AVC / MPEG-4 part10 SEI decoding. |
25 | * @author Michael Niedermayer <michaelni@gmx.at> |
26 | */ |
27 | |
28 | #include "avcodec.h" |
29 | #include "get_bits.h" |
30 | #include "golomb.h" |
31 | #include "h264_ps.h" |
32 | #include "h264_sei.h" |
33 | #include "internal.h" |
34 | |
35 | #define AVERROR_PS_NOT_FOUND FFERRTAG(0xF8,'?','P','S') |
36 | |
37 | static const uint8_t sei_num_clock_ts_table[9] = { |
38 | 1, 1, 1, 2, 2, 3, 3, 2, 3 |
39 | }; |
40 | |
41 | void ff_h264_sei_uninit(H264SEIContext *h) |
42 | { |
43 | h->recovery_point.recovery_frame_cnt = -1; |
44 | |
45 | h->picture_timing.dpb_output_delay = 0; |
46 | h->picture_timing.cpb_removal_delay = -1; |
47 | |
48 | h->picture_timing.present = 0; |
49 | h->buffering_period.present = 0; |
50 | h->frame_packing.present = 0; |
51 | h->display_orientation.present = 0; |
52 | h->afd.present = 0; |
53 | |
54 | h->a53_caption.a53_caption_size = 0; |
55 | av_freep(&h->a53_caption.a53_caption); |
56 | } |
57 | |
58 | static int decode_picture_timing(H264SEIPictureTiming *h, GetBitContext *gb, |
59 | const H264ParamSets *ps, void *logctx) |
60 | { |
61 | int i; |
62 | const SPS *sps = ps->sps; |
63 | |
64 | for (i = 0; i<MAX_SPS_COUNT; i++) |
65 | if ((!sps || !sps->log2_max_frame_num) && ps->sps_list[i]) |
66 | sps = (const SPS *)ps->sps_list[i]->data; |
67 | |
68 | if (!sps) { |
69 | av_log(logctx, AV_LOG_ERROR, "SPS unavailable in decode_picture_timing\n"); |
70 | return AVERROR_PS_NOT_FOUND; |
71 | } |
72 | |
73 | if (sps->nal_hrd_parameters_present_flag || |
74 | sps->vcl_hrd_parameters_present_flag) { |
75 | h->cpb_removal_delay = get_bits_long(gb, sps->cpb_removal_delay_length); |
76 | h->dpb_output_delay = get_bits_long(gb, sps->dpb_output_delay_length); |
77 | } |
78 | if (sps->pic_struct_present_flag) { |
79 | unsigned int i, num_clock_ts; |
80 | |
81 | h->pic_struct = get_bits(gb, 4); |
82 | h->ct_type = 0; |
83 | |
84 | if (h->pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING) |
85 | return AVERROR_INVALIDDATA; |
86 | |
87 | num_clock_ts = sei_num_clock_ts_table[h->pic_struct]; |
88 | |
89 | for (i = 0; i < num_clock_ts; i++) { |
90 | if (get_bits(gb, 1)) { /* clock_timestamp_flag */ |
91 | unsigned int full_timestamp_flag; |
92 | |
93 | h->ct_type |= 1 << get_bits(gb, 2); |
94 | skip_bits(gb, 1); /* nuit_field_based_flag */ |
95 | skip_bits(gb, 5); /* counting_type */ |
96 | full_timestamp_flag = get_bits(gb, 1); |
97 | skip_bits(gb, 1); /* discontinuity_flag */ |
98 | skip_bits(gb, 1); /* cnt_dropped_flag */ |
99 | skip_bits(gb, 8); /* n_frames */ |
100 | if (full_timestamp_flag) { |
101 | skip_bits(gb, 6); /* seconds_value 0..59 */ |
102 | skip_bits(gb, 6); /* minutes_value 0..59 */ |
103 | skip_bits(gb, 5); /* hours_value 0..23 */ |
104 | } else { |
105 | if (get_bits(gb, 1)) { /* seconds_flag */ |
106 | skip_bits(gb, 6); /* seconds_value range 0..59 */ |
107 | if (get_bits(gb, 1)) { /* minutes_flag */ |
108 | skip_bits(gb, 6); /* minutes_value 0..59 */ |
109 | if (get_bits(gb, 1)) /* hours_flag */ |
110 | skip_bits(gb, 5); /* hours_value 0..23 */ |
111 | } |
112 | } |
113 | } |
114 | if (sps->time_offset_length > 0) |
115 | skip_bits(gb, |
116 | sps->time_offset_length); /* time_offset */ |
117 | } |
118 | } |
119 | |
120 | av_log(logctx, AV_LOG_DEBUG, "ct_type:%X pic_struct:%d\n", |
121 | h->ct_type, h->pic_struct); |
122 | } |
123 | |
124 | h->present = 1; |
125 | return 0; |
126 | } |
127 | |
128 | static int decode_registered_user_data_afd(H264SEIAFD *h, GetBitContext *gb, int size) |
129 | { |
130 | int flag; |
131 | |
132 | if (size-- < 1) |
133 | return AVERROR_INVALIDDATA; |
134 | skip_bits(gb, 1); // 0 |
135 | flag = get_bits(gb, 1); // active_format_flag |
136 | skip_bits(gb, 6); // reserved |
137 | |
138 | if (flag) { |
139 | if (size-- < 1) |
140 | return AVERROR_INVALIDDATA; |
141 | skip_bits(gb, 4); // reserved |
142 | h->active_format_description = get_bits(gb, 4); |
143 | h->present = 1; |
144 | } |
145 | |
146 | return 0; |
147 | } |
148 | |
149 | static int decode_registered_user_data_closed_caption(H264SEIA53Caption *h, |
150 | GetBitContext *gb, void *logctx, |
151 | int size) |
152 | { |
153 | int flag; |
154 | int user_data_type_code; |
155 | int cc_count; |
156 | |
157 | if (size < 3) |
158 | return AVERROR(EINVAL); |
159 | |
160 | user_data_type_code = get_bits(gb, 8); |
161 | if (user_data_type_code == 0x3) { |
162 | skip_bits(gb, 1); // reserved |
163 | |
164 | flag = get_bits(gb, 1); // process_cc_data_flag |
165 | if (flag) { |
166 | skip_bits(gb, 1); // zero bit |
167 | cc_count = get_bits(gb, 5); |
168 | skip_bits(gb, 8); // reserved |
169 | size -= 2; |
170 | |
171 | if (cc_count && size >= cc_count * 3) { |
172 | const uint64_t new_size = (h->a53_caption_size + cc_count |
173 | * UINT64_C(3)); |
174 | int i, ret; |
175 | |
176 | if (new_size > INT_MAX) |
177 | return AVERROR(EINVAL); |
178 | |
179 | /* Allow merging of the cc data from two fields. */ |
180 | ret = av_reallocp(&h->a53_caption, new_size); |
181 | if (ret < 0) |
182 | return ret; |
183 | |
184 | for (i = 0; i < cc_count; i++) { |
185 | h->a53_caption[h->a53_caption_size++] = get_bits(gb, 8); |
186 | h->a53_caption[h->a53_caption_size++] = get_bits(gb, 8); |
187 | h->a53_caption[h->a53_caption_size++] = get_bits(gb, 8); |
188 | } |
189 | |
190 | skip_bits(gb, 8); // marker_bits |
191 | } |
192 | } |
193 | } else { |
194 | int i; |
195 | for (i = 0; i < size - 1; i++) |
196 | skip_bits(gb, 8); |
197 | } |
198 | |
199 | return 0; |
200 | } |
201 | |
202 | static int decode_registered_user_data(H264SEIContext *h, GetBitContext *gb, |
203 | void *logctx, int size) |
204 | { |
205 | uint32_t country_code; |
206 | uint32_t user_identifier; |
207 | |
208 | if (size < 7) |
209 | return AVERROR_INVALIDDATA; |
210 | size -= 7; |
211 | |
212 | country_code = get_bits(gb, 8); // itu_t_t35_country_code |
213 | if (country_code == 0xFF) { |
214 | skip_bits(gb, 8); // itu_t_t35_country_code_extension_byte |
215 | size--; |
216 | } |
217 | |
218 | /* itu_t_t35_payload_byte follows */ |
219 | skip_bits(gb, 8); // terminal provider code |
220 | skip_bits(gb, 8); // terminal provider oriented code |
221 | user_identifier = get_bits_long(gb, 32); |
222 | |
223 | switch (user_identifier) { |
224 | case MKBETAG('D', 'T', 'G', '1'): // afd_data |
225 | return decode_registered_user_data_afd(&h->afd, gb, size); |
226 | case MKBETAG('G', 'A', '9', '4'): // closed captions |
227 | return decode_registered_user_data_closed_caption(&h->a53_caption, gb, |
228 | logctx, size); |
229 | default: |
230 | skip_bits(gb, size * 8); |
231 | break; |
232 | } |
233 | |
234 | return 0; |
235 | } |
236 | |
237 | static int decode_unregistered_user_data(H264SEIUnregistered *h, GetBitContext *gb, |
238 | void *logctx, int size) |
239 | { |
240 | uint8_t *user_data; |
241 | int e, build, i; |
242 | |
243 | if (size < 16 || size >= INT_MAX - 16) |
244 | return AVERROR_INVALIDDATA; |
245 | |
246 | user_data = av_malloc(16 + size + 1); |
247 | if (!user_data) |
248 | return AVERROR(ENOMEM); |
249 | |
250 | for (i = 0; i < size + 16; i++) |
251 | user_data[i] = get_bits(gb, 8); |
252 | |
253 | user_data[i] = 0; |
254 | e = sscanf(user_data + 16, "x264 - core %d", &build); |
255 | if (e == 1 && build > 0) |
256 | h->x264_build = build; |
257 | if (e == 1 && build == 1 && !strncmp(user_data+16, "x264 - core 0000", 16)) |
258 | h->x264_build = 67; |
259 | |
260 | if (strlen(user_data + 16) > 0) |
261 | av_log(logctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data + 16); |
262 | |
263 | av_free(user_data); |
264 | return 0; |
265 | } |
266 | |
267 | static int decode_recovery_point(H264SEIRecoveryPoint *h, GetBitContext *gb) |
268 | { |
269 | h->recovery_frame_cnt = get_ue_golomb_long(gb); |
270 | |
271 | /* 1b exact_match_flag, |
272 | * 1b broken_link_flag, |
273 | * 2b changing_slice_group_idc */ |
274 | skip_bits(gb, 4); |
275 | |
276 | return 0; |
277 | } |
278 | |
279 | static int decode_buffering_period(H264SEIBufferingPeriod *h, GetBitContext *gb, |
280 | const H264ParamSets *ps, void *logctx) |
281 | { |
282 | unsigned int sps_id; |
283 | int sched_sel_idx; |
284 | const SPS *sps; |
285 | |
286 | sps_id = get_ue_golomb_31(gb); |
287 | if (sps_id > 31 || !ps->sps_list[sps_id]) { |
288 | av_log(logctx, AV_LOG_ERROR, |
289 | "non-existing SPS %d referenced in buffering period\n", sps_id); |
290 | return sps_id > 31 ? AVERROR_INVALIDDATA : AVERROR_PS_NOT_FOUND; |
291 | } |
292 | sps = (const SPS*)ps->sps_list[sps_id]->data; |
293 | |
294 | // NOTE: This is really so duplicated in the standard... See H.264, D.1.1 |
295 | if (sps->nal_hrd_parameters_present_flag) { |
296 | for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) { |
297 | h->initial_cpb_removal_delay[sched_sel_idx] = |
298 | get_bits_long(gb, sps->initial_cpb_removal_delay_length); |
299 | // initial_cpb_removal_delay_offset |
300 | skip_bits(gb, sps->initial_cpb_removal_delay_length); |
301 | } |
302 | } |
303 | if (sps->vcl_hrd_parameters_present_flag) { |
304 | for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) { |
305 | h->initial_cpb_removal_delay[sched_sel_idx] = |
306 | get_bits_long(gb, sps->initial_cpb_removal_delay_length); |
307 | // initial_cpb_removal_delay_offset |
308 | skip_bits(gb, sps->initial_cpb_removal_delay_length); |
309 | } |
310 | } |
311 | |
312 | h->present = 1; |
313 | return 0; |
314 | } |
315 | |
316 | static int decode_frame_packing_arrangement(H264SEIFramePacking *h, |
317 | GetBitContext *gb) |
318 | { |
319 | h->frame_packing_arrangement_id = get_ue_golomb_long(gb); |
320 | h->frame_packing_arrangement_cancel_flag = get_bits1(gb); |
321 | h->present = !h->frame_packing_arrangement_cancel_flag; |
322 | |
323 | if (h->present) { |
324 | h->frame_packing_arrangement_type = get_bits(gb, 7); |
325 | h->quincunx_sampling_flag = get_bits1(gb); |
326 | h->content_interpretation_type = get_bits(gb, 6); |
327 | |
328 | // the following skips: spatial_flipping_flag, frame0_flipped_flag, |
329 | // field_views_flag, current_frame_is_frame0_flag, |
330 | // frame0_self_contained_flag, frame1_self_contained_flag |
331 | skip_bits(gb, 6); |
332 | |
333 | if (!h->quincunx_sampling_flag && h->frame_packing_arrangement_type != 5) |
334 | skip_bits(gb, 16); // frame[01]_grid_position_[xy] |
335 | skip_bits(gb, 8); // frame_packing_arrangement_reserved_byte |
336 | h->frame_packing_arrangement_repetition_period = get_ue_golomb_long(gb); |
337 | } |
338 | skip_bits1(gb); // frame_packing_arrangement_extension_flag |
339 | |
340 | return 0; |
341 | } |
342 | |
343 | static int decode_display_orientation(H264SEIDisplayOrientation *h, |
344 | GetBitContext *gb) |
345 | { |
346 | h->present = !get_bits1(gb); |
347 | |
348 | if (h->present) { |
349 | h->hflip = get_bits1(gb); // hor_flip |
350 | h->vflip = get_bits1(gb); // ver_flip |
351 | |
352 | h->anticlockwise_rotation = get_bits(gb, 16); |
353 | get_ue_golomb_long(gb); // display_orientation_repetition_period |
354 | skip_bits1(gb); // display_orientation_extension_flag |
355 | } |
356 | |
357 | return 0; |
358 | } |
359 | |
360 | static int decode_green_metadata(H264SEIGreenMetaData *h, GetBitContext *gb) |
361 | { |
362 | h->green_metadata_type = get_bits(gb, 8); |
363 | |
364 | if (h->green_metadata_type == 0) { |
365 | h->period_type = get_bits(gb, 8); |
366 | |
367 | if (h->period_type == 2) |
368 | h->num_seconds = get_bits(gb, 16); |
369 | else if (h->period_type == 3) |
370 | h->num_pictures = get_bits(gb, 16); |
371 | |
372 | h->percent_non_zero_macroblocks = get_bits(gb, 8); |
373 | h->percent_intra_coded_macroblocks = get_bits(gb, 8); |
374 | h->percent_six_tap_filtering = get_bits(gb, 8); |
375 | h->percent_alpha_point_deblocking_instance = get_bits(gb, 8); |
376 | |
377 | } else if (h->green_metadata_type == 1) { |
378 | h->xsd_metric_type = get_bits(gb, 8); |
379 | h->xsd_metric_value = get_bits(gb, 16); |
380 | } |
381 | |
382 | return 0; |
383 | } |
384 | |
385 | int ff_h264_sei_decode(H264SEIContext *h, GetBitContext *gb, |
386 | const H264ParamSets *ps, void *logctx) |
387 | { |
388 | int master_ret = 0; |
389 | |
390 | while (get_bits_left(gb) > 16 && show_bits(gb, 16)) { |
391 | int type = 0; |
392 | unsigned size = 0; |
393 | unsigned next; |
394 | int ret = 0; |
395 | |
396 | do { |
397 | if (get_bits_left(gb) < 8) |
398 | return AVERROR_INVALIDDATA; |
399 | type += show_bits(gb, 8); |
400 | } while (get_bits(gb, 8) == 255); |
401 | |
402 | do { |
403 | if (get_bits_left(gb) < 8) |
404 | return AVERROR_INVALIDDATA; |
405 | size += show_bits(gb, 8); |
406 | } while (get_bits(gb, 8) == 255); |
407 | |
408 | if (size > get_bits_left(gb) / 8) { |
409 | av_log(logctx, AV_LOG_ERROR, "SEI type %d size %d truncated at %d\n", |
410 | type, 8*size, get_bits_left(gb)); |
411 | return AVERROR_INVALIDDATA; |
412 | } |
413 | next = get_bits_count(gb) + 8 * size; |
414 | |
415 | switch (type) { |
416 | case SEI_TYPE_PIC_TIMING: // Picture timing SEI |
417 | ret = decode_picture_timing(&h->picture_timing, gb, ps, logctx); |
418 | break; |
419 | case SEI_TYPE_USER_DATA_REGISTERED: |
420 | ret = decode_registered_user_data(h, gb, logctx, size); |
421 | break; |
422 | case SEI_TYPE_USER_DATA_UNREGISTERED: |
423 | ret = decode_unregistered_user_data(&h->unregistered, gb, logctx, size); |
424 | break; |
425 | case SEI_TYPE_RECOVERY_POINT: |
426 | ret = decode_recovery_point(&h->recovery_point, gb); |
427 | break; |
428 | case SEI_TYPE_BUFFERING_PERIOD: |
429 | ret = decode_buffering_period(&h->buffering_period, gb, ps, logctx); |
430 | break; |
431 | case SEI_TYPE_FRAME_PACKING: |
432 | ret = decode_frame_packing_arrangement(&h->frame_packing, gb); |
433 | break; |
434 | case SEI_TYPE_DISPLAY_ORIENTATION: |
435 | ret = decode_display_orientation(&h->display_orientation, gb); |
436 | break; |
437 | case SEI_TYPE_GREEN_METADATA: |
438 | ret = decode_green_metadata(&h->green_metadata, gb); |
439 | break; |
440 | default: |
441 | av_log(logctx, AV_LOG_DEBUG, "unknown SEI type %d\n", type); |
442 | } |
443 | if (ret < 0 && ret != AVERROR_PS_NOT_FOUND) |
444 | return ret; |
445 | if (ret < 0) |
446 | master_ret = ret; |
447 | |
448 | skip_bits_long(gb, next - get_bits_count(gb)); |
449 | |
450 | // FIXME check bits here |
451 | align_get_bits(gb); |
452 | } |
453 | |
454 | return master_ret; |
455 | } |
456 | |
457 | const char *ff_h264_sei_stereo_mode(const H264SEIFramePacking *h) |
458 | { |
459 | if (h->frame_packing_arrangement_cancel_flag == 0) { |
460 | switch (h->frame_packing_arrangement_type) { |
461 | case SEI_FPA_TYPE_CHECKERBOARD: |
462 | if (h->content_interpretation_type == 2) |
463 | return "checkerboard_rl"; |
464 | else |
465 | return "checkerboard_lr"; |
466 | case SEI_FPA_TYPE_INTERLEAVE_COLUMN: |
467 | if (h->content_interpretation_type == 2) |
468 | return "col_interleaved_rl"; |
469 | else |
470 | return "col_interleaved_lr"; |
471 | case SEI_FPA_TYPE_INTERLEAVE_ROW: |
472 | if (h->content_interpretation_type == 2) |
473 | return "row_interleaved_rl"; |
474 | else |
475 | return "row_interleaved_lr"; |
476 | case SEI_FPA_TYPE_SIDE_BY_SIDE: |
477 | if (h->content_interpretation_type == 2) |
478 | return "right_left"; |
479 | else |
480 | return "left_right"; |
481 | case SEI_FPA_TYPE_TOP_BOTTOM: |
482 | if (h->content_interpretation_type == 2) |
483 | return "bottom_top"; |
484 | else |
485 | return "top_bottom"; |
486 | case SEI_FPA_TYPE_INTERLEAVE_TEMPORAL: |
487 | if (h->content_interpretation_type == 2) |
488 | return "block_rl"; |
489 | else |
490 | return "block_lr"; |
491 | case SEI_FPA_TYPE_2D: |
492 | default: |
493 | return "mono"; |
494 | } |
495 | } else if (h->frame_packing_arrangement_cancel_flag == 1) { |
496 | return "mono"; |
497 | } else { |
498 | return NULL; |
499 | } |
500 | } |
501 |