summaryrefslogtreecommitdiff
path: root/libavcodec/videotoolboxenc.c (plain)
blob: f1c1670dd187707bb94b62e16fb91db96210c0f6
1/*
2 * copyright (c) 2015 Rick Kern <kernrj@gmail.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include <VideoToolbox/VideoToolbox.h>
22#include <CoreVideo/CoreVideo.h>
23#include <CoreMedia/CoreMedia.h>
24#include <TargetConditionals.h>
25#include <Availability.h>
26#include "avcodec.h"
27#include "libavutil/opt.h"
28#include "libavutil/avassert.h"
29#include "libavutil/avstring.h"
30#include "libavcodec/avcodec.h"
31#include "libavutil/pixdesc.h"
32#include "internal.h"
33#include <pthread.h>
34#include "h264.h"
35#include "h264_sei.h"
36#include <dlfcn.h>
37
38//These symbols may not be present
39static struct{
40 CFStringRef kCVImageBufferColorPrimaries_ITU_R_2020;
41 CFStringRef kCVImageBufferTransferFunction_ITU_R_2020;
42 CFStringRef kCVImageBufferYCbCrMatrix_ITU_R_2020;
43
44 CFStringRef kVTCompressionPropertyKey_H264EntropyMode;
45 CFStringRef kVTH264EntropyMode_CAVLC;
46 CFStringRef kVTH264EntropyMode_CABAC;
47
48 CFStringRef kVTProfileLevel_H264_Baseline_4_0;
49 CFStringRef kVTProfileLevel_H264_Baseline_4_2;
50 CFStringRef kVTProfileLevel_H264_Baseline_5_0;
51 CFStringRef kVTProfileLevel_H264_Baseline_5_1;
52 CFStringRef kVTProfileLevel_H264_Baseline_5_2;
53 CFStringRef kVTProfileLevel_H264_Baseline_AutoLevel;
54 CFStringRef kVTProfileLevel_H264_Main_4_2;
55 CFStringRef kVTProfileLevel_H264_Main_5_1;
56 CFStringRef kVTProfileLevel_H264_Main_5_2;
57 CFStringRef kVTProfileLevel_H264_Main_AutoLevel;
58 CFStringRef kVTProfileLevel_H264_High_3_0;
59 CFStringRef kVTProfileLevel_H264_High_3_1;
60 CFStringRef kVTProfileLevel_H264_High_3_2;
61 CFStringRef kVTProfileLevel_H264_High_4_0;
62 CFStringRef kVTProfileLevel_H264_High_4_1;
63 CFStringRef kVTProfileLevel_H264_High_4_2;
64 CFStringRef kVTProfileLevel_H264_High_5_1;
65 CFStringRef kVTProfileLevel_H264_High_5_2;
66 CFStringRef kVTProfileLevel_H264_High_AutoLevel;
67
68 CFStringRef kVTCompressionPropertyKey_RealTime;
69
70 CFStringRef kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder;
71 CFStringRef kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder;
72} compat_keys;
73
74#define GET_SYM(symbol, defaultVal) \
75do{ \
76 CFStringRef cfstr = *(CFStringRef*)dlsym(RTLD_DEFAULT, #symbol); \
77 if(!cfstr) \
78 compat_keys.symbol = CFSTR(defaultVal); \
79 else \
80 compat_keys.symbol = cfstr; \
81}while(0)
82
83static pthread_once_t once_ctrl = PTHREAD_ONCE_INIT;
84
85static void loadVTEncSymbols(){
86 GET_SYM(kCVImageBufferColorPrimaries_ITU_R_2020, "ITU_R_2020");
87 GET_SYM(kCVImageBufferTransferFunction_ITU_R_2020, "ITU_R_2020");
88 GET_SYM(kCVImageBufferYCbCrMatrix_ITU_R_2020, "ITU_R_2020");
89
90 GET_SYM(kVTCompressionPropertyKey_H264EntropyMode, "H264EntropyMode");
91 GET_SYM(kVTH264EntropyMode_CAVLC, "CAVLC");
92 GET_SYM(kVTH264EntropyMode_CABAC, "CABAC");
93
94 GET_SYM(kVTProfileLevel_H264_Baseline_4_0, "H264_Baseline_4_0");
95 GET_SYM(kVTProfileLevel_H264_Baseline_4_2, "H264_Baseline_4_2");
96 GET_SYM(kVTProfileLevel_H264_Baseline_5_0, "H264_Baseline_5_0");
97 GET_SYM(kVTProfileLevel_H264_Baseline_5_1, "H264_Baseline_5_1");
98 GET_SYM(kVTProfileLevel_H264_Baseline_5_2, "H264_Baseline_5_2");
99 GET_SYM(kVTProfileLevel_H264_Baseline_AutoLevel, "H264_Baseline_AutoLevel");
100 GET_SYM(kVTProfileLevel_H264_Main_4_2, "H264_Main_4_2");
101 GET_SYM(kVTProfileLevel_H264_Main_5_1, "H264_Main_5_1");
102 GET_SYM(kVTProfileLevel_H264_Main_5_2, "H264_Main_5_2");
103 GET_SYM(kVTProfileLevel_H264_Main_AutoLevel, "H264_Main_AutoLevel");
104 GET_SYM(kVTProfileLevel_H264_High_3_0, "H264_High_3_0");
105 GET_SYM(kVTProfileLevel_H264_High_3_1, "H264_High_3_1");
106 GET_SYM(kVTProfileLevel_H264_High_3_2, "H264_High_3_2");
107 GET_SYM(kVTProfileLevel_H264_High_4_0, "H264_High_4_0");
108 GET_SYM(kVTProfileLevel_H264_High_4_1, "H264_High_4_1");
109 GET_SYM(kVTProfileLevel_H264_High_4_2, "H264_High_4_2");
110 GET_SYM(kVTProfileLevel_H264_High_5_1, "H264_High_5_1");
111 GET_SYM(kVTProfileLevel_H264_High_5_2, "H264_High_5_2");
112 GET_SYM(kVTProfileLevel_H264_High_AutoLevel, "H264_High_AutoLevel");
113
114 GET_SYM(kVTCompressionPropertyKey_RealTime, "RealTime");
115
116 GET_SYM(kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
117 "EnableHardwareAcceleratedVideoEncoder");
118 GET_SYM(kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
119 "RequireHardwareAcceleratedVideoEncoder");
120}
121
122typedef enum VT_H264Profile {
123 H264_PROF_AUTO,
124 H264_PROF_BASELINE,
125 H264_PROF_MAIN,
126 H264_PROF_HIGH,
127 H264_PROF_COUNT
128} VT_H264Profile;
129
130typedef enum VTH264Entropy{
131 VT_ENTROPY_NOT_SET,
132 VT_CAVLC,
133 VT_CABAC
134} VTH264Entropy;
135
136static const uint8_t start_code[] = { 0, 0, 0, 1 };
137
138typedef struct ExtraSEI {
139 void *data;
140 size_t size;
141} ExtraSEI;
142
143typedef struct BufNode {
144 CMSampleBufferRef cm_buffer;
145 ExtraSEI *sei;
146 struct BufNode* next;
147 int error;
148} BufNode;
149
150typedef struct VTEncContext {
151 AVClass *class;
152 VTCompressionSessionRef session;
153 CFStringRef ycbcr_matrix;
154 CFStringRef color_primaries;
155 CFStringRef transfer_function;
156
157 pthread_mutex_t lock;
158 pthread_cond_t cv_sample_sent;
159
160 int async_error;
161
162 BufNode *q_head;
163 BufNode *q_tail;
164
165 int64_t frame_ct_out;
166 int64_t frame_ct_in;
167
168 int64_t first_pts;
169 int64_t dts_delta;
170
171 int64_t profile;
172 int64_t level;
173 int64_t entropy;
174 int64_t realtime;
175 int64_t frames_before;
176 int64_t frames_after;
177
178 int64_t allow_sw;
179
180 bool flushing;
181 bool has_b_frames;
182 bool warned_color_range;
183 bool a53_cc;
184} VTEncContext;
185
186static int vtenc_populate_extradata(AVCodecContext *avctx,
187 CMVideoCodecType codec_type,
188 CFStringRef profile_level,
189 CFNumberRef gamma_level,
190 CFDictionaryRef enc_info,
191 CFDictionaryRef pixel_buffer_info);
192
193/**
194 * NULL-safe release of *refPtr, and sets value to NULL.
195 */
196static void vt_release_num(CFNumberRef* refPtr){
197 if (!*refPtr) {
198 return;
199 }
200
201 CFRelease(*refPtr);
202 *refPtr = NULL;
203}
204
205static void set_async_error(VTEncContext *vtctx, int err)
206{
207 BufNode *info;
208
209 pthread_mutex_lock(&vtctx->lock);
210
211 vtctx->async_error = err;
212
213 info = vtctx->q_head;
214 vtctx->q_head = vtctx->q_tail = NULL;
215
216 while (info) {
217 BufNode *next = info->next;
218 CFRelease(info->cm_buffer);
219 av_free(info);
220 info = next;
221 }
222
223 pthread_mutex_unlock(&vtctx->lock);
224}
225
226static void clear_frame_queue(VTEncContext *vtctx)
227{
228 set_async_error(vtctx, 0);
229}
230
231static int vtenc_q_pop(VTEncContext *vtctx, bool wait, CMSampleBufferRef *buf, ExtraSEI **sei)
232{
233 BufNode *info;
234
235 pthread_mutex_lock(&vtctx->lock);
236
237 if (vtctx->async_error) {
238 pthread_mutex_unlock(&vtctx->lock);
239 return vtctx->async_error;
240 }
241
242 if (vtctx->flushing && vtctx->frame_ct_in == vtctx->frame_ct_out) {
243 *buf = NULL;
244
245 pthread_mutex_unlock(&vtctx->lock);
246 return 0;
247 }
248
249 while (!vtctx->q_head && !vtctx->async_error && wait) {
250 pthread_cond_wait(&vtctx->cv_sample_sent, &vtctx->lock);
251 }
252
253 if (!vtctx->q_head) {
254 pthread_mutex_unlock(&vtctx->lock);
255 *buf = NULL;
256 return 0;
257 }
258
259 info = vtctx->q_head;
260 vtctx->q_head = vtctx->q_head->next;
261 if (!vtctx->q_head) {
262 vtctx->q_tail = NULL;
263 }
264
265 pthread_mutex_unlock(&vtctx->lock);
266
267 *buf = info->cm_buffer;
268 if (sei && *buf) {
269 *sei = info->sei;
270 } else if (info->sei) {
271 if (info->sei->data) av_free(info->sei->data);
272 av_free(info->sei);
273 }
274 av_free(info);
275
276 vtctx->frame_ct_out++;
277
278 return 0;
279}
280
281static void vtenc_q_push(VTEncContext *vtctx, CMSampleBufferRef buffer, ExtraSEI *sei)
282{
283 BufNode *info = av_malloc(sizeof(BufNode));
284 if (!info) {
285 set_async_error(vtctx, AVERROR(ENOMEM));
286 return;
287 }
288
289 CFRetain(buffer);
290 info->cm_buffer = buffer;
291 info->sei = sei;
292 info->next = NULL;
293
294 pthread_mutex_lock(&vtctx->lock);
295 pthread_cond_signal(&vtctx->cv_sample_sent);
296
297 if (!vtctx->q_head) {
298 vtctx->q_head = info;
299 } else {
300 vtctx->q_tail->next = info;
301 }
302
303 vtctx->q_tail = info;
304
305 pthread_mutex_unlock(&vtctx->lock);
306}
307
308static int count_nalus(size_t length_code_size,
309 CMSampleBufferRef sample_buffer,
310 int *count)
311{
312 size_t offset = 0;
313 int status;
314 int nalu_ct = 0;
315 uint8_t size_buf[4];
316 size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
317 CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
318
319 if (length_code_size > 4)
320 return AVERROR_INVALIDDATA;
321
322 while (offset < src_size) {
323 size_t curr_src_len;
324 size_t box_len = 0;
325 size_t i;
326
327 status = CMBlockBufferCopyDataBytes(block,
328 offset,
329 length_code_size,
330 size_buf);
331
332 for (i = 0; i < length_code_size; i++) {
333 box_len <<= 8;
334 box_len |= size_buf[i];
335 }
336
337 curr_src_len = box_len + length_code_size;
338 offset += curr_src_len;
339
340 nalu_ct++;
341 }
342
343 *count = nalu_ct;
344 return 0;
345}
346
347static CMVideoCodecType get_cm_codec_type(enum AVCodecID id)
348{
349 switch (id) {
350 case AV_CODEC_ID_H264: return kCMVideoCodecType_H264;
351 default: return 0;
352 }
353}
354
355/**
356 * Get the parameter sets from a CMSampleBufferRef.
357 * @param dst If *dst isn't NULL, the parameters are copied into existing
358 * memory. *dst_size must be set accordingly when *dst != NULL.
359 * If *dst is NULL, it will be allocated.
360 * In all cases, *dst_size is set to the number of bytes used starting
361 * at *dst.
362 */
363static int get_params_size(
364 AVCodecContext *avctx,
365 CMVideoFormatDescriptionRef vid_fmt,
366 size_t *size)
367{
368 size_t total_size = 0;
369 size_t ps_count;
370 int is_count_bad = 0;
371 size_t i;
372 int status;
373 status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
374 0,
375 NULL,
376 NULL,
377 &ps_count,
378 NULL);
379 if (status) {
380 is_count_bad = 1;
381 ps_count = 0;
382 status = 0;
383 }
384
385 for (i = 0; i < ps_count || is_count_bad; i++) {
386 const uint8_t *ps;
387 size_t ps_size;
388 status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
389 i,
390 &ps,
391 &ps_size,
392 NULL,
393 NULL);
394 if (status) {
395 /*
396 * When ps_count is invalid, status != 0 ends the loop normally
397 * unless we didn't get any parameter sets.
398 */
399 if (i > 0 && is_count_bad) status = 0;
400
401 break;
402 }
403
404 total_size += ps_size + sizeof(start_code);
405 }
406
407 if (status) {
408 av_log(avctx, AV_LOG_ERROR, "Error getting parameter set sizes: %d\n", status);
409 return AVERROR_EXTERNAL;
410 }
411
412 *size = total_size;
413 return 0;
414}
415
416static int copy_param_sets(
417 AVCodecContext *avctx,
418 CMVideoFormatDescriptionRef vid_fmt,
419 uint8_t *dst,
420 size_t dst_size)
421{
422 size_t ps_count;
423 int is_count_bad = 0;
424 int status;
425 size_t offset = 0;
426 size_t i;
427
428 status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
429 0,
430 NULL,
431 NULL,
432 &ps_count,
433 NULL);
434 if (status) {
435 is_count_bad = 1;
436 ps_count = 0;
437 status = 0;
438 }
439
440
441 for (i = 0; i < ps_count || is_count_bad; i++) {
442 const uint8_t *ps;
443 size_t ps_size;
444 size_t next_offset;
445
446 status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
447 i,
448 &ps,
449 &ps_size,
450 NULL,
451 NULL);
452 if (status) {
453 if (i > 0 && is_count_bad) status = 0;
454
455 break;
456 }
457
458 next_offset = offset + sizeof(start_code) + ps_size;
459 if (dst_size < next_offset) {
460 av_log(avctx, AV_LOG_ERROR, "Error: buffer too small for parameter sets.\n");
461 return AVERROR_BUFFER_TOO_SMALL;
462 }
463
464 memcpy(dst + offset, start_code, sizeof(start_code));
465 offset += sizeof(start_code);
466
467 memcpy(dst + offset, ps, ps_size);
468 offset = next_offset;
469 }
470
471 if (status) {
472 av_log(avctx, AV_LOG_ERROR, "Error getting parameter set data: %d\n", status);
473 return AVERROR_EXTERNAL;
474 }
475
476 return 0;
477}
478
479static int set_extradata(AVCodecContext *avctx, CMSampleBufferRef sample_buffer)
480{
481 CMVideoFormatDescriptionRef vid_fmt;
482 size_t total_size;
483 int status;
484
485 vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
486 if (!vid_fmt) {
487 av_log(avctx, AV_LOG_ERROR, "No video format.\n");
488 return AVERROR_EXTERNAL;
489 }
490
491 status = get_params_size(avctx, vid_fmt, &total_size);
492 if (status) {
493 av_log(avctx, AV_LOG_ERROR, "Could not get parameter sets.\n");
494 return status;
495 }
496
497 avctx->extradata = av_mallocz(total_size + AV_INPUT_BUFFER_PADDING_SIZE);
498 if (!avctx->extradata) {
499 return AVERROR(ENOMEM);
500 }
501 avctx->extradata_size = total_size;
502
503 status = copy_param_sets(avctx, vid_fmt, avctx->extradata, total_size);
504
505 if (status) {
506 av_log(avctx, AV_LOG_ERROR, "Could not copy param sets.\n");
507 return status;
508 }
509
510 return 0;
511}
512
513static void vtenc_output_callback(
514 void *ctx,
515 void *sourceFrameCtx,
516 OSStatus status,
517 VTEncodeInfoFlags flags,
518 CMSampleBufferRef sample_buffer)
519{
520 AVCodecContext *avctx = ctx;
521 VTEncContext *vtctx = avctx->priv_data;
522 ExtraSEI *sei = sourceFrameCtx;
523
524 if (vtctx->async_error) {
525 if(sample_buffer) CFRelease(sample_buffer);
526 return;
527 }
528
529 if (status || !sample_buffer) {
530 av_log(avctx, AV_LOG_ERROR, "Error encoding frame: %d\n", (int)status);
531 set_async_error(vtctx, AVERROR_EXTERNAL);
532 return;
533 }
534
535 if (!avctx->extradata && (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
536 int set_status = set_extradata(avctx, sample_buffer);
537 if (set_status) {
538 set_async_error(vtctx, set_status);
539 return;
540 }
541 }
542
543 vtenc_q_push(vtctx, sample_buffer, sei);
544}
545
546static int get_length_code_size(
547 AVCodecContext *avctx,
548 CMSampleBufferRef sample_buffer,
549 size_t *size)
550{
551 CMVideoFormatDescriptionRef vid_fmt;
552 int isize;
553 int status;
554
555 vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
556 if (!vid_fmt) {
557 av_log(avctx, AV_LOG_ERROR, "Error getting buffer format description.\n");
558 return AVERROR_EXTERNAL;
559 }
560
561 status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
562 0,
563 NULL,
564 NULL,
565 NULL,
566 &isize);
567 if (status) {
568 av_log(avctx, AV_LOG_ERROR, "Error getting length code size: %d\n", status);
569 return AVERROR_EXTERNAL;
570 }
571
572 *size = isize;
573 return 0;
574}
575
576/*
577 * Returns true on success.
578 *
579 * If profile_level_val is NULL and this method returns true, don't specify the
580 * profile/level to the encoder.
581 */
582static bool get_vt_profile_level(AVCodecContext *avctx,
583 CFStringRef *profile_level_val)
584{
585 VTEncContext *vtctx = avctx->priv_data;
586 int64_t profile = vtctx->profile;
587
588 if (profile == H264_PROF_AUTO && vtctx->level) {
589 //Need to pick a profile if level is not auto-selected.
590 profile = vtctx->has_b_frames ? H264_PROF_MAIN : H264_PROF_BASELINE;
591 }
592
593 *profile_level_val = NULL;
594
595 switch (profile) {
596 case H264_PROF_AUTO:
597 return true;
598
599 case H264_PROF_BASELINE:
600 switch (vtctx->level) {
601 case 0: *profile_level_val =
602 compat_keys.kVTProfileLevel_H264_Baseline_AutoLevel; break;
603 case 13: *profile_level_val = kVTProfileLevel_H264_Baseline_1_3; break;
604 case 30: *profile_level_val = kVTProfileLevel_H264_Baseline_3_0; break;
605 case 31: *profile_level_val = kVTProfileLevel_H264_Baseline_3_1; break;
606 case 32: *profile_level_val = kVTProfileLevel_H264_Baseline_3_2; break;
607 case 40: *profile_level_val =
608 compat_keys.kVTProfileLevel_H264_Baseline_4_0; break;
609 case 41: *profile_level_val = kVTProfileLevel_H264_Baseline_4_1; break;
610 case 42: *profile_level_val =
611 compat_keys.kVTProfileLevel_H264_Baseline_4_2; break;
612 case 50: *profile_level_val =
613 compat_keys.kVTProfileLevel_H264_Baseline_5_0; break;
614 case 51: *profile_level_val =
615 compat_keys.kVTProfileLevel_H264_Baseline_5_1; break;
616 case 52: *profile_level_val =
617 compat_keys.kVTProfileLevel_H264_Baseline_5_2; break;
618 }
619 break;
620
621 case H264_PROF_MAIN:
622 switch (vtctx->level) {
623 case 0: *profile_level_val =
624 compat_keys.kVTProfileLevel_H264_Main_AutoLevel; break;
625 case 30: *profile_level_val = kVTProfileLevel_H264_Main_3_0; break;
626 case 31: *profile_level_val = kVTProfileLevel_H264_Main_3_1; break;
627 case 32: *profile_level_val = kVTProfileLevel_H264_Main_3_2; break;
628 case 40: *profile_level_val = kVTProfileLevel_H264_Main_4_0; break;
629 case 41: *profile_level_val = kVTProfileLevel_H264_Main_4_1; break;
630 case 42: *profile_level_val =
631 compat_keys.kVTProfileLevel_H264_Main_4_2; break;
632 case 50: *profile_level_val = kVTProfileLevel_H264_Main_5_0; break;
633 case 51: *profile_level_val =
634 compat_keys.kVTProfileLevel_H264_Main_5_1; break;
635 case 52: *profile_level_val =
636 compat_keys.kVTProfileLevel_H264_Main_5_2; break;
637 }
638 break;
639
640 case H264_PROF_HIGH:
641 switch (vtctx->level) {
642 case 0: *profile_level_val =
643 compat_keys.kVTProfileLevel_H264_High_AutoLevel; break;
644 case 30: *profile_level_val =
645 compat_keys.kVTProfileLevel_H264_High_3_0; break;
646 case 31: *profile_level_val =
647 compat_keys.kVTProfileLevel_H264_High_3_1; break;
648 case 32: *profile_level_val =
649 compat_keys.kVTProfileLevel_H264_High_3_2; break;
650 case 40: *profile_level_val =
651 compat_keys.kVTProfileLevel_H264_High_4_0; break;
652 case 41: *profile_level_val =
653 compat_keys.kVTProfileLevel_H264_High_4_1; break;
654 case 42: *profile_level_val =
655 compat_keys.kVTProfileLevel_H264_High_4_2; break;
656 case 50: *profile_level_val = kVTProfileLevel_H264_High_5_0; break;
657 case 51: *profile_level_val =
658 compat_keys.kVTProfileLevel_H264_High_5_1; break;
659 case 52: *profile_level_val =
660 compat_keys.kVTProfileLevel_H264_High_5_2; break;
661 }
662 break;
663 }
664
665 if (!*profile_level_val) {
666 av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
667 return false;
668 }
669
670 return true;
671}
672
673static int get_cv_pixel_format(AVCodecContext* avctx,
674 enum AVPixelFormat fmt,
675 enum AVColorRange range,
676 int* av_pixel_format,
677 int* range_guessed)
678{
679 if (range_guessed) *range_guessed = range != AVCOL_RANGE_MPEG &&
680 range != AVCOL_RANGE_JPEG;
681
682 //MPEG range is used when no range is set
683 if (fmt == AV_PIX_FMT_NV12) {
684 *av_pixel_format = range == AVCOL_RANGE_JPEG ?
685 kCVPixelFormatType_420YpCbCr8BiPlanarFullRange :
686 kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
687 } else if (fmt == AV_PIX_FMT_YUV420P) {
688 *av_pixel_format = range == AVCOL_RANGE_JPEG ?
689 kCVPixelFormatType_420YpCbCr8PlanarFullRange :
690 kCVPixelFormatType_420YpCbCr8Planar;
691 } else {
692 return AVERROR(EINVAL);
693 }
694
695 return 0;
696}
697
698static void add_color_attr(AVCodecContext *avctx, CFMutableDictionaryRef dict) {
699 VTEncContext *vtctx = avctx->priv_data;
700
701 if (vtctx->color_primaries) {
702 CFDictionarySetValue(dict,
703 kCVImageBufferColorPrimariesKey,
704 vtctx->color_primaries);
705 }
706
707 if (vtctx->transfer_function) {
708 CFDictionarySetValue(dict,
709 kCVImageBufferTransferFunctionKey,
710 vtctx->transfer_function);
711 }
712
713 if (vtctx->ycbcr_matrix) {
714 CFDictionarySetValue(dict,
715 kCVImageBufferYCbCrMatrixKey,
716 vtctx->ycbcr_matrix);
717 }
718}
719
720static int create_cv_pixel_buffer_info(AVCodecContext* avctx,
721 CFMutableDictionaryRef* dict)
722{
723 CFNumberRef cv_color_format_num = NULL;
724 CFNumberRef width_num = NULL;
725 CFNumberRef height_num = NULL;
726 CFMutableDictionaryRef pixel_buffer_info = NULL;
727 int cv_color_format;
728 int status = get_cv_pixel_format(avctx,
729 avctx->pix_fmt,
730 avctx->color_range,
731 &cv_color_format,
732 NULL);
733 if (status) return status;
734
735 pixel_buffer_info = CFDictionaryCreateMutable(
736 kCFAllocatorDefault,
737 20,
738 &kCFCopyStringDictionaryKeyCallBacks,
739 &kCFTypeDictionaryValueCallBacks);
740
741 if (!pixel_buffer_info) goto pbinfo_nomem;
742
743 cv_color_format_num = CFNumberCreate(kCFAllocatorDefault,
744 kCFNumberSInt32Type,
745 &cv_color_format);
746 if (!cv_color_format_num) goto pbinfo_nomem;
747
748 CFDictionarySetValue(pixel_buffer_info,
749 kCVPixelBufferPixelFormatTypeKey,
750 cv_color_format_num);
751 vt_release_num(&cv_color_format_num);
752
753 width_num = CFNumberCreate(kCFAllocatorDefault,
754 kCFNumberSInt32Type,
755 &avctx->width);
756 if (!width_num) return AVERROR(ENOMEM);
757
758 CFDictionarySetValue(pixel_buffer_info,
759 kCVPixelBufferWidthKey,
760 width_num);
761 vt_release_num(&width_num);
762
763 height_num = CFNumberCreate(kCFAllocatorDefault,
764 kCFNumberSInt32Type,
765 &avctx->height);
766 if (!height_num) goto pbinfo_nomem;
767
768 CFDictionarySetValue(pixel_buffer_info,
769 kCVPixelBufferHeightKey,
770 height_num);
771 vt_release_num(&height_num);
772
773 add_color_attr(avctx, pixel_buffer_info);
774
775 *dict = pixel_buffer_info;
776 return 0;
777
778pbinfo_nomem:
779 vt_release_num(&cv_color_format_num);
780 vt_release_num(&width_num);
781 vt_release_num(&height_num);
782 if (pixel_buffer_info) CFRelease(pixel_buffer_info);
783
784 return AVERROR(ENOMEM);
785}
786
787static int get_cv_color_primaries(AVCodecContext *avctx,
788 CFStringRef *primaries)
789{
790 enum AVColorPrimaries pri = avctx->color_primaries;
791 switch (pri) {
792 case AVCOL_PRI_UNSPECIFIED:
793 *primaries = NULL;
794 break;
795
796 case AVCOL_PRI_BT709:
797 *primaries = kCVImageBufferColorPrimaries_ITU_R_709_2;
798 break;
799
800 case AVCOL_PRI_BT2020:
801 *primaries = compat_keys.kCVImageBufferColorPrimaries_ITU_R_2020;
802 break;
803
804 default:
805 av_log(avctx, AV_LOG_ERROR, "Color primaries %s is not supported.\n", av_color_primaries_name(pri));
806 *primaries = NULL;
807 return -1;
808 }
809
810 return 0;
811}
812
813static int get_cv_transfer_function(AVCodecContext *avctx,
814 CFStringRef *transfer_fnc,
815 CFNumberRef *gamma_level)
816{
817 enum AVColorTransferCharacteristic trc = avctx->color_trc;
818 Float32 gamma;
819 *gamma_level = NULL;
820
821 switch (trc) {
822 case AVCOL_TRC_UNSPECIFIED:
823 *transfer_fnc = NULL;
824 break;
825
826 case AVCOL_TRC_BT709:
827 *transfer_fnc = kCVImageBufferTransferFunction_ITU_R_709_2;
828 break;
829
830 case AVCOL_TRC_SMPTE240M:
831 *transfer_fnc = kCVImageBufferTransferFunction_SMPTE_240M_1995;
832 break;
833
834 case AVCOL_TRC_GAMMA22:
835 gamma = 2.2;
836 *transfer_fnc = kCVImageBufferTransferFunction_UseGamma;
837 *gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
838 break;
839
840 case AVCOL_TRC_GAMMA28:
841 gamma = 2.8;
842 *transfer_fnc = kCVImageBufferTransferFunction_UseGamma;
843 *gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
844 break;
845
846 case AVCOL_TRC_BT2020_10:
847 case AVCOL_TRC_BT2020_12:
848 *transfer_fnc = compat_keys.kCVImageBufferTransferFunction_ITU_R_2020;
849 break;
850
851 default:
852 av_log(avctx, AV_LOG_ERROR, "Transfer function %s is not supported.\n", av_color_transfer_name(trc));
853 return -1;
854 }
855
856 return 0;
857}
858
859static int get_cv_ycbcr_matrix(AVCodecContext *avctx, CFStringRef *matrix) {
860 switch(avctx->colorspace) {
861 case AVCOL_SPC_BT709:
862 *matrix = kCVImageBufferYCbCrMatrix_ITU_R_709_2;
863 break;
864
865 case AVCOL_SPC_UNSPECIFIED:
866 *matrix = NULL;
867 break;
868
869 case AVCOL_SPC_BT470BG:
870 case AVCOL_SPC_SMPTE170M:
871 *matrix = kCVImageBufferYCbCrMatrix_ITU_R_601_4;
872 break;
873
874 case AVCOL_SPC_SMPTE240M:
875 *matrix = kCVImageBufferYCbCrMatrix_SMPTE_240M_1995;
876 break;
877
878 case AVCOL_SPC_BT2020_NCL:
879 *matrix = compat_keys.kCVImageBufferYCbCrMatrix_ITU_R_2020;
880 break;
881
882 default:
883 av_log(avctx, AV_LOG_ERROR, "Color space %s is not supported.\n", av_color_space_name(avctx->colorspace));
884 return -1;
885 }
886
887 return 0;
888}
889
890static int vtenc_create_encoder(AVCodecContext *avctx,
891 CMVideoCodecType codec_type,
892 CFStringRef profile_level,
893 CFNumberRef gamma_level,
894 CFDictionaryRef enc_info,
895 CFDictionaryRef pixel_buffer_info,
896 VTCompressionSessionRef *session)
897{
898 VTEncContext *vtctx = avctx->priv_data;
899 SInt32 bit_rate = avctx->bit_rate;
900 SInt32 max_rate = avctx->rc_max_rate;
901 CFNumberRef bit_rate_num;
902 CFNumberRef bytes_per_second;
903 CFNumberRef one_second;
904 CFArrayRef data_rate_limits;
905 int64_t bytes_per_second_value = 0;
906 int64_t one_second_value = 0;
907 void *nums[2];
908
909 int status = VTCompressionSessionCreate(kCFAllocatorDefault,
910 avctx->width,
911 avctx->height,
912 codec_type,
913 enc_info,
914 pixel_buffer_info,
915 kCFAllocatorDefault,
916 vtenc_output_callback,
917 avctx,
918 session);
919
920 if (status || !vtctx->session) {
921 av_log(avctx, AV_LOG_ERROR, "Error: cannot create compression session: %d\n", status);
922
923#if !TARGET_OS_IPHONE
924 if (!vtctx->allow_sw) {
925 av_log(avctx, AV_LOG_ERROR, "Try -allow_sw 1. The hardware encoder may be busy, or not supported.\n");
926 }
927#endif
928
929 return AVERROR_EXTERNAL;
930 }
931
932 bit_rate_num = CFNumberCreate(kCFAllocatorDefault,
933 kCFNumberSInt32Type,
934 &bit_rate);
935 if (!bit_rate_num) return AVERROR(ENOMEM);
936
937 status = VTSessionSetProperty(vtctx->session,
938 kVTCompressionPropertyKey_AverageBitRate,
939 bit_rate_num);
940 CFRelease(bit_rate_num);
941
942 if (status) {
943 av_log(avctx, AV_LOG_ERROR, "Error setting bitrate property: %d\n", status);
944 return AVERROR_EXTERNAL;
945 }
946
947 bytes_per_second_value = max_rate >> 3;
948 bytes_per_second = CFNumberCreate(kCFAllocatorDefault,
949 kCFNumberSInt64Type,
950 &bytes_per_second_value);
951 if (!bytes_per_second) {
952 return AVERROR(ENOMEM);
953 }
954 one_second_value = 1;
955 one_second = CFNumberCreate(kCFAllocatorDefault,
956 kCFNumberSInt64Type,
957 &one_second_value);
958 if (!one_second) {
959 CFRelease(bytes_per_second);
960 return AVERROR(ENOMEM);
961 }
962 nums[0] = bytes_per_second;
963 nums[1] = one_second;
964 data_rate_limits = CFArrayCreate(kCFAllocatorDefault,
965 nums,
966 2,
967 &kCFTypeArrayCallBacks);
968
969 if (!data_rate_limits) {
970 CFRelease(bytes_per_second);
971 CFRelease(one_second);
972 return AVERROR(ENOMEM);
973 }
974 status = VTSessionSetProperty(vtctx->session,
975 kVTCompressionPropertyKey_DataRateLimits,
976 data_rate_limits);
977
978 CFRelease(bytes_per_second);
979 CFRelease(one_second);
980 CFRelease(data_rate_limits);
981
982 if (status) {
983 av_log(avctx, AV_LOG_ERROR, "Error setting max bitrate property: %d\n", status);
984 return AVERROR_EXTERNAL;
985 }
986
987 if (profile_level) {
988 status = VTSessionSetProperty(vtctx->session,
989 kVTCompressionPropertyKey_ProfileLevel,
990 profile_level);
991 if (status) {
992 av_log(avctx, AV_LOG_ERROR, "Error setting profile/level property: %d\n", status);
993 }
994 }
995
996 if (avctx->gop_size > 0) {
997 CFNumberRef interval = CFNumberCreate(kCFAllocatorDefault,
998 kCFNumberIntType,
999 &avctx->gop_size);
1000 if (!interval) {
1001 return AVERROR(ENOMEM);
1002 }
1003
1004 status = VTSessionSetProperty(vtctx->session,
1005 kVTCompressionPropertyKey_MaxKeyFrameInterval,
1006 interval);
1007 CFRelease(interval);
1008
1009 if (status) {
1010 av_log(avctx, AV_LOG_ERROR, "Error setting 'max key-frame interval' property: %d\n", status);
1011 return AVERROR_EXTERNAL;
1012 }
1013 }
1014
1015 if (vtctx->frames_before) {
1016 status = VTSessionSetProperty(vtctx->session,
1017 kVTCompressionPropertyKey_MoreFramesBeforeStart,
1018 kCFBooleanTrue);
1019
1020 if (status == kVTPropertyNotSupportedErr) {
1021 av_log(avctx, AV_LOG_WARNING, "frames_before property is not supported on this device. Ignoring.\n");
1022 } else if (status) {
1023 av_log(avctx, AV_LOG_ERROR, "Error setting frames_before property: %d\n", status);
1024 }
1025 }
1026
1027 if (vtctx->frames_after) {
1028 status = VTSessionSetProperty(vtctx->session,
1029 kVTCompressionPropertyKey_MoreFramesAfterEnd,
1030 kCFBooleanTrue);
1031
1032 if (status == kVTPropertyNotSupportedErr) {
1033 av_log(avctx, AV_LOG_WARNING, "frames_after property is not supported on this device. Ignoring.\n");
1034 } else if (status) {
1035 av_log(avctx, AV_LOG_ERROR, "Error setting frames_after property: %d\n", status);
1036 }
1037 }
1038
1039 if (avctx->sample_aspect_ratio.num != 0) {
1040 CFNumberRef num;
1041 CFNumberRef den;
1042 CFMutableDictionaryRef par;
1043 AVRational *avpar = &avctx->sample_aspect_ratio;
1044
1045 av_reduce(&avpar->num, &avpar->den,
1046 avpar->num, avpar->den,
1047 0xFFFFFFFF);
1048
1049 num = CFNumberCreate(kCFAllocatorDefault,
1050 kCFNumberIntType,
1051 &avpar->num);
1052
1053 den = CFNumberCreate(kCFAllocatorDefault,
1054 kCFNumberIntType,
1055 &avpar->den);
1056
1057
1058
1059 par = CFDictionaryCreateMutable(kCFAllocatorDefault,
1060 2,
1061 &kCFCopyStringDictionaryKeyCallBacks,
1062 &kCFTypeDictionaryValueCallBacks);
1063
1064 if (!par || !num || !den) {
1065 if (par) CFRelease(par);
1066 if (num) CFRelease(num);
1067 if (den) CFRelease(den);
1068
1069 return AVERROR(ENOMEM);
1070 }
1071
1072 CFDictionarySetValue(
1073 par,
1074 kCMFormatDescriptionKey_PixelAspectRatioHorizontalSpacing,
1075 num);
1076
1077 CFDictionarySetValue(
1078 par,
1079 kCMFormatDescriptionKey_PixelAspectRatioVerticalSpacing,
1080 den);
1081
1082 status = VTSessionSetProperty(vtctx->session,
1083 kVTCompressionPropertyKey_PixelAspectRatio,
1084 par);
1085
1086 CFRelease(par);
1087 CFRelease(num);
1088 CFRelease(den);
1089
1090 if (status) {
1091 av_log(avctx,
1092 AV_LOG_ERROR,
1093 "Error setting pixel aspect ratio to %d:%d: %d.\n",
1094 avctx->sample_aspect_ratio.num,
1095 avctx->sample_aspect_ratio.den,
1096 status);
1097
1098 return AVERROR_EXTERNAL;
1099 }
1100 }
1101
1102
1103 if (vtctx->transfer_function) {
1104 status = VTSessionSetProperty(vtctx->session,
1105 kVTCompressionPropertyKey_TransferFunction,
1106 vtctx->transfer_function);
1107
1108 if (status) {
1109 av_log(avctx, AV_LOG_WARNING, "Could not set transfer function: %d\n", status);
1110 }
1111 }
1112
1113
1114 if (vtctx->ycbcr_matrix) {
1115 status = VTSessionSetProperty(vtctx->session,
1116 kVTCompressionPropertyKey_YCbCrMatrix,
1117 vtctx->ycbcr_matrix);
1118
1119 if (status) {
1120 av_log(avctx, AV_LOG_WARNING, "Could not set ycbcr matrix: %d\n", status);
1121 }
1122 }
1123
1124
1125 if (vtctx->color_primaries) {
1126 status = VTSessionSetProperty(vtctx->session,
1127 kVTCompressionPropertyKey_ColorPrimaries,
1128 vtctx->color_primaries);
1129
1130 if (status) {
1131 av_log(avctx, AV_LOG_WARNING, "Could not set color primaries: %d\n", status);
1132 }
1133 }
1134
1135 if (gamma_level) {
1136 status = VTSessionSetProperty(vtctx->session,
1137 kCVImageBufferGammaLevelKey,
1138 gamma_level);
1139
1140 if (status) {
1141 av_log(avctx, AV_LOG_WARNING, "Could not set gamma level: %d\n", status);
1142 }
1143 }
1144
1145 if (!vtctx->has_b_frames) {
1146 status = VTSessionSetProperty(vtctx->session,
1147 kVTCompressionPropertyKey_AllowFrameReordering,
1148 kCFBooleanFalse);
1149
1150 if (status) {
1151 av_log(avctx, AV_LOG_ERROR, "Error setting 'allow frame reordering' property: %d\n", status);
1152 return AVERROR_EXTERNAL;
1153 }
1154 }
1155
1156 if (vtctx->entropy != VT_ENTROPY_NOT_SET) {
1157 CFStringRef entropy = vtctx->entropy == VT_CABAC ?
1158 compat_keys.kVTH264EntropyMode_CABAC:
1159 compat_keys.kVTH264EntropyMode_CAVLC;
1160
1161 status = VTSessionSetProperty(vtctx->session,
1162 compat_keys.kVTCompressionPropertyKey_H264EntropyMode,
1163 entropy);
1164
1165 if (status) {
1166 av_log(avctx, AV_LOG_ERROR, "Error setting entropy property: %d\n", status);
1167 }
1168 }
1169
1170 if (vtctx->realtime) {
1171 status = VTSessionSetProperty(vtctx->session,
1172 compat_keys.kVTCompressionPropertyKey_RealTime,
1173 kCFBooleanTrue);
1174
1175 if (status) {
1176 av_log(avctx, AV_LOG_ERROR, "Error setting realtime property: %d\n", status);
1177 }
1178 }
1179
1180 status = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
1181 if (status) {
1182 av_log(avctx, AV_LOG_ERROR, "Error: cannot prepare encoder: %d\n", status);
1183 return AVERROR_EXTERNAL;
1184 }
1185
1186 return 0;
1187}
1188
1189static av_cold int vtenc_init(AVCodecContext *avctx)
1190{
1191 CFMutableDictionaryRef enc_info;
1192 CFMutableDictionaryRef pixel_buffer_info;
1193 CMVideoCodecType codec_type;
1194 VTEncContext *vtctx = avctx->priv_data;
1195 CFStringRef profile_level;
1196 CFBooleanRef has_b_frames_cfbool;
1197 CFNumberRef gamma_level = NULL;
1198 int status;
1199
1200 pthread_once(&once_ctrl, loadVTEncSymbols);
1201
1202 codec_type = get_cm_codec_type(avctx->codec_id);
1203 if (!codec_type) {
1204 av_log(avctx, AV_LOG_ERROR, "Error: no mapping for AVCodecID %d\n", avctx->codec_id);
1205 return AVERROR(EINVAL);
1206 }
1207
1208 vtctx->has_b_frames = avctx->max_b_frames > 0;
1209 if(vtctx->has_b_frames && vtctx->profile == H264_PROF_BASELINE){
1210 av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");
1211 vtctx->has_b_frames = false;
1212 }
1213
1214 if (vtctx->entropy == VT_CABAC && vtctx->profile == H264_PROF_BASELINE) {
1215 av_log(avctx, AV_LOG_WARNING, "CABAC entropy requires 'main' or 'high' profile, but baseline was requested. Encode will not use CABAC entropy.\n");
1216 vtctx->entropy = VT_ENTROPY_NOT_SET;
1217 }
1218
1219 if (!get_vt_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
1220
1221 vtctx->session = NULL;
1222
1223 enc_info = CFDictionaryCreateMutable(
1224 kCFAllocatorDefault,
1225 20,
1226 &kCFCopyStringDictionaryKeyCallBacks,
1227 &kCFTypeDictionaryValueCallBacks
1228 );
1229
1230 if (!enc_info) return AVERROR(ENOMEM);
1231
1232#if !TARGET_OS_IPHONE
1233 if (!vtctx->allow_sw) {
1234 CFDictionarySetValue(enc_info,
1235 compat_keys.kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
1236 kCFBooleanTrue);
1237 } else {
1238 CFDictionarySetValue(enc_info,
1239 compat_keys.kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
1240 kCFBooleanTrue);
1241 }
1242#endif
1243
1244 if (avctx->pix_fmt != AV_PIX_FMT_VIDEOTOOLBOX) {
1245 status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info);
1246 if (status)
1247 goto init_cleanup;
1248 } else {
1249 pixel_buffer_info = NULL;
1250 }
1251
1252 pthread_mutex_init(&vtctx->lock, NULL);
1253 pthread_cond_init(&vtctx->cv_sample_sent, NULL);
1254 vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0;
1255
1256 get_cv_transfer_function(avctx, &vtctx->transfer_function, &gamma_level);
1257 get_cv_ycbcr_matrix(avctx, &vtctx->ycbcr_matrix);
1258 get_cv_color_primaries(avctx, &vtctx->color_primaries);
1259
1260
1261 if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1262 status = vtenc_populate_extradata(avctx,
1263 codec_type,
1264 profile_level,
1265 gamma_level,
1266 enc_info,
1267 pixel_buffer_info);
1268 if (status)
1269 goto init_cleanup;
1270 }
1271
1272 status = vtenc_create_encoder(avctx,
1273 codec_type,
1274 profile_level,
1275 gamma_level,
1276 enc_info,
1277 pixel_buffer_info,
1278 &vtctx->session);
1279
1280 if (status < 0)
1281 goto init_cleanup;
1282
1283 status = VTSessionCopyProperty(vtctx->session,
1284 kVTCompressionPropertyKey_AllowFrameReordering,
1285 kCFAllocatorDefault,
1286 &has_b_frames_cfbool);
1287
1288 if (!status) {
1289 //Some devices don't output B-frames for main profile, even if requested.
1290 vtctx->has_b_frames = CFBooleanGetValue(has_b_frames_cfbool);
1291 CFRelease(has_b_frames_cfbool);
1292 }
1293 avctx->has_b_frames = vtctx->has_b_frames;
1294
1295init_cleanup:
1296 if (gamma_level)
1297 CFRelease(gamma_level);
1298
1299 if (pixel_buffer_info)
1300 CFRelease(pixel_buffer_info);
1301
1302 CFRelease(enc_info);
1303
1304 return status;
1305}
1306
1307static void vtenc_get_frame_info(CMSampleBufferRef buffer, bool *is_key_frame)
1308{
1309 CFArrayRef attachments;
1310 CFDictionaryRef attachment;
1311 CFBooleanRef not_sync;
1312 CFIndex len;
1313
1314 attachments = CMSampleBufferGetSampleAttachmentsArray(buffer, false);
1315 len = !attachments ? 0 : CFArrayGetCount(attachments);
1316
1317 if (!len) {
1318 *is_key_frame = true;
1319 return;
1320 }
1321
1322 attachment = CFArrayGetValueAtIndex(attachments, 0);
1323
1324 if (CFDictionaryGetValueIfPresent(attachment,
1325 kCMSampleAttachmentKey_NotSync,
1326 (const void **)&not_sync))
1327 {
1328 *is_key_frame = !CFBooleanGetValue(not_sync);
1329 } else {
1330 *is_key_frame = true;
1331 }
1332}
1333
1334static int is_post_sei_nal_type(int nal_type){
1335 return nal_type != H264_NAL_SEI &&
1336 nal_type != H264_NAL_SPS &&
1337 nal_type != H264_NAL_PPS &&
1338 nal_type != H264_NAL_AUD;
1339}
1340
1341/*
1342 * Finds the sei message start/size of type find_sei_type.
1343 * If more than one of that type exists, the last one is returned.
1344 */
1345static int find_sei_end(AVCodecContext *avctx,
1346 uint8_t *nal_data,
1347 size_t nal_size,
1348 uint8_t **sei_end)
1349{
1350 int nal_type;
1351 size_t sei_payload_size = 0;
1352 int sei_payload_type = 0;
1353 *sei_end = NULL;
1354 uint8_t *nal_start = nal_data;
1355
1356 if (!nal_size)
1357 return 0;
1358
1359 nal_type = *nal_data & 0x1F;
1360 if (nal_type != H264_NAL_SEI)
1361 return 0;
1362
1363 nal_data++;
1364 nal_size--;
1365
1366 if (nal_data[nal_size - 1] == 0x80)
1367 nal_size--;
1368
1369 while (nal_size > 0 && *nal_data > 0) {
1370 do{
1371 sei_payload_type += *nal_data;
1372 nal_data++;
1373 nal_size--;
1374 } while (nal_size > 0 && *nal_data == 0xFF);
1375
1376 if (!nal_size) {
1377 av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing type.\n");
1378 return AVERROR_INVALIDDATA;
1379 }
1380
1381 do{
1382 sei_payload_size += *nal_data;
1383 nal_data++;
1384 nal_size--;
1385 } while (nal_size > 0 && *nal_data == 0xFF);
1386
1387 if (nal_size < sei_payload_size) {
1388 av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing size.\n");
1389 return AVERROR_INVALIDDATA;
1390 }
1391
1392 nal_data += sei_payload_size;
1393 nal_size -= sei_payload_size;
1394 }
1395
1396 *sei_end = nal_data;
1397
1398 return nal_data - nal_start + 1;
1399}
1400
1401/**
1402 * Copies the data inserting emulation prevention bytes as needed.
1403 * Existing data in the destination can be taken into account by providing
1404 * dst with a dst_offset > 0.
1405 *
1406 * @return The number of bytes copied on success. On failure, the negative of
1407 * the number of bytes needed to copy src is returned.
1408 */
1409static int copy_emulation_prev(const uint8_t *src,
1410 size_t src_size,
1411 uint8_t *dst,
1412 ssize_t dst_offset,
1413 size_t dst_size)
1414{
1415 int zeros = 0;
1416 int wrote_bytes;
1417 uint8_t* dst_start;
1418 uint8_t* dst_end = dst + dst_size;
1419 const uint8_t* src_end = src + src_size;
1420 int start_at = dst_offset > 2 ? dst_offset - 2 : 0;
1421 int i;
1422 for (i = start_at; i < dst_offset && i < dst_size; i++) {
1423 if (!dst[i])
1424 zeros++;
1425 else
1426 zeros = 0;
1427 }
1428
1429 dst += dst_offset;
1430 dst_start = dst;
1431 for (; src < src_end; src++, dst++) {
1432 if (zeros == 2) {
1433 int insert_ep3_byte = *src <= 3;
1434 if (insert_ep3_byte) {
1435 if (dst < dst_end)
1436 *dst = 3;
1437 dst++;
1438 }
1439
1440 zeros = 0;
1441 }
1442
1443 if (dst < dst_end)
1444 *dst = *src;
1445
1446 if (!*src)
1447 zeros++;
1448 else
1449 zeros = 0;
1450 }
1451
1452 wrote_bytes = dst - dst_start;
1453
1454 if (dst > dst_end)
1455 return -wrote_bytes;
1456
1457 return wrote_bytes;
1458}
1459
1460static int write_sei(const ExtraSEI *sei,
1461 int sei_type,
1462 uint8_t *dst,
1463 size_t dst_size)
1464{
1465 uint8_t *sei_start = dst;
1466 size_t remaining_sei_size = sei->size;
1467 size_t remaining_dst_size = dst_size;
1468 int header_bytes;
1469 int bytes_written;
1470 ssize_t offset;
1471
1472 if (!remaining_dst_size)
1473 return AVERROR_BUFFER_TOO_SMALL;
1474
1475 while (sei_type && remaining_dst_size != 0) {
1476 int sei_byte = sei_type > 255 ? 255 : sei_type;
1477 *dst = sei_byte;
1478
1479 sei_type -= sei_byte;
1480 dst++;
1481 remaining_dst_size--;
1482 }
1483
1484 if (!dst_size)
1485 return AVERROR_BUFFER_TOO_SMALL;
1486
1487 while (remaining_sei_size && remaining_dst_size != 0) {
1488 int size_byte = remaining_sei_size > 255 ? 255 : remaining_sei_size;
1489 *dst = size_byte;
1490
1491 remaining_sei_size -= size_byte;
1492 dst++;
1493 remaining_dst_size--;
1494 }
1495
1496 if (remaining_dst_size < sei->size)
1497 return AVERROR_BUFFER_TOO_SMALL;
1498
1499 header_bytes = dst - sei_start;
1500
1501 offset = header_bytes;
1502 bytes_written = copy_emulation_prev(sei->data,
1503 sei->size,
1504 sei_start,
1505 offset,
1506 dst_size);
1507 if (bytes_written < 0)
1508 return AVERROR_BUFFER_TOO_SMALL;
1509
1510 bytes_written += header_bytes;
1511 return bytes_written;
1512}
1513
1514/**
1515 * Copies NAL units and replaces length codes with
1516 * H.264 Annex B start codes. On failure, the contents of
1517 * dst_data may have been modified.
1518 *
1519 * @param length_code_size Byte length of each length code
1520 * @param sample_buffer NAL units prefixed with length codes.
1521 * @param sei Optional A53 closed captions SEI data.
1522 * @param dst_data Must be zeroed before calling this function.
1523 * Contains the copied NAL units prefixed with
1524 * start codes when the function returns
1525 * successfully.
1526 * @param dst_size Length of dst_data
1527 * @return 0 on success
1528 * AVERROR_INVALIDDATA if length_code_size is invalid
1529 * AVERROR_BUFFER_TOO_SMALL if dst_data is too small
1530 * or if a length_code in src_data specifies data beyond
1531 * the end of its buffer.
1532 */
1533static int copy_replace_length_codes(
1534 AVCodecContext *avctx,
1535 size_t length_code_size,
1536 CMSampleBufferRef sample_buffer,
1537 ExtraSEI *sei,
1538 uint8_t *dst_data,
1539 size_t dst_size)
1540{
1541 size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
1542 size_t remaining_src_size = src_size;
1543 size_t remaining_dst_size = dst_size;
1544 size_t src_offset = 0;
1545 int wrote_sei = 0;
1546 int status;
1547 uint8_t size_buf[4];
1548 uint8_t nal_type;
1549 CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
1550
1551 if (length_code_size > 4) {
1552 return AVERROR_INVALIDDATA;
1553 }
1554
1555 while (remaining_src_size > 0) {
1556 size_t curr_src_len;
1557 size_t curr_dst_len;
1558 size_t box_len = 0;
1559 size_t i;
1560
1561 uint8_t *dst_box;
1562
1563 status = CMBlockBufferCopyDataBytes(block,
1564 src_offset,
1565 length_code_size,
1566 size_buf);
1567 if (status) {
1568 av_log(avctx, AV_LOG_ERROR, "Cannot copy length: %d\n", status);
1569 return AVERROR_EXTERNAL;
1570 }
1571
1572 status = CMBlockBufferCopyDataBytes(block,
1573 src_offset + length_code_size,
1574 1,
1575 &nal_type);
1576
1577 if (status) {
1578 av_log(avctx, AV_LOG_ERROR, "Cannot copy type: %d\n", status);
1579 return AVERROR_EXTERNAL;
1580 }
1581
1582 nal_type &= 0x1F;
1583
1584 for (i = 0; i < length_code_size; i++) {
1585 box_len <<= 8;
1586 box_len |= size_buf[i];
1587 }
1588
1589 if (sei && !wrote_sei && is_post_sei_nal_type(nal_type)) {
1590 //No SEI NAL unit - insert.
1591 int wrote_bytes;
1592
1593 memcpy(dst_data, start_code, sizeof(start_code));
1594 dst_data += sizeof(start_code);
1595 remaining_dst_size -= sizeof(start_code);
1596
1597 *dst_data = H264_NAL_SEI;
1598 dst_data++;
1599 remaining_dst_size--;
1600
1601 wrote_bytes = write_sei(sei,
1602 SEI_TYPE_USER_DATA_REGISTERED,
1603 dst_data,
1604 remaining_dst_size);
1605
1606 if (wrote_bytes < 0)
1607 return wrote_bytes;
1608
1609 remaining_dst_size -= wrote_bytes;
1610 dst_data += wrote_bytes;
1611
1612 if (remaining_dst_size <= 0)
1613 return AVERROR_BUFFER_TOO_SMALL;
1614
1615 *dst_data = 0x80;
1616
1617 dst_data++;
1618 remaining_dst_size--;
1619
1620 wrote_sei = 1;
1621 }
1622
1623 curr_src_len = box_len + length_code_size;
1624 curr_dst_len = box_len + sizeof(start_code);
1625
1626 if (remaining_src_size < curr_src_len) {
1627 return AVERROR_BUFFER_TOO_SMALL;
1628 }
1629
1630 if (remaining_dst_size < curr_dst_len) {
1631 return AVERROR_BUFFER_TOO_SMALL;
1632 }
1633
1634 dst_box = dst_data + sizeof(start_code);
1635
1636 memcpy(dst_data, start_code, sizeof(start_code));
1637 status = CMBlockBufferCopyDataBytes(block,
1638 src_offset + length_code_size,
1639 box_len,
1640 dst_box);
1641
1642 if (status) {
1643 av_log(avctx, AV_LOG_ERROR, "Cannot copy data: %d\n", status);
1644 return AVERROR_EXTERNAL;
1645 }
1646
1647 if (sei && !wrote_sei && nal_type == H264_NAL_SEI) {
1648 //Found SEI NAL unit - append.
1649 int wrote_bytes;
1650 int old_sei_length;
1651 int extra_bytes;
1652 uint8_t *new_sei;
1653 old_sei_length = find_sei_end(avctx, dst_box, box_len, &new_sei);
1654 if (old_sei_length < 0)
1655 return status;
1656
1657 wrote_bytes = write_sei(sei,
1658 SEI_TYPE_USER_DATA_REGISTERED,
1659 new_sei,
1660 remaining_dst_size - old_sei_length);
1661 if (wrote_bytes < 0)
1662 return wrote_bytes;
1663
1664 if (new_sei + wrote_bytes >= dst_data + remaining_dst_size)
1665 return AVERROR_BUFFER_TOO_SMALL;
1666
1667 new_sei[wrote_bytes++] = 0x80;
1668 extra_bytes = wrote_bytes - (dst_box + box_len - new_sei);
1669
1670 dst_data += extra_bytes;
1671 remaining_dst_size -= extra_bytes;
1672
1673 wrote_sei = 1;
1674 }
1675
1676 src_offset += curr_src_len;
1677 dst_data += curr_dst_len;
1678
1679 remaining_src_size -= curr_src_len;
1680 remaining_dst_size -= curr_dst_len;
1681 }
1682
1683 return 0;
1684}
1685
1686/**
1687 * Returns a sufficient number of bytes to contain the sei data.
1688 * It may be greater than the minimum required.
1689 */
1690static int get_sei_msg_bytes(const ExtraSEI* sei, int type){
1691 int copied_size;
1692 if (sei->size == 0)
1693 return 0;
1694
1695 copied_size = -copy_emulation_prev(sei->data,
1696 sei->size,
1697 NULL,
1698 0,
1699 0);
1700
1701 if ((sei->size % 255) == 0) //may result in an extra byte
1702 copied_size++;
1703
1704 return copied_size + sei->size / 255 + 1 + type / 255 + 1;
1705}
1706
1707static int vtenc_cm_to_avpacket(
1708 AVCodecContext *avctx,
1709 CMSampleBufferRef sample_buffer,
1710 AVPacket *pkt,
1711 ExtraSEI *sei)
1712{
1713 VTEncContext *vtctx = avctx->priv_data;
1714
1715 int status;
1716 bool is_key_frame;
1717 bool add_header;
1718 size_t length_code_size;
1719 size_t header_size = 0;
1720 size_t in_buf_size;
1721 size_t out_buf_size;
1722 size_t sei_nalu_size = 0;
1723 int64_t dts_delta;
1724 int64_t time_base_num;
1725 int nalu_count;
1726 CMTime pts;
1727 CMTime dts;
1728 CMVideoFormatDescriptionRef vid_fmt;
1729
1730
1731 vtenc_get_frame_info(sample_buffer, &is_key_frame);
1732 status = get_length_code_size(avctx, sample_buffer, &length_code_size);
1733 if (status) return status;
1734
1735 add_header = is_key_frame && !(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
1736
1737 if (add_header) {
1738 vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
1739 if (!vid_fmt) {
1740 av_log(avctx, AV_LOG_ERROR, "Cannot get format description.\n");
1741 return AVERROR_EXTERNAL;
1742 }
1743
1744 int status = get_params_size(avctx, vid_fmt, &header_size);
1745 if (status) return status;
1746 }
1747
1748 status = count_nalus(length_code_size, sample_buffer, &nalu_count);
1749 if(status)
1750 return status;
1751
1752 if (sei) {
1753 size_t msg_size = get_sei_msg_bytes(sei,
1754 SEI_TYPE_USER_DATA_REGISTERED);
1755
1756 sei_nalu_size = sizeof(start_code) + 1 + msg_size + 1;
1757 }
1758
1759 in_buf_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
1760 out_buf_size = header_size +
1761 in_buf_size +
1762 sei_nalu_size +
1763 nalu_count * ((int)sizeof(start_code) - (int)length_code_size);
1764
1765 status = ff_alloc_packet2(avctx, pkt, out_buf_size, out_buf_size);
1766 if (status < 0)
1767 return status;
1768
1769 if (add_header) {
1770 status = copy_param_sets(avctx, vid_fmt, pkt->data, out_buf_size);
1771 if(status) return status;
1772 }
1773
1774 status = copy_replace_length_codes(
1775 avctx,
1776 length_code_size,
1777 sample_buffer,
1778 sei,
1779 pkt->data + header_size,
1780 pkt->size - header_size
1781 );
1782
1783 if (status) {
1784 av_log(avctx, AV_LOG_ERROR, "Error copying packet data: %d\n", status);
1785 return status;
1786 }
1787
1788 if (is_key_frame) {
1789 pkt->flags |= AV_PKT_FLAG_KEY;
1790 }
1791
1792 pts = CMSampleBufferGetPresentationTimeStamp(sample_buffer);
1793 dts = CMSampleBufferGetDecodeTimeStamp (sample_buffer);
1794
1795 if (CMTIME_IS_INVALID(dts)) {
1796 if (!vtctx->has_b_frames) {
1797 dts = pts;
1798 } else {
1799 av_log(avctx, AV_LOG_ERROR, "DTS is invalid.\n");
1800 return AVERROR_EXTERNAL;
1801 }
1802 }
1803
1804 dts_delta = vtctx->dts_delta >= 0 ? vtctx->dts_delta : 0;
1805 time_base_num = avctx->time_base.num;
1806 pkt->pts = pts.value / time_base_num;
1807 pkt->dts = dts.value / time_base_num - dts_delta;
1808 pkt->size = out_buf_size;
1809
1810 return 0;
1811}
1812
1813/*
1814 * contiguous_buf_size is 0 if not contiguous, and the size of the buffer
1815 * containing all planes if so.
1816 */
1817static int get_cv_pixel_info(
1818 AVCodecContext *avctx,
1819 const AVFrame *frame,
1820 int *color,
1821 int *plane_count,
1822 size_t *widths,
1823 size_t *heights,
1824 size_t *strides,
1825 size_t *contiguous_buf_size)
1826{
1827 VTEncContext *vtctx = avctx->priv_data;
1828 int av_format = frame->format;
1829 int av_color_range = av_frame_get_color_range(frame);
1830 int i;
1831 int range_guessed;
1832 int status;
1833
1834 status = get_cv_pixel_format(avctx, av_format, av_color_range, color, &range_guessed);
1835 if (status) {
1836 av_log(avctx,
1837 AV_LOG_ERROR,
1838 "Could not get pixel format for color format '%s' range '%s'.\n",
1839 av_get_pix_fmt_name(av_format),
1840 av_color_range > AVCOL_RANGE_UNSPECIFIED &&
1841 av_color_range < AVCOL_RANGE_NB ?
1842 av_color_range_name(av_color_range) :
1843 "Unknown");
1844
1845 return AVERROR(EINVAL);
1846 }
1847
1848 if (range_guessed) {
1849 if (!vtctx->warned_color_range) {
1850 vtctx->warned_color_range = true;
1851 av_log(avctx,
1852 AV_LOG_WARNING,
1853 "Color range not set for %s. Using MPEG range.\n",
1854 av_get_pix_fmt_name(av_format));
1855 }
1856
1857 av_log(avctx, AV_LOG_WARNING, "");
1858 }
1859
1860 switch (av_format) {
1861 case AV_PIX_FMT_NV12:
1862 *plane_count = 2;
1863
1864 widths [0] = avctx->width;
1865 heights[0] = avctx->height;
1866 strides[0] = frame ? frame->linesize[0] : avctx->width;
1867
1868 widths [1] = (avctx->width + 1) / 2;
1869 heights[1] = (avctx->height + 1) / 2;
1870 strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) & -2;
1871 break;
1872
1873 case AV_PIX_FMT_YUV420P:
1874 *plane_count = 3;
1875
1876 widths [0] = avctx->width;
1877 heights[0] = avctx->height;
1878 strides[0] = frame ? frame->linesize[0] : avctx->width;
1879
1880 widths [1] = (avctx->width + 1) / 2;
1881 heights[1] = (avctx->height + 1) / 2;
1882 strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) / 2;
1883
1884 widths [2] = (avctx->width + 1) / 2;
1885 heights[2] = (avctx->height + 1) / 2;
1886 strides[2] = frame ? frame->linesize[2] : (avctx->width + 1) / 2;
1887 break;
1888
1889 default:
1890 av_log(
1891 avctx,
1892 AV_LOG_ERROR,
1893 "Could not get frame format info for color %d range %d.\n",
1894 av_format,
1895 av_color_range);
1896
1897 return AVERROR(EINVAL);
1898 }
1899
1900 *contiguous_buf_size = 0;
1901 for (i = 0; i < *plane_count; i++) {
1902 if (i < *plane_count - 1 &&
1903 frame->data[i] + strides[i] * heights[i] != frame->data[i + 1]) {
1904 *contiguous_buf_size = 0;
1905 break;
1906 }
1907
1908 *contiguous_buf_size += strides[i] * heights[i];
1909 }
1910
1911 return 0;
1912}
1913
1914#if !TARGET_OS_IPHONE
1915//Not used on iOS - frame is always copied.
1916static void free_avframe(
1917 void *release_ctx,
1918 const void *data,
1919 size_t size,
1920 size_t plane_count,
1921 const void *plane_addresses[])
1922{
1923 AVFrame *frame = release_ctx;
1924 av_frame_free(&frame);
1925}
1926#else
1927//Not used on OSX - frame is never copied.
1928static int copy_avframe_to_pixel_buffer(AVCodecContext *avctx,
1929 const AVFrame *frame,
1930 CVPixelBufferRef cv_img,
1931 const size_t *plane_strides,
1932 const size_t *plane_rows)
1933{
1934 int i, j;
1935 size_t plane_count;
1936 int status;
1937 int rows;
1938 int src_stride;
1939 int dst_stride;
1940 uint8_t *src_addr;
1941 uint8_t *dst_addr;
1942 size_t copy_bytes;
1943
1944 status = CVPixelBufferLockBaseAddress(cv_img, 0);
1945 if (status) {
1946 av_log(
1947 avctx,
1948 AV_LOG_ERROR,
1949 "Error: Could not lock base address of CVPixelBuffer: %d.\n",
1950 status
1951 );
1952 }
1953
1954 if (CVPixelBufferIsPlanar(cv_img)) {
1955 plane_count = CVPixelBufferGetPlaneCount(cv_img);
1956 for (i = 0; frame->data[i]; i++) {
1957 if (i == plane_count) {
1958 CVPixelBufferUnlockBaseAddress(cv_img, 0);
1959 av_log(avctx,
1960 AV_LOG_ERROR,
1961 "Error: different number of planes in AVFrame and CVPixelBuffer.\n"
1962 );
1963
1964 return AVERROR_EXTERNAL;
1965 }
1966
1967 dst_addr = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(cv_img, i);
1968 src_addr = (uint8_t*)frame->data[i];
1969 dst_stride = CVPixelBufferGetBytesPerRowOfPlane(cv_img, i);
1970 src_stride = plane_strides[i];
1971 rows = plane_rows[i];
1972
1973 if (dst_stride == src_stride) {
1974 memcpy(dst_addr, src_addr, src_stride * rows);
1975 } else {
1976 copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
1977
1978 for (j = 0; j < rows; j++) {
1979 memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
1980 }
1981 }
1982 }
1983 } else {
1984 if (frame->data[1]) {
1985 CVPixelBufferUnlockBaseAddress(cv_img, 0);
1986 av_log(avctx,
1987 AV_LOG_ERROR,
1988 "Error: different number of planes in AVFrame and non-planar CVPixelBuffer.\n"
1989 );
1990
1991 return AVERROR_EXTERNAL;
1992 }
1993
1994 dst_addr = (uint8_t*)CVPixelBufferGetBaseAddress(cv_img);
1995 src_addr = (uint8_t*)frame->data[0];
1996 dst_stride = CVPixelBufferGetBytesPerRow(cv_img);
1997 src_stride = plane_strides[0];
1998 rows = plane_rows[0];
1999
2000 if (dst_stride == src_stride) {
2001 memcpy(dst_addr, src_addr, src_stride * rows);
2002 } else {
2003 copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
2004
2005 for (j = 0; j < rows; j++) {
2006 memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
2007 }
2008 }
2009 }
2010
2011 status = CVPixelBufferUnlockBaseAddress(cv_img, 0);
2012 if (status) {
2013 av_log(avctx, AV_LOG_ERROR, "Error: Could not unlock CVPixelBuffer base address: %d.\n", status);
2014 return AVERROR_EXTERNAL;
2015 }
2016
2017 return 0;
2018}
2019#endif //!TARGET_OS_IPHONE
2020
2021static int create_cv_pixel_buffer(AVCodecContext *avctx,
2022 const AVFrame *frame,
2023 CVPixelBufferRef *cv_img)
2024{
2025 int plane_count;
2026 int color;
2027 size_t widths [AV_NUM_DATA_POINTERS];
2028 size_t heights[AV_NUM_DATA_POINTERS];
2029 size_t strides[AV_NUM_DATA_POINTERS];
2030 int status;
2031 size_t contiguous_buf_size;
2032#if TARGET_OS_IPHONE
2033 CVPixelBufferPoolRef pix_buf_pool;
2034 VTEncContext* vtctx = avctx->priv_data;
2035#else
2036 CFMutableDictionaryRef pix_buf_attachments = CFDictionaryCreateMutable(
2037 kCFAllocatorDefault,
2038 10,
2039 &kCFCopyStringDictionaryKeyCallBacks,
2040 &kCFTypeDictionaryValueCallBacks);
2041
2042 if (!pix_buf_attachments) return AVERROR(ENOMEM);
2043#endif
2044
2045 if (avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX) {
2046 av_assert0(frame->format == AV_PIX_FMT_VIDEOTOOLBOX);
2047
2048 *cv_img = (CVPixelBufferRef)frame->data[3];
2049 av_assert0(*cv_img);
2050
2051 CFRetain(*cv_img);
2052 return 0;
2053 }
2054
2055 memset(widths, 0, sizeof(widths));
2056 memset(heights, 0, sizeof(heights));
2057 memset(strides, 0, sizeof(strides));
2058
2059 status = get_cv_pixel_info(
2060 avctx,
2061 frame,
2062 &color,
2063 &plane_count,
2064 widths,
2065 heights,
2066 strides,
2067 &contiguous_buf_size
2068 );
2069
2070 if (status) {
2071 av_log(
2072 avctx,
2073 AV_LOG_ERROR,
2074 "Error: Cannot convert format %d color_range %d: %d\n",
2075 frame->format,
2076 av_frame_get_color_range(frame),
2077 status
2078 );
2079
2080 return AVERROR_EXTERNAL;
2081 }
2082
2083#if TARGET_OS_IPHONE
2084 pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
2085 if (!pix_buf_pool) {
2086 av_log(avctx, AV_LOG_ERROR, "Could not get pixel buffer pool.\n");
2087 return AVERROR_EXTERNAL;
2088 }
2089
2090 status = CVPixelBufferPoolCreatePixelBuffer(NULL,
2091 pix_buf_pool,
2092 cv_img);
2093
2094
2095 if (status) {
2096 av_log(avctx, AV_LOG_ERROR, "Could not create pixel buffer from pool: %d.\n", status);
2097 return AVERROR_EXTERNAL;
2098 }
2099
2100 status = copy_avframe_to_pixel_buffer(avctx, frame, *cv_img, strides, heights);
2101 if (status) {
2102 CFRelease(*cv_img);
2103 *cv_img = NULL;
2104 return status;
2105 }
2106#else
2107 AVFrame *enc_frame = av_frame_alloc();
2108 if (!enc_frame) return AVERROR(ENOMEM);
2109
2110 status = av_frame_ref(enc_frame, frame);
2111 if (status) {
2112 av_frame_free(&enc_frame);
2113 return status;
2114 }
2115
2116 status = CVPixelBufferCreateWithPlanarBytes(
2117 kCFAllocatorDefault,
2118 enc_frame->width,
2119 enc_frame->height,
2120 color,
2121 NULL,
2122 contiguous_buf_size,
2123 plane_count,
2124 (void **)enc_frame->data,
2125 widths,
2126 heights,
2127 strides,
2128 free_avframe,
2129 enc_frame,
2130 NULL,
2131 cv_img
2132 );
2133
2134 add_color_attr(avctx, pix_buf_attachments);
2135 CVBufferSetAttachments(*cv_img, pix_buf_attachments, kCVAttachmentMode_ShouldPropagate);
2136 CFRelease(pix_buf_attachments);
2137
2138 if (status) {
2139 av_log(avctx, AV_LOG_ERROR, "Error: Could not create CVPixelBuffer: %d\n", status);
2140 return AVERROR_EXTERNAL;
2141 }
2142#endif
2143
2144 return 0;
2145}
2146
2147static int create_encoder_dict_h264(const AVFrame *frame,
2148 CFDictionaryRef* dict_out)
2149{
2150 CFDictionaryRef dict = NULL;
2151 if (frame->pict_type == AV_PICTURE_TYPE_I) {
2152 const void *keys[] = { kVTEncodeFrameOptionKey_ForceKeyFrame };
2153 const void *vals[] = { kCFBooleanTrue };
2154
2155 dict = CFDictionaryCreate(NULL, keys, vals, 1, NULL, NULL);
2156 if(!dict) return AVERROR(ENOMEM);
2157 }
2158
2159 *dict_out = dict;
2160 return 0;
2161}
2162
2163static int vtenc_send_frame(AVCodecContext *avctx,
2164 VTEncContext *vtctx,
2165 const AVFrame *frame)
2166{
2167 CMTime time;
2168 CFDictionaryRef frame_dict;
2169 CVPixelBufferRef cv_img = NULL;
2170 AVFrameSideData *side_data = NULL;
2171 ExtraSEI *sei = NULL;
2172 int status = create_cv_pixel_buffer(avctx, frame, &cv_img);
2173
2174 if (status) return status;
2175
2176 status = create_encoder_dict_h264(frame, &frame_dict);
2177 if (status) {
2178 CFRelease(cv_img);
2179 return status;
2180 }
2181
2182 side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC);
2183 if (vtctx->a53_cc && side_data && side_data->size) {
2184 sei = av_mallocz(sizeof(*sei));
2185 if (!sei) {
2186 av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2187 } else {
2188 int ret = ff_alloc_a53_sei(frame, 0, &sei->data, &sei->size);
2189 if (ret < 0) {
2190 av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2191 av_free(sei);
2192 sei = NULL;
2193 }
2194 }
2195 }
2196
2197 time = CMTimeMake(frame->pts * avctx->time_base.num, avctx->time_base.den);
2198 status = VTCompressionSessionEncodeFrame(
2199 vtctx->session,
2200 cv_img,
2201 time,
2202 kCMTimeInvalid,
2203 frame_dict,
2204 sei,
2205 NULL
2206 );
2207
2208 if (frame_dict) CFRelease(frame_dict);
2209 CFRelease(cv_img);
2210
2211 if (status) {
2212 av_log(avctx, AV_LOG_ERROR, "Error: cannot encode frame: %d\n", status);
2213 return AVERROR_EXTERNAL;
2214 }
2215
2216 return 0;
2217}
2218
2219static av_cold int vtenc_frame(
2220 AVCodecContext *avctx,
2221 AVPacket *pkt,
2222 const AVFrame *frame,
2223 int *got_packet)
2224{
2225 VTEncContext *vtctx = avctx->priv_data;
2226 bool get_frame;
2227 int status;
2228 CMSampleBufferRef buf = NULL;
2229 ExtraSEI *sei = NULL;
2230
2231 if (frame) {
2232 status = vtenc_send_frame(avctx, vtctx, frame);
2233
2234 if (status) {
2235 status = AVERROR_EXTERNAL;
2236 goto end_nopkt;
2237 }
2238
2239 if (vtctx->frame_ct_in == 0) {
2240 vtctx->first_pts = frame->pts;
2241 } else if(vtctx->frame_ct_in == 1 && vtctx->has_b_frames) {
2242 vtctx->dts_delta = frame->pts - vtctx->first_pts;
2243 }
2244
2245 vtctx->frame_ct_in++;
2246 } else if(!vtctx->flushing) {
2247 vtctx->flushing = true;
2248
2249 status = VTCompressionSessionCompleteFrames(vtctx->session,
2250 kCMTimeIndefinite);
2251
2252 if (status) {
2253 av_log(avctx, AV_LOG_ERROR, "Error flushing frames: %d\n", status);
2254 status = AVERROR_EXTERNAL;
2255 goto end_nopkt;
2256 }
2257 }
2258
2259 *got_packet = 0;
2260 get_frame = vtctx->dts_delta >= 0 || !frame;
2261 if (!get_frame) {
2262 status = 0;
2263 goto end_nopkt;
2264 }
2265
2266 status = vtenc_q_pop(vtctx, !frame, &buf, &sei);
2267 if (status) goto end_nopkt;
2268 if (!buf) goto end_nopkt;
2269
2270 status = vtenc_cm_to_avpacket(avctx, buf, pkt, sei);
2271 if (sei) {
2272 if (sei->data) av_free(sei->data);
2273 av_free(sei);
2274 }
2275 CFRelease(buf);
2276 if (status) goto end_nopkt;
2277
2278 *got_packet = 1;
2279 return 0;
2280
2281end_nopkt:
2282 av_packet_unref(pkt);
2283 return status;
2284}
2285
2286static int vtenc_populate_extradata(AVCodecContext *avctx,
2287 CMVideoCodecType codec_type,
2288 CFStringRef profile_level,
2289 CFNumberRef gamma_level,
2290 CFDictionaryRef enc_info,
2291 CFDictionaryRef pixel_buffer_info)
2292{
2293 VTEncContext *vtctx = avctx->priv_data;
2294 AVFrame *frame = av_frame_alloc();
2295 int y_size = avctx->width * avctx->height;
2296 int chroma_size = (avctx->width / 2) * (avctx->height / 2);
2297 CMSampleBufferRef buf = NULL;
2298 int status;
2299
2300 if (!frame)
2301 return AVERROR(ENOMEM);
2302
2303 frame->buf[0] = av_buffer_alloc(y_size + 2 * chroma_size);
2304
2305 if(!frame->buf[0]){
2306 status = AVERROR(ENOMEM);
2307 goto pe_cleanup;
2308 }
2309
2310 status = vtenc_create_encoder(avctx,
2311 codec_type,
2312 profile_level,
2313 gamma_level,
2314 enc_info,
2315 pixel_buffer_info,
2316 &vtctx->session);
2317 if (status)
2318 goto pe_cleanup;
2319
2320 frame->data[0] = frame->buf[0]->data;
2321 memset(frame->data[0], 0, y_size);
2322
2323 frame->data[1] = frame->buf[0]->data + y_size;
2324 memset(frame->data[1], 128, chroma_size);
2325
2326
2327 if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
2328 frame->data[2] = frame->buf[0]->data + y_size + chroma_size;
2329 memset(frame->data[2], 128, chroma_size);
2330 }
2331
2332 frame->linesize[0] = avctx->width;
2333
2334 if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
2335 frame->linesize[1] =
2336 frame->linesize[2] = (avctx->width + 1) / 2;
2337 } else {
2338 frame->linesize[1] = (avctx->width + 1) / 2;
2339 }
2340
2341 frame->format = avctx->pix_fmt;
2342 frame->width = avctx->width;
2343 frame->height = avctx->height;
2344 av_frame_set_colorspace(frame, avctx->colorspace);
2345 av_frame_set_color_range(frame, avctx->color_range);
2346 frame->color_trc = avctx->color_trc;
2347 frame->color_primaries = avctx->color_primaries;
2348
2349 frame->pts = 0;
2350 status = vtenc_send_frame(avctx, vtctx, frame);
2351 if (status) {
2352 av_log(avctx, AV_LOG_ERROR, "Error sending frame: %d\n", status);
2353 goto pe_cleanup;
2354 }
2355
2356 //Populates extradata - output frames are flushed and param sets are available.
2357 status = VTCompressionSessionCompleteFrames(vtctx->session,
2358 kCMTimeIndefinite);
2359
2360 if (status)
2361 goto pe_cleanup;
2362
2363 status = vtenc_q_pop(vtctx, 0, &buf, NULL);
2364 if (status) {
2365 av_log(avctx, AV_LOG_ERROR, "popping: %d\n", status);
2366 goto pe_cleanup;
2367 }
2368
2369 CFRelease(buf);
2370
2371
2372
2373pe_cleanup:
2374 if(vtctx->session)
2375 CFRelease(vtctx->session);
2376
2377 vtctx->session = NULL;
2378 vtctx->frame_ct_out = 0;
2379
2380 av_frame_unref(frame);
2381 av_frame_free(&frame);
2382
2383 av_assert0(status != 0 || (avctx->extradata && avctx->extradata_size > 0));
2384
2385 return status;
2386}
2387
2388static av_cold int vtenc_close(AVCodecContext *avctx)
2389{
2390 VTEncContext *vtctx = avctx->priv_data;
2391
2392 if(!vtctx->session) return 0;
2393
2394 VTCompressionSessionCompleteFrames(vtctx->session,
2395 kCMTimeIndefinite);
2396 clear_frame_queue(vtctx);
2397 pthread_cond_destroy(&vtctx->cv_sample_sent);
2398 pthread_mutex_destroy(&vtctx->lock);
2399 CFRelease(vtctx->session);
2400 vtctx->session = NULL;
2401
2402 if (vtctx->color_primaries) {
2403 CFRelease(vtctx->color_primaries);
2404 vtctx->color_primaries = NULL;
2405 }
2406
2407 if (vtctx->transfer_function) {
2408 CFRelease(vtctx->transfer_function);
2409 vtctx->transfer_function = NULL;
2410 }
2411
2412 if (vtctx->ycbcr_matrix) {
2413 CFRelease(vtctx->ycbcr_matrix);
2414 vtctx->ycbcr_matrix = NULL;
2415 }
2416
2417 return 0;
2418}
2419
2420static const enum AVPixelFormat pix_fmts[] = {
2421 AV_PIX_FMT_VIDEOTOOLBOX,
2422 AV_PIX_FMT_NV12,
2423 AV_PIX_FMT_YUV420P,
2424 AV_PIX_FMT_NONE
2425};
2426
2427#define OFFSET(x) offsetof(VTEncContext, x)
2428#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
2429static const AVOption options[] = {
2430 { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = H264_PROF_AUTO }, H264_PROF_AUTO, H264_PROF_COUNT, VE, "profile" },
2431 { "baseline", "Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_BASELINE }, INT_MIN, INT_MAX, VE, "profile" },
2432 { "main", "Main Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_MAIN }, INT_MIN, INT_MAX, VE, "profile" },
2433 { "high", "High Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_HIGH }, INT_MIN, INT_MAX, VE, "profile" },
2434
2435 { "level", "Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, VE, "level" },
2436 { "1.3", "Level 1.3, only available with Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, INT_MIN, INT_MAX, VE, "level" },
2437 { "3.0", "Level 3.0", 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, INT_MIN, INT_MAX, VE, "level" },
2438 { "3.1", "Level 3.1", 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, INT_MIN, INT_MAX, VE, "level" },
2439 { "3.2", "Level 3.2", 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, INT_MIN, INT_MAX, VE, "level" },
2440 { "4.0", "Level 4.0", 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, INT_MIN, INT_MAX, VE, "level" },
2441 { "4.1", "Level 4.1", 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, INT_MIN, INT_MAX, VE, "level" },
2442 { "4.2", "Level 4.2", 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, INT_MIN, INT_MAX, VE, "level" },
2443 { "5.0", "Level 5.0", 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, INT_MIN, INT_MAX, VE, "level" },
2444 { "5.1", "Level 5.1", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, INT_MIN, INT_MAX, VE, "level" },
2445 { "5.2", "Level 5.2", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, INT_MIN, INT_MAX, VE, "level" },
2446
2447 { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL,
2448 { .i64 = 0 }, 0, 1, VE },
2449
2450 { "coder", "Entropy coding", OFFSET(entropy), AV_OPT_TYPE_INT, { .i64 = VT_ENTROPY_NOT_SET }, VT_ENTROPY_NOT_SET, VT_CABAC, VE, "coder" },
2451 { "cavlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
2452 { "vlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
2453 { "cabac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
2454 { "ac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
2455
2456 { "realtime", "Hint that encoding should happen in real-time if not faster (e.g. capturing from camera).",
2457 OFFSET(realtime), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2458
2459 { "frames_before", "Other frames will come before the frames in this session. This helps smooth concatenation issues.",
2460 OFFSET(frames_before), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2461 { "frames_after", "Other frames will come after the frames in this session. This helps smooth concatenation issues.",
2462 OFFSET(frames_after), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2463
2464 { "a53cc", "Use A53 Closed Captions (if available)", OFFSET(a53_cc), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, VE },
2465
2466 { NULL },
2467};
2468
2469static const AVClass h264_videotoolbox_class = {
2470 .class_name = "h264_videotoolbox",
2471 .item_name = av_default_item_name,
2472 .option = options,
2473 .version = LIBAVUTIL_VERSION_INT,
2474};
2475
2476AVCodec ff_h264_videotoolbox_encoder = {
2477 .name = "h264_videotoolbox",
2478 .long_name = NULL_IF_CONFIG_SMALL("VideoToolbox H.264 Encoder"),
2479 .type = AVMEDIA_TYPE_VIDEO,
2480 .id = AV_CODEC_ID_H264,
2481 .priv_data_size = sizeof(VTEncContext),
2482 .pix_fmts = pix_fmts,
2483 .init = vtenc_init,
2484 .encode2 = vtenc_frame,
2485 .close = vtenc_close,
2486 .capabilities = AV_CODEC_CAP_DELAY,
2487 .priv_class = &h264_videotoolbox_class,
2488 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE |
2489 FF_CODEC_CAP_INIT_CLEANUP,
2490};
2491