blob: cf054550c1312fb85875c3d71aa2400ccf1cde44
1 | /* |
2 | * H.264/HEVC hardware encoding using nvidia nvenc |
3 | * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org> |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | #include "config.h" |
23 | |
24 | #include "nvenc.h" |
25 | |
26 | #include "libavutil/hwcontext_cuda.h" |
27 | #include "libavutil/hwcontext.h" |
28 | #include "libavutil/imgutils.h" |
29 | #include "libavutil/avassert.h" |
30 | #include "libavutil/mem.h" |
31 | #include "libavutil/pixdesc.h" |
32 | #include "internal.h" |
33 | |
34 | #define NVENC_CAP 0x30 |
35 | #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \ |
36 | rc == NV_ENC_PARAMS_RC_2_PASS_QUALITY || \ |
37 | rc == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP) |
38 | |
39 | const enum AVPixelFormat ff_nvenc_pix_fmts[] = { |
40 | AV_PIX_FMT_YUV420P, |
41 | AV_PIX_FMT_NV12, |
42 | AV_PIX_FMT_P010, |
43 | AV_PIX_FMT_YUV444P, |
44 | AV_PIX_FMT_YUV444P16, |
45 | AV_PIX_FMT_0RGB32, |
46 | AV_PIX_FMT_0BGR32, |
47 | AV_PIX_FMT_CUDA, |
48 | AV_PIX_FMT_NONE |
49 | }; |
50 | |
51 | #define IS_10BIT(pix_fmt) (pix_fmt == AV_PIX_FMT_P010 || \ |
52 | pix_fmt == AV_PIX_FMT_YUV444P16) |
53 | |
54 | #define IS_YUV444(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV444P || \ |
55 | pix_fmt == AV_PIX_FMT_YUV444P16) |
56 | |
57 | static const struct { |
58 | NVENCSTATUS nverr; |
59 | int averr; |
60 | const char *desc; |
61 | } nvenc_errors[] = { |
62 | { NV_ENC_SUCCESS, 0, "success" }, |
63 | { NV_ENC_ERR_NO_ENCODE_DEVICE, AVERROR(ENOENT), "no encode device" }, |
64 | { NV_ENC_ERR_UNSUPPORTED_DEVICE, AVERROR(ENOSYS), "unsupported device" }, |
65 | { NV_ENC_ERR_INVALID_ENCODERDEVICE, AVERROR(EINVAL), "invalid encoder device" }, |
66 | { NV_ENC_ERR_INVALID_DEVICE, AVERROR(EINVAL), "invalid device" }, |
67 | { NV_ENC_ERR_DEVICE_NOT_EXIST, AVERROR(EIO), "device does not exist" }, |
68 | { NV_ENC_ERR_INVALID_PTR, AVERROR(EFAULT), "invalid ptr" }, |
69 | { NV_ENC_ERR_INVALID_EVENT, AVERROR(EINVAL), "invalid event" }, |
70 | { NV_ENC_ERR_INVALID_PARAM, AVERROR(EINVAL), "invalid param" }, |
71 | { NV_ENC_ERR_INVALID_CALL, AVERROR(EINVAL), "invalid call" }, |
72 | { NV_ENC_ERR_OUT_OF_MEMORY, AVERROR(ENOMEM), "out of memory" }, |
73 | { NV_ENC_ERR_ENCODER_NOT_INITIALIZED, AVERROR(EINVAL), "encoder not initialized" }, |
74 | { NV_ENC_ERR_UNSUPPORTED_PARAM, AVERROR(ENOSYS), "unsupported param" }, |
75 | { NV_ENC_ERR_LOCK_BUSY, AVERROR(EAGAIN), "lock busy" }, |
76 | { NV_ENC_ERR_NOT_ENOUGH_BUFFER, AVERROR_BUFFER_TOO_SMALL, "not enough buffer"}, |
77 | { NV_ENC_ERR_INVALID_VERSION, AVERROR(EINVAL), "invalid version" }, |
78 | { NV_ENC_ERR_MAP_FAILED, AVERROR(EIO), "map failed" }, |
79 | { NV_ENC_ERR_NEED_MORE_INPUT, AVERROR(EAGAIN), "need more input" }, |
80 | { NV_ENC_ERR_ENCODER_BUSY, AVERROR(EAGAIN), "encoder busy" }, |
81 | { NV_ENC_ERR_EVENT_NOT_REGISTERD, AVERROR(EBADF), "event not registered" }, |
82 | { NV_ENC_ERR_GENERIC, AVERROR_UNKNOWN, "generic error" }, |
83 | { NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY, AVERROR(EINVAL), "incompatible client key" }, |
84 | { NV_ENC_ERR_UNIMPLEMENTED, AVERROR(ENOSYS), "unimplemented" }, |
85 | { NV_ENC_ERR_RESOURCE_REGISTER_FAILED, AVERROR(EIO), "resource register failed" }, |
86 | { NV_ENC_ERR_RESOURCE_NOT_REGISTERED, AVERROR(EBADF), "resource not registered" }, |
87 | { NV_ENC_ERR_RESOURCE_NOT_MAPPED, AVERROR(EBADF), "resource not mapped" }, |
88 | }; |
89 | |
90 | static int nvenc_map_error(NVENCSTATUS err, const char **desc) |
91 | { |
92 | int i; |
93 | for (i = 0; i < FF_ARRAY_ELEMS(nvenc_errors); i++) { |
94 | if (nvenc_errors[i].nverr == err) { |
95 | if (desc) |
96 | *desc = nvenc_errors[i].desc; |
97 | return nvenc_errors[i].averr; |
98 | } |
99 | } |
100 | if (desc) |
101 | *desc = "unknown error"; |
102 | return AVERROR_UNKNOWN; |
103 | } |
104 | |
105 | static int nvenc_print_error(void *log_ctx, NVENCSTATUS err, |
106 | const char *error_string) |
107 | { |
108 | const char *desc; |
109 | int ret; |
110 | ret = nvenc_map_error(err, &desc); |
111 | av_log(log_ctx, AV_LOG_ERROR, "%s: %s (%d)\n", error_string, desc, err); |
112 | return ret; |
113 | } |
114 | |
115 | static av_cold int nvenc_load_libraries(AVCodecContext *avctx) |
116 | { |
117 | NvencContext *ctx = avctx->priv_data; |
118 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
119 | NVENCSTATUS err; |
120 | uint32_t nvenc_max_ver; |
121 | int ret; |
122 | |
123 | ret = cuda_load_functions(&dl_fn->cuda_dl); |
124 | if (ret < 0) |
125 | return ret; |
126 | |
127 | ret = nvenc_load_functions(&dl_fn->nvenc_dl); |
128 | if (ret < 0) |
129 | return ret; |
130 | |
131 | err = dl_fn->nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(&nvenc_max_ver); |
132 | if (err != NV_ENC_SUCCESS) |
133 | return nvenc_print_error(avctx, err, "Failed to query nvenc max version"); |
134 | |
135 | av_log(avctx, AV_LOG_VERBOSE, "Loaded Nvenc version %d.%d\n", nvenc_max_ver >> 4, nvenc_max_ver & 0xf); |
136 | |
137 | if ((NVENCAPI_MAJOR_VERSION << 4 | NVENCAPI_MINOR_VERSION) > nvenc_max_ver) { |
138 | av_log(avctx, AV_LOG_ERROR, "Driver does not support the required nvenc API version. " |
139 | "Required: %d.%d Found: %d.%d\n", |
140 | NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION, |
141 | nvenc_max_ver >> 4, nvenc_max_ver & 0xf); |
142 | return AVERROR(ENOSYS); |
143 | } |
144 | |
145 | dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER; |
146 | |
147 | err = dl_fn->nvenc_dl->NvEncodeAPICreateInstance(&dl_fn->nvenc_funcs); |
148 | if (err != NV_ENC_SUCCESS) |
149 | return nvenc_print_error(avctx, err, "Failed to create nvenc instance"); |
150 | |
151 | av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n"); |
152 | |
153 | return 0; |
154 | } |
155 | |
156 | static av_cold int nvenc_open_session(AVCodecContext *avctx) |
157 | { |
158 | NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 }; |
159 | NvencContext *ctx = avctx->priv_data; |
160 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs; |
161 | NVENCSTATUS ret; |
162 | |
163 | params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; |
164 | params.apiVersion = NVENCAPI_VERSION; |
165 | params.device = ctx->cu_context; |
166 | params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; |
167 | |
168 | ret = p_nvenc->nvEncOpenEncodeSessionEx(¶ms, &ctx->nvencoder); |
169 | if (ret != NV_ENC_SUCCESS) { |
170 | ctx->nvencoder = NULL; |
171 | return nvenc_print_error(avctx, ret, "OpenEncodeSessionEx failed"); |
172 | } |
173 | |
174 | return 0; |
175 | } |
176 | |
177 | static int nvenc_check_codec_support(AVCodecContext *avctx) |
178 | { |
179 | NvencContext *ctx = avctx->priv_data; |
180 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs; |
181 | int i, ret, count = 0; |
182 | GUID *guids = NULL; |
183 | |
184 | ret = p_nvenc->nvEncGetEncodeGUIDCount(ctx->nvencoder, &count); |
185 | |
186 | if (ret != NV_ENC_SUCCESS || !count) |
187 | return AVERROR(ENOSYS); |
188 | |
189 | guids = av_malloc(count * sizeof(GUID)); |
190 | if (!guids) |
191 | return AVERROR(ENOMEM); |
192 | |
193 | ret = p_nvenc->nvEncGetEncodeGUIDs(ctx->nvencoder, guids, count, &count); |
194 | if (ret != NV_ENC_SUCCESS) { |
195 | ret = AVERROR(ENOSYS); |
196 | goto fail; |
197 | } |
198 | |
199 | ret = AVERROR(ENOSYS); |
200 | for (i = 0; i < count; i++) { |
201 | if (!memcmp(&guids[i], &ctx->init_encode_params.encodeGUID, sizeof(*guids))) { |
202 | ret = 0; |
203 | break; |
204 | } |
205 | } |
206 | |
207 | fail: |
208 | av_free(guids); |
209 | |
210 | return ret; |
211 | } |
212 | |
213 | static int nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap) |
214 | { |
215 | NvencContext *ctx = avctx->priv_data; |
216 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs; |
217 | NV_ENC_CAPS_PARAM params = { 0 }; |
218 | int ret, val = 0; |
219 | |
220 | params.version = NV_ENC_CAPS_PARAM_VER; |
221 | params.capsToQuery = cap; |
222 | |
223 | ret = p_nvenc->nvEncGetEncodeCaps(ctx->nvencoder, ctx->init_encode_params.encodeGUID, ¶ms, &val); |
224 | |
225 | if (ret == NV_ENC_SUCCESS) |
226 | return val; |
227 | return 0; |
228 | } |
229 | |
230 | static int nvenc_check_capabilities(AVCodecContext *avctx) |
231 | { |
232 | NvencContext *ctx = avctx->priv_data; |
233 | int ret; |
234 | |
235 | ret = nvenc_check_codec_support(avctx); |
236 | if (ret < 0) { |
237 | av_log(avctx, AV_LOG_VERBOSE, "Codec not supported\n"); |
238 | return ret; |
239 | } |
240 | |
241 | ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE); |
242 | if (IS_YUV444(ctx->data_pix_fmt) && ret <= 0) { |
243 | av_log(avctx, AV_LOG_VERBOSE, "YUV444P not supported\n"); |
244 | return AVERROR(ENOSYS); |
245 | } |
246 | |
247 | ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE); |
248 | if (ctx->preset >= PRESET_LOSSLESS_DEFAULT && ret <= 0) { |
249 | av_log(avctx, AV_LOG_VERBOSE, "Lossless encoding not supported\n"); |
250 | return AVERROR(ENOSYS); |
251 | } |
252 | |
253 | ret = nvenc_check_cap(avctx, NV_ENC_CAPS_WIDTH_MAX); |
254 | if (ret < avctx->width) { |
255 | av_log(avctx, AV_LOG_VERBOSE, "Width %d exceeds %d\n", |
256 | avctx->width, ret); |
257 | return AVERROR(ENOSYS); |
258 | } |
259 | |
260 | ret = nvenc_check_cap(avctx, NV_ENC_CAPS_HEIGHT_MAX); |
261 | if (ret < avctx->height) { |
262 | av_log(avctx, AV_LOG_VERBOSE, "Height %d exceeds %d\n", |
263 | avctx->height, ret); |
264 | return AVERROR(ENOSYS); |
265 | } |
266 | |
267 | ret = nvenc_check_cap(avctx, NV_ENC_CAPS_NUM_MAX_BFRAMES); |
268 | if (ret < avctx->max_b_frames) { |
269 | av_log(avctx, AV_LOG_VERBOSE, "Max B-frames %d exceed %d\n", |
270 | avctx->max_b_frames, ret); |
271 | |
272 | return AVERROR(ENOSYS); |
273 | } |
274 | |
275 | ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_FIELD_ENCODING); |
276 | if (ret < 1 && avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { |
277 | av_log(avctx, AV_LOG_VERBOSE, |
278 | "Interlaced encoding is not supported. Supported level: %d\n", |
279 | ret); |
280 | return AVERROR(ENOSYS); |
281 | } |
282 | |
283 | ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE); |
284 | if (IS_10BIT(ctx->data_pix_fmt) && ret <= 0) { |
285 | av_log(avctx, AV_LOG_VERBOSE, "10 bit encode not supported\n"); |
286 | return AVERROR(ENOSYS); |
287 | } |
288 | |
289 | ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOOKAHEAD); |
290 | if (ctx->rc_lookahead > 0 && ret <= 0) { |
291 | av_log(avctx, AV_LOG_VERBOSE, "RC lookahead not supported\n"); |
292 | return AVERROR(ENOSYS); |
293 | } |
294 | |
295 | ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ); |
296 | if (ctx->temporal_aq > 0 && ret <= 0) { |
297 | av_log(avctx, AV_LOG_VERBOSE, "Temporal AQ not supported\n"); |
298 | return AVERROR(ENOSYS); |
299 | } |
300 | |
301 | return 0; |
302 | } |
303 | |
304 | static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) |
305 | { |
306 | NvencContext *ctx = avctx->priv_data; |
307 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
308 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; |
309 | char name[128] = { 0}; |
310 | int major, minor, ret; |
311 | CUresult cu_res; |
312 | CUdevice cu_device; |
313 | CUcontext dummy; |
314 | int loglevel = AV_LOG_VERBOSE; |
315 | |
316 | if (ctx->device == LIST_DEVICES) |
317 | loglevel = AV_LOG_INFO; |
318 | |
319 | cu_res = dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx); |
320 | if (cu_res != CUDA_SUCCESS) { |
321 | av_log(avctx, AV_LOG_ERROR, |
322 | "Cannot access the CUDA device %d\n", |
323 | idx); |
324 | return -1; |
325 | } |
326 | |
327 | cu_res = dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device); |
328 | if (cu_res != CUDA_SUCCESS) { |
329 | av_log(avctx, AV_LOG_ERROR, "cuDeviceGetName failed on device %d\n", idx); |
330 | return -1; |
331 | } |
332 | |
333 | cu_res = dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device); |
334 | if (cu_res != CUDA_SUCCESS) { |
335 | av_log(avctx, AV_LOG_ERROR, "cuDeviceComputeCapability failed on device %d\n", idx); |
336 | return -1; |
337 | } |
338 | |
339 | av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor); |
340 | if (((major << 4) | minor) < NVENC_CAP) { |
341 | av_log(avctx, loglevel, "does not support NVENC\n"); |
342 | goto fail; |
343 | } |
344 | |
345 | if (ctx->device != idx && ctx->device != ANY_DEVICE) |
346 | return -1; |
347 | |
348 | cu_res = dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device); |
349 | if (cu_res != CUDA_SUCCESS) { |
350 | av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res); |
351 | goto fail; |
352 | } |
353 | |
354 | ctx->cu_context = ctx->cu_context_internal; |
355 | |
356 | cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy); |
357 | if (cu_res != CUDA_SUCCESS) { |
358 | av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res); |
359 | goto fail2; |
360 | } |
361 | |
362 | if ((ret = nvenc_open_session(avctx)) < 0) |
363 | goto fail2; |
364 | |
365 | if ((ret = nvenc_check_capabilities(avctx)) < 0) |
366 | goto fail3; |
367 | |
368 | av_log(avctx, loglevel, "supports NVENC\n"); |
369 | |
370 | dl_fn->nvenc_device_count++; |
371 | |
372 | if (ctx->device == idx || ctx->device == ANY_DEVICE) |
373 | return 0; |
374 | |
375 | fail3: |
376 | p_nvenc->nvEncDestroyEncoder(ctx->nvencoder); |
377 | ctx->nvencoder = NULL; |
378 | |
379 | fail2: |
380 | dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); |
381 | ctx->cu_context_internal = NULL; |
382 | |
383 | fail: |
384 | return AVERROR(ENOSYS); |
385 | } |
386 | |
387 | static av_cold int nvenc_setup_device(AVCodecContext *avctx) |
388 | { |
389 | NvencContext *ctx = avctx->priv_data; |
390 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
391 | |
392 | switch (avctx->codec->id) { |
393 | case AV_CODEC_ID_H264: |
394 | ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_H264_GUID; |
395 | break; |
396 | case AV_CODEC_ID_HEVC: |
397 | ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_HEVC_GUID; |
398 | break; |
399 | default: |
400 | return AVERROR_BUG; |
401 | } |
402 | |
403 | if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { |
404 | AVHWFramesContext *frames_ctx; |
405 | AVCUDADeviceContext *device_hwctx; |
406 | int ret; |
407 | |
408 | if (!avctx->hw_frames_ctx) |
409 | return AVERROR(EINVAL); |
410 | |
411 | frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; |
412 | device_hwctx = frames_ctx->device_ctx->hwctx; |
413 | |
414 | ctx->cu_context = device_hwctx->cuda_ctx; |
415 | |
416 | ret = nvenc_open_session(avctx); |
417 | if (ret < 0) |
418 | return ret; |
419 | |
420 | ret = nvenc_check_capabilities(avctx); |
421 | if (ret < 0) { |
422 | av_log(avctx, AV_LOG_FATAL, "Provided device doesn't support required NVENC features\n"); |
423 | return ret; |
424 | } |
425 | } else { |
426 | int i, nb_devices = 0; |
427 | |
428 | if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) { |
429 | av_log(avctx, AV_LOG_ERROR, |
430 | "Cannot init CUDA\n"); |
431 | return AVERROR_UNKNOWN; |
432 | } |
433 | |
434 | if ((dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) != CUDA_SUCCESS) { |
435 | av_log(avctx, AV_LOG_ERROR, |
436 | "Cannot enumerate the CUDA devices\n"); |
437 | return AVERROR_UNKNOWN; |
438 | } |
439 | |
440 | if (!nb_devices) { |
441 | av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n"); |
442 | return AVERROR_EXTERNAL; |
443 | } |
444 | |
445 | av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", nb_devices); |
446 | |
447 | dl_fn->nvenc_device_count = 0; |
448 | for (i = 0; i < nb_devices; ++i) { |
449 | if ((nvenc_check_device(avctx, i)) >= 0 && ctx->device != LIST_DEVICES) |
450 | return 0; |
451 | } |
452 | |
453 | if (ctx->device == LIST_DEVICES) |
454 | return AVERROR_EXIT; |
455 | |
456 | if (!dl_fn->nvenc_device_count) { |
457 | av_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n"); |
458 | return AVERROR_EXTERNAL; |
459 | } |
460 | |
461 | av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->device, nb_devices); |
462 | return AVERROR(EINVAL); |
463 | } |
464 | |
465 | return 0; |
466 | } |
467 | |
468 | typedef struct GUIDTuple { |
469 | const GUID guid; |
470 | int flags; |
471 | } GUIDTuple; |
472 | |
473 | #define PRESET_ALIAS(alias, name, ...) \ |
474 | [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ } |
475 | |
476 | #define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__) |
477 | |
478 | static void nvenc_map_preset(NvencContext *ctx) |
479 | { |
480 | GUIDTuple presets[] = { |
481 | PRESET(DEFAULT), |
482 | PRESET(HP), |
483 | PRESET(HQ), |
484 | PRESET(BD), |
485 | PRESET_ALIAS(SLOW, HQ, NVENC_TWO_PASSES), |
486 | PRESET_ALIAS(MEDIUM, HQ, NVENC_ONE_PASS), |
487 | PRESET_ALIAS(FAST, HP, NVENC_ONE_PASS), |
488 | PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY), |
489 | PRESET(LOW_LATENCY_HP, NVENC_LOWLATENCY), |
490 | PRESET(LOW_LATENCY_HQ, NVENC_LOWLATENCY), |
491 | PRESET(LOSSLESS_DEFAULT, NVENC_LOSSLESS), |
492 | PRESET(LOSSLESS_HP, NVENC_LOSSLESS), |
493 | }; |
494 | |
495 | GUIDTuple *t = &presets[ctx->preset]; |
496 | |
497 | ctx->init_encode_params.presetGUID = t->guid; |
498 | ctx->flags = t->flags; |
499 | } |
500 | |
501 | #undef PRESET |
502 | #undef PRESET_ALIAS |
503 | |
504 | static av_cold void set_constqp(AVCodecContext *avctx) |
505 | { |
506 | NvencContext *ctx = avctx->priv_data; |
507 | NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams; |
508 | |
509 | rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP; |
510 | |
511 | if (ctx->init_qp_p >= 0) { |
512 | rc->constQP.qpInterP = ctx->init_qp_p; |
513 | if (ctx->init_qp_i >= 0 && ctx->init_qp_b >= 0) { |
514 | rc->constQP.qpIntra = ctx->init_qp_i; |
515 | rc->constQP.qpInterB = ctx->init_qp_b; |
516 | } else if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) { |
517 | rc->constQP.qpIntra = av_clip( |
518 | rc->constQP.qpInterP * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51); |
519 | rc->constQP.qpInterB = av_clip( |
520 | rc->constQP.qpInterP * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51); |
521 | } else { |
522 | rc->constQP.qpIntra = rc->constQP.qpInterP; |
523 | rc->constQP.qpInterB = rc->constQP.qpInterP; |
524 | } |
525 | } else if (ctx->cqp >= 0) { |
526 | rc->constQP.qpInterP = rc->constQP.qpInterB = rc->constQP.qpIntra = ctx->cqp; |
527 | if (avctx->b_quant_factor != 0.0) |
528 | rc->constQP.qpInterB = av_clip(ctx->cqp * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51); |
529 | if (avctx->i_quant_factor != 0.0) |
530 | rc->constQP.qpIntra = av_clip(ctx->cqp * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51); |
531 | } |
532 | |
533 | avctx->qmin = -1; |
534 | avctx->qmax = -1; |
535 | } |
536 | |
537 | static av_cold void set_vbr(AVCodecContext *avctx) |
538 | { |
539 | NvencContext *ctx = avctx->priv_data; |
540 | NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams; |
541 | int qp_inter_p; |
542 | |
543 | if (avctx->qmin >= 0 && avctx->qmax >= 0) { |
544 | rc->enableMinQP = 1; |
545 | rc->enableMaxQP = 1; |
546 | |
547 | rc->minQP.qpInterB = avctx->qmin; |
548 | rc->minQP.qpInterP = avctx->qmin; |
549 | rc->minQP.qpIntra = avctx->qmin; |
550 | |
551 | rc->maxQP.qpInterB = avctx->qmax; |
552 | rc->maxQP.qpInterP = avctx->qmax; |
553 | rc->maxQP.qpIntra = avctx->qmax; |
554 | |
555 | qp_inter_p = (avctx->qmax + 3 * avctx->qmin) / 4; // biased towards Qmin |
556 | } else if (avctx->qmin >= 0) { |
557 | rc->enableMinQP = 1; |
558 | |
559 | rc->minQP.qpInterB = avctx->qmin; |
560 | rc->minQP.qpInterP = avctx->qmin; |
561 | rc->minQP.qpIntra = avctx->qmin; |
562 | |
563 | qp_inter_p = avctx->qmin; |
564 | } else { |
565 | qp_inter_p = 26; // default to 26 |
566 | } |
567 | |
568 | rc->enableInitialRCQP = 1; |
569 | |
570 | if (ctx->init_qp_p < 0) { |
571 | rc->initialRCQP.qpInterP = qp_inter_p; |
572 | } else { |
573 | rc->initialRCQP.qpInterP = ctx->init_qp_p; |
574 | } |
575 | |
576 | if (ctx->init_qp_i < 0) { |
577 | if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) { |
578 | rc->initialRCQP.qpIntra = av_clip( |
579 | rc->initialRCQP.qpInterP * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51); |
580 | } else { |
581 | rc->initialRCQP.qpIntra = rc->initialRCQP.qpInterP; |
582 | } |
583 | } else { |
584 | rc->initialRCQP.qpIntra = ctx->init_qp_i; |
585 | } |
586 | |
587 | if (ctx->init_qp_b < 0) { |
588 | if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) { |
589 | rc->initialRCQP.qpInterB = av_clip( |
590 | rc->initialRCQP.qpInterP * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51); |
591 | } else { |
592 | rc->initialRCQP.qpInterB = rc->initialRCQP.qpInterP; |
593 | } |
594 | } else { |
595 | rc->initialRCQP.qpInterB = ctx->init_qp_b; |
596 | } |
597 | } |
598 | |
599 | static av_cold void set_lossless(AVCodecContext *avctx) |
600 | { |
601 | NvencContext *ctx = avctx->priv_data; |
602 | NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams; |
603 | |
604 | rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP; |
605 | rc->constQP.qpInterB = 0; |
606 | rc->constQP.qpInterP = 0; |
607 | rc->constQP.qpIntra = 0; |
608 | |
609 | avctx->qmin = -1; |
610 | avctx->qmax = -1; |
611 | } |
612 | |
613 | static void nvenc_override_rate_control(AVCodecContext *avctx) |
614 | { |
615 | NvencContext *ctx = avctx->priv_data; |
616 | NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams; |
617 | |
618 | switch (ctx->rc) { |
619 | case NV_ENC_PARAMS_RC_CONSTQP: |
620 | set_constqp(avctx); |
621 | return; |
622 | case NV_ENC_PARAMS_RC_VBR_MINQP: |
623 | if (avctx->qmin < 0) { |
624 | av_log(avctx, AV_LOG_WARNING, |
625 | "The variable bitrate rate-control requires " |
626 | "the 'qmin' option set.\n"); |
627 | set_vbr(avctx); |
628 | return; |
629 | } |
630 | /* fall through */ |
631 | case NV_ENC_PARAMS_RC_2_PASS_VBR: |
632 | case NV_ENC_PARAMS_RC_VBR: |
633 | set_vbr(avctx); |
634 | break; |
635 | case NV_ENC_PARAMS_RC_CBR: |
636 | case NV_ENC_PARAMS_RC_2_PASS_QUALITY: |
637 | case NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP: |
638 | break; |
639 | } |
640 | |
641 | rc->rateControlMode = ctx->rc; |
642 | } |
643 | |
644 | static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx) |
645 | { |
646 | NvencContext *ctx = avctx->priv_data; |
647 | int nb_surfaces = 0; |
648 | |
649 | if (ctx->rc_lookahead > 0) { |
650 | nb_surfaces = ctx->rc_lookahead + ((ctx->encode_config.frameIntervalP > 0) ? ctx->encode_config.frameIntervalP : 0) + 1 + 4; |
651 | if (ctx->nb_surfaces < nb_surfaces) { |
652 | av_log(avctx, AV_LOG_WARNING, |
653 | "Defined rc_lookahead requires more surfaces, " |
654 | "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces); |
655 | ctx->nb_surfaces = nb_surfaces; |
656 | } |
657 | } |
658 | |
659 | ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces)); |
660 | ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1); |
661 | |
662 | return 0; |
663 | } |
664 | |
665 | static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx) |
666 | { |
667 | NvencContext *ctx = avctx->priv_data; |
668 | |
669 | if (avctx->global_quality > 0) |
670 | av_log(avctx, AV_LOG_WARNING, "Using global_quality with nvenc is deprecated. Use qp instead.\n"); |
671 | |
672 | if (ctx->cqp < 0 && avctx->global_quality > 0) |
673 | ctx->cqp = avctx->global_quality; |
674 | |
675 | if (avctx->bit_rate > 0) { |
676 | ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate; |
677 | } else if (ctx->encode_config.rcParams.averageBitRate > 0) { |
678 | ctx->encode_config.rcParams.maxBitRate = ctx->encode_config.rcParams.averageBitRate; |
679 | } |
680 | |
681 | if (avctx->rc_max_rate > 0) |
682 | ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate; |
683 | |
684 | if (ctx->rc < 0) { |
685 | if (ctx->flags & NVENC_ONE_PASS) |
686 | ctx->twopass = 0; |
687 | if (ctx->flags & NVENC_TWO_PASSES) |
688 | ctx->twopass = 1; |
689 | |
690 | if (ctx->twopass < 0) |
691 | ctx->twopass = (ctx->flags & NVENC_LOWLATENCY) != 0; |
692 | |
693 | if (ctx->cbr) { |
694 | if (ctx->twopass) { |
695 | ctx->rc = NV_ENC_PARAMS_RC_2_PASS_QUALITY; |
696 | } else { |
697 | ctx->rc = NV_ENC_PARAMS_RC_CBR; |
698 | } |
699 | } else if (ctx->cqp >= 0) { |
700 | ctx->rc = NV_ENC_PARAMS_RC_CONSTQP; |
701 | } else if (ctx->twopass) { |
702 | ctx->rc = NV_ENC_PARAMS_RC_2_PASS_VBR; |
703 | } else if (avctx->qmin >= 0 && avctx->qmax >= 0) { |
704 | ctx->rc = NV_ENC_PARAMS_RC_VBR_MINQP; |
705 | } |
706 | } |
707 | |
708 | if (ctx->flags & NVENC_LOSSLESS) { |
709 | set_lossless(avctx); |
710 | } else if (ctx->rc >= 0) { |
711 | nvenc_override_rate_control(avctx); |
712 | } else { |
713 | ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR; |
714 | set_vbr(avctx); |
715 | } |
716 | |
717 | if (avctx->rc_buffer_size > 0) { |
718 | ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size; |
719 | } else if (ctx->encode_config.rcParams.averageBitRate > 0) { |
720 | ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate; |
721 | } |
722 | |
723 | if (ctx->aq) { |
724 | ctx->encode_config.rcParams.enableAQ = 1; |
725 | ctx->encode_config.rcParams.aqStrength = ctx->aq_strength; |
726 | av_log(avctx, AV_LOG_VERBOSE, "AQ enabled.\n"); |
727 | } |
728 | |
729 | if (ctx->temporal_aq) { |
730 | ctx->encode_config.rcParams.enableTemporalAQ = 1; |
731 | av_log(avctx, AV_LOG_VERBOSE, "Temporal AQ enabled.\n"); |
732 | } |
733 | |
734 | if (ctx->rc_lookahead > 0) { |
735 | int lkd_bound = FFMIN(ctx->nb_surfaces, ctx->async_depth) - |
736 | ctx->encode_config.frameIntervalP - 4; |
737 | |
738 | if (lkd_bound < 0) { |
739 | av_log(avctx, AV_LOG_WARNING, |
740 | "Lookahead not enabled. Increase buffer delay (-delay).\n"); |
741 | } else { |
742 | ctx->encode_config.rcParams.enableLookahead = 1; |
743 | ctx->encode_config.rcParams.lookaheadDepth = av_clip(ctx->rc_lookahead, 0, lkd_bound); |
744 | ctx->encode_config.rcParams.disableIadapt = ctx->no_scenecut; |
745 | ctx->encode_config.rcParams.disableBadapt = !ctx->b_adapt; |
746 | av_log(avctx, AV_LOG_VERBOSE, |
747 | "Lookahead enabled: depth %d, scenecut %s, B-adapt %s.\n", |
748 | ctx->encode_config.rcParams.lookaheadDepth, |
749 | ctx->encode_config.rcParams.disableIadapt ? "disabled" : "enabled", |
750 | ctx->encode_config.rcParams.disableBadapt ? "disabled" : "enabled"); |
751 | } |
752 | } |
753 | |
754 | if (ctx->strict_gop) { |
755 | ctx->encode_config.rcParams.strictGOPTarget = 1; |
756 | av_log(avctx, AV_LOG_VERBOSE, "Strict GOP target enabled.\n"); |
757 | } |
758 | |
759 | if (ctx->nonref_p) |
760 | ctx->encode_config.rcParams.enableNonRefP = 1; |
761 | |
762 | if (ctx->zerolatency) |
763 | ctx->encode_config.rcParams.zeroReorderDelay = 1; |
764 | |
765 | if (ctx->quality) |
766 | ctx->encode_config.rcParams.targetQuality = ctx->quality; |
767 | } |
768 | |
769 | static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx) |
770 | { |
771 | NvencContext *ctx = avctx->priv_data; |
772 | NV_ENC_CONFIG *cc = &ctx->encode_config; |
773 | NV_ENC_CONFIG_H264 *h264 = &cc->encodeCodecConfig.h264Config; |
774 | NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui = &h264->h264VUIParameters; |
775 | |
776 | vui->colourMatrix = avctx->colorspace; |
777 | vui->colourPrimaries = avctx->color_primaries; |
778 | vui->transferCharacteristics = avctx->color_trc; |
779 | vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG |
780 | || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P); |
781 | |
782 | vui->colourDescriptionPresentFlag = |
783 | (avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2); |
784 | |
785 | vui->videoSignalTypePresentFlag = |
786 | (vui->colourDescriptionPresentFlag |
787 | || vui->videoFormat != 5 |
788 | || vui->videoFullRangeFlag != 0); |
789 | |
790 | h264->sliceMode = 3; |
791 | h264->sliceModeData = 1; |
792 | |
793 | h264->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0; |
794 | h264->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1; |
795 | h264->outputAUD = ctx->aud; |
796 | |
797 | if (avctx->refs >= 0) { |
798 | /* 0 means "let the hardware decide" */ |
799 | h264->maxNumRefFrames = avctx->refs; |
800 | } |
801 | if (avctx->gop_size >= 0) { |
802 | h264->idrPeriod = cc->gopLength; |
803 | } |
804 | |
805 | if (IS_CBR(cc->rcParams.rateControlMode)) { |
806 | h264->outputBufferingPeriodSEI = 1; |
807 | h264->outputPictureTimingSEI = 1; |
808 | } |
809 | |
810 | if (cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_QUALITY || |
811 | cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP || |
812 | cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_VBR) { |
813 | h264->adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE; |
814 | h264->fmoMode = NV_ENC_H264_FMO_DISABLE; |
815 | } |
816 | |
817 | if (ctx->flags & NVENC_LOSSLESS) { |
818 | h264->qpPrimeYZeroTransformBypassFlag = 1; |
819 | } else { |
820 | switch(ctx->profile) { |
821 | case NV_ENC_H264_PROFILE_BASELINE: |
822 | cc->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID; |
823 | avctx->profile = FF_PROFILE_H264_BASELINE; |
824 | break; |
825 | case NV_ENC_H264_PROFILE_MAIN: |
826 | cc->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID; |
827 | avctx->profile = FF_PROFILE_H264_MAIN; |
828 | break; |
829 | case NV_ENC_H264_PROFILE_HIGH: |
830 | cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; |
831 | avctx->profile = FF_PROFILE_H264_HIGH; |
832 | break; |
833 | case NV_ENC_H264_PROFILE_HIGH_444P: |
834 | cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID; |
835 | avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE; |
836 | break; |
837 | } |
838 | } |
839 | |
840 | // force setting profile as high444p if input is AV_PIX_FMT_YUV444P |
841 | if (ctx->data_pix_fmt == AV_PIX_FMT_YUV444P) { |
842 | cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID; |
843 | avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE; |
844 | } |
845 | |
846 | h264->chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1; |
847 | |
848 | h264->level = ctx->level; |
849 | |
850 | return 0; |
851 | } |
852 | |
853 | static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx) |
854 | { |
855 | NvencContext *ctx = avctx->priv_data; |
856 | NV_ENC_CONFIG *cc = &ctx->encode_config; |
857 | NV_ENC_CONFIG_HEVC *hevc = &cc->encodeCodecConfig.hevcConfig; |
858 | NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui = &hevc->hevcVUIParameters; |
859 | |
860 | vui->colourMatrix = avctx->colorspace; |
861 | vui->colourPrimaries = avctx->color_primaries; |
862 | vui->transferCharacteristics = avctx->color_trc; |
863 | vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG |
864 | || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P); |
865 | |
866 | vui->colourDescriptionPresentFlag = |
867 | (avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2); |
868 | |
869 | vui->videoSignalTypePresentFlag = |
870 | (vui->colourDescriptionPresentFlag |
871 | || vui->videoFormat != 5 |
872 | || vui->videoFullRangeFlag != 0); |
873 | |
874 | hevc->sliceMode = 3; |
875 | hevc->sliceModeData = 1; |
876 | |
877 | hevc->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0; |
878 | hevc->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1; |
879 | hevc->outputAUD = ctx->aud; |
880 | |
881 | if (avctx->refs >= 0) { |
882 | /* 0 means "let the hardware decide" */ |
883 | hevc->maxNumRefFramesInDPB = avctx->refs; |
884 | } |
885 | if (avctx->gop_size >= 0) { |
886 | hevc->idrPeriod = cc->gopLength; |
887 | } |
888 | |
889 | if (IS_CBR(cc->rcParams.rateControlMode)) { |
890 | hevc->outputBufferingPeriodSEI = 1; |
891 | hevc->outputPictureTimingSEI = 1; |
892 | } |
893 | |
894 | switch (ctx->profile) { |
895 | case NV_ENC_HEVC_PROFILE_MAIN: |
896 | cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID; |
897 | avctx->profile = FF_PROFILE_HEVC_MAIN; |
898 | break; |
899 | case NV_ENC_HEVC_PROFILE_MAIN_10: |
900 | cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; |
901 | avctx->profile = FF_PROFILE_HEVC_MAIN_10; |
902 | break; |
903 | case NV_ENC_HEVC_PROFILE_REXT: |
904 | cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID; |
905 | avctx->profile = FF_PROFILE_HEVC_REXT; |
906 | break; |
907 | } |
908 | |
909 | // force setting profile as main10 if input is 10 bit |
910 | if (IS_10BIT(ctx->data_pix_fmt)) { |
911 | cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; |
912 | avctx->profile = FF_PROFILE_HEVC_MAIN_10; |
913 | } |
914 | |
915 | // force setting profile as rext if input is yuv444 |
916 | if (IS_YUV444(ctx->data_pix_fmt)) { |
917 | cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID; |
918 | avctx->profile = FF_PROFILE_HEVC_REXT; |
919 | } |
920 | |
921 | hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1; |
922 | |
923 | hevc->pixelBitDepthMinus8 = IS_10BIT(ctx->data_pix_fmt) ? 2 : 0; |
924 | |
925 | hevc->level = ctx->level; |
926 | |
927 | hevc->tier = ctx->tier; |
928 | |
929 | return 0; |
930 | } |
931 | |
932 | static av_cold int nvenc_setup_codec_config(AVCodecContext *avctx) |
933 | { |
934 | switch (avctx->codec->id) { |
935 | case AV_CODEC_ID_H264: |
936 | return nvenc_setup_h264_config(avctx); |
937 | case AV_CODEC_ID_HEVC: |
938 | return nvenc_setup_hevc_config(avctx); |
939 | /* Earlier switch/case will return if unknown codec is passed. */ |
940 | } |
941 | |
942 | return 0; |
943 | } |
944 | |
945 | static av_cold int nvenc_setup_encoder(AVCodecContext *avctx) |
946 | { |
947 | NvencContext *ctx = avctx->priv_data; |
948 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
949 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; |
950 | |
951 | NV_ENC_PRESET_CONFIG preset_config = { 0 }; |
952 | NVENCSTATUS nv_status = NV_ENC_SUCCESS; |
953 | AVCPBProperties *cpb_props; |
954 | int res = 0; |
955 | int dw, dh; |
956 | |
957 | ctx->encode_config.version = NV_ENC_CONFIG_VER; |
958 | ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER; |
959 | |
960 | ctx->init_encode_params.encodeHeight = avctx->height; |
961 | ctx->init_encode_params.encodeWidth = avctx->width; |
962 | |
963 | ctx->init_encode_params.encodeConfig = &ctx->encode_config; |
964 | |
965 | nvenc_map_preset(ctx); |
966 | |
967 | preset_config.version = NV_ENC_PRESET_CONFIG_VER; |
968 | preset_config.presetCfg.version = NV_ENC_CONFIG_VER; |
969 | |
970 | nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder, |
971 | ctx->init_encode_params.encodeGUID, |
972 | ctx->init_encode_params.presetGUID, |
973 | &preset_config); |
974 | if (nv_status != NV_ENC_SUCCESS) |
975 | return nvenc_print_error(avctx, nv_status, "Cannot get the preset configuration"); |
976 | |
977 | memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config)); |
978 | |
979 | ctx->encode_config.version = NV_ENC_CONFIG_VER; |
980 | |
981 | dw = avctx->width; |
982 | dh = avctx->height; |
983 | if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 0) { |
984 | dw*= avctx->sample_aspect_ratio.num; |
985 | dh*= avctx->sample_aspect_ratio.den; |
986 | } |
987 | av_reduce(&dw, &dh, dw, dh, 1024 * 1024); |
988 | ctx->init_encode_params.darHeight = dh; |
989 | ctx->init_encode_params.darWidth = dw; |
990 | |
991 | ctx->init_encode_params.frameRateNum = avctx->time_base.den; |
992 | ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame; |
993 | |
994 | ctx->init_encode_params.enableEncodeAsync = 0; |
995 | ctx->init_encode_params.enablePTD = 1; |
996 | |
997 | if (ctx->bluray_compat) { |
998 | ctx->aud = 1; |
999 | avctx->refs = FFMIN(FFMAX(avctx->refs, 0), 6); |
1000 | avctx->max_b_frames = FFMIN(avctx->max_b_frames, 3); |
1001 | switch (avctx->codec->id) { |
1002 | case AV_CODEC_ID_H264: |
1003 | /* maximum level depends on used resolution */ |
1004 | break; |
1005 | case AV_CODEC_ID_HEVC: |
1006 | ctx->level = NV_ENC_LEVEL_HEVC_51; |
1007 | ctx->tier = NV_ENC_TIER_HEVC_HIGH; |
1008 | break; |
1009 | } |
1010 | } |
1011 | |
1012 | if (avctx->gop_size > 0) { |
1013 | if (avctx->max_b_frames >= 0) { |
1014 | /* 0 is intra-only, 1 is I/P only, 2 is one B-Frame, 3 two B-frames, and so on. */ |
1015 | ctx->encode_config.frameIntervalP = avctx->max_b_frames + 1; |
1016 | } |
1017 | |
1018 | ctx->encode_config.gopLength = avctx->gop_size; |
1019 | } else if (avctx->gop_size == 0) { |
1020 | ctx->encode_config.frameIntervalP = 0; |
1021 | ctx->encode_config.gopLength = 1; |
1022 | } |
1023 | |
1024 | ctx->initial_pts[0] = AV_NOPTS_VALUE; |
1025 | ctx->initial_pts[1] = AV_NOPTS_VALUE; |
1026 | |
1027 | nvenc_recalc_surfaces(avctx); |
1028 | |
1029 | nvenc_setup_rate_control(avctx); |
1030 | |
1031 | if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { |
1032 | ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD; |
1033 | } else { |
1034 | ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME; |
1035 | } |
1036 | |
1037 | res = nvenc_setup_codec_config(avctx); |
1038 | if (res) |
1039 | return res; |
1040 | |
1041 | nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params); |
1042 | if (nv_status != NV_ENC_SUCCESS) { |
1043 | return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed"); |
1044 | } |
1045 | |
1046 | if (ctx->encode_config.frameIntervalP > 1) |
1047 | avctx->has_b_frames = 2; |
1048 | |
1049 | if (ctx->encode_config.rcParams.averageBitRate > 0) |
1050 | avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate; |
1051 | |
1052 | cpb_props = ff_add_cpb_side_data(avctx); |
1053 | if (!cpb_props) |
1054 | return AVERROR(ENOMEM); |
1055 | cpb_props->max_bitrate = ctx->encode_config.rcParams.maxBitRate; |
1056 | cpb_props->avg_bitrate = avctx->bit_rate; |
1057 | cpb_props->buffer_size = ctx->encode_config.rcParams.vbvBufferSize; |
1058 | |
1059 | return 0; |
1060 | } |
1061 | |
1062 | static NV_ENC_BUFFER_FORMAT nvenc_map_buffer_format(enum AVPixelFormat pix_fmt) |
1063 | { |
1064 | switch (pix_fmt) { |
1065 | case AV_PIX_FMT_YUV420P: |
1066 | return NV_ENC_BUFFER_FORMAT_YV12_PL; |
1067 | case AV_PIX_FMT_NV12: |
1068 | return NV_ENC_BUFFER_FORMAT_NV12_PL; |
1069 | case AV_PIX_FMT_P010: |
1070 | return NV_ENC_BUFFER_FORMAT_YUV420_10BIT; |
1071 | case AV_PIX_FMT_YUV444P: |
1072 | return NV_ENC_BUFFER_FORMAT_YUV444_PL; |
1073 | case AV_PIX_FMT_YUV444P16: |
1074 | return NV_ENC_BUFFER_FORMAT_YUV444_10BIT; |
1075 | case AV_PIX_FMT_0RGB32: |
1076 | return NV_ENC_BUFFER_FORMAT_ARGB; |
1077 | case AV_PIX_FMT_0BGR32: |
1078 | return NV_ENC_BUFFER_FORMAT_ABGR; |
1079 | default: |
1080 | return NV_ENC_BUFFER_FORMAT_UNDEFINED; |
1081 | } |
1082 | } |
1083 | |
1084 | static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx) |
1085 | { |
1086 | NvencContext *ctx = avctx->priv_data; |
1087 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
1088 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; |
1089 | |
1090 | NVENCSTATUS nv_status; |
1091 | NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 }; |
1092 | allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER; |
1093 | |
1094 | if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { |
1095 | ctx->surfaces[idx].in_ref = av_frame_alloc(); |
1096 | if (!ctx->surfaces[idx].in_ref) |
1097 | return AVERROR(ENOMEM); |
1098 | } else { |
1099 | NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 }; |
1100 | |
1101 | ctx->surfaces[idx].format = nvenc_map_buffer_format(ctx->data_pix_fmt); |
1102 | if (ctx->surfaces[idx].format == NV_ENC_BUFFER_FORMAT_UNDEFINED) { |
1103 | av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n", |
1104 | av_get_pix_fmt_name(ctx->data_pix_fmt)); |
1105 | return AVERROR(EINVAL); |
1106 | } |
1107 | |
1108 | allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER; |
1109 | allocSurf.width = (avctx->width + 31) & ~31; |
1110 | allocSurf.height = (avctx->height + 31) & ~31; |
1111 | allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED; |
1112 | allocSurf.bufferFmt = ctx->surfaces[idx].format; |
1113 | |
1114 | nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf); |
1115 | if (nv_status != NV_ENC_SUCCESS) { |
1116 | return nvenc_print_error(avctx, nv_status, "CreateInputBuffer failed"); |
1117 | } |
1118 | |
1119 | ctx->surfaces[idx].input_surface = allocSurf.inputBuffer; |
1120 | ctx->surfaces[idx].width = allocSurf.width; |
1121 | ctx->surfaces[idx].height = allocSurf.height; |
1122 | } |
1123 | |
1124 | ctx->surfaces[idx].lockCount = 0; |
1125 | |
1126 | /* 1MB is large enough to hold most output frames. |
1127 | * NVENC increases this automaticaly if it is not enough. */ |
1128 | allocOut.size = 1024 * 1024; |
1129 | |
1130 | allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED; |
1131 | |
1132 | nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut); |
1133 | if (nv_status != NV_ENC_SUCCESS) { |
1134 | int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed"); |
1135 | if (avctx->pix_fmt != AV_PIX_FMT_CUDA) |
1136 | p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[idx].input_surface); |
1137 | av_frame_free(&ctx->surfaces[idx].in_ref); |
1138 | return err; |
1139 | } |
1140 | |
1141 | ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer; |
1142 | ctx->surfaces[idx].size = allocOut.size; |
1143 | |
1144 | return 0; |
1145 | } |
1146 | |
1147 | static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx) |
1148 | { |
1149 | NvencContext *ctx = avctx->priv_data; |
1150 | int i, res; |
1151 | |
1152 | ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces)); |
1153 | if (!ctx->surfaces) |
1154 | return AVERROR(ENOMEM); |
1155 | |
1156 | ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t)); |
1157 | if (!ctx->timestamp_list) |
1158 | return AVERROR(ENOMEM); |
1159 | ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*)); |
1160 | if (!ctx->output_surface_queue) |
1161 | return AVERROR(ENOMEM); |
1162 | ctx->output_surface_ready_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*)); |
1163 | if (!ctx->output_surface_ready_queue) |
1164 | return AVERROR(ENOMEM); |
1165 | |
1166 | for (i = 0; i < ctx->nb_surfaces; i++) { |
1167 | if ((res = nvenc_alloc_surface(avctx, i)) < 0) |
1168 | return res; |
1169 | } |
1170 | |
1171 | return 0; |
1172 | } |
1173 | |
1174 | static av_cold int nvenc_setup_extradata(AVCodecContext *avctx) |
1175 | { |
1176 | NvencContext *ctx = avctx->priv_data; |
1177 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
1178 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; |
1179 | |
1180 | NVENCSTATUS nv_status; |
1181 | uint32_t outSize = 0; |
1182 | char tmpHeader[256]; |
1183 | NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 }; |
1184 | payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER; |
1185 | |
1186 | payload.spsppsBuffer = tmpHeader; |
1187 | payload.inBufferSize = sizeof(tmpHeader); |
1188 | payload.outSPSPPSPayloadSize = &outSize; |
1189 | |
1190 | nv_status = p_nvenc->nvEncGetSequenceParams(ctx->nvencoder, &payload); |
1191 | if (nv_status != NV_ENC_SUCCESS) { |
1192 | return nvenc_print_error(avctx, nv_status, "GetSequenceParams failed"); |
1193 | } |
1194 | |
1195 | avctx->extradata_size = outSize; |
1196 | avctx->extradata = av_mallocz(outSize + AV_INPUT_BUFFER_PADDING_SIZE); |
1197 | |
1198 | if (!avctx->extradata) { |
1199 | return AVERROR(ENOMEM); |
1200 | } |
1201 | |
1202 | memcpy(avctx->extradata, tmpHeader, outSize); |
1203 | |
1204 | return 0; |
1205 | } |
1206 | |
1207 | av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) |
1208 | { |
1209 | NvencContext *ctx = avctx->priv_data; |
1210 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
1211 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; |
1212 | int i; |
1213 | |
1214 | /* the encoder has to be flushed before it can be closed */ |
1215 | if (ctx->nvencoder) { |
1216 | NV_ENC_PIC_PARAMS params = { .version = NV_ENC_PIC_PARAMS_VER, |
1217 | .encodePicFlags = NV_ENC_PIC_FLAG_EOS }; |
1218 | |
1219 | p_nvenc->nvEncEncodePicture(ctx->nvencoder, ¶ms); |
1220 | } |
1221 | |
1222 | av_fifo_freep(&ctx->timestamp_list); |
1223 | av_fifo_freep(&ctx->output_surface_ready_queue); |
1224 | av_fifo_freep(&ctx->output_surface_queue); |
1225 | |
1226 | if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) { |
1227 | for (i = 0; i < ctx->nb_surfaces; ++i) { |
1228 | if (ctx->surfaces[i].input_surface) { |
1229 | p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->surfaces[i].in_map.mappedResource); |
1230 | } |
1231 | } |
1232 | for (i = 0; i < ctx->nb_registered_frames; i++) { |
1233 | if (ctx->registered_frames[i].regptr) |
1234 | p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr); |
1235 | } |
1236 | ctx->nb_registered_frames = 0; |
1237 | } |
1238 | |
1239 | if (ctx->surfaces) { |
1240 | for (i = 0; i < ctx->nb_surfaces; ++i) { |
1241 | if (avctx->pix_fmt != AV_PIX_FMT_CUDA) |
1242 | p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface); |
1243 | av_frame_free(&ctx->surfaces[i].in_ref); |
1244 | p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface); |
1245 | } |
1246 | } |
1247 | av_freep(&ctx->surfaces); |
1248 | ctx->nb_surfaces = 0; |
1249 | |
1250 | if (ctx->nvencoder) |
1251 | p_nvenc->nvEncDestroyEncoder(ctx->nvencoder); |
1252 | ctx->nvencoder = NULL; |
1253 | |
1254 | if (ctx->cu_context_internal) |
1255 | dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); |
1256 | ctx->cu_context = ctx->cu_context_internal = NULL; |
1257 | |
1258 | nvenc_free_functions(&dl_fn->nvenc_dl); |
1259 | cuda_free_functions(&dl_fn->cuda_dl); |
1260 | |
1261 | dl_fn->nvenc_device_count = 0; |
1262 | |
1263 | av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n"); |
1264 | |
1265 | return 0; |
1266 | } |
1267 | |
1268 | av_cold int ff_nvenc_encode_init(AVCodecContext *avctx) |
1269 | { |
1270 | NvencContext *ctx = avctx->priv_data; |
1271 | int ret; |
1272 | |
1273 | if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { |
1274 | AVHWFramesContext *frames_ctx; |
1275 | if (!avctx->hw_frames_ctx) { |
1276 | av_log(avctx, AV_LOG_ERROR, |
1277 | "hw_frames_ctx must be set when using GPU frames as input\n"); |
1278 | return AVERROR(EINVAL); |
1279 | } |
1280 | frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; |
1281 | ctx->data_pix_fmt = frames_ctx->sw_format; |
1282 | } else { |
1283 | ctx->data_pix_fmt = avctx->pix_fmt; |
1284 | } |
1285 | |
1286 | if ((ret = nvenc_load_libraries(avctx)) < 0) |
1287 | return ret; |
1288 | |
1289 | if ((ret = nvenc_setup_device(avctx)) < 0) |
1290 | return ret; |
1291 | |
1292 | if ((ret = nvenc_setup_encoder(avctx)) < 0) |
1293 | return ret; |
1294 | |
1295 | if ((ret = nvenc_setup_surfaces(avctx)) < 0) |
1296 | return ret; |
1297 | |
1298 | if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) { |
1299 | if ((ret = nvenc_setup_extradata(avctx)) < 0) |
1300 | return ret; |
1301 | } |
1302 | |
1303 | return 0; |
1304 | } |
1305 | |
1306 | static NvencSurface *get_free_frame(NvencContext *ctx) |
1307 | { |
1308 | int i; |
1309 | |
1310 | for (i = 0; i < ctx->nb_surfaces; i++) { |
1311 | if (!ctx->surfaces[i].lockCount) { |
1312 | ctx->surfaces[i].lockCount = 1; |
1313 | return &ctx->surfaces[i]; |
1314 | } |
1315 | } |
1316 | |
1317 | return NULL; |
1318 | } |
1319 | |
1320 | static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface, |
1321 | NV_ENC_LOCK_INPUT_BUFFER *lock_buffer_params, const AVFrame *frame) |
1322 | { |
1323 | int dst_linesize[4] = { |
1324 | lock_buffer_params->pitch, |
1325 | lock_buffer_params->pitch, |
1326 | lock_buffer_params->pitch, |
1327 | lock_buffer_params->pitch |
1328 | }; |
1329 | uint8_t *dst_data[4]; |
1330 | int ret; |
1331 | |
1332 | if (frame->format == AV_PIX_FMT_YUV420P) |
1333 | dst_linesize[1] = dst_linesize[2] >>= 1; |
1334 | |
1335 | ret = av_image_fill_pointers(dst_data, frame->format, nv_surface->height, |
1336 | lock_buffer_params->bufferDataPtr, dst_linesize); |
1337 | if (ret < 0) |
1338 | return ret; |
1339 | |
1340 | if (frame->format == AV_PIX_FMT_YUV420P) |
1341 | FFSWAP(uint8_t*, dst_data[1], dst_data[2]); |
1342 | |
1343 | av_image_copy(dst_data, dst_linesize, |
1344 | (const uint8_t**)frame->data, frame->linesize, frame->format, |
1345 | avctx->width, avctx->height); |
1346 | |
1347 | return 0; |
1348 | } |
1349 | |
1350 | static int nvenc_find_free_reg_resource(AVCodecContext *avctx) |
1351 | { |
1352 | NvencContext *ctx = avctx->priv_data; |
1353 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
1354 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; |
1355 | |
1356 | int i; |
1357 | |
1358 | if (ctx->nb_registered_frames == FF_ARRAY_ELEMS(ctx->registered_frames)) { |
1359 | for (i = 0; i < ctx->nb_registered_frames; i++) { |
1360 | if (!ctx->registered_frames[i].mapped) { |
1361 | if (ctx->registered_frames[i].regptr) { |
1362 | p_nvenc->nvEncUnregisterResource(ctx->nvencoder, |
1363 | ctx->registered_frames[i].regptr); |
1364 | ctx->registered_frames[i].regptr = NULL; |
1365 | } |
1366 | return i; |
1367 | } |
1368 | } |
1369 | } else { |
1370 | return ctx->nb_registered_frames++; |
1371 | } |
1372 | |
1373 | av_log(avctx, AV_LOG_ERROR, "Too many registered CUDA frames\n"); |
1374 | return AVERROR(ENOMEM); |
1375 | } |
1376 | |
1377 | static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame) |
1378 | { |
1379 | NvencContext *ctx = avctx->priv_data; |
1380 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
1381 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; |
1382 | |
1383 | AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; |
1384 | NV_ENC_REGISTER_RESOURCE reg; |
1385 | int i, idx, ret; |
1386 | |
1387 | for (i = 0; i < ctx->nb_registered_frames; i++) { |
1388 | if (ctx->registered_frames[i].ptr == (CUdeviceptr)frame->data[0]) |
1389 | return i; |
1390 | } |
1391 | |
1392 | idx = nvenc_find_free_reg_resource(avctx); |
1393 | if (idx < 0) |
1394 | return idx; |
1395 | |
1396 | reg.version = NV_ENC_REGISTER_RESOURCE_VER; |
1397 | reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR; |
1398 | reg.width = frames_ctx->width; |
1399 | reg.height = frames_ctx->height; |
1400 | reg.pitch = frame->linesize[0]; |
1401 | reg.resourceToRegister = frame->data[0]; |
1402 | |
1403 | reg.bufferFormat = nvenc_map_buffer_format(frames_ctx->sw_format); |
1404 | if (reg.bufferFormat == NV_ENC_BUFFER_FORMAT_UNDEFINED) { |
1405 | av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n", |
1406 | av_get_pix_fmt_name(frames_ctx->sw_format)); |
1407 | return AVERROR(EINVAL); |
1408 | } |
1409 | |
1410 | ret = p_nvenc->nvEncRegisterResource(ctx->nvencoder, ®); |
1411 | if (ret != NV_ENC_SUCCESS) { |
1412 | nvenc_print_error(avctx, ret, "Error registering an input resource"); |
1413 | return AVERROR_UNKNOWN; |
1414 | } |
1415 | |
1416 | ctx->registered_frames[idx].ptr = (CUdeviceptr)frame->data[0]; |
1417 | ctx->registered_frames[idx].regptr = reg.registeredResource; |
1418 | return idx; |
1419 | } |
1420 | |
1421 | static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame, |
1422 | NvencSurface *nvenc_frame) |
1423 | { |
1424 | NvencContext *ctx = avctx->priv_data; |
1425 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
1426 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; |
1427 | |
1428 | int res; |
1429 | NVENCSTATUS nv_status; |
1430 | |
1431 | if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { |
1432 | int reg_idx = nvenc_register_frame(avctx, frame); |
1433 | if (reg_idx < 0) { |
1434 | av_log(avctx, AV_LOG_ERROR, "Could not register an input CUDA frame\n"); |
1435 | return reg_idx; |
1436 | } |
1437 | |
1438 | res = av_frame_ref(nvenc_frame->in_ref, frame); |
1439 | if (res < 0) |
1440 | return res; |
1441 | |
1442 | nvenc_frame->in_map.version = NV_ENC_MAP_INPUT_RESOURCE_VER; |
1443 | nvenc_frame->in_map.registeredResource = ctx->registered_frames[reg_idx].regptr; |
1444 | nv_status = p_nvenc->nvEncMapInputResource(ctx->nvencoder, &nvenc_frame->in_map); |
1445 | if (nv_status != NV_ENC_SUCCESS) { |
1446 | av_frame_unref(nvenc_frame->in_ref); |
1447 | return nvenc_print_error(avctx, nv_status, "Error mapping an input resource"); |
1448 | } |
1449 | |
1450 | ctx->registered_frames[reg_idx].mapped = 1; |
1451 | nvenc_frame->reg_idx = reg_idx; |
1452 | nvenc_frame->input_surface = nvenc_frame->in_map.mappedResource; |
1453 | nvenc_frame->format = nvenc_frame->in_map.mappedBufferFmt; |
1454 | nvenc_frame->pitch = frame->linesize[0]; |
1455 | return 0; |
1456 | } else { |
1457 | NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 }; |
1458 | |
1459 | lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER; |
1460 | lockBufferParams.inputBuffer = nvenc_frame->input_surface; |
1461 | |
1462 | nv_status = p_nvenc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams); |
1463 | if (nv_status != NV_ENC_SUCCESS) { |
1464 | return nvenc_print_error(avctx, nv_status, "Failed locking nvenc input buffer"); |
1465 | } |
1466 | |
1467 | nvenc_frame->pitch = lockBufferParams.pitch; |
1468 | res = nvenc_copy_frame(avctx, nvenc_frame, &lockBufferParams, frame); |
1469 | |
1470 | nv_status = p_nvenc->nvEncUnlockInputBuffer(ctx->nvencoder, nvenc_frame->input_surface); |
1471 | if (nv_status != NV_ENC_SUCCESS) { |
1472 | return nvenc_print_error(avctx, nv_status, "Failed unlocking input buffer!"); |
1473 | } |
1474 | |
1475 | return res; |
1476 | } |
1477 | } |
1478 | |
1479 | static void nvenc_codec_specific_pic_params(AVCodecContext *avctx, |
1480 | NV_ENC_PIC_PARAMS *params) |
1481 | { |
1482 | NvencContext *ctx = avctx->priv_data; |
1483 | |
1484 | switch (avctx->codec->id) { |
1485 | case AV_CODEC_ID_H264: |
1486 | params->codecPicParams.h264PicParams.sliceMode = |
1487 | ctx->encode_config.encodeCodecConfig.h264Config.sliceMode; |
1488 | params->codecPicParams.h264PicParams.sliceModeData = |
1489 | ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData; |
1490 | break; |
1491 | case AV_CODEC_ID_HEVC: |
1492 | params->codecPicParams.hevcPicParams.sliceMode = |
1493 | ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode; |
1494 | params->codecPicParams.hevcPicParams.sliceModeData = |
1495 | ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData; |
1496 | break; |
1497 | } |
1498 | } |
1499 | |
1500 | static inline void timestamp_queue_enqueue(AVFifoBuffer* queue, int64_t timestamp) |
1501 | { |
1502 | av_fifo_generic_write(queue, ×tamp, sizeof(timestamp), NULL); |
1503 | } |
1504 | |
1505 | static inline int64_t timestamp_queue_dequeue(AVFifoBuffer* queue) |
1506 | { |
1507 | int64_t timestamp = AV_NOPTS_VALUE; |
1508 | if (av_fifo_size(queue) > 0) |
1509 | av_fifo_generic_read(queue, ×tamp, sizeof(timestamp), NULL); |
1510 | |
1511 | return timestamp; |
1512 | } |
1513 | |
1514 | static int nvenc_set_timestamp(AVCodecContext *avctx, |
1515 | NV_ENC_LOCK_BITSTREAM *params, |
1516 | AVPacket *pkt) |
1517 | { |
1518 | NvencContext *ctx = avctx->priv_data; |
1519 | |
1520 | pkt->pts = params->outputTimeStamp; |
1521 | |
1522 | /* generate the first dts by linearly extrapolating the |
1523 | * first two pts values to the past */ |
1524 | if (avctx->max_b_frames > 0 && !ctx->first_packet_output && |
1525 | ctx->initial_pts[1] != AV_NOPTS_VALUE) { |
1526 | int64_t ts0 = ctx->initial_pts[0], ts1 = ctx->initial_pts[1]; |
1527 | int64_t delta; |
1528 | |
1529 | if ((ts0 < 0 && ts1 > INT64_MAX + ts0) || |
1530 | (ts0 > 0 && ts1 < INT64_MIN + ts0)) |
1531 | return AVERROR(ERANGE); |
1532 | delta = ts1 - ts0; |
1533 | |
1534 | if ((delta < 0 && ts0 > INT64_MAX + delta) || |
1535 | (delta > 0 && ts0 < INT64_MIN + delta)) |
1536 | return AVERROR(ERANGE); |
1537 | pkt->dts = ts0 - delta; |
1538 | |
1539 | ctx->first_packet_output = 1; |
1540 | return 0; |
1541 | } |
1542 | |
1543 | pkt->dts = timestamp_queue_dequeue(ctx->timestamp_list); |
1544 | |
1545 | return 0; |
1546 | } |
1547 | |
1548 | static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencSurface *tmpoutsurf) |
1549 | { |
1550 | NvencContext *ctx = avctx->priv_data; |
1551 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
1552 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; |
1553 | |
1554 | uint32_t slice_mode_data; |
1555 | uint32_t *slice_offsets = NULL; |
1556 | NV_ENC_LOCK_BITSTREAM lock_params = { 0 }; |
1557 | NVENCSTATUS nv_status; |
1558 | int res = 0; |
1559 | |
1560 | enum AVPictureType pict_type; |
1561 | |
1562 | switch (avctx->codec->id) { |
1563 | case AV_CODEC_ID_H264: |
1564 | slice_mode_data = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData; |
1565 | break; |
1566 | case AV_CODEC_ID_H265: |
1567 | slice_mode_data = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData; |
1568 | break; |
1569 | default: |
1570 | av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n"); |
1571 | res = AVERROR(EINVAL); |
1572 | goto error; |
1573 | } |
1574 | slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets)); |
1575 | |
1576 | if (!slice_offsets) |
1577 | goto error; |
1578 | |
1579 | lock_params.version = NV_ENC_LOCK_BITSTREAM_VER; |
1580 | |
1581 | lock_params.doNotWait = 0; |
1582 | lock_params.outputBitstream = tmpoutsurf->output_surface; |
1583 | lock_params.sliceOffsets = slice_offsets; |
1584 | |
1585 | nv_status = p_nvenc->nvEncLockBitstream(ctx->nvencoder, &lock_params); |
1586 | if (nv_status != NV_ENC_SUCCESS) { |
1587 | res = nvenc_print_error(avctx, nv_status, "Failed locking bitstream buffer"); |
1588 | goto error; |
1589 | } |
1590 | |
1591 | if (res = ff_alloc_packet2(avctx, pkt, lock_params.bitstreamSizeInBytes,0)) { |
1592 | p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface); |
1593 | goto error; |
1594 | } |
1595 | |
1596 | memcpy(pkt->data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes); |
1597 | |
1598 | nv_status = p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface); |
1599 | if (nv_status != NV_ENC_SUCCESS) |
1600 | nvenc_print_error(avctx, nv_status, "Failed unlocking bitstream buffer, expect the gates of mordor to open"); |
1601 | |
1602 | |
1603 | if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { |
1604 | p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, tmpoutsurf->in_map.mappedResource); |
1605 | av_frame_unref(tmpoutsurf->in_ref); |
1606 | ctx->registered_frames[tmpoutsurf->reg_idx].mapped = 0; |
1607 | |
1608 | tmpoutsurf->input_surface = NULL; |
1609 | } |
1610 | |
1611 | switch (lock_params.pictureType) { |
1612 | case NV_ENC_PIC_TYPE_IDR: |
1613 | pkt->flags |= AV_PKT_FLAG_KEY; |
1614 | case NV_ENC_PIC_TYPE_I: |
1615 | pict_type = AV_PICTURE_TYPE_I; |
1616 | break; |
1617 | case NV_ENC_PIC_TYPE_P: |
1618 | pict_type = AV_PICTURE_TYPE_P; |
1619 | break; |
1620 | case NV_ENC_PIC_TYPE_B: |
1621 | pict_type = AV_PICTURE_TYPE_B; |
1622 | break; |
1623 | case NV_ENC_PIC_TYPE_BI: |
1624 | pict_type = AV_PICTURE_TYPE_BI; |
1625 | break; |
1626 | default: |
1627 | av_log(avctx, AV_LOG_ERROR, "Unknown picture type encountered, expect the output to be broken.\n"); |
1628 | av_log(avctx, AV_LOG_ERROR, "Please report this error and include as much information on how to reproduce it as possible.\n"); |
1629 | res = AVERROR_EXTERNAL; |
1630 | goto error; |
1631 | } |
1632 | |
1633 | #if FF_API_CODED_FRAME |
1634 | FF_DISABLE_DEPRECATION_WARNINGS |
1635 | avctx->coded_frame->pict_type = pict_type; |
1636 | FF_ENABLE_DEPRECATION_WARNINGS |
1637 | #endif |
1638 | |
1639 | ff_side_data_set_encoder_stats(pkt, |
1640 | (lock_params.frameAvgQP - 1) * FF_QP2LAMBDA, NULL, 0, pict_type); |
1641 | |
1642 | res = nvenc_set_timestamp(avctx, &lock_params, pkt); |
1643 | if (res < 0) |
1644 | goto error2; |
1645 | |
1646 | av_free(slice_offsets); |
1647 | |
1648 | return 0; |
1649 | |
1650 | error: |
1651 | timestamp_queue_dequeue(ctx->timestamp_list); |
1652 | |
1653 | error2: |
1654 | av_free(slice_offsets); |
1655 | |
1656 | return res; |
1657 | } |
1658 | |
1659 | static int output_ready(AVCodecContext *avctx, int flush) |
1660 | { |
1661 | NvencContext *ctx = avctx->priv_data; |
1662 | int nb_ready, nb_pending; |
1663 | |
1664 | /* when B-frames are enabled, we wait for two initial timestamps to |
1665 | * calculate the first dts */ |
1666 | if (!flush && avctx->max_b_frames > 0 && |
1667 | (ctx->initial_pts[0] == AV_NOPTS_VALUE || ctx->initial_pts[1] == AV_NOPTS_VALUE)) |
1668 | return 0; |
1669 | |
1670 | nb_ready = av_fifo_size(ctx->output_surface_ready_queue) / sizeof(NvencSurface*); |
1671 | nb_pending = av_fifo_size(ctx->output_surface_queue) / sizeof(NvencSurface*); |
1672 | if (flush) |
1673 | return nb_ready > 0; |
1674 | return (nb_ready > 0) && (nb_ready + nb_pending >= ctx->async_depth); |
1675 | } |
1676 | |
1677 | int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
1678 | const AVFrame *frame, int *got_packet) |
1679 | { |
1680 | NVENCSTATUS nv_status; |
1681 | CUresult cu_res; |
1682 | CUcontext dummy; |
1683 | NvencSurface *tmpoutsurf, *inSurf; |
1684 | int res; |
1685 | |
1686 | NvencContext *ctx = avctx->priv_data; |
1687 | NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
1688 | NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; |
1689 | |
1690 | NV_ENC_PIC_PARAMS pic_params = { 0 }; |
1691 | pic_params.version = NV_ENC_PIC_PARAMS_VER; |
1692 | |
1693 | if (frame) { |
1694 | inSurf = get_free_frame(ctx); |
1695 | if (!inSurf) { |
1696 | av_log(avctx, AV_LOG_ERROR, "No free surfaces\n"); |
1697 | return AVERROR_BUG; |
1698 | } |
1699 | |
1700 | cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context); |
1701 | if (cu_res != CUDA_SUCCESS) { |
1702 | av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n"); |
1703 | return AVERROR_EXTERNAL; |
1704 | } |
1705 | |
1706 | res = nvenc_upload_frame(avctx, frame, inSurf); |
1707 | |
1708 | cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy); |
1709 | if (cu_res != CUDA_SUCCESS) { |
1710 | av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n"); |
1711 | return AVERROR_EXTERNAL; |
1712 | } |
1713 | |
1714 | if (res) { |
1715 | inSurf->lockCount = 0; |
1716 | return res; |
1717 | } |
1718 | |
1719 | pic_params.inputBuffer = inSurf->input_surface; |
1720 | pic_params.bufferFmt = inSurf->format; |
1721 | pic_params.inputWidth = avctx->width; |
1722 | pic_params.inputHeight = avctx->height; |
1723 | pic_params.inputPitch = inSurf->pitch; |
1724 | pic_params.outputBitstream = inSurf->output_surface; |
1725 | |
1726 | if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { |
1727 | if (frame->top_field_first) |
1728 | pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM; |
1729 | else |
1730 | pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP; |
1731 | } else { |
1732 | pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; |
1733 | } |
1734 | |
1735 | if (ctx->forced_idr >= 0 && frame->pict_type == AV_PICTURE_TYPE_I) { |
1736 | pic_params.encodePicFlags = |
1737 | ctx->forced_idr ? NV_ENC_PIC_FLAG_FORCEIDR : NV_ENC_PIC_FLAG_FORCEINTRA; |
1738 | } else { |
1739 | pic_params.encodePicFlags = 0; |
1740 | } |
1741 | |
1742 | pic_params.inputTimeStamp = frame->pts; |
1743 | |
1744 | nvenc_codec_specific_pic_params(avctx, &pic_params); |
1745 | } else { |
1746 | pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS; |
1747 | } |
1748 | |
1749 | cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context); |
1750 | if (cu_res != CUDA_SUCCESS) { |
1751 | av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n"); |
1752 | return AVERROR_EXTERNAL; |
1753 | } |
1754 | |
1755 | nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params); |
1756 | |
1757 | cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy); |
1758 | if (cu_res != CUDA_SUCCESS) { |
1759 | av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n"); |
1760 | return AVERROR_EXTERNAL; |
1761 | } |
1762 | |
1763 | if (nv_status != NV_ENC_SUCCESS && |
1764 | nv_status != NV_ENC_ERR_NEED_MORE_INPUT) |
1765 | return nvenc_print_error(avctx, nv_status, "EncodePicture failed!"); |
1766 | |
1767 | if (frame) { |
1768 | av_fifo_generic_write(ctx->output_surface_queue, &inSurf, sizeof(inSurf), NULL); |
1769 | timestamp_queue_enqueue(ctx->timestamp_list, frame->pts); |
1770 | |
1771 | if (ctx->initial_pts[0] == AV_NOPTS_VALUE) |
1772 | ctx->initial_pts[0] = frame->pts; |
1773 | else if (ctx->initial_pts[1] == AV_NOPTS_VALUE) |
1774 | ctx->initial_pts[1] = frame->pts; |
1775 | } |
1776 | |
1777 | /* all the pending buffers are now ready for output */ |
1778 | if (nv_status == NV_ENC_SUCCESS) { |
1779 | while (av_fifo_size(ctx->output_surface_queue) > 0) { |
1780 | av_fifo_generic_read(ctx->output_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL); |
1781 | av_fifo_generic_write(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL); |
1782 | } |
1783 | } |
1784 | |
1785 | if (output_ready(avctx, !frame)) { |
1786 | av_fifo_generic_read(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL); |
1787 | |
1788 | res = process_output_surface(avctx, pkt, tmpoutsurf); |
1789 | |
1790 | if (res) |
1791 | return res; |
1792 | |
1793 | av_assert0(tmpoutsurf->lockCount); |
1794 | tmpoutsurf->lockCount--; |
1795 | |
1796 | *got_packet = 1; |
1797 | } else { |
1798 | *got_packet = 0; |
1799 | } |
1800 | |
1801 | return 0; |
1802 | } |
1803 |