summaryrefslogtreecommitdiff
path: root/libavcodec/libopusenc.c (plain)
blob: c40fcde7ba044e12c4e5b6bef1bbc1d600124403
1/*
2 * Opus encoder using libopus
3 * Copyright (c) 2012 Nathan Caldwell
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include <opus.h>
23#include <opus_multistream.h>
24
25#include "libavutil/opt.h"
26#include "avcodec.h"
27#include "bytestream.h"
28#include "internal.h"
29#include "libopus.h"
30#include "vorbis.h"
31#include "audio_frame_queue.h"
32
33typedef struct LibopusEncOpts {
34 int vbr;
35 int application;
36 int packet_loss;
37 int complexity;
38 float frame_duration;
39 int packet_size;
40 int max_bandwidth;
41 int mapping_family;
42} LibopusEncOpts;
43
44typedef struct LibopusEncContext {
45 AVClass *class;
46 OpusMSEncoder *enc;
47 int stream_count;
48 uint8_t *samples;
49 LibopusEncOpts opts;
50 AudioFrameQueue afq;
51 const uint8_t *encoder_channel_map;
52} LibopusEncContext;
53
54static const uint8_t opus_coupled_streams[8] = {
55 0, 1, 1, 2, 2, 2, 2, 3
56};
57
58/* Opus internal to Vorbis channel order mapping written in the header */
59static const uint8_t opus_vorbis_channel_map[8][8] = {
60 { 0 },
61 { 0, 1 },
62 { 0, 2, 1 },
63 { 0, 1, 2, 3 },
64 { 0, 4, 1, 2, 3 },
65 { 0, 4, 1, 2, 3, 5 },
66 { 0, 4, 1, 2, 3, 5, 6 },
67 { 0, 6, 1, 2, 3, 4, 5, 7 },
68};
69
70/* libavcodec to libopus channel order mapping, passed to libopus */
71static const uint8_t libavcodec_libopus_channel_map[8][8] = {
72 { 0 },
73 { 0, 1 },
74 { 0, 1, 2 },
75 { 0, 1, 2, 3 },
76 { 0, 1, 3, 4, 2 },
77 { 0, 1, 4, 5, 2, 3 },
78 { 0, 1, 5, 6, 2, 4, 3 },
79 { 0, 1, 6, 7, 4, 5, 2, 3 },
80};
81
82static void libopus_write_header(AVCodecContext *avctx, int stream_count,
83 int coupled_stream_count,
84 int mapping_family,
85 const uint8_t *channel_mapping)
86{
87 uint8_t *p = avctx->extradata;
88 int channels = avctx->channels;
89
90 bytestream_put_buffer(&p, "OpusHead", 8);
91 bytestream_put_byte(&p, 1); /* Version */
92 bytestream_put_byte(&p, channels);
93 bytestream_put_le16(&p, avctx->initial_padding); /* Lookahead samples at 48kHz */
94 bytestream_put_le32(&p, avctx->sample_rate); /* Original sample rate */
95 bytestream_put_le16(&p, 0); /* Gain of 0dB is recommended. */
96
97 /* Channel mapping */
98 bytestream_put_byte(&p, mapping_family);
99 if (mapping_family != 0) {
100 bytestream_put_byte(&p, stream_count);
101 bytestream_put_byte(&p, coupled_stream_count);
102 bytestream_put_buffer(&p, channel_mapping, channels);
103 }
104}
105
106static int libopus_configure_encoder(AVCodecContext *avctx, OpusMSEncoder *enc,
107 LibopusEncOpts *opts)
108{
109 int ret;
110
111 if (avctx->global_quality) {
112 av_log(avctx, AV_LOG_ERROR,
113 "Quality-based encoding not supported, "
114 "please specify a bitrate and VBR setting.\n");
115 return AVERROR(EINVAL);
116 }
117
118 ret = opus_multistream_encoder_ctl(enc, OPUS_SET_BITRATE(avctx->bit_rate));
119 if (ret != OPUS_OK) {
120 av_log(avctx, AV_LOG_ERROR,
121 "Failed to set bitrate: %s\n", opus_strerror(ret));
122 return ret;
123 }
124
125 ret = opus_multistream_encoder_ctl(enc,
126 OPUS_SET_COMPLEXITY(opts->complexity));
127 if (ret != OPUS_OK)
128 av_log(avctx, AV_LOG_WARNING,
129 "Unable to set complexity: %s\n", opus_strerror(ret));
130
131 ret = opus_multistream_encoder_ctl(enc, OPUS_SET_VBR(!!opts->vbr));
132 if (ret != OPUS_OK)
133 av_log(avctx, AV_LOG_WARNING,
134 "Unable to set VBR: %s\n", opus_strerror(ret));
135
136 ret = opus_multistream_encoder_ctl(enc,
137 OPUS_SET_VBR_CONSTRAINT(opts->vbr == 2));
138 if (ret != OPUS_OK)
139 av_log(avctx, AV_LOG_WARNING,
140 "Unable to set constrained VBR: %s\n", opus_strerror(ret));
141
142 ret = opus_multistream_encoder_ctl(enc,
143 OPUS_SET_PACKET_LOSS_PERC(opts->packet_loss));
144 if (ret != OPUS_OK)
145 av_log(avctx, AV_LOG_WARNING,
146 "Unable to set expected packet loss percentage: %s\n",
147 opus_strerror(ret));
148
149 if (avctx->cutoff) {
150 ret = opus_multistream_encoder_ctl(enc,
151 OPUS_SET_MAX_BANDWIDTH(opts->max_bandwidth));
152 if (ret != OPUS_OK)
153 av_log(avctx, AV_LOG_WARNING,
154 "Unable to set maximum bandwidth: %s\n", opus_strerror(ret));
155 }
156
157 return OPUS_OK;
158}
159
160static int libopus_check_max_channels(AVCodecContext *avctx,
161 int max_channels) {
162 if (avctx->channels > max_channels) {
163 av_log(avctx, AV_LOG_ERROR, "Opus mapping family undefined for %d channels.\n",
164 avctx->channels);
165 return AVERROR(EINVAL);
166 }
167
168 return 0;
169}
170
171static int libopus_check_vorbis_layout(AVCodecContext *avctx, int mapping_family) {
172 av_assert2(avctx->channels < FF_ARRAY_ELEMS(ff_vorbis_channel_layouts));
173
174 if (!avctx->channel_layout) {
175 av_log(avctx, AV_LOG_WARNING,
176 "No channel layout specified. Opus encoder will use Vorbis "
177 "channel layout for %d channels.\n", avctx->channels);
178 } else if (avctx->channel_layout != ff_vorbis_channel_layouts[avctx->channels - 1]) {
179 char name[32];
180 av_get_channel_layout_string(name, sizeof(name), avctx->channels,
181 avctx->channel_layout);
182 av_log(avctx, AV_LOG_ERROR,
183 "Invalid channel layout %s for specified mapping family %d.\n",
184 name, mapping_family);
185
186 return AVERROR(EINVAL);
187 }
188
189 return 0;
190}
191
192static int libopus_validate_layout_and_get_channel_map(
193 AVCodecContext *avctx,
194 int mapping_family,
195 const uint8_t ** channel_map_result)
196{
197 const uint8_t * channel_map = NULL;
198 int ret;
199
200 switch (mapping_family) {
201 case -1:
202 ret = libopus_check_max_channels(avctx, 8);
203 if (ret == 0) {
204 ret = libopus_check_vorbis_layout(avctx, mapping_family);
205 /* Channels do not need to be reordered. */
206 }
207
208 break;
209 case 0:
210 ret = libopus_check_max_channels(avctx, 2);
211 if (ret == 0) {
212 ret = libopus_check_vorbis_layout(avctx, mapping_family);
213 }
214 break;
215 case 1:
216 /* Opus expects channels to be in Vorbis order. */
217 ret = libopus_check_max_channels(avctx, 8);
218 if (ret == 0) {
219 ret = libopus_check_vorbis_layout(avctx, mapping_family);
220 channel_map = ff_vorbis_channel_layout_offsets[avctx->channels - 1];
221 }
222 break;
223 case 255:
224 ret = libopus_check_max_channels(avctx, 254);
225 break;
226 default:
227 av_log(avctx, AV_LOG_WARNING,
228 "Unknown channel mapping family %d. Output channel layout may be invalid.\n",
229 mapping_family);
230 ret = 0;
231 }
232
233 *channel_map_result = channel_map;
234 return ret;
235}
236
237static av_cold int libopus_encode_init(AVCodecContext *avctx)
238{
239 LibopusEncContext *opus = avctx->priv_data;
240 OpusMSEncoder *enc;
241 uint8_t libopus_channel_mapping[255];
242 int ret = OPUS_OK;
243 int av_ret;
244 int coupled_stream_count, header_size, frame_size;
245 int mapping_family;
246
247 frame_size = opus->opts.frame_duration * 48000 / 1000;
248 switch (frame_size) {
249 case 120:
250 case 240:
251 if (opus->opts.application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
252 av_log(avctx, AV_LOG_WARNING,
253 "LPC mode cannot be used with a frame duration of less "
254 "than 10ms. Enabling restricted low-delay mode.\n"
255 "Use a longer frame duration if this is not what you want.\n");
256 /* Frame sizes less than 10 ms can only use MDCT mode, so switching to
257 * RESTRICTED_LOWDELAY avoids an unnecessary extra 2.5ms lookahead. */
258 opus->opts.application = OPUS_APPLICATION_RESTRICTED_LOWDELAY;
259 case 480:
260 case 960:
261 case 1920:
262 case 2880:
263 opus->opts.packet_size =
264 avctx->frame_size = frame_size * avctx->sample_rate / 48000;
265 break;
266 default:
267 av_log(avctx, AV_LOG_ERROR, "Invalid frame duration: %g.\n"
268 "Frame duration must be exactly one of: 2.5, 5, 10, 20, 40 or 60.\n",
269 opus->opts.frame_duration);
270 return AVERROR(EINVAL);
271 }
272
273 if (avctx->compression_level < 0 || avctx->compression_level > 10) {
274 av_log(avctx, AV_LOG_WARNING,
275 "Compression level must be in the range 0 to 10. "
276 "Defaulting to 10.\n");
277 opus->opts.complexity = 10;
278 } else {
279 opus->opts.complexity = avctx->compression_level;
280 }
281
282 if (avctx->cutoff) {
283 switch (avctx->cutoff) {
284 case 4000:
285 opus->opts.max_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
286 break;
287 case 6000:
288 opus->opts.max_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
289 break;
290 case 8000:
291 opus->opts.max_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
292 break;
293 case 12000:
294 opus->opts.max_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
295 break;
296 case 20000:
297 opus->opts.max_bandwidth = OPUS_BANDWIDTH_FULLBAND;
298 break;
299 default:
300 av_log(avctx, AV_LOG_WARNING,
301 "Invalid frequency cutoff: %d. Using default maximum bandwidth.\n"
302 "Cutoff frequency must be exactly one of: 4000, 6000, 8000, 12000 or 20000.\n",
303 avctx->cutoff);
304 avctx->cutoff = 0;
305 }
306 }
307
308 /* Channels may need to be reordered to match opus mapping. */
309 av_ret = libopus_validate_layout_and_get_channel_map(avctx, opus->opts.mapping_family,
310 &opus->encoder_channel_map);
311 if (av_ret) {
312 return av_ret;
313 }
314
315 if (opus->opts.mapping_family == -1) {
316 /* By default, use mapping family 1 for the header but use the older
317 * libopus multistream API to avoid surround masking. */
318
319 /* Set the mapping family so that the value is correct in the header */
320 mapping_family = avctx->channels > 2 ? 1 : 0;
321 coupled_stream_count = opus_coupled_streams[avctx->channels - 1];
322 opus->stream_count = avctx->channels - coupled_stream_count;
323 memcpy(libopus_channel_mapping,
324 opus_vorbis_channel_map[avctx->channels - 1],
325 avctx->channels * sizeof(*libopus_channel_mapping));
326
327 enc = opus_multistream_encoder_create(
328 avctx->sample_rate, avctx->channels, opus->stream_count,
329 coupled_stream_count,
330 libavcodec_libopus_channel_map[avctx->channels - 1],
331 opus->opts.application, &ret);
332 } else {
333 /* Use the newer multistream API. The encoder will set the channel
334 * mapping and coupled stream counts to its internal defaults and will
335 * use surround masking analysis to save bits. */
336 mapping_family = opus->opts.mapping_family;
337 enc = opus_multistream_surround_encoder_create(
338 avctx->sample_rate, avctx->channels, mapping_family,
339 &opus->stream_count, &coupled_stream_count, libopus_channel_mapping,
340 opus->opts.application, &ret);
341 }
342
343 if (ret != OPUS_OK) {
344 av_log(avctx, AV_LOG_ERROR,
345 "Failed to create encoder: %s\n", opus_strerror(ret));
346 return ff_opus_error_to_averror(ret);
347 }
348
349 if (!avctx->bit_rate) {
350 /* Sane default copied from opusenc */
351 avctx->bit_rate = 64000 * opus->stream_count +
352 32000 * coupled_stream_count;
353 av_log(avctx, AV_LOG_WARNING,
354 "No bit rate set. Defaulting to %"PRId64" bps.\n", (int64_t)avctx->bit_rate);
355 }
356
357 if (avctx->bit_rate < 500 || avctx->bit_rate > 256000 * avctx->channels) {
358 av_log(avctx, AV_LOG_ERROR, "The bit rate %"PRId64" bps is unsupported. "
359 "Please choose a value between 500 and %d.\n", (int64_t)avctx->bit_rate,
360 256000 * avctx->channels);
361 ret = AVERROR(EINVAL);
362 goto fail;
363 }
364
365 ret = libopus_configure_encoder(avctx, enc, &opus->opts);
366 if (ret != OPUS_OK) {
367 ret = ff_opus_error_to_averror(ret);
368 goto fail;
369 }
370
371 /* Header includes channel mapping table if and only if mapping family is 0 */
372 header_size = 19 + (mapping_family == 0 ? 0 : 2 + avctx->channels);
373 avctx->extradata = av_malloc(header_size + AV_INPUT_BUFFER_PADDING_SIZE);
374 if (!avctx->extradata) {
375 av_log(avctx, AV_LOG_ERROR, "Failed to allocate extradata.\n");
376 ret = AVERROR(ENOMEM);
377 goto fail;
378 }
379 avctx->extradata_size = header_size;
380
381 opus->samples = av_mallocz_array(frame_size, avctx->channels *
382 av_get_bytes_per_sample(avctx->sample_fmt));
383 if (!opus->samples) {
384 av_log(avctx, AV_LOG_ERROR, "Failed to allocate samples buffer.\n");
385 ret = AVERROR(ENOMEM);
386 goto fail;
387 }
388
389 ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->initial_padding));
390 if (ret != OPUS_OK)
391 av_log(avctx, AV_LOG_WARNING,
392 "Unable to get number of lookahead samples: %s\n",
393 opus_strerror(ret));
394
395 libopus_write_header(avctx, opus->stream_count, coupled_stream_count,
396 mapping_family, libopus_channel_mapping);
397
398 ff_af_queue_init(avctx, &opus->afq);
399
400 opus->enc = enc;
401
402 return 0;
403
404fail:
405 opus_multistream_encoder_destroy(enc);
406 av_freep(&avctx->extradata);
407 return ret;
408}
409
410static void libopus_copy_samples_with_channel_map(
411 uint8_t *dst, const uint8_t *src, const uint8_t *channel_map,
412 int nb_channels, int nb_samples, int bytes_per_sample) {
413 int sample, channel;
414 for (sample = 0; sample < nb_samples; ++sample) {
415 for (channel = 0; channel < nb_channels; ++channel) {
416 const size_t src_pos = bytes_per_sample * (nb_channels * sample + channel);
417 const size_t dst_pos = bytes_per_sample * (nb_channels * sample + channel_map[channel]);
418
419 memcpy(&dst[dst_pos], &src[src_pos], bytes_per_sample);
420 }
421 }
422}
423
424static int libopus_encode(AVCodecContext *avctx, AVPacket *avpkt,
425 const AVFrame *frame, int *got_packet_ptr)
426{
427 LibopusEncContext *opus = avctx->priv_data;
428 const int bytes_per_sample = av_get_bytes_per_sample(avctx->sample_fmt);
429 const int sample_size = avctx->channels * bytes_per_sample;
430 uint8_t *audio;
431 int ret;
432 int discard_padding;
433
434 if (frame) {
435 ret = ff_af_queue_add(&opus->afq, frame);
436 if (ret < 0)
437 return ret;
438 if (opus->encoder_channel_map != NULL) {
439 audio = opus->samples;
440 libopus_copy_samples_with_channel_map(
441 audio, frame->data[0], opus->encoder_channel_map,
442 avctx->channels, frame->nb_samples, bytes_per_sample);
443 } else if (frame->nb_samples < opus->opts.packet_size) {
444 audio = opus->samples;
445 memcpy(audio, frame->data[0], frame->nb_samples * sample_size);
446 } else
447 audio = frame->data[0];
448 } else {
449 if (!opus->afq.remaining_samples || (!opus->afq.frame_alloc && !opus->afq.frame_count))
450 return 0;
451 audio = opus->samples;
452 memset(audio, 0, opus->opts.packet_size * sample_size);
453 }
454
455 /* Maximum packet size taken from opusenc in opus-tools. 60ms packets
456 * consist of 3 frames in one packet. The maximum frame size is 1275
457 * bytes along with the largest possible packet header of 7 bytes. */
458 if ((ret = ff_alloc_packet2(avctx, avpkt, (1275 * 3 + 7) * opus->stream_count, 0)) < 0)
459 return ret;
460
461 if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
462 ret = opus_multistream_encode_float(opus->enc, (float *)audio,
463 opus->opts.packet_size,
464 avpkt->data, avpkt->size);
465 else
466 ret = opus_multistream_encode(opus->enc, (opus_int16 *)audio,
467 opus->opts.packet_size,
468 avpkt->data, avpkt->size);
469
470 if (ret < 0) {
471 av_log(avctx, AV_LOG_ERROR,
472 "Error encoding frame: %s\n", opus_strerror(ret));
473 return ff_opus_error_to_averror(ret);
474 }
475
476 av_shrink_packet(avpkt, ret);
477
478 ff_af_queue_remove(&opus->afq, opus->opts.packet_size,
479 &avpkt->pts, &avpkt->duration);
480
481 discard_padding = opus->opts.packet_size - avpkt->duration;
482 // Check if subtraction resulted in an overflow
483 if ((discard_padding < opus->opts.packet_size) != (avpkt->duration > 0)) {
484 av_packet_unref(avpkt);
485 av_free(avpkt);
486 return AVERROR(EINVAL);
487 }
488 if (discard_padding > 0) {
489 uint8_t* side_data = av_packet_new_side_data(avpkt,
490 AV_PKT_DATA_SKIP_SAMPLES,
491 10);
492 if(!side_data) {
493 av_packet_unref(avpkt);
494 av_free(avpkt);
495 return AVERROR(ENOMEM);
496 }
497 AV_WL32(side_data + 4, discard_padding);
498 }
499
500 *got_packet_ptr = 1;
501
502 return 0;
503}
504
505static av_cold int libopus_encode_close(AVCodecContext *avctx)
506{
507 LibopusEncContext *opus = avctx->priv_data;
508
509 opus_multistream_encoder_destroy(opus->enc);
510
511 ff_af_queue_close(&opus->afq);
512
513 av_freep(&opus->samples);
514 av_freep(&avctx->extradata);
515
516 return 0;
517}
518
519#define OFFSET(x) offsetof(LibopusEncContext, opts.x)
520#define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
521static const AVOption libopus_options[] = {
522 { "application", "Intended application type", OFFSET(application), AV_OPT_TYPE_INT, { .i64 = OPUS_APPLICATION_AUDIO }, OPUS_APPLICATION_VOIP, OPUS_APPLICATION_RESTRICTED_LOWDELAY, FLAGS, "application" },
523 { "voip", "Favor improved speech intelligibility", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_VOIP }, 0, 0, FLAGS, "application" },
524 { "audio", "Favor faithfulness to the input", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_AUDIO }, 0, 0, FLAGS, "application" },
525 { "lowdelay", "Restrict to only the lowest delay modes", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_RESTRICTED_LOWDELAY }, 0, 0, FLAGS, "application" },
526 { "frame_duration", "Duration of a frame in milliseconds", OFFSET(frame_duration), AV_OPT_TYPE_FLOAT, { .dbl = 20.0 }, 2.5, 60.0, FLAGS },
527 { "packet_loss", "Expected packet loss percentage", OFFSET(packet_loss), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, FLAGS },
528 { "vbr", "Variable bit rate mode", OFFSET(vbr), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 2, FLAGS, "vbr" },
529 { "off", "Use constant bit rate", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "vbr" },
530 { "on", "Use variable bit rate", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "vbr" },
531 { "constrained", "Use constrained VBR", 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, "vbr" },
532 { "mapping_family", "Channel Mapping Family", OFFSET(mapping_family), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 255, FLAGS, "mapping_family" },
533 { NULL },
534};
535
536static const AVClass libopus_class = {
537 .class_name = "libopus",
538 .item_name = av_default_item_name,
539 .option = libopus_options,
540 .version = LIBAVUTIL_VERSION_INT,
541};
542
543static const AVCodecDefault libopus_defaults[] = {
544 { "b", "0" },
545 { "compression_level", "10" },
546 { NULL },
547};
548
549static const int libopus_sample_rates[] = {
550 48000, 24000, 16000, 12000, 8000, 0,
551};
552
553AVCodec ff_libopus_encoder = {
554 .name = "libopus",
555 .long_name = NULL_IF_CONFIG_SMALL("libopus Opus"),
556 .type = AVMEDIA_TYPE_AUDIO,
557 .id = AV_CODEC_ID_OPUS,
558 .priv_data_size = sizeof(LibopusEncContext),
559 .init = libopus_encode_init,
560 .encode2 = libopus_encode,
561 .close = libopus_encode_close,
562 .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SMALL_LAST_FRAME,
563 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
564 AV_SAMPLE_FMT_FLT,
565 AV_SAMPLE_FMT_NONE },
566 .supported_samplerates = libopus_sample_rates,
567 .priv_class = &libopus_class,
568 .defaults = libopus_defaults,
569};
570