blob: cecc8f22ab3ba96ecb198b87a1caa62d86a4296a
1 | /* |
2 | * Opus encoder |
3 | * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com> |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | #include "opus_celt.h" |
23 | #include "opus_pvq.h" |
24 | #include "opustab.h" |
25 | |
26 | #include "libavutil/float_dsp.h" |
27 | #include "libavutil/opt.h" |
28 | #include "internal.h" |
29 | #include "bytestream.h" |
30 | #include "audio_frame_queue.h" |
31 | |
32 | /* Determines the maximum delay the psychoacoustic system will use for lookahead */ |
33 | #define FF_BUFQUEUE_SIZE 145 |
34 | #include "libavfilter/bufferqueue.h" |
35 | |
36 | #define OPUS_MAX_LOOKAHEAD ((FF_BUFQUEUE_SIZE - 1)*2.5f) |
37 | |
38 | #define OPUS_MAX_CHANNELS 2 |
39 | |
40 | /* 120 ms / 2.5 ms = 48 frames (extremely improbable, but the encoder'll work) */ |
41 | #define OPUS_MAX_FRAMES_PER_PACKET 48 |
42 | |
43 | #define OPUS_BLOCK_SIZE(x) (2 * 15 * (1 << ((x) + 2))) |
44 | |
45 | #define OPUS_SAMPLES_TO_BLOCK_SIZE(x) (ff_log2((x) / (2 * 15)) - 2) |
46 | |
47 | typedef struct OpusEncOptions { |
48 | float max_delay_ms; |
49 | } OpusEncOptions; |
50 | |
51 | typedef struct OpusEncContext { |
52 | AVClass *av_class; |
53 | OpusEncOptions options; |
54 | AVCodecContext *avctx; |
55 | AudioFrameQueue afq; |
56 | AVFloatDSPContext *dsp; |
57 | MDCT15Context *mdct[CELT_BLOCK_NB]; |
58 | struct FFBufQueue bufqueue; |
59 | |
60 | enum OpusMode mode; |
61 | enum OpusBandwidth bandwidth; |
62 | int pkt_framesize; |
63 | int pkt_frames; |
64 | |
65 | int channels; |
66 | |
67 | CeltFrame *frame; |
68 | OpusRangeCoder *rc; |
69 | |
70 | /* Actual energy the decoder will have */ |
71 | float last_quantized_energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]; |
72 | |
73 | DECLARE_ALIGNED(32, float, scratch)[2048]; |
74 | } OpusEncContext; |
75 | |
76 | static void opus_write_extradata(AVCodecContext *avctx) |
77 | { |
78 | uint8_t *bs = avctx->extradata; |
79 | |
80 | bytestream_put_buffer(&bs, "OpusHead", 8); |
81 | bytestream_put_byte (&bs, 0x1); |
82 | bytestream_put_byte (&bs, avctx->channels); |
83 | bytestream_put_le16 (&bs, avctx->initial_padding); |
84 | bytestream_put_le32 (&bs, avctx->sample_rate); |
85 | bytestream_put_le16 (&bs, 0x0); |
86 | bytestream_put_byte (&bs, 0x0); /* Default layout */ |
87 | } |
88 | |
89 | static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_needed) |
90 | { |
91 | int i, tmp = 0x0, extended_toc = 0; |
92 | static const int toc_cfg[][OPUS_MODE_NB][OPUS_BANDWITH_NB] = { |
93 | /* Silk Hybrid Celt Layer */ |
94 | /* NB MB WB SWB FB NB MB WB SWB FB NB MB WB SWB FB Bandwidth */ |
95 | { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 17, 0, 21, 25, 29 } }, /* 2.5 ms */ |
96 | { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 18, 0, 22, 26, 30 } }, /* 5 ms */ |
97 | { { 1, 5, 9, 0, 0 }, { 0, 0, 0, 13, 15 }, { 19, 0, 23, 27, 31 } }, /* 10 ms */ |
98 | { { 2, 6, 10, 0, 0 }, { 0, 0, 0, 14, 16 }, { 20, 0, 24, 28, 32 } }, /* 20 ms */ |
99 | { { 3, 7, 11, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 40 ms */ |
100 | { { 4, 8, 12, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 60 ms */ |
101 | }; |
102 | int cfg = toc_cfg[s->pkt_framesize][s->mode][s->bandwidth]; |
103 | *fsize_needed = 0; |
104 | if (!cfg) |
105 | return 1; |
106 | if (s->pkt_frames == 2) { /* 2 packets */ |
107 | if (s->frame[0].framebits == s->frame[1].framebits) { /* same size */ |
108 | tmp = 0x1; |
109 | } else { /* different size */ |
110 | tmp = 0x2; |
111 | *fsize_needed = 1; /* put frame sizes in the packet */ |
112 | } |
113 | } else if (s->pkt_frames > 2) { |
114 | tmp = 0x3; |
115 | extended_toc = 1; |
116 | } |
117 | tmp |= (s->channels > 1) << 2; /* Stereo or mono */ |
118 | tmp |= (cfg - 1) << 3; /* codec configuration */ |
119 | *toc++ = tmp; |
120 | if (extended_toc) { |
121 | for (i = 0; i < (s->pkt_frames - 1); i++) |
122 | *fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits); |
123 | tmp = (*fsize_needed) << 7; /* vbr flag */ |
124 | tmp |= s->pkt_frames; /* frame number - can be 0 as well */ |
125 | *toc++ = tmp; |
126 | } |
127 | *size = 1 + extended_toc; |
128 | return 0; |
129 | } |
130 | |
131 | static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f) |
132 | { |
133 | int sf, ch; |
134 | AVFrame *cur = NULL; |
135 | const int subframesize = s->avctx->frame_size; |
136 | int subframes = OPUS_BLOCK_SIZE(s->pkt_framesize) / subframesize; |
137 | |
138 | cur = ff_bufqueue_get(&s->bufqueue); |
139 | |
140 | for (ch = 0; ch < f->channels; ch++) { |
141 | CeltBlock *b = &f->block[ch]; |
142 | const void *input = cur->extended_data[ch]; |
143 | size_t bps = av_get_bytes_per_sample(cur->format); |
144 | memcpy(b->overlap, input, bps*cur->nb_samples); |
145 | } |
146 | |
147 | av_frame_free(&cur); |
148 | |
149 | for (sf = 0; sf < subframes; sf++) { |
150 | if (sf != (subframes - 1)) |
151 | cur = ff_bufqueue_get(&s->bufqueue); |
152 | else |
153 | cur = ff_bufqueue_peek(&s->bufqueue, 0); |
154 | |
155 | for (ch = 0; ch < f->channels; ch++) { |
156 | CeltBlock *b = &f->block[ch]; |
157 | const void *input = cur->extended_data[ch]; |
158 | const size_t bps = av_get_bytes_per_sample(cur->format); |
159 | const size_t left = (subframesize - cur->nb_samples)*bps; |
160 | const size_t len = FFMIN(subframesize, cur->nb_samples)*bps; |
161 | memcpy(&b->samples[sf*subframesize], input, len); |
162 | memset(&b->samples[cur->nb_samples], 0, left); |
163 | } |
164 | |
165 | /* Last frame isn't popped off and freed yet - we need it for overlap */ |
166 | if (sf != (subframes - 1)) |
167 | av_frame_free(&cur); |
168 | } |
169 | } |
170 | |
171 | /* Apply the pre emphasis filter */ |
172 | static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f) |
173 | { |
174 | int i, sf, ch; |
175 | const int subframesize = s->avctx->frame_size; |
176 | const int subframes = OPUS_BLOCK_SIZE(s->pkt_framesize) / subframesize; |
177 | |
178 | /* Filter overlap */ |
179 | for (ch = 0; ch < f->channels; ch++) { |
180 | CeltBlock *b = &f->block[ch]; |
181 | float m = b->emph_coeff; |
182 | for (i = 0; i < CELT_OVERLAP; i++) { |
183 | float sample = b->overlap[i]; |
184 | b->overlap[i] = sample - m; |
185 | m = sample * CELT_EMPH_COEFF; |
186 | } |
187 | b->emph_coeff = m; |
188 | } |
189 | |
190 | /* Filter the samples but do not update the last subframe's coeff - overlap ^^^ */ |
191 | for (sf = 0; sf < subframes; sf++) { |
192 | for (ch = 0; ch < f->channels; ch++) { |
193 | CeltBlock *b = &f->block[ch]; |
194 | float m = b->emph_coeff; |
195 | for (i = 0; i < subframesize; i++) { |
196 | float sample = b->samples[sf*subframesize + i]; |
197 | b->samples[sf*subframesize + i] = sample - m; |
198 | m = sample * CELT_EMPH_COEFF; |
199 | } |
200 | if (sf != (subframes - 1)) |
201 | b->emph_coeff = m; |
202 | } |
203 | } |
204 | } |
205 | |
206 | /* Create the window and do the mdct */ |
207 | static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f) |
208 | { |
209 | int i, t, ch; |
210 | float *win = s->scratch; |
211 | |
212 | /* I think I can use s->dsp->vector_fmul_window for transients at least */ |
213 | if (f->transient) { |
214 | for (ch = 0; ch < f->channels; ch++) { |
215 | CeltBlock *b = &f->block[ch]; |
216 | float *src1 = b->overlap; |
217 | for (t = 0; t < f->blocks; t++) { |
218 | float *src2 = &b->samples[CELT_OVERLAP*t]; |
219 | for (i = 0; i < CELT_OVERLAP; i++) { |
220 | win[ i] = src1[i]*ff_celt_window[i]; |
221 | win[CELT_OVERLAP + i] = src2[i]*ff_celt_window[CELT_OVERLAP - i - 1]; |
222 | } |
223 | src1 = src2; |
224 | s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks); |
225 | } |
226 | } |
227 | } else { |
228 | int blk_len = OPUS_BLOCK_SIZE(f->size), wlen = OPUS_BLOCK_SIZE(f->size + 1); |
229 | int rwin = blk_len - CELT_OVERLAP, lap_dst = (wlen - blk_len - CELT_OVERLAP) >> 1; |
230 | for (ch = 0; ch < f->channels; ch++) { |
231 | CeltBlock *b = &f->block[ch]; |
232 | |
233 | memset(win, 0, wlen*sizeof(float)); |
234 | |
235 | memcpy(&win[lap_dst + CELT_OVERLAP], b->samples, rwin*sizeof(float)); |
236 | |
237 | /* Alignment fucks me over */ |
238 | //s->dsp->vector_fmul(&dst[lap_dst], b->overlap, ff_celt_window, CELT_OVERLAP); |
239 | //s->dsp->vector_fmul_reverse(&dst[lap_dst + blk_len - CELT_OVERLAP], b->samples, ff_celt_window, CELT_OVERLAP); |
240 | |
241 | for (i = 0; i < CELT_OVERLAP; i++) { |
242 | win[lap_dst + i] = b->overlap[i] *ff_celt_window[i]; |
243 | win[lap_dst + blk_len + i] = b->samples[rwin + i]*ff_celt_window[CELT_OVERLAP - i - 1]; |
244 | } |
245 | |
246 | s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1); |
247 | } |
248 | } |
249 | } |
250 | |
251 | /* Fills the bands and normalizes them */ |
252 | static int celt_frame_map_norm_bands(OpusEncContext *s, CeltFrame *f) |
253 | { |
254 | int i, j, ch, noise = 0; |
255 | |
256 | for (ch = 0; ch < f->channels; ch++) { |
257 | CeltBlock *block = &f->block[ch]; |
258 | float *start = block->coeffs; |
259 | for (i = 0; i < CELT_MAX_BANDS; i++) { |
260 | float ener = 0.0f; |
261 | |
262 | /* Calculate band bins */ |
263 | block->band_bins[i] = ff_celt_freq_range[i] << f->size; |
264 | block->band_coeffs[i] = start; |
265 | start += block->band_bins[i]; |
266 | |
267 | /* Normalize band energy */ |
268 | for (j = 0; j < block->band_bins[i]; j++) |
269 | ener += block->band_coeffs[i][j]*block->band_coeffs[i][j]; |
270 | |
271 | block->lin_energy[i] = sqrtf(ener) + FLT_EPSILON; |
272 | ener = 1.0f/block->lin_energy[i]; |
273 | |
274 | for (j = 0; j < block->band_bins[i]; j++) |
275 | block->band_coeffs[i][j] *= ener; |
276 | |
277 | block->energy[i] = log2f(block->lin_energy[i]) - ff_celt_mean_energy[i]; |
278 | |
279 | /* CELT_ENERGY_SILENCE is what the decoder uses and its not -infinity */ |
280 | block->energy[i] = FFMAX(block->energy[i], CELT_ENERGY_SILENCE); |
281 | noise |= block->energy[i] > CELT_ENERGY_SILENCE; |
282 | } |
283 | } |
284 | return !noise; |
285 | } |
286 | |
287 | static void celt_enc_tf(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f) |
288 | { |
289 | int i, tf_select = 0, diff = 0, tf_changed = 0, tf_select_needed; |
290 | int bits = f->transient ? 2 : 4; |
291 | |
292 | tf_select_needed = ((f->size && (opus_rc_tell(rc) + bits + 1) <= f->framebits)); |
293 | |
294 | for (i = f->start_band; i < f->end_band; i++) { |
295 | if ((opus_rc_tell(rc) + bits + tf_select_needed) <= f->framebits) { |
296 | const int tbit = (diff ^ 1) == f->tf_change[i]; |
297 | ff_opus_rc_enc_log(rc, tbit, bits); |
298 | diff ^= tbit; |
299 | tf_changed |= diff; |
300 | } |
301 | bits = f->transient ? 4 : 5; |
302 | } |
303 | |
304 | if (tf_select_needed && ff_celt_tf_select[f->size][f->transient][0][tf_changed] != |
305 | ff_celt_tf_select[f->size][f->transient][1][tf_changed]) { |
306 | ff_opus_rc_enc_log(rc, f->tf_select, 1); |
307 | tf_select = f->tf_select; |
308 | } |
309 | |
310 | for (i = f->start_band; i < f->end_band; i++) |
311 | f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]]; |
312 | } |
313 | |
314 | static void celt_bitalloc(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f) |
315 | { |
316 | int i, j, low, high, total, done, bandbits, remaining, tbits_8ths; |
317 | int skip_startband = f->start_band; |
318 | int skip_bit = 0; |
319 | int intensitystereo_bit = 0; |
320 | int dualstereo_bit = 0; |
321 | int dynalloc = 6; |
322 | int extrabits = 0; |
323 | |
324 | int *cap = f->caps; |
325 | int boost[CELT_MAX_BANDS]; |
326 | int trim_offset[CELT_MAX_BANDS]; |
327 | int threshold[CELT_MAX_BANDS]; |
328 | int bits1[CELT_MAX_BANDS]; |
329 | int bits2[CELT_MAX_BANDS]; |
330 | |
331 | /* Tell the spread to the decoder */ |
332 | if (opus_rc_tell(rc) + 4 <= f->framebits) |
333 | ff_opus_rc_enc_cdf(rc, f->spread, ff_celt_model_spread); |
334 | |
335 | /* Generate static allocation caps */ |
336 | for (i = 0; i < CELT_MAX_BANDS; i++) { |
337 | cap[i] = (ff_celt_static_caps[f->size][f->channels - 1][i] + 64) |
338 | * ff_celt_freq_range[i] << (f->channels - 1) << f->size >> 2; |
339 | } |
340 | |
341 | /* Band boosts */ |
342 | tbits_8ths = f->framebits << 3; |
343 | for (i = f->start_band; i < f->end_band; i++) { |
344 | int quanta, b_dynalloc, boost_amount = f->alloc_boost[i]; |
345 | |
346 | boost[i] = 0; |
347 | |
348 | quanta = ff_celt_freq_range[i] << (f->channels - 1) << f->size; |
349 | quanta = FFMIN(quanta << 3, FFMAX(6 << 3, quanta)); |
350 | b_dynalloc = dynalloc; |
351 | |
352 | while (opus_rc_tell_frac(rc) + (b_dynalloc << 3) < tbits_8ths && boost[i] < cap[i]) { |
353 | int is_boost = boost_amount--; |
354 | |
355 | ff_opus_rc_enc_log(rc, is_boost, b_dynalloc); |
356 | if (!is_boost) |
357 | break; |
358 | |
359 | boost[i] += quanta; |
360 | tbits_8ths -= quanta; |
361 | |
362 | b_dynalloc = 1; |
363 | } |
364 | |
365 | if (boost[i]) |
366 | dynalloc = FFMAX(2, dynalloc - 1); |
367 | } |
368 | |
369 | /* Put allocation trim */ |
370 | if (opus_rc_tell_frac(rc) + (6 << 3) <= tbits_8ths) |
371 | ff_opus_rc_enc_cdf(rc, f->alloc_trim, ff_celt_model_alloc_trim); |
372 | |
373 | /* Anti-collapse bit reservation */ |
374 | tbits_8ths = (f->framebits << 3) - opus_rc_tell_frac(rc) - 1; |
375 | f->anticollapse_needed = 0; |
376 | if (f->transient && f->size >= 2 && tbits_8ths >= ((f->size + 2) << 3)) |
377 | f->anticollapse_needed = 1 << 3; |
378 | tbits_8ths -= f->anticollapse_needed; |
379 | |
380 | /* Band skip bit reservation */ |
381 | if (tbits_8ths >= 1 << 3) |
382 | skip_bit = 1 << 3; |
383 | tbits_8ths -= skip_bit; |
384 | |
385 | /* Intensity/dual stereo bit reservation */ |
386 | if (f->channels == 2) { |
387 | intensitystereo_bit = ff_celt_log2_frac[f->end_band - f->start_band]; |
388 | if (intensitystereo_bit <= tbits_8ths) { |
389 | tbits_8ths -= intensitystereo_bit; |
390 | if (tbits_8ths >= 1 << 3) { |
391 | dualstereo_bit = 1 << 3; |
392 | tbits_8ths -= 1 << 3; |
393 | } |
394 | } else { |
395 | intensitystereo_bit = 0; |
396 | } |
397 | } |
398 | |
399 | /* Trim offsets */ |
400 | for (i = f->start_band; i < f->end_band; i++) { |
401 | int trim = f->alloc_trim - 5 - f->size; |
402 | int band = ff_celt_freq_range[i] * (f->end_band - i - 1); |
403 | int duration = f->size + 3; |
404 | int scale = duration + f->channels - 1; |
405 | |
406 | /* PVQ minimum allocation threshold, below this value the band is |
407 | * skipped */ |
408 | threshold[i] = FFMAX(3 * ff_celt_freq_range[i] << duration >> 4, |
409 | f->channels << 3); |
410 | |
411 | trim_offset[i] = trim * (band << scale) >> 6; |
412 | |
413 | if (ff_celt_freq_range[i] << f->size == 1) |
414 | trim_offset[i] -= f->channels << 3; |
415 | } |
416 | |
417 | /* Bisection */ |
418 | low = 1; |
419 | high = CELT_VECTORS - 1; |
420 | while (low <= high) { |
421 | int center = (low + high) >> 1; |
422 | done = total = 0; |
423 | |
424 | for (i = f->end_band - 1; i >= f->start_band; i--) { |
425 | bandbits = ff_celt_freq_range[i] * ff_celt_static_alloc[center][i] |
426 | << (f->channels - 1) << f->size >> 2; |
427 | |
428 | if (bandbits) |
429 | bandbits = FFMAX(0, bandbits + trim_offset[i]); |
430 | bandbits += boost[i]; |
431 | |
432 | if (bandbits >= threshold[i] || done) { |
433 | done = 1; |
434 | total += FFMIN(bandbits, cap[i]); |
435 | } else if (bandbits >= f->channels << 3) |
436 | total += f->channels << 3; |
437 | } |
438 | |
439 | if (total > tbits_8ths) |
440 | high = center - 1; |
441 | else |
442 | low = center + 1; |
443 | } |
444 | high = low--; |
445 | |
446 | /* Bisection */ |
447 | for (i = f->start_band; i < f->end_band; i++) { |
448 | bits1[i] = ff_celt_freq_range[i] * ff_celt_static_alloc[low][i] |
449 | << (f->channels - 1) << f->size >> 2; |
450 | bits2[i] = high >= CELT_VECTORS ? cap[i] : |
451 | ff_celt_freq_range[i] * ff_celt_static_alloc[high][i] |
452 | << (f->channels - 1) << f->size >> 2; |
453 | |
454 | if (bits1[i]) |
455 | bits1[i] = FFMAX(0, bits1[i] + trim_offset[i]); |
456 | if (bits2[i]) |
457 | bits2[i] = FFMAX(0, bits2[i] + trim_offset[i]); |
458 | if (low) |
459 | bits1[i] += boost[i]; |
460 | bits2[i] += boost[i]; |
461 | |
462 | if (boost[i]) |
463 | skip_startband = i; |
464 | bits2[i] = FFMAX(0, bits2[i] - bits1[i]); |
465 | } |
466 | |
467 | /* Bisection */ |
468 | low = 0; |
469 | high = 1 << CELT_ALLOC_STEPS; |
470 | for (i = 0; i < CELT_ALLOC_STEPS; i++) { |
471 | int center = (low + high) >> 1; |
472 | done = total = 0; |
473 | |
474 | for (j = f->end_band - 1; j >= f->start_band; j--) { |
475 | bandbits = bits1[j] + (center * bits2[j] >> CELT_ALLOC_STEPS); |
476 | |
477 | if (bandbits >= threshold[j] || done) { |
478 | done = 1; |
479 | total += FFMIN(bandbits, cap[j]); |
480 | } else if (bandbits >= f->channels << 3) |
481 | total += f->channels << 3; |
482 | } |
483 | if (total > tbits_8ths) |
484 | high = center; |
485 | else |
486 | low = center; |
487 | } |
488 | |
489 | /* Bisection */ |
490 | done = total = 0; |
491 | for (i = f->end_band - 1; i >= f->start_band; i--) { |
492 | bandbits = bits1[i] + (low * bits2[i] >> CELT_ALLOC_STEPS); |
493 | |
494 | if (bandbits >= threshold[i] || done) |
495 | done = 1; |
496 | else |
497 | bandbits = (bandbits >= f->channels << 3) ? |
498 | f->channels << 3 : 0; |
499 | |
500 | bandbits = FFMIN(bandbits, cap[i]); |
501 | f->pulses[i] = bandbits; |
502 | total += bandbits; |
503 | } |
504 | |
505 | /* Band skipping */ |
506 | for (f->coded_bands = f->end_band; ; f->coded_bands--) { |
507 | int allocation; |
508 | j = f->coded_bands - 1; |
509 | |
510 | if (j == skip_startband) { |
511 | /* all remaining bands are not skipped */ |
512 | tbits_8ths += skip_bit; |
513 | break; |
514 | } |
515 | |
516 | /* determine the number of bits available for coding "do not skip" markers */ |
517 | remaining = tbits_8ths - total; |
518 | bandbits = remaining / (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]); |
519 | remaining -= bandbits * (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]); |
520 | allocation = f->pulses[j] + bandbits * ff_celt_freq_range[j] |
521 | + FFMAX(0, remaining - (ff_celt_freq_bands[j] - ff_celt_freq_bands[f->start_band])); |
522 | |
523 | /* a "do not skip" marker is only coded if the allocation is |
524 | above the chosen threshold */ |
525 | if (allocation >= FFMAX(threshold[j], (f->channels + 1) << 3)) { |
526 | const int do_not_skip = f->coded_bands <= f->skip_band_floor; |
527 | ff_opus_rc_enc_log(rc, do_not_skip, 1); |
528 | if (do_not_skip) |
529 | break; |
530 | |
531 | total += 1 << 3; |
532 | allocation -= 1 << 3; |
533 | } |
534 | |
535 | /* the band is skipped, so reclaim its bits */ |
536 | total -= f->pulses[j]; |
537 | if (intensitystereo_bit) { |
538 | total -= intensitystereo_bit; |
539 | intensitystereo_bit = ff_celt_log2_frac[j - f->start_band]; |
540 | total += intensitystereo_bit; |
541 | } |
542 | |
543 | total += f->pulses[j] = (allocation >= f->channels << 3) ? f->channels << 3 : 0; |
544 | } |
545 | |
546 | /* Encode stereo flags */ |
547 | if (intensitystereo_bit) { |
548 | f->intensity_stereo = FFMIN(f->intensity_stereo, f->coded_bands); |
549 | ff_opus_rc_enc_uint(rc, f->intensity_stereo, f->coded_bands + 1 - f->start_band); |
550 | } |
551 | if (f->intensity_stereo <= f->start_band) |
552 | tbits_8ths += dualstereo_bit; /* no intensity stereo means no dual stereo */ |
553 | else if (dualstereo_bit) |
554 | ff_opus_rc_enc_log(rc, f->dual_stereo, 1); |
555 | |
556 | /* Supply the remaining bits in this frame to lower bands */ |
557 | remaining = tbits_8ths - total; |
558 | bandbits = remaining / (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]); |
559 | remaining -= bandbits * (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]); |
560 | for (i = f->start_band; i < f->coded_bands; i++) { |
561 | int bits = FFMIN(remaining, ff_celt_freq_range[i]); |
562 | |
563 | f->pulses[i] += bits + bandbits * ff_celt_freq_range[i]; |
564 | remaining -= bits; |
565 | } |
566 | |
567 | /* Finally determine the allocation */ |
568 | for (i = f->start_band; i < f->coded_bands; i++) { |
569 | int N = ff_celt_freq_range[i] << f->size; |
570 | int prev_extra = extrabits; |
571 | f->pulses[i] += extrabits; |
572 | |
573 | if (N > 1) { |
574 | int dof; // degrees of freedom |
575 | int temp; // dof * channels * log(dof) |
576 | int offset; // fine energy quantization offset, i.e. |
577 | // extra bits assigned over the standard |
578 | // totalbits/dof |
579 | int fine_bits, max_bits; |
580 | |
581 | extrabits = FFMAX(0, f->pulses[i] - cap[i]); |
582 | f->pulses[i] -= extrabits; |
583 | |
584 | /* intensity stereo makes use of an extra degree of freedom */ |
585 | dof = N * f->channels + (f->channels == 2 && N > 2 && !f->dual_stereo && i < f->intensity_stereo); |
586 | temp = dof * (ff_celt_log_freq_range[i] + (f->size << 3)); |
587 | offset = (temp >> 1) - dof * CELT_FINE_OFFSET; |
588 | if (N == 2) /* dof=2 is the only case that doesn't fit the model */ |
589 | offset += dof << 1; |
590 | |
591 | /* grant an additional bias for the first and second pulses */ |
592 | if (f->pulses[i] + offset < 2 * (dof << 3)) |
593 | offset += temp >> 2; |
594 | else if (f->pulses[i] + offset < 3 * (dof << 3)) |
595 | offset += temp >> 3; |
596 | |
597 | fine_bits = (f->pulses[i] + offset + (dof << 2)) / (dof << 3); |
598 | max_bits = FFMIN((f->pulses[i] >> 3) >> (f->channels - 1), CELT_MAX_FINE_BITS); |
599 | |
600 | max_bits = FFMAX(max_bits, 0); |
601 | |
602 | f->fine_bits[i] = av_clip(fine_bits, 0, max_bits); |
603 | |
604 | /* if fine_bits was rounded down or capped, |
605 | give priority for the final fine energy pass */ |
606 | f->fine_priority[i] = (f->fine_bits[i] * (dof << 3) >= f->pulses[i] + offset); |
607 | |
608 | /* the remaining bits are assigned to PVQ */ |
609 | f->pulses[i] -= f->fine_bits[i] << (f->channels - 1) << 3; |
610 | } else { |
611 | /* all bits go to fine energy except for the sign bit */ |
612 | extrabits = FFMAX(0, f->pulses[i] - (f->channels << 3)); |
613 | f->pulses[i] -= extrabits; |
614 | f->fine_bits[i] = 0; |
615 | f->fine_priority[i] = 1; |
616 | } |
617 | |
618 | /* hand back a limited number of extra fine energy bits to this band */ |
619 | if (extrabits > 0) { |
620 | int fineextra = FFMIN(extrabits >> (f->channels + 2), |
621 | CELT_MAX_FINE_BITS - f->fine_bits[i]); |
622 | f->fine_bits[i] += fineextra; |
623 | |
624 | fineextra <<= f->channels + 2; |
625 | f->fine_priority[i] = (fineextra >= extrabits - prev_extra); |
626 | extrabits -= fineextra; |
627 | } |
628 | } |
629 | f->remaining = extrabits; |
630 | |
631 | /* skipped bands dedicate all of their bits for fine energy */ |
632 | for (; i < f->end_band; i++) { |
633 | f->fine_bits[i] = f->pulses[i] >> (f->channels - 1) >> 3; |
634 | f->pulses[i] = 0; |
635 | f->fine_priority[i] = f->fine_bits[i] < 1; |
636 | } |
637 | } |
638 | |
639 | static void celt_quant_coarse(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f) |
640 | { |
641 | int i, ch; |
642 | float alpha, beta, prev[2] = { 0, 0 }; |
643 | const uint8_t *pmod = ff_celt_coarse_energy_dist[f->size][f->intra]; |
644 | |
645 | /* Inter is really just differential coding */ |
646 | if (opus_rc_tell(rc) + 3 <= f->framebits) |
647 | ff_opus_rc_enc_log(rc, f->intra, 3); |
648 | else |
649 | f->intra = 0; |
650 | |
651 | if (f->intra) { |
652 | alpha = 0.0f; |
653 | beta = 1.0f - 4915.0f/32768.0f; |
654 | } else { |
655 | alpha = ff_celt_alpha_coef[f->size]; |
656 | beta = 1.0f - ff_celt_beta_coef[f->size]; |
657 | } |
658 | |
659 | for (i = f->start_band; i < f->end_band; i++) { |
660 | for (ch = 0; ch < f->channels; ch++) { |
661 | CeltBlock *block = &f->block[ch]; |
662 | const int left = f->framebits - opus_rc_tell(rc); |
663 | const float last = FFMAX(-9.0f, s->last_quantized_energy[ch][i]); |
664 | float diff = block->energy[i] - prev[ch] - last*alpha; |
665 | int q_en = lrintf(diff); |
666 | if (left >= 15) { |
667 | ff_opus_rc_enc_laplace(rc, &q_en, pmod[i << 1] << 7, pmod[(i << 1) + 1] << 6); |
668 | } else if (left >= 2) { |
669 | q_en = av_clip(q_en, -1, 1); |
670 | ff_opus_rc_enc_cdf(rc, 2*q_en + 3*(q_en < 0), ff_celt_model_energy_small); |
671 | } else if (left >= 1) { |
672 | q_en = av_clip(q_en, -1, 0); |
673 | ff_opus_rc_enc_log(rc, (q_en & 1), 1); |
674 | } else q_en = -1; |
675 | |
676 | block->error_energy[i] = q_en - diff; |
677 | prev[ch] += beta * q_en; |
678 | } |
679 | } |
680 | } |
681 | |
682 | static void celt_quant_fine(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f) |
683 | { |
684 | int i, ch; |
685 | for (i = f->start_band; i < f->end_band; i++) { |
686 | if (!f->fine_bits[i]) |
687 | continue; |
688 | for (ch = 0; ch < f->channels; ch++) { |
689 | CeltBlock *block = &f->block[ch]; |
690 | int quant, lim = (1 << f->fine_bits[i]); |
691 | float offset, diff = 0.5f - block->error_energy[i]; |
692 | quant = av_clip(floor(diff*lim), 0, lim - 1); |
693 | ff_opus_rc_put_raw(rc, quant, f->fine_bits[i]); |
694 | offset = 0.5f - ((quant + 0.5f) * (1 << (14 - f->fine_bits[i])) / 16384.0f); |
695 | block->error_energy[i] -= offset; |
696 | } |
697 | } |
698 | } |
699 | |
700 | static void celt_quant_final(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f) |
701 | { |
702 | int i, ch, priority; |
703 | for (priority = 0; priority < 2; priority++) { |
704 | for (i = f->start_band; i < f->end_band && (f->framebits - opus_rc_tell(rc)) >= f->channels; i++) { |
705 | if (f->fine_priority[i] != priority || f->fine_bits[i] >= CELT_MAX_FINE_BITS) |
706 | continue; |
707 | for (ch = 0; ch < f->channels; ch++) { |
708 | CeltBlock *block = &f->block[ch]; |
709 | const float err = block->error_energy[i]; |
710 | const float offset = 0.5f * (1 << (14 - f->fine_bits[i] - 1)) / 16384.0f; |
711 | const int sign = FFABS(err + offset) < FFABS(err - offset); |
712 | ff_opus_rc_put_raw(rc, sign, 1); |
713 | block->error_energy[i] -= offset*(1 - 2*sign); |
714 | } |
715 | } |
716 | } |
717 | } |
718 | |
719 | static void celt_quant_bands(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f) |
720 | { |
721 | float lowband_scratch[8 * 22]; |
722 | float norm[2 * 8 * 100]; |
723 | |
724 | int totalbits = (f->framebits << 3) - f->anticollapse_needed; |
725 | |
726 | int update_lowband = 1; |
727 | int lowband_offset = 0; |
728 | |
729 | int i, j; |
730 | |
731 | for (i = f->start_band; i < f->end_band; i++) { |
732 | int band_offset = ff_celt_freq_bands[i] << f->size; |
733 | int band_size = ff_celt_freq_range[i] << f->size; |
734 | float *X = f->block[0].coeffs + band_offset; |
735 | float *Y = (f->channels == 2) ? f->block[1].coeffs + band_offset : NULL; |
736 | |
737 | int consumed = opus_rc_tell_frac(rc); |
738 | float *norm2 = norm + 8 * 100; |
739 | int effective_lowband = -1; |
740 | unsigned int cm[2]; |
741 | int b; |
742 | |
743 | /* Compute how many bits we want to allocate to this band */ |
744 | if (i != f->start_band) |
745 | f->remaining -= consumed; |
746 | f->remaining2 = totalbits - consumed - 1; |
747 | if (i <= f->coded_bands - 1) { |
748 | int curr_balance = f->remaining / FFMIN(3, f->coded_bands-i); |
749 | b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[i] + curr_balance), 14); |
750 | } else |
751 | b = 0; |
752 | |
753 | if (ff_celt_freq_bands[i] - ff_celt_freq_range[i] >= ff_celt_freq_bands[f->start_band] && |
754 | (update_lowband || lowband_offset == 0)) |
755 | lowband_offset = i; |
756 | |
757 | /* Get a conservative estimate of the collapse_mask's for the bands we're |
758 | going to be folding from. */ |
759 | if (lowband_offset != 0 && (f->spread != CELT_SPREAD_AGGRESSIVE || |
760 | f->blocks > 1 || f->tf_change[i] < 0)) { |
761 | int foldstart, foldend; |
762 | |
763 | /* This ensures we never repeat spectral content within one band */ |
764 | effective_lowband = FFMAX(ff_celt_freq_bands[f->start_band], |
765 | ff_celt_freq_bands[lowband_offset] - ff_celt_freq_range[i]); |
766 | foldstart = lowband_offset; |
767 | while (ff_celt_freq_bands[--foldstart] > effective_lowband); |
768 | foldend = lowband_offset - 1; |
769 | while (ff_celt_freq_bands[++foldend] < effective_lowband + ff_celt_freq_range[i]); |
770 | |
771 | cm[0] = cm[1] = 0; |
772 | for (j = foldstart; j < foldend; j++) { |
773 | cm[0] |= f->block[0].collapse_masks[j]; |
774 | cm[1] |= f->block[f->channels - 1].collapse_masks[j]; |
775 | } |
776 | } else |
777 | /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost |
778 | always) be non-zero.*/ |
779 | cm[0] = cm[1] = (1 << f->blocks) - 1; |
780 | |
781 | if (f->dual_stereo && i == f->intensity_stereo) { |
782 | /* Switch off dual stereo to do intensity */ |
783 | f->dual_stereo = 0; |
784 | for (j = ff_celt_freq_bands[f->start_band] << f->size; j < band_offset; j++) |
785 | norm[j] = (norm[j] + norm2[j]) / 2; |
786 | } |
787 | |
788 | if (f->dual_stereo) { |
789 | cm[0] = ff_celt_encode_band(f, rc, i, X, NULL, band_size, b / 2, f->blocks, |
790 | effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size, |
791 | norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]); |
792 | |
793 | cm[1] = ff_celt_encode_band(f, rc, i, Y, NULL, band_size, b/2, f->blocks, |
794 | effective_lowband != -1 ? norm2 + (effective_lowband << f->size) : NULL, f->size, |
795 | norm2 + band_offset, 0, 1.0f, lowband_scratch, cm[1]); |
796 | } else { |
797 | cm[0] = ff_celt_encode_band(f, rc, i, X, Y, band_size, b, f->blocks, |
798 | effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size, |
799 | norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]|cm[1]); |
800 | cm[1] = cm[0]; |
801 | } |
802 | |
803 | f->block[0].collapse_masks[i] = (uint8_t)cm[0]; |
804 | f->block[f->channels - 1].collapse_masks[i] = (uint8_t)cm[1]; |
805 | f->remaining += f->pulses[i] + consumed; |
806 | |
807 | /* Update the folding position only as long as we have 1 bit/sample depth */ |
808 | update_lowband = (b > band_size << 3); |
809 | } |
810 | } |
811 | |
812 | static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f) |
813 | { |
814 | int i, ch; |
815 | |
816 | celt_frame_setup_input(s, f); |
817 | celt_apply_preemph_filter(s, f); |
818 | if (f->pfilter) { |
819 | /* Not implemented */ |
820 | } |
821 | celt_frame_mdct(s, f); |
822 | f->silence = celt_frame_map_norm_bands(s, f); |
823 | if (f->silence) { |
824 | f->framebits = 1; |
825 | return; |
826 | } |
827 | |
828 | ff_opus_rc_enc_log(rc, f->silence, 15); |
829 | |
830 | if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits) |
831 | ff_opus_rc_enc_log(rc, f->pfilter, 1); |
832 | |
833 | if (f->pfilter) { |
834 | /* Not implemented */ |
835 | } |
836 | |
837 | if (f->size && opus_rc_tell(rc) + 3 <= f->framebits) |
838 | ff_opus_rc_enc_log(rc, f->transient, 3); |
839 | |
840 | celt_quant_coarse (s, rc, f); |
841 | celt_enc_tf (s, rc, f); |
842 | celt_bitalloc (s, rc, f); |
843 | celt_quant_fine (s, rc, f); |
844 | celt_quant_bands (s, rc, f); |
845 | |
846 | if (f->anticollapse_needed) |
847 | ff_opus_rc_put_raw(rc, f->anticollapse, 1); |
848 | |
849 | celt_quant_final(s, rc, f); |
850 | |
851 | for (ch = 0; ch < f->channels; ch++) { |
852 | CeltBlock *block = &f->block[ch]; |
853 | for (i = 0; i < CELT_MAX_BANDS; i++) |
854 | s->last_quantized_energy[ch][i] = block->energy[i] + block->error_energy[i]; |
855 | } |
856 | } |
857 | |
858 | static void ff_opus_psy_process(OpusEncContext *s, int end, int *need_more) |
859 | { |
860 | int max_delay_samples = (s->options.max_delay_ms*s->avctx->sample_rate)/1000; |
861 | int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960); |
862 | |
863 | s->pkt_frames = 1; |
864 | s->pkt_framesize = max_bsize; |
865 | s->mode = OPUS_MODE_CELT; |
866 | s->bandwidth = OPUS_BANDWIDTH_FULLBAND; |
867 | |
868 | *need_more = s->bufqueue.available*s->avctx->frame_size < (max_delay_samples + CELT_OVERLAP); |
869 | /* Don't request more if we start being flushed with NULL frames */ |
870 | *need_more = !end && *need_more; |
871 | } |
872 | |
873 | static void ff_opus_psy_celt_frame_setup(OpusEncContext *s, CeltFrame *f, int index) |
874 | { |
875 | int frame_size = OPUS_BLOCK_SIZE(s->pkt_framesize); |
876 | |
877 | f->avctx = s->avctx; |
878 | f->dsp = s->dsp; |
879 | f->start_band = (s->mode == OPUS_MODE_HYBRID) ? 17 : 0; |
880 | f->end_band = ff_celt_band_end[s->bandwidth]; |
881 | f->channels = s->channels; |
882 | f->size = s->pkt_framesize; |
883 | |
884 | /* Decisions */ |
885 | f->silence = 0; |
886 | f->pfilter = 0; |
887 | f->transient = 0; |
888 | f->intra = 1; |
889 | f->tf_select = 0; |
890 | f->anticollapse = 0; |
891 | f->alloc_trim = 5; |
892 | f->skip_band_floor = f->end_band; |
893 | f->intensity_stereo = f->end_band; |
894 | f->dual_stereo = 0; |
895 | f->spread = CELT_SPREAD_NORMAL; |
896 | memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS); |
897 | memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS); |
898 | |
899 | f->blocks = f->transient ? frame_size/CELT_OVERLAP : 1; |
900 | f->framebits = FFALIGN(lrintf((double)s->avctx->bit_rate/(s->avctx->sample_rate/frame_size)), 8); |
901 | } |
902 | |
903 | static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt) |
904 | { |
905 | int i, offset, fsize_needed; |
906 | |
907 | /* Write toc */ |
908 | opus_gen_toc(s, avpkt->data, &offset, &fsize_needed); |
909 | |
910 | for (i = 0; i < s->pkt_frames; i++) { |
911 | ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset, s->frame[i].framebits >> 3); |
912 | offset += s->frame[i].framebits >> 3; |
913 | } |
914 | |
915 | avpkt->size = offset; |
916 | } |
917 | |
918 | /* Used as overlap for the first frame and padding for the last encoded packet */ |
919 | static AVFrame *spawn_empty_frame(OpusEncContext *s) |
920 | { |
921 | int i; |
922 | AVFrame *f = av_frame_alloc(); |
923 | if (!f) |
924 | return NULL; |
925 | f->format = s->avctx->sample_fmt; |
926 | f->nb_samples = s->avctx->frame_size; |
927 | f->channel_layout = s->avctx->channel_layout; |
928 | if (av_frame_get_buffer(f, 4)) { |
929 | av_frame_free(&f); |
930 | return NULL; |
931 | } |
932 | for (i = 0; i < s->channels; i++) { |
933 | size_t bps = av_get_bytes_per_sample(f->format); |
934 | memset(f->extended_data[i], 0, bps*f->nb_samples); |
935 | } |
936 | return f; |
937 | } |
938 | |
939 | static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, |
940 | const AVFrame *frame, int *got_packet_ptr) |
941 | { |
942 | OpusEncContext *s = avctx->priv_data; |
943 | int i, ret, frame_size, need_more, alloc_size = 0; |
944 | |
945 | if (frame) { /* Add new frame to queue */ |
946 | if ((ret = ff_af_queue_add(&s->afq, frame)) < 0) |
947 | return ret; |
948 | ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame)); |
949 | } else { |
950 | if (!s->afq.remaining_samples) |
951 | return 0; /* We've been flushed and there's nothing left to encode */ |
952 | } |
953 | |
954 | /* Run the psychoacoustic system */ |
955 | ff_opus_psy_process(s, !frame, &need_more); |
956 | |
957 | /* Get more samples for lookahead/encoding */ |
958 | if (need_more) |
959 | return 0; |
960 | |
961 | frame_size = OPUS_BLOCK_SIZE(s->pkt_framesize); |
962 | |
963 | if (!frame) { |
964 | /* This can go negative, that's not a problem, we only pad if positive */ |
965 | int pad_empty = s->pkt_frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1; |
966 | /* Pad with empty 2.5 ms frames to whatever framesize was decided, |
967 | * this should only happen at the very last flush frame. The frames |
968 | * allocated here will be freed (because they have no other references) |
969 | * after they get used by celt_frame_setup_input() */ |
970 | for (i = 0; i < pad_empty; i++) { |
971 | AVFrame *empty = spawn_empty_frame(s); |
972 | if (!empty) |
973 | return AVERROR(ENOMEM); |
974 | ff_bufqueue_add(avctx, &s->bufqueue, empty); |
975 | } |
976 | } |
977 | |
978 | for (i = 0; i < s->pkt_frames; i++) { |
979 | ff_opus_rc_enc_init(&s->rc[i]); |
980 | ff_opus_psy_celt_frame_setup(s, &s->frame[i], i); |
981 | celt_encode_frame(s, &s->rc[i], &s->frame[i]); |
982 | alloc_size += s->frame[i].framebits >> 3; |
983 | } |
984 | |
985 | /* Worst case toc + the frame lengths if needed */ |
986 | alloc_size += 2 + s->pkt_frames*2; |
987 | |
988 | if ((ret = ff_alloc_packet2(avctx, avpkt, alloc_size, 0)) < 0) |
989 | return ret; |
990 | |
991 | /* Assemble packet */ |
992 | opus_packet_assembler(s, avpkt); |
993 | |
994 | /* Remove samples from queue and skip if needed */ |
995 | ff_af_queue_remove(&s->afq, s->pkt_frames*frame_size, &avpkt->pts, &avpkt->duration); |
996 | if (s->pkt_frames*frame_size > avpkt->duration) { |
997 | uint8_t *side = av_packet_new_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, 10); |
998 | if (!side) |
999 | return AVERROR(ENOMEM); |
1000 | AV_WL32(&side[4], s->pkt_frames*frame_size - avpkt->duration + 120); |
1001 | } |
1002 | |
1003 | *got_packet_ptr = 1; |
1004 | |
1005 | return 0; |
1006 | } |
1007 | |
1008 | static av_cold int opus_encode_end(AVCodecContext *avctx) |
1009 | { |
1010 | int i; |
1011 | OpusEncContext *s = avctx->priv_data; |
1012 | |
1013 | for (i = 0; i < CELT_BLOCK_NB; i++) |
1014 | ff_mdct15_uninit(&s->mdct[i]); |
1015 | |
1016 | av_freep(&s->dsp); |
1017 | av_freep(&s->frame); |
1018 | av_freep(&s->rc); |
1019 | ff_af_queue_close(&s->afq); |
1020 | ff_bufqueue_discard_all(&s->bufqueue); |
1021 | av_freep(&avctx->extradata); |
1022 | |
1023 | return 0; |
1024 | } |
1025 | |
1026 | static av_cold int opus_encode_init(AVCodecContext *avctx) |
1027 | { |
1028 | int i, ch, ret; |
1029 | OpusEncContext *s = avctx->priv_data; |
1030 | |
1031 | s->avctx = avctx; |
1032 | s->channels = avctx->channels; |
1033 | |
1034 | /* Opus allows us to change the framesize on each packet (and each packet may |
1035 | * have multiple frames in it) but we can't change the codec's frame size on |
1036 | * runtime, so fix it to the lowest possible number of samples and use a queue |
1037 | * to accumulate AVFrames until we have enough to encode whatever the encoder |
1038 | * decides is the best */ |
1039 | avctx->frame_size = 120; |
1040 | /* Initial padding will change if SILK is ever supported */ |
1041 | avctx->initial_padding = 120; |
1042 | |
1043 | avctx->cutoff = !avctx->cutoff ? 20000 : avctx->cutoff; |
1044 | |
1045 | if (!avctx->bit_rate) { |
1046 | int coupled = ff_opus_default_coupled_streams[s->channels - 1]; |
1047 | avctx->bit_rate = coupled*(96000) + (s->channels - coupled*2)*(48000); |
1048 | } else if (avctx->bit_rate < 6000 || avctx->bit_rate > 255000 * s->channels) { |
1049 | int64_t clipped_rate = av_clip(avctx->bit_rate, 6000, 255000 * s->channels); |
1050 | av_log(avctx, AV_LOG_ERROR, "Unsupported bitrate %"PRId64" kbps, clipping to %"PRId64" kbps\n", |
1051 | avctx->bit_rate/1000, clipped_rate/1000); |
1052 | avctx->bit_rate = clipped_rate; |
1053 | } |
1054 | |
1055 | /* Frame structs and range coder buffers */ |
1056 | s->frame = av_malloc(OPUS_MAX_FRAMES_PER_PACKET*sizeof(CeltFrame)); |
1057 | if (!s->frame) |
1058 | return AVERROR(ENOMEM); |
1059 | s->rc = av_malloc(OPUS_MAX_FRAMES_PER_PACKET*sizeof(OpusRangeCoder)); |
1060 | if (!s->rc) |
1061 | return AVERROR(ENOMEM); |
1062 | |
1063 | /* Extradata */ |
1064 | avctx->extradata_size = 19; |
1065 | avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); |
1066 | if (!avctx->extradata) |
1067 | return AVERROR(ENOMEM); |
1068 | opus_write_extradata(avctx); |
1069 | |
1070 | ff_af_queue_init(avctx, &s->afq); |
1071 | |
1072 | if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT))) |
1073 | return AVERROR(ENOMEM); |
1074 | |
1075 | /* I have no idea why a base scaling factor of 68 works, could be the twiddles */ |
1076 | for (i = 0; i < CELT_BLOCK_NB; i++) |
1077 | if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i)))) |
1078 | return AVERROR(ENOMEM); |
1079 | |
1080 | for (i = 0; i < OPUS_MAX_FRAMES_PER_PACKET; i++) |
1081 | s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f; |
1082 | |
1083 | /* Zero out previous energy (matters for inter first frame) */ |
1084 | for (ch = 0; ch < s->channels; ch++) |
1085 | for (i = 0; i < CELT_MAX_BANDS; i++) |
1086 | s->last_quantized_energy[ch][i] = 0.0f; |
1087 | |
1088 | /* Allocate an empty frame to use as overlap for the first frame of audio */ |
1089 | ff_bufqueue_add(avctx, &s->bufqueue, spawn_empty_frame(s)); |
1090 | if (!ff_bufqueue_peek(&s->bufqueue, 0)) |
1091 | return AVERROR(ENOMEM); |
1092 | |
1093 | return 0; |
1094 | } |
1095 | |
1096 | #define OPUSENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM |
1097 | static const AVOption opusenc_options[] = { |
1098 | { "opus_delay", "Maximum delay (and lookahead) in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS }, |
1099 | { NULL }, |
1100 | }; |
1101 | |
1102 | static const AVClass opusenc_class = { |
1103 | .class_name = "Opus encoder", |
1104 | .item_name = av_default_item_name, |
1105 | .option = opusenc_options, |
1106 | .version = LIBAVUTIL_VERSION_INT, |
1107 | }; |
1108 | |
1109 | static const AVCodecDefault opusenc_defaults[] = { |
1110 | { "b", "0" }, |
1111 | { "compression_level", "10" }, |
1112 | { NULL }, |
1113 | }; |
1114 | |
1115 | AVCodec ff_opus_encoder = { |
1116 | .name = "opus", |
1117 | .long_name = NULL_IF_CONFIG_SMALL("Opus"), |
1118 | .type = AVMEDIA_TYPE_AUDIO, |
1119 | .id = AV_CODEC_ID_OPUS, |
1120 | .defaults = opusenc_defaults, |
1121 | .priv_class = &opusenc_class, |
1122 | .priv_data_size = sizeof(OpusEncContext), |
1123 | .init = opus_encode_init, |
1124 | .encode2 = opus_encode_frame, |
1125 | .close = opus_encode_end, |
1126 | .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, |
1127 | .capabilities = AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY, |
1128 | .supported_samplerates = (const int []){ 48000, 0 }, |
1129 | .channel_layouts = (const uint64_t []){ AV_CH_LAYOUT_MONO, |
1130 | AV_CH_LAYOUT_STEREO, 0 }, |
1131 | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP, |
1132 | AV_SAMPLE_FMT_NONE }, |
1133 | }; |
1134 |