summaryrefslogtreecommitdiff
path: root/libavcodec/nellymoserenc.c (plain)
blob: 9d22ac8cca20eb387547f4f2e53cd4f79e19124d
1/*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/**
25 * @file
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38#include "libavutil/common.h"
39#include "libavutil/float_dsp.h"
40#include "libavutil/mathematics.h"
41
42#include "audio_frame_queue.h"
43#include "avcodec.h"
44#include "fft.h"
45#include "internal.h"
46#include "nellymoser.h"
47#include "sinewin.h"
48
49#define BITSTREAM_WRITER_LE
50#include "put_bits.h"
51
52#define POW_TABLE_SIZE (1<<11)
53#define POW_TABLE_OFFSET 3
54#define OPT_SIZE ((1<<15) + 3000)
55
56typedef struct NellyMoserEncodeContext {
57 AVCodecContext *avctx;
58 int last_frame;
59 AVFloatDSPContext *fdsp;
60 FFTContext mdct_ctx;
61 AudioFrameQueue afq;
62 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
63 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
64 DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
65 float (*opt )[OPT_SIZE];
66 uint8_t (*path)[OPT_SIZE];
67} NellyMoserEncodeContext;
68
69static float pow_table[POW_TABLE_SIZE]; ///< pow(2, -i / 2048.0 - 3.0);
70
71static const uint8_t sf_lut[96] = {
72 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
73 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
74 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
75 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
76 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
77 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
78};
79
80static const uint8_t sf_delta_lut[78] = {
81 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
82 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
83 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
84 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
85 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
86};
87
88static const uint8_t quant_lut[230] = {
89 0,
90
91 0, 1, 2,
92
93 0, 1, 2, 3, 4, 5, 6,
94
95 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
96 12, 13, 13, 13, 14,
97
98 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
99 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
100 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
101 30,
102
103 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
104 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
105 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
106 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
107 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
108 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
109 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
110 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
111 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
112 61, 61, 61, 61, 62,
113};
114
115static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
116static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
117static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
118
119static void apply_mdct(NellyMoserEncodeContext *s)
120{
121 float *in0 = s->buf;
122 float *in1 = s->buf + NELLY_BUF_LEN;
123 float *in2 = s->buf + 2 * NELLY_BUF_LEN;
124
125 s->fdsp->vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
126 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
127 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
128
129 s->fdsp->vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
130 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
131 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
132}
133
134static av_cold int encode_end(AVCodecContext *avctx)
135{
136 NellyMoserEncodeContext *s = avctx->priv_data;
137
138 ff_mdct_end(&s->mdct_ctx);
139
140 if (s->avctx->trellis) {
141 av_freep(&s->opt);
142 av_freep(&s->path);
143 }
144 ff_af_queue_close(&s->afq);
145 av_freep(&s->fdsp);
146
147 return 0;
148}
149
150static av_cold int encode_init(AVCodecContext *avctx)
151{
152 NellyMoserEncodeContext *s = avctx->priv_data;
153 int i, ret;
154
155 if (avctx->channels != 1) {
156 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
157 return AVERROR(EINVAL);
158 }
159
160 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
161 avctx->sample_rate != 11025 &&
162 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
163 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
164 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
165 return AVERROR(EINVAL);
166 }
167
168 avctx->frame_size = NELLY_SAMPLES;
169 avctx->initial_padding = NELLY_BUF_LEN;
170 ff_af_queue_init(avctx, &s->afq);
171 s->avctx = avctx;
172 if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
173 goto error;
174 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
175 if (!s->fdsp) {
176 ret = AVERROR(ENOMEM);
177 goto error;
178 }
179
180 /* Generate overlap window */
181 ff_init_ff_sine_windows(7);
182 /* faster way of doing
183 for (i = 0; i < POW_TABLE_SIZE; i++)
184 pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
185 pow_table[0] = 1;
186 pow_table[1024] = M_SQRT1_2;
187 for (i = 1; i < 513; i++) {
188 double tmp = exp2(-i / 2048.0);
189 pow_table[i] = tmp;
190 pow_table[1024-i] = M_SQRT1_2 / tmp;
191 pow_table[1024+i] = tmp * M_SQRT1_2;
192 pow_table[2048-i] = 0.5 / tmp;
193 }
194
195 if (s->avctx->trellis) {
196 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
197 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
198 if (!s->opt || !s->path) {
199 ret = AVERROR(ENOMEM);
200 goto error;
201 }
202 }
203
204 return 0;
205error:
206 encode_end(avctx);
207 return ret;
208}
209
210#define find_best(val, table, LUT, LUT_add, LUT_size) \
211 best_idx = \
212 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
213 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
214 best_idx++;
215
216static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
217{
218 int band, best_idx, power_idx = 0;
219 float power_candidate;
220
221 //base exponent
222 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
223 idx_table[0] = best_idx;
224 power_idx = ff_nelly_init_table[best_idx];
225
226 for (band = 1; band < NELLY_BANDS; band++) {
227 power_candidate = cand[band] - power_idx;
228 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
229 idx_table[band] = best_idx;
230 power_idx += ff_nelly_delta_table[best_idx];
231 }
232}
233
234static inline float distance(float x, float y, int band)
235{
236 //return pow(fabs(x-y), 2.0);
237 float tmp = x - y;
238 return tmp * tmp;
239}
240
241static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
242{
243 int i, j, band, best_idx;
244 float power_candidate, best_val;
245
246 float (*opt )[OPT_SIZE] = s->opt ;
247 uint8_t(*path)[OPT_SIZE] = s->path;
248
249 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
250 opt[0][i] = INFINITY;
251 }
252
253 for (i = 0; i < 64; i++) {
254 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
255 path[0][ff_nelly_init_table[i]] = i;
256 }
257
258 for (band = 1; band < NELLY_BANDS; band++) {
259 int q, c = 0;
260 float tmp;
261 int idx_min, idx_max, idx;
262 power_candidate = cand[band];
263 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
264 idx_min = FFMAX(0, cand[band] - q);
265 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
266 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
267 if ( isinf(opt[band - 1][i]) )
268 continue;
269 for (j = 0; j < 32; j++) {
270 idx = i + ff_nelly_delta_table[j];
271 if (idx > idx_max)
272 break;
273 if (idx >= idx_min) {
274 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
275 if (opt[band][idx] > tmp) {
276 opt[band][idx] = tmp;
277 path[band][idx] = j;
278 c = 1;
279 }
280 }
281 }
282 }
283 }
284 av_assert1(c); //FIXME
285 }
286
287 best_val = INFINITY;
288 best_idx = -1;
289 band = NELLY_BANDS - 1;
290 for (i = 0; i < OPT_SIZE; i++) {
291 if (best_val > opt[band][i]) {
292 best_val = opt[band][i];
293 best_idx = i;
294 }
295 }
296 for (band = NELLY_BANDS - 1; band >= 0; band--) {
297 idx_table[band] = path[band][best_idx];
298 if (band) {
299 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
300 }
301 }
302}
303
304/**
305 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
306 * @param s encoder context
307 * @param output output buffer
308 * @param output_size size of output buffer
309 */
310static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
311{
312 PutBitContext pb;
313 int i, j, band, block, best_idx, power_idx = 0;
314 float power_val, coeff, coeff_sum;
315 float pows[NELLY_FILL_LEN];
316 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
317 float cand[NELLY_BANDS];
318
319 apply_mdct(s);
320
321 init_put_bits(&pb, output, output_size);
322
323 i = 0;
324 for (band = 0; band < NELLY_BANDS; band++) {
325 coeff_sum = 0;
326 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
327 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
328 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
329 }
330 cand[band] =
331 log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
332 }
333
334 if (s->avctx->trellis) {
335 get_exponent_dynamic(s, cand, idx_table);
336 } else {
337 get_exponent_greedy(s, cand, idx_table);
338 }
339
340 i = 0;
341 for (band = 0; band < NELLY_BANDS; band++) {
342 if (band) {
343 power_idx += ff_nelly_delta_table[idx_table[band]];
344 put_bits(&pb, 5, idx_table[band]);
345 } else {
346 power_idx = ff_nelly_init_table[idx_table[0]];
347 put_bits(&pb, 6, idx_table[0]);
348 }
349 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
350 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
351 s->mdct_out[i] *= power_val;
352 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
353 pows[i] = power_idx;
354 }
355 }
356
357 ff_nelly_get_sample_bits(pows, bits);
358
359 for (block = 0; block < 2; block++) {
360 for (i = 0; i < NELLY_FILL_LEN; i++) {
361 if (bits[i] > 0) {
362 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
363 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
364 best_idx =
365 quant_lut[av_clip (
366 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
367 quant_lut_offset[bits[i]],
368 quant_lut_offset[bits[i]+1] - 1
369 )];
370 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
371 best_idx++;
372
373 put_bits(&pb, bits[i], best_idx);
374 }
375 }
376 if (!block)
377 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
378 }
379
380 flush_put_bits(&pb);
381 memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
382}
383
384static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
385 const AVFrame *frame, int *got_packet_ptr)
386{
387 NellyMoserEncodeContext *s = avctx->priv_data;
388 int ret;
389
390 if (s->last_frame)
391 return 0;
392
393 memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
394 if (frame) {
395 memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
396 frame->nb_samples * sizeof(*s->buf));
397 if (frame->nb_samples < NELLY_SAMPLES) {
398 memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
399 (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
400 if (frame->nb_samples >= NELLY_BUF_LEN)
401 s->last_frame = 1;
402 }
403 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
404 return ret;
405 } else {
406 memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
407 s->last_frame = 1;
408 }
409
410 if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
411 return ret;
412 encode_block(s, avpkt->data, avpkt->size);
413
414 /* Get the next frame pts/duration */
415 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
416 &avpkt->duration);
417
418 *got_packet_ptr = 1;
419 return 0;
420}
421
422AVCodec ff_nellymoser_encoder = {
423 .name = "nellymoser",
424 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
425 .type = AVMEDIA_TYPE_AUDIO,
426 .id = AV_CODEC_ID_NELLYMOSER,
427 .priv_data_size = sizeof(NellyMoserEncodeContext),
428 .init = encode_init,
429 .encode2 = encode_frame,
430 .close = encode_end,
431 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
432 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
433 AV_SAMPLE_FMT_NONE },
434};
435