platform/external/ffmpeg.git - Unnamed repository; edit this file 'description' to name the repository.

1 /*
2  * Nellymoser encoder
3  * This code is developed as part of Google Summer of Code 2008 Program.
4  *
5  * Copyright (c) 2008 Bartlomiej Wolowiec
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23
24 /**
25  * @file
26  * Nellymoser encoder
27  * by Bartlomiej Wolowiec
28  *
29  * Generic codec information: libavcodec/nellymoserdec.c
30  *
31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
33  *
34  * for more information about nellymoser format, visit:
35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
36  */
37
38 #include "libavutil/common.h"
39 #include "libavutil/float_dsp.h"
40 #include "libavutil/mathematics.h"
41
42 #include "audio_frame_queue.h"
43 #include "avcodec.h"
44 #include "fft.h"
45 #include "internal.h"
46 #include "nellymoser.h"
47 #include "sinewin.h"
48
49 #define BITSTREAM_WRITER_LE
50 #include "put_bits.h"
51
52 #define POW_TABLE_SIZE (1<<11)
53 #define POW_TABLE_OFFSET 3
54 #define OPT_SIZE ((1<<15) + 3000)
55
56 typedef struct NellyMoserEncodeContext {
57     AVCodecContext  *avctx;
58     int             last_frame;
59     AVFloatDSPContext *fdsp;
60     FFTContext      mdct_ctx;
61     AudioFrameQueue afq;
62     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
63     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
64     DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
65     float           (*opt )[OPT_SIZE];
66     uint8_t         (*path)[OPT_SIZE];
67 } NellyMoserEncodeContext;
68
69 static float pow_table[POW_TABLE_SIZE];     ///< pow(2, -i / 2048.0 - 3.0);
70
71 static const uint8_t sf_lut[96] = {
72      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
73      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
74     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
75     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
76     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
77     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
78 };
79
80 static const uint8_t sf_delta_lut[78] = {
81      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
82      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
83     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
84     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
85     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
86 };
87
88 static const uint8_t quant_lut[230] = {
89      0,
90
91      0,  1,  2,
92
93      0,  1,  2,  3,  4,  5,  6,
94
95      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
96     12, 13, 13, 13, 14,
97
98      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
99      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
100     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
101     30,
102
103      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
104      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
105     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
106     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
107     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
108     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
109     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
110     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
111     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
112     61, 61, 61, 61, 62,
113 };
114
115 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
116 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
117 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
118
119 static void apply_mdct(NellyMoserEncodeContext *s)
120 {
121     float *in0 = s->buf;
122     float *in1 = s->buf + NELLY_BUF_LEN;
123     float *in2 = s->buf + 2 * NELLY_BUF_LEN;
124
125     s->fdsp->vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
126     s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
127     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
128
129     s->fdsp->vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
130     s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
131     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
132 }
133
134 static av_cold int encode_end(AVCodecContext *avctx)
135 {
136     NellyMoserEncodeContext *s = avctx->priv_data;
137
138     ff_mdct_end(&s->mdct_ctx);
139
140     if (s->avctx->trellis) {
141         av_freep(&s->opt);
142         av_freep(&s->path);
143     }
144     ff_af_queue_close(&s->afq);
145     av_freep(&s->fdsp);
146
147     return 0;
148 }
149
150 static av_cold int encode_init(AVCodecContext *avctx)
151 {
152     NellyMoserEncodeContext *s = avctx->priv_data;
153     int i, ret;
154
155     if (avctx->channels != 1) {
156         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
157         return AVERROR(EINVAL);
158     }
159
160     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
161         avctx->sample_rate != 11025 &&
162         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
163         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
164         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
165         return AVERROR(EINVAL);
166     }
167
168     avctx->frame_size = NELLY_SAMPLES;
169     avctx->initial_padding = NELLY_BUF_LEN;
170     ff_af_queue_init(avctx, &s->afq);
171     s->avctx = avctx;
172     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
173         goto error;
174     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
175     if (!s->fdsp) {
176         ret = AVERROR(ENOMEM);
177         goto error;
178     }
179
180     /* Generate overlap window */
181     ff_init_ff_sine_windows(7);
182     /* faster way of doing
183     for (i = 0; i < POW_TABLE_SIZE; i++)
184        pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
185     pow_table[0] = 1;
186     pow_table[1024] = M_SQRT1_2;
187     for (i = 1; i < 513; i++) {
188         double tmp = exp2(-i / 2048.0);
189         pow_table[i] = tmp;
190         pow_table[1024-i] = M_SQRT1_2 / tmp;
191         pow_table[1024+i] = tmp * M_SQRT1_2;
192         pow_table[2048-i] = 0.5 / tmp;
193     }
194
195     if (s->avctx->trellis) {
196         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
197         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
198         if (!s->opt || !s->path) {
199             ret = AVERROR(ENOMEM);
200             goto error;
201         }
202     }
203
204     return 0;
205 error:
206     encode_end(avctx);
207     return ret;
208 }
209
210 #define find_best(val, table, LUT, LUT_add, LUT_size) \
211     best_idx = \
212         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
213     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
214         best_idx++;
215
216 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
217 {
218     int band, best_idx, power_idx = 0;
219     float power_candidate;
220
221     //base exponent
222     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
223     idx_table[0] = best_idx;
224     power_idx = ff_nelly_init_table[best_idx];
225
226     for (band = 1; band < NELLY_BANDS; band++) {
227         power_candidate = cand[band] - power_idx;
228         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
229         idx_table[band] = best_idx;
230         power_idx += ff_nelly_delta_table[best_idx];
231     }
232 }
233
234 static inline float distance(float x, float y, int band)
235 {
236     //return pow(fabs(x-y), 2.0);
237     float tmp = x - y;
238     return tmp * tmp;
239 }
240
241 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
242 {
243     int i, j, band, best_idx;
244     float power_candidate, best_val;
245
246     float  (*opt )[OPT_SIZE] = s->opt ;
247     uint8_t(*path)[OPT_SIZE] = s->path;
248
249     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
250         opt[0][i] = INFINITY;
251     }
252
253     for (i = 0; i < 64; i++) {
254         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
255         path[0][ff_nelly_init_table[i]] = i;
256     }
257
258     for (band = 1; band < NELLY_BANDS; band++) {
259         int q, c = 0;
260         float tmp;
261         int idx_min, idx_max, idx;
262         power_candidate = cand[band];
263         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
264             idx_min = FFMAX(0, cand[band] - q);
265             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
266             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
267                 if ( isinf(opt[band - 1][i]) )
268                     continue;
269                 for (j = 0; j < 32; j++) {
270                     idx = i + ff_nelly_delta_table[j];
271                     if (idx > idx_max)
272                         break;
273                     if (idx >= idx_min) {
274                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
275                         if (opt[band][idx] > tmp) {
276                             opt[band][idx] = tmp;
277                             path[band][idx] = j;
278                             c = 1;
279                         }
280                     }
281                 }
282             }
283         }
284         av_assert1(c); //FIXME
285     }
286
287     best_val = INFINITY;
288     best_idx = -1;
289     band = NELLY_BANDS - 1;
290     for (i = 0; i < OPT_SIZE; i++) {
291         if (best_val > opt[band][i]) {
292             best_val = opt[band][i];
293             best_idx = i;
294         }
295     }
296     for (band = NELLY_BANDS - 1; band >= 0; band--) {
297         idx_table[band] = path[band][best_idx];
298         if (band) {
299             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
300         }
301     }
302 }
303
304 /**
305  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
306  *  @param s               encoder context
307  *  @param output          output buffer
308  *  @param output_size     size of output buffer
309  */
310 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
311 {
312     PutBitContext pb;
313     int i, j, band, block, best_idx, power_idx = 0;
314     float power_val, coeff, coeff_sum;
315     float pows[NELLY_FILL_LEN];
316     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
317     float cand[NELLY_BANDS];
318
319     apply_mdct(s);
320
321     init_put_bits(&pb, output, output_size);
322
323     i = 0;
324     for (band = 0; band < NELLY_BANDS; band++) {
325         coeff_sum = 0;
326         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
327             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
328                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
329         }
330         cand[band] =
331             log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
332     }
333
334     if (s->avctx->trellis) {
335         get_exponent_dynamic(s, cand, idx_table);
336     } else {
337         get_exponent_greedy(s, cand, idx_table);
338     }
339
340     i = 0;
341     for (band = 0; band < NELLY_BANDS; band++) {
342         if (band) {
343             power_idx += ff_nelly_delta_table[idx_table[band]];
344             put_bits(&pb, 5, idx_table[band]);
345         } else {
346             power_idx = ff_nelly_init_table[idx_table[0]];
347             put_bits(&pb, 6, idx_table[0]);
348         }
349         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
350         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
351             s->mdct_out[i] *= power_val;
352             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
353             pows[i] = power_idx;
354         }
355     }
356
357     ff_nelly_get_sample_bits(pows, bits);
358
359     for (block = 0; block < 2; block++) {
360         for (i = 0; i < NELLY_FILL_LEN; i++) {
361             if (bits[i] > 0) {
362                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
363                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
364                 best_idx =
365                     quant_lut[av_clip (
366                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
367                             quant_lut_offset[bits[i]],
368                             quant_lut_offset[bits[i]+1] - 1
369                             )];
370                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
371                     best_idx++;
372
373                 put_bits(&pb, bits[i], best_idx);
374             }
375         }
376         if (!block)
377             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
378     }
379
380     flush_put_bits(&pb);
381     memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
382 }
383
384 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
385                         const AVFrame *frame, int *got_packet_ptr)
386 {
387     NellyMoserEncodeContext *s = avctx->priv_data;
388     int ret;
389
390     if (s->last_frame)
391         return 0;
392
393     memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
394     if (frame) {
395         memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
396                frame->nb_samples * sizeof(*s->buf));
397         if (frame->nb_samples < NELLY_SAMPLES) {
398             memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
399                    (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
400             if (frame->nb_samples >= NELLY_BUF_LEN)
401                 s->last_frame = 1;
402         }
403         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
404             return ret;
405     } else {
406         memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
407         s->last_frame = 1;
408     }
409
410     if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
411         return ret;
412     encode_block(s, avpkt->data, avpkt->size);
413
414     /* Get the next frame pts/duration */
415     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
416                        &avpkt->duration);
417
418     *got_packet_ptr = 1;
419     return 0;
420 }
421
422 AVCodec ff_nellymoser_encoder = {
423     .name           = "nellymoser",
424     .long_name      = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
425     .type           = AVMEDIA_TYPE_AUDIO,
426     .id             = AV_CODEC_ID_NELLYMOSER,
427     .priv_data_size = sizeof(NellyMoserEncodeContext),
428     .init           = encode_init,
429     .encode2        = encode_frame,
430     .close          = encode_end,
431     .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
432     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
433                                                      AV_SAMPLE_FMT_NONE },
434 };
435
1	/*
2	* Nellymoser encoder
3	* This code is developed as part of Google Summer of Code 2008 Program.
4	*
5	* Copyright (c) 2008 Bartlomiej Wolowiec
6	*
7	* This file is part of FFmpeg.
8	*
9	* FFmpeg is free software; you can redistribute it and/or
10	* modify it under the terms of the GNU Lesser General Public
11	* License as published by the Free Software Foundation; either
12	* version 2.1 of the License, or (at your option) any later version.
13	*
14	* FFmpeg is distributed in the hope that it will be useful,
15	* but WITHOUT ANY WARRANTY; without even the implied warranty of
16	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17	* Lesser General Public License for more details.
18	*
19	* You should have received a copy of the GNU Lesser General Public
20	* License along with FFmpeg; if not, write to the Free Software
21	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22	*/
23
24	/**
25	* @file
26	* Nellymoser encoder
27	* by Bartlomiej Wolowiec
28	*
29	* Generic codec information: libavcodec/nellymoserdec.c
30	*
31	* Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32	* (Copyright Joseph Artsimovich and UAB "DKD")
33	*
34	* for more information about nellymoser format, visit:
35	* http://wiki.multimedia.cx/index.php?title=Nellymoser
36	*/
37
38	#include "libavutil/common.h"
39	#include "libavutil/float_dsp.h"
40	#include "libavutil/mathematics.h"
41
42	#include "audio_frame_queue.h"
43	#include "avcodec.h"
44	#include "fft.h"
45	#include "internal.h"
46	#include "nellymoser.h"
47	#include "sinewin.h"
48
49	#define BITSTREAM_WRITER_LE
50	#include "put_bits.h"
51
52	#define POW_TABLE_SIZE (1<<11)
53	#define POW_TABLE_OFFSET 3
54	#define OPT_SIZE ((1<<15) + 3000)
55
56	typedef struct NellyMoserEncodeContext {
57	AVCodecContext *avctx;
58	int last_frame;
59	AVFloatDSPContext *fdsp;
60	FFTContext mdct_ctx;
61	AudioFrameQueue afq;
62	DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
63	DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
64	DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
65	float (*opt )[OPT_SIZE];
66	uint8_t (*path)[OPT_SIZE];
67	} NellyMoserEncodeContext;
68
69	static float pow_table[POW_TABLE_SIZE]; ///< pow(2, -i / 2048.0 - 3.0);
70
71	static const uint8_t sf_lut[96] = {
72	0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
73	5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
74	15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
75	27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
76	41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
77	54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
78	};
79
80	static const uint8_t sf_delta_lut[78] = {
81	0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
82	4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
83	13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
84	23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
85	28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
86	};
87
88	static const uint8_t quant_lut[230] = {
89	0,
90
91	0, 1, 2,
92
93	0, 1, 2, 3, 4, 5, 6,
94
95	0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
96	12, 13, 13, 13, 14,
97
98	0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
99	8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
100	22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
101	30,
102
103	0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
104	4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
105	10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
106	15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
107	21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
108	33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
109	46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
110	53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
111	58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
112	61, 61, 61, 61, 62,
113	};
114
115	static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
116	static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
117	static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
118
119	static void apply_mdct(NellyMoserEncodeContext *s)
120	{
121	float *in0 = s->buf;
122	float *in1 = s->buf + NELLY_BUF_LEN;
123	float in2 = s->buf + 2 NELLY_BUF_LEN;
124
125	s->fdsp->vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
126	s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
127	s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
128
129	s->fdsp->vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
130	s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
131	s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
132	}
133
134	static av_cold int encode_end(AVCodecContext *avctx)
135	{
136	NellyMoserEncodeContext *s = avctx->priv_data;
137
138	ff_mdct_end(&s->mdct_ctx);
139
140	if (s->avctx->trellis) {
141	av_freep(&s->opt);
142	av_freep(&s->path);
143	}
144	ff_af_queue_close(&s->afq);
145	av_freep(&s->fdsp);
146
147	return 0;
148	}
149
150	static av_cold int encode_init(AVCodecContext *avctx)
151	{
152	NellyMoserEncodeContext *s = avctx->priv_data;
153	int i, ret;
154
155	if (avctx->channels != 1) {
156	av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
157	return AVERROR(EINVAL);
158	}
159
160	if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
161	avctx->sample_rate != 11025 &&
162	avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
163	avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
164	av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
165	return AVERROR(EINVAL);
166	}
167
168	avctx->frame_size = NELLY_SAMPLES;
169	avctx->initial_padding = NELLY_BUF_LEN;
170	ff_af_queue_init(avctx, &s->afq);
171	s->avctx = avctx;
172	if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
173	goto error;
174	s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
175	if (!s->fdsp) {
176	ret = AVERROR(ENOMEM);
177	goto error;
178	}
179
180	/* Generate overlap window */
181	ff_init_ff_sine_windows(7);
182	/* faster way of doing
183	for (i = 0; i < POW_TABLE_SIZE; i++)
184	pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
185	pow_table[0] = 1;
186	pow_table[1024] = M_SQRT1_2;
187	for (i = 1; i < 513; i++) {
188	double tmp = exp2(-i / 2048.0);
189	pow_table[i] = tmp;
190	pow_table[1024-i] = M_SQRT1_2 / tmp;
191	pow_table[1024+i] = tmp * M_SQRT1_2;
192	pow_table[2048-i] = 0.5 / tmp;
193	}
194
195	if (s->avctx->trellis) {
196	s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
197	s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
198	if (!s->opt \|\| !s->path) {
199	ret = AVERROR(ENOMEM);
200	goto error;
201	}
202	}
203
204	return 0;
205	error:
206	encode_end(avctx);
207	return ret;
208	}
209
210	#define find_best(val, table, LUT, LUT_add, LUT_size) \
211	best_idx = \
212	LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
213	if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
214	best_idx++;
215
216	static void get_exponent_greedy(NellyMoserEncodeContext s, float cand, int *idx_table)
217	{
218	int band, best_idx, power_idx = 0;
219	float power_candidate;
220
221	//base exponent
222	find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
223	idx_table[0] = best_idx;
224	power_idx = ff_nelly_init_table[best_idx];
225
226	for (band = 1; band < NELLY_BANDS; band++) {
227	power_candidate = cand[band] - power_idx;
228	find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
229	idx_table[band] = best_idx;
230	power_idx += ff_nelly_delta_table[best_idx];
231	}
232	}
233
234	static inline float distance(float x, float y, int band)
235	{
236	//return pow(fabs(x-y), 2.0);
237	float tmp = x - y;
238	return tmp * tmp;
239	}
240
241	static void get_exponent_dynamic(NellyMoserEncodeContext s, float cand, int *idx_table)
242	{
243	int i, j, band, best_idx;
244	float power_candidate, best_val;
245
246	float (*opt )[OPT_SIZE] = s->opt ;
247	uint8_t(*path)[OPT_SIZE] = s->path;
248
249	for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
250	opt[0][i] = INFINITY;
251	}
252
253	for (i = 0; i < 64; i++) {
254	opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
255	path[0][ff_nelly_init_table[i]] = i;
256	}
257
258	for (band = 1; band < NELLY_BANDS; band++) {
259	int q, c = 0;
260	float tmp;
261	int idx_min, idx_max, idx;
262	power_candidate = cand[band];
263	for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
264	idx_min = FFMAX(0, cand[band] - q);
265	idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
266	for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
267	if ( isinf(opt[band - 1][i]) )
268	continue;
269	for (j = 0; j < 32; j++) {
270	idx = i + ff_nelly_delta_table[j];
271	if (idx > idx_max)
272	break;
273	if (idx >= idx_min) {
274	tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
275	if (opt[band][idx] > tmp) {
276	opt[band][idx] = tmp;
277	path[band][idx] = j;
278	c = 1;
279	}
280	}
281	}
282	}
283	}
284	av_assert1(c); //FIXME
285	}
286
287	best_val = INFINITY;
288	best_idx = -1;
289	band = NELLY_BANDS - 1;
290	for (i = 0; i < OPT_SIZE; i++) {
291	if (best_val > opt[band][i]) {
292	best_val = opt[band][i];
293	best_idx = i;
294	}
295	}
296	for (band = NELLY_BANDS - 1; band >= 0; band--) {
297	idx_table[band] = path[band][best_idx];
298	if (band) {
299	best_idx -= ff_nelly_delta_table[path[band][best_idx]];
300	}
301	}
302	}
303
304	/**
305	* Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
306	* @param s encoder context
307	* @param output output buffer
308	* @param output_size size of output buffer
309	*/
310	static void encode_block(NellyMoserEncodeContext s, unsigned char output, int output_size)
311	{
312	PutBitContext pb;
313	int i, j, band, block, best_idx, power_idx = 0;
314	float power_val, coeff, coeff_sum;
315	float pows[NELLY_FILL_LEN];
316	int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
317	float cand[NELLY_BANDS];
318
319	apply_mdct(s);
320
321	init_put_bits(&pb, output, output_size);
322
323	i = 0;
324	for (band = 0; band < NELLY_BANDS; band++) {
325	coeff_sum = 0;
326	for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
327	coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
328	+ s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
329	}
330	cand[band] =
331	log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
332	}
333
334	if (s->avctx->trellis) {
335	get_exponent_dynamic(s, cand, idx_table);
336	} else {
337	get_exponent_greedy(s, cand, idx_table);
338	}
339
340	i = 0;
341	for (band = 0; band < NELLY_BANDS; band++) {
342	if (band) {
343	power_idx += ff_nelly_delta_table[idx_table[band]];
344	put_bits(&pb, 5, idx_table[band]);
345	} else {
346	power_idx = ff_nelly_init_table[idx_table[0]];
347	put_bits(&pb, 6, idx_table[0]);
348	}
349	power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
350	for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
351	s->mdct_out[i] *= power_val;
352	s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
353	pows[i] = power_idx;
354	}
355	}
356
357	ff_nelly_get_sample_bits(pows, bits);
358
359	for (block = 0; block < 2; block++) {
360	for (i = 0; i < NELLY_FILL_LEN; i++) {
361	if (bits[i] > 0) {
362	const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
363	coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
364	best_idx =
365	quant_lut[av_clip (
366	coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
367	quant_lut_offset[bits[i]],
368	quant_lut_offset[bits[i]+1] - 1
369	)];
370	if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
371	best_idx++;
372
373	put_bits(&pb, bits[i], best_idx);
374	}
375	}
376	if (!block)
377	put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
378	}
379
380	flush_put_bits(&pb);
381	memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
382	}
383
384	static int encode_frame(AVCodecContext avctx, AVPacket avpkt,
385	const AVFrame frame, int got_packet_ptr)
386	{
387	NellyMoserEncodeContext *s = avctx->priv_data;
388	int ret;
389
390	if (s->last_frame)
391	return 0;
392
393	memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
394	if (frame) {
395	memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
396	frame->nb_samples * sizeof(*s->buf));
397	if (frame->nb_samples < NELLY_SAMPLES) {
398	memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
399	(NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
400	if (frame->nb_samples >= NELLY_BUF_LEN)
401	s->last_frame = 1;
402	}
403	if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
404	return ret;
405	} else {
406	memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
407	s->last_frame = 1;
408	}
409
410	if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
411	return ret;
412	encode_block(s, avpkt->data, avpkt->size);
413
414	/* Get the next frame pts/duration */
415	ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
416	&avpkt->duration);
417
418	*got_packet_ptr = 1;
419	return 0;
420	}
421
422	AVCodec ff_nellymoser_encoder = {
423	.name = "nellymoser",
424	.long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
425	.type = AVMEDIA_TYPE_AUDIO,
426	.id = AV_CODEC_ID_NELLYMOSER,
427	.priv_data_size = sizeof(NellyMoserEncodeContext),
428	.init = encode_init,
429	.encode2 = encode_frame,
430	.close = encode_end,
431	.capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME \| AV_CODEC_CAP_DELAY,
432	.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
433	AV_SAMPLE_FMT_NONE },
434	};
435