blob: c68babd549355a0a474c1c5c627fcc54d275d1be
1 | /* |
2 | * WMA compatible encoder |
3 | * Copyright (c) 2007 Michael Niedermayer |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | #include "libavutil/attributes.h" |
23 | #include "libavutil/ffmath.h" |
24 | |
25 | #include "avcodec.h" |
26 | #include "internal.h" |
27 | #include "wma.h" |
28 | #include "libavutil/avassert.h" |
29 | |
30 | |
31 | static av_cold int encode_init(AVCodecContext *avctx) |
32 | { |
33 | WMACodecContext *s = avctx->priv_data; |
34 | int i, flags1, flags2, block_align; |
35 | uint8_t *extradata; |
36 | int ret; |
37 | |
38 | s->avctx = avctx; |
39 | |
40 | if (avctx->channels > MAX_CHANNELS) { |
41 | av_log(avctx, AV_LOG_ERROR, |
42 | "too many channels: got %i, need %i or fewer\n", |
43 | avctx->channels, MAX_CHANNELS); |
44 | return AVERROR(EINVAL); |
45 | } |
46 | |
47 | if (avctx->sample_rate > 48000) { |
48 | av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n", |
49 | avctx->sample_rate); |
50 | return AVERROR(EINVAL); |
51 | } |
52 | |
53 | if (avctx->bit_rate < 24 * 1000) { |
54 | av_log(avctx, AV_LOG_ERROR, |
55 | "bitrate too low: got %"PRId64", need 24000 or higher\n", |
56 | (int64_t)avctx->bit_rate); |
57 | return AVERROR(EINVAL); |
58 | } |
59 | |
60 | /* extract flag info */ |
61 | flags1 = 0; |
62 | flags2 = 1; |
63 | if (avctx->codec->id == AV_CODEC_ID_WMAV1) { |
64 | extradata = av_malloc(4); |
65 | if (!extradata) |
66 | return AVERROR(ENOMEM); |
67 | avctx->extradata_size = 4; |
68 | AV_WL16(extradata, flags1); |
69 | AV_WL16(extradata + 2, flags2); |
70 | } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) { |
71 | extradata = av_mallocz(10); |
72 | if (!extradata) |
73 | return AVERROR(ENOMEM); |
74 | avctx->extradata_size = 10; |
75 | AV_WL32(extradata, flags1); |
76 | AV_WL16(extradata + 4, flags2); |
77 | } else { |
78 | av_assert0(0); |
79 | } |
80 | avctx->extradata = extradata; |
81 | s->use_exp_vlc = flags2 & 0x0001; |
82 | s->use_bit_reservoir = flags2 & 0x0002; |
83 | s->use_variable_block_len = flags2 & 0x0004; |
84 | if (avctx->channels == 2) |
85 | s->ms_stereo = 1; |
86 | |
87 | if ((ret = ff_wma_init(avctx, flags2)) < 0) |
88 | return ret; |
89 | |
90 | /* init MDCT */ |
91 | for (i = 0; i < s->nb_block_sizes; i++) |
92 | ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0); |
93 | |
94 | block_align = avctx->bit_rate * (int64_t) s->frame_len / |
95 | (avctx->sample_rate * 8); |
96 | block_align = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE); |
97 | avctx->block_align = block_align; |
98 | avctx->frame_size = avctx->initial_padding = s->frame_len; |
99 | |
100 | return 0; |
101 | } |
102 | |
103 | static int apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame) |
104 | { |
105 | WMACodecContext *s = avctx->priv_data; |
106 | float **audio = (float **) frame->extended_data; |
107 | int len = frame->nb_samples; |
108 | int window_index = s->frame_len_bits - s->block_len_bits; |
109 | FFTContext *mdct = &s->mdct_ctx[window_index]; |
110 | int ch; |
111 | const float *win = s->windows[window_index]; |
112 | int window_len = 1 << s->block_len_bits; |
113 | float n = 2.0 * 32768.0 / window_len; |
114 | |
115 | for (ch = 0; ch < avctx->channels; ch++) { |
116 | memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output)); |
117 | s->fdsp->vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len); |
118 | s->fdsp->vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], |
119 | win, len); |
120 | s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len); |
121 | mdct->mdct_calc(mdct, s->coefs[ch], s->output); |
122 | if (!isfinite(s->coefs[ch][0])) { |
123 | av_log(avctx, AV_LOG_ERROR, "Input contains NaN/+-Inf\n"); |
124 | return AVERROR(EINVAL); |
125 | } |
126 | } |
127 | |
128 | return 0; |
129 | } |
130 | |
131 | // FIXME use for decoding too |
132 | static void init_exp(WMACodecContext *s, int ch, const int *exp_param) |
133 | { |
134 | int n; |
135 | const uint16_t *ptr; |
136 | float v, *q, max_scale, *q_end; |
137 | |
138 | ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; |
139 | q = s->exponents[ch]; |
140 | q_end = q + s->block_len; |
141 | max_scale = 0; |
142 | while (q < q_end) { |
143 | /* XXX: use a table */ |
144 | v = ff_exp10(*exp_param++ *(1.0 / 16.0)); |
145 | max_scale = FFMAX(max_scale, v); |
146 | n = *ptr++; |
147 | do { |
148 | *q++ = v; |
149 | } while (--n); |
150 | } |
151 | s->max_exponent[ch] = max_scale; |
152 | } |
153 | |
154 | static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param) |
155 | { |
156 | int last_exp; |
157 | const uint16_t *ptr; |
158 | float *q, *q_end; |
159 | |
160 | ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; |
161 | q = s->exponents[ch]; |
162 | q_end = q + s->block_len; |
163 | if (s->version == 1) { |
164 | last_exp = *exp_param++; |
165 | av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32); |
166 | put_bits(&s->pb, 5, last_exp - 10); |
167 | q += *ptr++; |
168 | } else |
169 | last_exp = 36; |
170 | while (q < q_end) { |
171 | int exp = *exp_param++; |
172 | int code = exp - last_exp + 60; |
173 | av_assert1(code >= 0 && code < 120); |
174 | put_bits(&s->pb, ff_aac_scalefactor_bits[code], |
175 | ff_aac_scalefactor_code[code]); |
176 | /* XXX: use a table */ |
177 | q += *ptr++; |
178 | last_exp = exp; |
179 | } |
180 | } |
181 | |
182 | static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], |
183 | int total_gain) |
184 | { |
185 | int v, bsize, ch, coef_nb_bits, parse_exponents; |
186 | float mdct_norm; |
187 | int nb_coefs[MAX_CHANNELS]; |
188 | static const int fixed_exp[25] = { |
189 | 20, 20, 20, 20, 20, |
190 | 20, 20, 20, 20, 20, |
191 | 20, 20, 20, 20, 20, |
192 | 20, 20, 20, 20, 20, |
193 | 20, 20, 20, 20, 20 |
194 | }; |
195 | |
196 | // FIXME remove duplication relative to decoder |
197 | if (s->use_variable_block_len) { |
198 | av_assert0(0); // FIXME not implemented |
199 | } else { |
200 | /* fixed block len */ |
201 | s->next_block_len_bits = s->frame_len_bits; |
202 | s->prev_block_len_bits = s->frame_len_bits; |
203 | s->block_len_bits = s->frame_len_bits; |
204 | } |
205 | |
206 | s->block_len = 1 << s->block_len_bits; |
207 | // av_assert0((s->block_pos + s->block_len) <= s->frame_len); |
208 | bsize = s->frame_len_bits - s->block_len_bits; |
209 | |
210 | // FIXME factor |
211 | v = s->coefs_end[bsize] - s->coefs_start; |
212 | for (ch = 0; ch < s->avctx->channels; ch++) |
213 | nb_coefs[ch] = v; |
214 | { |
215 | int n4 = s->block_len / 2; |
216 | mdct_norm = 1.0 / (float) n4; |
217 | if (s->version == 1) |
218 | mdct_norm *= sqrt(n4); |
219 | } |
220 | |
221 | if (s->avctx->channels == 2) |
222 | put_bits(&s->pb, 1, !!s->ms_stereo); |
223 | |
224 | for (ch = 0; ch < s->avctx->channels; ch++) { |
225 | // FIXME only set channel_coded when needed, instead of always |
226 | s->channel_coded[ch] = 1; |
227 | if (s->channel_coded[ch]) |
228 | init_exp(s, ch, fixed_exp); |
229 | } |
230 | |
231 | for (ch = 0; ch < s->avctx->channels; ch++) { |
232 | if (s->channel_coded[ch]) { |
233 | WMACoef *coefs1; |
234 | float *coefs, *exponents, mult; |
235 | int i, n; |
236 | |
237 | coefs1 = s->coefs1[ch]; |
238 | exponents = s->exponents[ch]; |
239 | mult = ff_exp10(total_gain * 0.05) / s->max_exponent[ch]; |
240 | mult *= mdct_norm; |
241 | coefs = src_coefs[ch]; |
242 | if (s->use_noise_coding && 0) { |
243 | av_assert0(0); // FIXME not implemented |
244 | } else { |
245 | coefs += s->coefs_start; |
246 | n = nb_coefs[ch]; |
247 | for (i = 0; i < n; i++) { |
248 | double t = *coefs++ / (exponents[i] * mult); |
249 | if (t < -32768 || t > 32767) |
250 | return -1; |
251 | |
252 | coefs1[i] = lrint(t); |
253 | } |
254 | } |
255 | } |
256 | } |
257 | |
258 | v = 0; |
259 | for (ch = 0; ch < s->avctx->channels; ch++) { |
260 | int a = s->channel_coded[ch]; |
261 | put_bits(&s->pb, 1, a); |
262 | v |= a; |
263 | } |
264 | |
265 | if (!v) |
266 | return 1; |
267 | |
268 | for (v = total_gain - 1; v >= 127; v -= 127) |
269 | put_bits(&s->pb, 7, 127); |
270 | put_bits(&s->pb, 7, v); |
271 | |
272 | coef_nb_bits = ff_wma_total_gain_to_bits(total_gain); |
273 | |
274 | if (s->use_noise_coding) { |
275 | for (ch = 0; ch < s->avctx->channels; ch++) { |
276 | if (s->channel_coded[ch]) { |
277 | int i, n; |
278 | n = s->exponent_high_sizes[bsize]; |
279 | for (i = 0; i < n; i++) { |
280 | put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0); |
281 | if (0) |
282 | nb_coefs[ch] -= s->exponent_high_bands[bsize][i]; |
283 | } |
284 | } |
285 | } |
286 | } |
287 | |
288 | parse_exponents = 1; |
289 | if (s->block_len_bits != s->frame_len_bits) |
290 | put_bits(&s->pb, 1, parse_exponents); |
291 | |
292 | if (parse_exponents) { |
293 | for (ch = 0; ch < s->avctx->channels; ch++) { |
294 | if (s->channel_coded[ch]) { |
295 | if (s->use_exp_vlc) { |
296 | encode_exp_vlc(s, ch, fixed_exp); |
297 | } else { |
298 | av_assert0(0); // FIXME not implemented |
299 | // encode_exp_lsp(s, ch); |
300 | } |
301 | } |
302 | } |
303 | } else |
304 | av_assert0(0); // FIXME not implemented |
305 | |
306 | for (ch = 0; ch < s->avctx->channels; ch++) { |
307 | if (s->channel_coded[ch]) { |
308 | int run, tindex; |
309 | WMACoef *ptr, *eptr; |
310 | tindex = (ch == 1 && s->ms_stereo); |
311 | ptr = &s->coefs1[ch][0]; |
312 | eptr = ptr + nb_coefs[ch]; |
313 | |
314 | run = 0; |
315 | for (; ptr < eptr; ptr++) { |
316 | if (*ptr) { |
317 | int level = *ptr; |
318 | int abs_level = FFABS(level); |
319 | int code = 0; |
320 | if (abs_level <= s->coef_vlcs[tindex]->max_level) |
321 | if (run < s->coef_vlcs[tindex]->levels[abs_level - 1]) |
322 | code = run + s->int_table[tindex][abs_level - 1]; |
323 | |
324 | av_assert2(code < s->coef_vlcs[tindex]->n); |
325 | put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code], |
326 | s->coef_vlcs[tindex]->huffcodes[code]); |
327 | |
328 | if (code == 0) { |
329 | if (1 << coef_nb_bits <= abs_level) |
330 | return -1; |
331 | |
332 | put_bits(&s->pb, coef_nb_bits, abs_level); |
333 | put_bits(&s->pb, s->frame_len_bits, run); |
334 | } |
335 | // FIXME the sign is flipped somewhere |
336 | put_bits(&s->pb, 1, level < 0); |
337 | run = 0; |
338 | } else |
339 | run++; |
340 | } |
341 | if (run) |
342 | put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1], |
343 | s->coef_vlcs[tindex]->huffcodes[1]); |
344 | } |
345 | if (s->version == 1 && s->avctx->channels >= 2) |
346 | avpriv_align_put_bits(&s->pb); |
347 | } |
348 | return 0; |
349 | } |
350 | |
351 | static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], |
352 | uint8_t *buf, int buf_size, int total_gain) |
353 | { |
354 | init_put_bits(&s->pb, buf, buf_size); |
355 | |
356 | if (s->use_bit_reservoir) |
357 | av_assert0(0); // FIXME not implemented |
358 | else if (encode_block(s, src_coefs, total_gain) < 0) |
359 | return INT_MAX; |
360 | |
361 | avpriv_align_put_bits(&s->pb); |
362 | |
363 | return put_bits_count(&s->pb) / 8 - s->avctx->block_align; |
364 | } |
365 | |
366 | static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt, |
367 | const AVFrame *frame, int *got_packet_ptr) |
368 | { |
369 | WMACodecContext *s = avctx->priv_data; |
370 | int i, total_gain, ret, error; |
371 | |
372 | s->block_len_bits = s->frame_len_bits; // required by non variable block len |
373 | s->block_len = 1 << s->block_len_bits; |
374 | |
375 | ret = apply_window_and_mdct(avctx, frame); |
376 | |
377 | if (ret < 0) |
378 | return ret; |
379 | |
380 | if (s->ms_stereo) { |
381 | float a, b; |
382 | int i; |
383 | |
384 | for (i = 0; i < s->block_len; i++) { |
385 | a = s->coefs[0][i] * 0.5; |
386 | b = s->coefs[1][i] * 0.5; |
387 | s->coefs[0][i] = a + b; |
388 | s->coefs[1][i] = a - b; |
389 | } |
390 | } |
391 | |
392 | if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE, 0)) < 0) |
393 | return ret; |
394 | |
395 | total_gain = 128; |
396 | for (i = 64; i; i >>= 1) { |
397 | error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, |
398 | total_gain - i); |
399 | if (error <= 0) |
400 | total_gain -= i; |
401 | } |
402 | |
403 | while(total_gain <= 128 && error > 0) |
404 | error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++); |
405 | if (error > 0) { |
406 | av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n"); |
407 | avpkt->size = 0; |
408 | return AVERROR(EINVAL); |
409 | } |
410 | av_assert0((put_bits_count(&s->pb) & 7) == 0); |
411 | i= avctx->block_align - (put_bits_count(&s->pb)+7)/8; |
412 | av_assert0(i>=0); |
413 | while(i--) |
414 | put_bits(&s->pb, 8, 'N'); |
415 | |
416 | flush_put_bits(&s->pb); |
417 | av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align); |
418 | |
419 | if (frame->pts != AV_NOPTS_VALUE) |
420 | avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding); |
421 | |
422 | avpkt->size = avctx->block_align; |
423 | *got_packet_ptr = 1; |
424 | return 0; |
425 | } |
426 | |
427 | #if CONFIG_WMAV1_ENCODER |
428 | AVCodec ff_wmav1_encoder = { |
429 | .name = "wmav1", |
430 | .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"), |
431 | .type = AVMEDIA_TYPE_AUDIO, |
432 | .id = AV_CODEC_ID_WMAV1, |
433 | .priv_data_size = sizeof(WMACodecContext), |
434 | .init = encode_init, |
435 | .encode2 = encode_superframe, |
436 | .close = ff_wma_end, |
437 | .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, |
438 | AV_SAMPLE_FMT_NONE }, |
439 | }; |
440 | #endif |
441 | #if CONFIG_WMAV2_ENCODER |
442 | AVCodec ff_wmav2_encoder = { |
443 | .name = "wmav2", |
444 | .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"), |
445 | .type = AVMEDIA_TYPE_AUDIO, |
446 | .id = AV_CODEC_ID_WMAV2, |
447 | .priv_data_size = sizeof(WMACodecContext), |
448 | .init = encode_init, |
449 | .encode2 = encode_superframe, |
450 | .close = ff_wma_end, |
451 | .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, |
452 | AV_SAMPLE_FMT_NONE }, |
453 | }; |
454 | #endif |
455 |