blob: 2e3ca79fdd1c8185a7c15ef56496b57363265f18
1 | /* |
2 | * Simple free lossless/lossy audio codec |
3 | * Copyright (c) 2004 Alex Beregszaszi |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | #include "avcodec.h" |
22 | #include "get_bits.h" |
23 | #include "golomb.h" |
24 | #include "internal.h" |
25 | #include "rangecoder.h" |
26 | |
27 | |
28 | /** |
29 | * @file |
30 | * Simple free lossless/lossy audio codec |
31 | * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk) |
32 | * Written and designed by Alex Beregszaszi |
33 | * |
34 | * TODO: |
35 | * - CABAC put/get_symbol |
36 | * - independent quantizer for channels |
37 | * - >2 channels support |
38 | * - more decorrelation types |
39 | * - more tap_quant tests |
40 | * - selectable intlist writers/readers (bonk-style, golomb, cabac) |
41 | */ |
42 | |
43 | #define MAX_CHANNELS 2 |
44 | |
45 | #define MID_SIDE 0 |
46 | #define LEFT_SIDE 1 |
47 | #define RIGHT_SIDE 2 |
48 | |
49 | typedef struct SonicContext { |
50 | int version; |
51 | int minor_version; |
52 | int lossless, decorrelation; |
53 | |
54 | int num_taps, downsampling; |
55 | double quantization; |
56 | |
57 | int channels, samplerate, block_align, frame_size; |
58 | |
59 | int *tap_quant; |
60 | int *int_samples; |
61 | int *coded_samples[MAX_CHANNELS]; |
62 | |
63 | // for encoding |
64 | int *tail; |
65 | int tail_size; |
66 | int *window; |
67 | int window_size; |
68 | |
69 | // for decoding |
70 | int *predictor_k; |
71 | int *predictor_state[MAX_CHANNELS]; |
72 | } SonicContext; |
73 | |
74 | #define LATTICE_SHIFT 10 |
75 | #define SAMPLE_SHIFT 4 |
76 | #define LATTICE_FACTOR (1 << LATTICE_SHIFT) |
77 | #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT) |
78 | |
79 | #define BASE_QUANT 0.6 |
80 | #define RATE_VARIATION 3.0 |
81 | |
82 | static inline int shift(int a,int b) |
83 | { |
84 | return (a+(1<<(b-1))) >> b; |
85 | } |
86 | |
87 | static inline int shift_down(int a,int b) |
88 | { |
89 | return (a>>b)+(a<0); |
90 | } |
91 | |
92 | static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){ |
93 | int i; |
94 | |
95 | #define put_rac(C,S,B) \ |
96 | do{\ |
97 | if(rc_stat){\ |
98 | rc_stat[*(S)][B]++;\ |
99 | rc_stat2[(S)-state][B]++;\ |
100 | }\ |
101 | put_rac(C,S,B);\ |
102 | }while(0) |
103 | |
104 | if(v){ |
105 | const int a= FFABS(v); |
106 | const int e= av_log2(a); |
107 | put_rac(c, state+0, 0); |
108 | if(e<=9){ |
109 | for(i=0; i<e; i++){ |
110 | put_rac(c, state+1+i, 1); //1..10 |
111 | } |
112 | put_rac(c, state+1+i, 0); |
113 | |
114 | for(i=e-1; i>=0; i--){ |
115 | put_rac(c, state+22+i, (a>>i)&1); //22..31 |
116 | } |
117 | |
118 | if(is_signed) |
119 | put_rac(c, state+11 + e, v < 0); //11..21 |
120 | }else{ |
121 | for(i=0; i<e; i++){ |
122 | put_rac(c, state+1+FFMIN(i,9), 1); //1..10 |
123 | } |
124 | put_rac(c, state+1+9, 0); |
125 | |
126 | for(i=e-1; i>=0; i--){ |
127 | put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 |
128 | } |
129 | |
130 | if(is_signed) |
131 | put_rac(c, state+11 + 10, v < 0); //11..21 |
132 | } |
133 | }else{ |
134 | put_rac(c, state+0, 1); |
135 | } |
136 | #undef put_rac |
137 | } |
138 | |
139 | static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ |
140 | if(get_rac(c, state+0)) |
141 | return 0; |
142 | else{ |
143 | int i, e, a; |
144 | e= 0; |
145 | while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 |
146 | e++; |
147 | } |
148 | |
149 | a= 1; |
150 | for(i=e-1; i>=0; i--){ |
151 | a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 |
152 | } |
153 | |
154 | e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21 |
155 | return (a^e)-e; |
156 | } |
157 | } |
158 | |
159 | #if 1 |
160 | static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part) |
161 | { |
162 | int i; |
163 | |
164 | for (i = 0; i < entries; i++) |
165 | put_symbol(c, state, buf[i], 1, NULL, NULL); |
166 | |
167 | return 1; |
168 | } |
169 | |
170 | static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part) |
171 | { |
172 | int i; |
173 | |
174 | for (i = 0; i < entries; i++) |
175 | buf[i] = get_symbol(c, state, 1); |
176 | |
177 | return 1; |
178 | } |
179 | #elif 1 |
180 | static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) |
181 | { |
182 | int i; |
183 | |
184 | for (i = 0; i < entries; i++) |
185 | set_se_golomb(pb, buf[i]); |
186 | |
187 | return 1; |
188 | } |
189 | |
190 | static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) |
191 | { |
192 | int i; |
193 | |
194 | for (i = 0; i < entries; i++) |
195 | buf[i] = get_se_golomb(gb); |
196 | |
197 | return 1; |
198 | } |
199 | |
200 | #else |
201 | |
202 | #define ADAPT_LEVEL 8 |
203 | |
204 | static int bits_to_store(uint64_t x) |
205 | { |
206 | int res = 0; |
207 | |
208 | while(x) |
209 | { |
210 | res++; |
211 | x >>= 1; |
212 | } |
213 | return res; |
214 | } |
215 | |
216 | static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max) |
217 | { |
218 | int i, bits; |
219 | |
220 | if (!max) |
221 | return; |
222 | |
223 | bits = bits_to_store(max); |
224 | |
225 | for (i = 0; i < bits-1; i++) |
226 | put_bits(pb, 1, value & (1 << i)); |
227 | |
228 | if ( (value | (1 << (bits-1))) <= max) |
229 | put_bits(pb, 1, value & (1 << (bits-1))); |
230 | } |
231 | |
232 | static unsigned int read_uint_max(GetBitContext *gb, int max) |
233 | { |
234 | int i, bits, value = 0; |
235 | |
236 | if (!max) |
237 | return 0; |
238 | |
239 | bits = bits_to_store(max); |
240 | |
241 | for (i = 0; i < bits-1; i++) |
242 | if (get_bits1(gb)) |
243 | value += 1 << i; |
244 | |
245 | if ( (value | (1<<(bits-1))) <= max) |
246 | if (get_bits1(gb)) |
247 | value += 1 << (bits-1); |
248 | |
249 | return value; |
250 | } |
251 | |
252 | static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) |
253 | { |
254 | int i, j, x = 0, low_bits = 0, max = 0; |
255 | int step = 256, pos = 0, dominant = 0, any = 0; |
256 | int *copy, *bits; |
257 | |
258 | copy = av_calloc(entries, sizeof(*copy)); |
259 | if (!copy) |
260 | return AVERROR(ENOMEM); |
261 | |
262 | if (base_2_part) |
263 | { |
264 | int energy = 0; |
265 | |
266 | for (i = 0; i < entries; i++) |
267 | energy += abs(buf[i]); |
268 | |
269 | low_bits = bits_to_store(energy / (entries * 2)); |
270 | if (low_bits > 15) |
271 | low_bits = 15; |
272 | |
273 | put_bits(pb, 4, low_bits); |
274 | } |
275 | |
276 | for (i = 0; i < entries; i++) |
277 | { |
278 | put_bits(pb, low_bits, abs(buf[i])); |
279 | copy[i] = abs(buf[i]) >> low_bits; |
280 | if (copy[i] > max) |
281 | max = abs(copy[i]); |
282 | } |
283 | |
284 | bits = av_calloc(entries*max, sizeof(*bits)); |
285 | if (!bits) |
286 | { |
287 | av_free(copy); |
288 | return AVERROR(ENOMEM); |
289 | } |
290 | |
291 | for (i = 0; i <= max; i++) |
292 | { |
293 | for (j = 0; j < entries; j++) |
294 | if (copy[j] >= i) |
295 | bits[x++] = copy[j] > i; |
296 | } |
297 | |
298 | // store bitstream |
299 | while (pos < x) |
300 | { |
301 | int steplet = step >> 8; |
302 | |
303 | if (pos + steplet > x) |
304 | steplet = x - pos; |
305 | |
306 | for (i = 0; i < steplet; i++) |
307 | if (bits[i+pos] != dominant) |
308 | any = 1; |
309 | |
310 | put_bits(pb, 1, any); |
311 | |
312 | if (!any) |
313 | { |
314 | pos += steplet; |
315 | step += step / ADAPT_LEVEL; |
316 | } |
317 | else |
318 | { |
319 | int interloper = 0; |
320 | |
321 | while (((pos + interloper) < x) && (bits[pos + interloper] == dominant)) |
322 | interloper++; |
323 | |
324 | // note change |
325 | write_uint_max(pb, interloper, (step >> 8) - 1); |
326 | |
327 | pos += interloper + 1; |
328 | step -= step / ADAPT_LEVEL; |
329 | } |
330 | |
331 | if (step < 256) |
332 | { |
333 | step = 65536 / step; |
334 | dominant = !dominant; |
335 | } |
336 | } |
337 | |
338 | // store signs |
339 | for (i = 0; i < entries; i++) |
340 | if (buf[i]) |
341 | put_bits(pb, 1, buf[i] < 0); |
342 | |
343 | av_free(bits); |
344 | av_free(copy); |
345 | |
346 | return 0; |
347 | } |
348 | |
349 | static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) |
350 | { |
351 | int i, low_bits = 0, x = 0; |
352 | int n_zeros = 0, step = 256, dominant = 0; |
353 | int pos = 0, level = 0; |
354 | int *bits = av_calloc(entries, sizeof(*bits)); |
355 | |
356 | if (!bits) |
357 | return AVERROR(ENOMEM); |
358 | |
359 | if (base_2_part) |
360 | { |
361 | low_bits = get_bits(gb, 4); |
362 | |
363 | if (low_bits) |
364 | for (i = 0; i < entries; i++) |
365 | buf[i] = get_bits(gb, low_bits); |
366 | } |
367 | |
368 | // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits); |
369 | |
370 | while (n_zeros < entries) |
371 | { |
372 | int steplet = step >> 8; |
373 | |
374 | if (!get_bits1(gb)) |
375 | { |
376 | for (i = 0; i < steplet; i++) |
377 | bits[x++] = dominant; |
378 | |
379 | if (!dominant) |
380 | n_zeros += steplet; |
381 | |
382 | step += step / ADAPT_LEVEL; |
383 | } |
384 | else |
385 | { |
386 | int actual_run = read_uint_max(gb, steplet-1); |
387 | |
388 | // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run); |
389 | |
390 | for (i = 0; i < actual_run; i++) |
391 | bits[x++] = dominant; |
392 | |
393 | bits[x++] = !dominant; |
394 | |
395 | if (!dominant) |
396 | n_zeros += actual_run; |
397 | else |
398 | n_zeros++; |
399 | |
400 | step -= step / ADAPT_LEVEL; |
401 | } |
402 | |
403 | if (step < 256) |
404 | { |
405 | step = 65536 / step; |
406 | dominant = !dominant; |
407 | } |
408 | } |
409 | |
410 | // reconstruct unsigned values |
411 | n_zeros = 0; |
412 | for (i = 0; n_zeros < entries; i++) |
413 | { |
414 | while(1) |
415 | { |
416 | if (pos >= entries) |
417 | { |
418 | pos = 0; |
419 | level += 1 << low_bits; |
420 | } |
421 | |
422 | if (buf[pos] >= level) |
423 | break; |
424 | |
425 | pos++; |
426 | } |
427 | |
428 | if (bits[i]) |
429 | buf[pos] += 1 << low_bits; |
430 | else |
431 | n_zeros++; |
432 | |
433 | pos++; |
434 | } |
435 | av_free(bits); |
436 | |
437 | // read signs |
438 | for (i = 0; i < entries; i++) |
439 | if (buf[i] && get_bits1(gb)) |
440 | buf[i] = -buf[i]; |
441 | |
442 | // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos); |
443 | |
444 | return 0; |
445 | } |
446 | #endif |
447 | |
448 | static void predictor_init_state(int *k, int *state, int order) |
449 | { |
450 | int i; |
451 | |
452 | for (i = order-2; i >= 0; i--) |
453 | { |
454 | int j, p, x = state[i]; |
455 | |
456 | for (j = 0, p = i+1; p < order; j++,p++) |
457 | { |
458 | int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT); |
459 | state[p] += shift_down(k[j]*x, LATTICE_SHIFT); |
460 | x = tmp; |
461 | } |
462 | } |
463 | } |
464 | |
465 | static int predictor_calc_error(int *k, int *state, int order, int error) |
466 | { |
467 | int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT); |
468 | |
469 | #if 1 |
470 | int *k_ptr = &(k[order-2]), |
471 | *state_ptr = &(state[order-2]); |
472 | for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--) |
473 | { |
474 | int k_value = *k_ptr, state_value = *state_ptr; |
475 | x -= shift_down(k_value * state_value, LATTICE_SHIFT); |
476 | state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT); |
477 | } |
478 | #else |
479 | for (i = order-2; i >= 0; i--) |
480 | { |
481 | x -= shift_down(k[i] * state[i], LATTICE_SHIFT); |
482 | state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT); |
483 | } |
484 | #endif |
485 | |
486 | // don't drift too far, to avoid overflows |
487 | if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16); |
488 | if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16); |
489 | |
490 | state[0] = x; |
491 | |
492 | return x; |
493 | } |
494 | |
495 | #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER |
496 | // Heavily modified Levinson-Durbin algorithm which |
497 | // copes better with quantization, and calculates the |
498 | // actual whitened result as it goes. |
499 | |
500 | static int modified_levinson_durbin(int *window, int window_entries, |
501 | int *out, int out_entries, int channels, int *tap_quant) |
502 | { |
503 | int i; |
504 | int *state = av_calloc(window_entries, sizeof(*state)); |
505 | |
506 | if (!state) |
507 | return AVERROR(ENOMEM); |
508 | |
509 | memcpy(state, window, 4* window_entries); |
510 | |
511 | for (i = 0; i < out_entries; i++) |
512 | { |
513 | int step = (i+1)*channels, k, j; |
514 | double xx = 0.0, xy = 0.0; |
515 | #if 1 |
516 | int *x_ptr = &(window[step]); |
517 | int *state_ptr = &(state[0]); |
518 | j = window_entries - step; |
519 | for (;j>0;j--,x_ptr++,state_ptr++) |
520 | { |
521 | double x_value = *x_ptr; |
522 | double state_value = *state_ptr; |
523 | xx += state_value*state_value; |
524 | xy += x_value*state_value; |
525 | } |
526 | #else |
527 | for (j = 0; j <= (window_entries - step); j++); |
528 | { |
529 | double stepval = window[step+j]; |
530 | double stateval = window[j]; |
531 | // xx += (double)window[j]*(double)window[j]; |
532 | // xy += (double)window[step+j]*(double)window[j]; |
533 | xx += stateval*stateval; |
534 | xy += stepval*stateval; |
535 | } |
536 | #endif |
537 | if (xx == 0.0) |
538 | k = 0; |
539 | else |
540 | k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5)); |
541 | |
542 | if (k > (LATTICE_FACTOR/tap_quant[i])) |
543 | k = LATTICE_FACTOR/tap_quant[i]; |
544 | if (-k > (LATTICE_FACTOR/tap_quant[i])) |
545 | k = -(LATTICE_FACTOR/tap_quant[i]); |
546 | |
547 | out[i] = k; |
548 | k *= tap_quant[i]; |
549 | |
550 | #if 1 |
551 | x_ptr = &(window[step]); |
552 | state_ptr = &(state[0]); |
553 | j = window_entries - step; |
554 | for (;j>0;j--,x_ptr++,state_ptr++) |
555 | { |
556 | int x_value = *x_ptr; |
557 | int state_value = *state_ptr; |
558 | *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT); |
559 | *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT); |
560 | } |
561 | #else |
562 | for (j=0; j <= (window_entries - step); j++) |
563 | { |
564 | int stepval = window[step+j]; |
565 | int stateval=state[j]; |
566 | window[step+j] += shift_down(k * stateval, LATTICE_SHIFT); |
567 | state[j] += shift_down(k * stepval, LATTICE_SHIFT); |
568 | } |
569 | #endif |
570 | } |
571 | |
572 | av_free(state); |
573 | return 0; |
574 | } |
575 | |
576 | static inline int code_samplerate(int samplerate) |
577 | { |
578 | switch (samplerate) |
579 | { |
580 | case 44100: return 0; |
581 | case 22050: return 1; |
582 | case 11025: return 2; |
583 | case 96000: return 3; |
584 | case 48000: return 4; |
585 | case 32000: return 5; |
586 | case 24000: return 6; |
587 | case 16000: return 7; |
588 | case 8000: return 8; |
589 | } |
590 | return AVERROR(EINVAL); |
591 | } |
592 | |
593 | static av_cold int sonic_encode_init(AVCodecContext *avctx) |
594 | { |
595 | SonicContext *s = avctx->priv_data; |
596 | PutBitContext pb; |
597 | int i; |
598 | |
599 | s->version = 2; |
600 | |
601 | if (avctx->channels > MAX_CHANNELS) |
602 | { |
603 | av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); |
604 | return AVERROR(EINVAL); /* only stereo or mono for now */ |
605 | } |
606 | |
607 | if (avctx->channels == 2) |
608 | s->decorrelation = MID_SIDE; |
609 | else |
610 | s->decorrelation = 3; |
611 | |
612 | if (avctx->codec->id == AV_CODEC_ID_SONIC_LS) |
613 | { |
614 | s->lossless = 1; |
615 | s->num_taps = 32; |
616 | s->downsampling = 1; |
617 | s->quantization = 0.0; |
618 | } |
619 | else |
620 | { |
621 | s->num_taps = 128; |
622 | s->downsampling = 2; |
623 | s->quantization = 1.0; |
624 | } |
625 | |
626 | // max tap 2048 |
627 | if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) { |
628 | av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n"); |
629 | return AVERROR_INVALIDDATA; |
630 | } |
631 | |
632 | // generate taps |
633 | s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant)); |
634 | if (!s->tap_quant) |
635 | return AVERROR(ENOMEM); |
636 | |
637 | for (i = 0; i < s->num_taps; i++) |
638 | s->tap_quant[i] = ff_sqrt(i+1); |
639 | |
640 | s->channels = avctx->channels; |
641 | s->samplerate = avctx->sample_rate; |
642 | |
643 | s->block_align = 2048LL*s->samplerate/(44100*s->downsampling); |
644 | s->frame_size = s->channels*s->block_align*s->downsampling; |
645 | |
646 | s->tail_size = s->num_taps*s->channels; |
647 | s->tail = av_calloc(s->tail_size, sizeof(*s->tail)); |
648 | if (!s->tail) |
649 | return AVERROR(ENOMEM); |
650 | |
651 | s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) ); |
652 | if (!s->predictor_k) |
653 | return AVERROR(ENOMEM); |
654 | |
655 | for (i = 0; i < s->channels; i++) |
656 | { |
657 | s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples)); |
658 | if (!s->coded_samples[i]) |
659 | return AVERROR(ENOMEM); |
660 | } |
661 | |
662 | s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples)); |
663 | |
664 | s->window_size = ((2*s->tail_size)+s->frame_size); |
665 | s->window = av_calloc(s->window_size, sizeof(*s->window)); |
666 | if (!s->window || !s->int_samples) |
667 | return AVERROR(ENOMEM); |
668 | |
669 | avctx->extradata = av_mallocz(16); |
670 | if (!avctx->extradata) |
671 | return AVERROR(ENOMEM); |
672 | init_put_bits(&pb, avctx->extradata, 16*8); |
673 | |
674 | put_bits(&pb, 2, s->version); // version |
675 | if (s->version >= 1) |
676 | { |
677 | if (s->version >= 2) { |
678 | put_bits(&pb, 8, s->version); |
679 | put_bits(&pb, 8, s->minor_version); |
680 | } |
681 | put_bits(&pb, 2, s->channels); |
682 | put_bits(&pb, 4, code_samplerate(s->samplerate)); |
683 | } |
684 | put_bits(&pb, 1, s->lossless); |
685 | if (!s->lossless) |
686 | put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision |
687 | put_bits(&pb, 2, s->decorrelation); |
688 | put_bits(&pb, 2, s->downsampling); |
689 | put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024 |
690 | put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table |
691 | |
692 | flush_put_bits(&pb); |
693 | avctx->extradata_size = put_bits_count(&pb)/8; |
694 | |
695 | av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", |
696 | s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); |
697 | |
698 | avctx->frame_size = s->block_align*s->downsampling; |
699 | |
700 | return 0; |
701 | } |
702 | |
703 | static av_cold int sonic_encode_close(AVCodecContext *avctx) |
704 | { |
705 | SonicContext *s = avctx->priv_data; |
706 | int i; |
707 | |
708 | for (i = 0; i < s->channels; i++) |
709 | av_freep(&s->coded_samples[i]); |
710 | |
711 | av_freep(&s->predictor_k); |
712 | av_freep(&s->tail); |
713 | av_freep(&s->tap_quant); |
714 | av_freep(&s->window); |
715 | av_freep(&s->int_samples); |
716 | |
717 | return 0; |
718 | } |
719 | |
720 | static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, |
721 | const AVFrame *frame, int *got_packet_ptr) |
722 | { |
723 | SonicContext *s = avctx->priv_data; |
724 | RangeCoder c; |
725 | int i, j, ch, quant = 0, x = 0; |
726 | int ret; |
727 | const short *samples = (const int16_t*)frame->data[0]; |
728 | uint8_t state[32]; |
729 | |
730 | if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000, 0)) < 0) |
731 | return ret; |
732 | |
733 | ff_init_range_encoder(&c, avpkt->data, avpkt->size); |
734 | ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8); |
735 | memset(state, 128, sizeof(state)); |
736 | |
737 | // short -> internal |
738 | for (i = 0; i < s->frame_size; i++) |
739 | s->int_samples[i] = samples[i]; |
740 | |
741 | if (!s->lossless) |
742 | for (i = 0; i < s->frame_size; i++) |
743 | s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT; |
744 | |
745 | switch(s->decorrelation) |
746 | { |
747 | case MID_SIDE: |
748 | for (i = 0; i < s->frame_size; i += s->channels) |
749 | { |
750 | s->int_samples[i] += s->int_samples[i+1]; |
751 | s->int_samples[i+1] -= shift(s->int_samples[i], 1); |
752 | } |
753 | break; |
754 | case LEFT_SIDE: |
755 | for (i = 0; i < s->frame_size; i += s->channels) |
756 | s->int_samples[i+1] -= s->int_samples[i]; |
757 | break; |
758 | case RIGHT_SIDE: |
759 | for (i = 0; i < s->frame_size; i += s->channels) |
760 | s->int_samples[i] -= s->int_samples[i+1]; |
761 | break; |
762 | } |
763 | |
764 | memset(s->window, 0, 4* s->window_size); |
765 | |
766 | for (i = 0; i < s->tail_size; i++) |
767 | s->window[x++] = s->tail[i]; |
768 | |
769 | for (i = 0; i < s->frame_size; i++) |
770 | s->window[x++] = s->int_samples[i]; |
771 | |
772 | for (i = 0; i < s->tail_size; i++) |
773 | s->window[x++] = 0; |
774 | |
775 | for (i = 0; i < s->tail_size; i++) |
776 | s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i]; |
777 | |
778 | // generate taps |
779 | ret = modified_levinson_durbin(s->window, s->window_size, |
780 | s->predictor_k, s->num_taps, s->channels, s->tap_quant); |
781 | if (ret < 0) |
782 | return ret; |
783 | |
784 | if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0) |
785 | return ret; |
786 | |
787 | for (ch = 0; ch < s->channels; ch++) |
788 | { |
789 | x = s->tail_size+ch; |
790 | for (i = 0; i < s->block_align; i++) |
791 | { |
792 | int sum = 0; |
793 | for (j = 0; j < s->downsampling; j++, x += s->channels) |
794 | sum += s->window[x]; |
795 | s->coded_samples[ch][i] = sum; |
796 | } |
797 | } |
798 | |
799 | // simple rate control code |
800 | if (!s->lossless) |
801 | { |
802 | double energy1 = 0.0, energy2 = 0.0; |
803 | for (ch = 0; ch < s->channels; ch++) |
804 | { |
805 | for (i = 0; i < s->block_align; i++) |
806 | { |
807 | double sample = s->coded_samples[ch][i]; |
808 | energy2 += sample*sample; |
809 | energy1 += fabs(sample); |
810 | } |
811 | } |
812 | |
813 | energy2 = sqrt(energy2/(s->channels*s->block_align)); |
814 | energy1 = M_SQRT2*energy1/(s->channels*s->block_align); |
815 | |
816 | // increase bitrate when samples are like a gaussian distribution |
817 | // reduce bitrate when samples are like a two-tailed exponential distribution |
818 | |
819 | if (energy2 > energy1) |
820 | energy2 += (energy2-energy1)*RATE_VARIATION; |
821 | |
822 | quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR); |
823 | // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2); |
824 | |
825 | quant = av_clip(quant, 1, 65534); |
826 | |
827 | put_symbol(&c, state, quant, 0, NULL, NULL); |
828 | |
829 | quant *= SAMPLE_FACTOR; |
830 | } |
831 | |
832 | // write out coded samples |
833 | for (ch = 0; ch < s->channels; ch++) |
834 | { |
835 | if (!s->lossless) |
836 | for (i = 0; i < s->block_align; i++) |
837 | s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant); |
838 | |
839 | if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0) |
840 | return ret; |
841 | } |
842 | |
843 | // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8); |
844 | |
845 | avpkt->size = ff_rac_terminate(&c); |
846 | *got_packet_ptr = 1; |
847 | return 0; |
848 | |
849 | } |
850 | #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */ |
851 | |
852 | #if CONFIG_SONIC_DECODER |
853 | static const int samplerate_table[] = |
854 | { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 }; |
855 | |
856 | static av_cold int sonic_decode_init(AVCodecContext *avctx) |
857 | { |
858 | SonicContext *s = avctx->priv_data; |
859 | GetBitContext gb; |
860 | int i; |
861 | int ret; |
862 | |
863 | s->channels = avctx->channels; |
864 | s->samplerate = avctx->sample_rate; |
865 | |
866 | if (!avctx->extradata) |
867 | { |
868 | av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n"); |
869 | return AVERROR_INVALIDDATA; |
870 | } |
871 | |
872 | ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size); |
873 | if (ret < 0) |
874 | return ret; |
875 | |
876 | s->version = get_bits(&gb, 2); |
877 | if (s->version >= 2) { |
878 | s->version = get_bits(&gb, 8); |
879 | s->minor_version = get_bits(&gb, 8); |
880 | } |
881 | if (s->version != 2) |
882 | { |
883 | av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n"); |
884 | return AVERROR_INVALIDDATA; |
885 | } |
886 | |
887 | if (s->version >= 1) |
888 | { |
889 | int sample_rate_index; |
890 | s->channels = get_bits(&gb, 2); |
891 | sample_rate_index = get_bits(&gb, 4); |
892 | if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) { |
893 | av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index); |
894 | return AVERROR_INVALIDDATA; |
895 | } |
896 | s->samplerate = samplerate_table[sample_rate_index]; |
897 | av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n", |
898 | s->channels, s->samplerate); |
899 | } |
900 | |
901 | if (s->channels > MAX_CHANNELS || s->channels < 1) |
902 | { |
903 | av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); |
904 | return AVERROR_INVALIDDATA; |
905 | } |
906 | avctx->channels = s->channels; |
907 | |
908 | s->lossless = get_bits1(&gb); |
909 | if (!s->lossless) |
910 | skip_bits(&gb, 3); // XXX FIXME |
911 | s->decorrelation = get_bits(&gb, 2); |
912 | if (s->decorrelation != 3 && s->channels != 2) { |
913 | av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation); |
914 | return AVERROR_INVALIDDATA; |
915 | } |
916 | |
917 | s->downsampling = get_bits(&gb, 2); |
918 | if (!s->downsampling) { |
919 | av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n"); |
920 | return AVERROR_INVALIDDATA; |
921 | } |
922 | |
923 | s->num_taps = (get_bits(&gb, 5)+1)<<5; |
924 | if (get_bits1(&gb)) // XXX FIXME |
925 | av_log(avctx, AV_LOG_INFO, "Custom quant table\n"); |
926 | |
927 | s->block_align = 2048LL*s->samplerate/(44100*s->downsampling); |
928 | s->frame_size = s->channels*s->block_align*s->downsampling; |
929 | // avctx->frame_size = s->block_align; |
930 | |
931 | if (s->num_taps * s->channels > s->frame_size) { |
932 | av_log(avctx, AV_LOG_ERROR, |
933 | "number of taps times channels (%d * %d) larger than frame size %d\n", |
934 | s->num_taps, s->channels, s->frame_size); |
935 | return AVERROR_INVALIDDATA; |
936 | } |
937 | |
938 | av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", |
939 | s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); |
940 | |
941 | // generate taps |
942 | s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant)); |
943 | if (!s->tap_quant) |
944 | return AVERROR(ENOMEM); |
945 | |
946 | for (i = 0; i < s->num_taps; i++) |
947 | s->tap_quant[i] = ff_sqrt(i+1); |
948 | |
949 | s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k)); |
950 | |
951 | for (i = 0; i < s->channels; i++) |
952 | { |
953 | s->predictor_state[i] = av_calloc(s->num_taps, sizeof(**s->predictor_state)); |
954 | if (!s->predictor_state[i]) |
955 | return AVERROR(ENOMEM); |
956 | } |
957 | |
958 | for (i = 0; i < s->channels; i++) |
959 | { |
960 | s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples)); |
961 | if (!s->coded_samples[i]) |
962 | return AVERROR(ENOMEM); |
963 | } |
964 | s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples)); |
965 | if (!s->int_samples) |
966 | return AVERROR(ENOMEM); |
967 | |
968 | avctx->sample_fmt = AV_SAMPLE_FMT_S16; |
969 | return 0; |
970 | } |
971 | |
972 | static av_cold int sonic_decode_close(AVCodecContext *avctx) |
973 | { |
974 | SonicContext *s = avctx->priv_data; |
975 | int i; |
976 | |
977 | av_freep(&s->int_samples); |
978 | av_freep(&s->tap_quant); |
979 | av_freep(&s->predictor_k); |
980 | |
981 | for (i = 0; i < s->channels; i++) |
982 | { |
983 | av_freep(&s->predictor_state[i]); |
984 | av_freep(&s->coded_samples[i]); |
985 | } |
986 | |
987 | return 0; |
988 | } |
989 | |
990 | static int sonic_decode_frame(AVCodecContext *avctx, |
991 | void *data, int *got_frame_ptr, |
992 | AVPacket *avpkt) |
993 | { |
994 | const uint8_t *buf = avpkt->data; |
995 | int buf_size = avpkt->size; |
996 | SonicContext *s = avctx->priv_data; |
997 | RangeCoder c; |
998 | uint8_t state[32]; |
999 | int i, quant, ch, j, ret; |
1000 | int16_t *samples; |
1001 | AVFrame *frame = data; |
1002 | |
1003 | if (buf_size == 0) return 0; |
1004 | |
1005 | frame->nb_samples = s->frame_size / avctx->channels; |
1006 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) |
1007 | return ret; |
1008 | samples = (int16_t *)frame->data[0]; |
1009 | |
1010 | // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size); |
1011 | |
1012 | memset(state, 128, sizeof(state)); |
1013 | ff_init_range_decoder(&c, buf, buf_size); |
1014 | ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8); |
1015 | |
1016 | intlist_read(&c, state, s->predictor_k, s->num_taps, 0); |
1017 | |
1018 | // dequantize |
1019 | for (i = 0; i < s->num_taps; i++) |
1020 | s->predictor_k[i] *= s->tap_quant[i]; |
1021 | |
1022 | if (s->lossless) |
1023 | quant = 1; |
1024 | else |
1025 | quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR; |
1026 | |
1027 | // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant); |
1028 | |
1029 | for (ch = 0; ch < s->channels; ch++) |
1030 | { |
1031 | int x = ch; |
1032 | |
1033 | predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps); |
1034 | |
1035 | intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1); |
1036 | |
1037 | for (i = 0; i < s->block_align; i++) |
1038 | { |
1039 | for (j = 0; j < s->downsampling - 1; j++) |
1040 | { |
1041 | s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0); |
1042 | x += s->channels; |
1043 | } |
1044 | |
1045 | s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant); |
1046 | x += s->channels; |
1047 | } |
1048 | |
1049 | for (i = 0; i < s->num_taps; i++) |
1050 | s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels]; |
1051 | } |
1052 | |
1053 | switch(s->decorrelation) |
1054 | { |
1055 | case MID_SIDE: |
1056 | for (i = 0; i < s->frame_size; i += s->channels) |
1057 | { |
1058 | s->int_samples[i+1] += shift(s->int_samples[i], 1); |
1059 | s->int_samples[i] -= s->int_samples[i+1]; |
1060 | } |
1061 | break; |
1062 | case LEFT_SIDE: |
1063 | for (i = 0; i < s->frame_size; i += s->channels) |
1064 | s->int_samples[i+1] += s->int_samples[i]; |
1065 | break; |
1066 | case RIGHT_SIDE: |
1067 | for (i = 0; i < s->frame_size; i += s->channels) |
1068 | s->int_samples[i] += s->int_samples[i+1]; |
1069 | break; |
1070 | } |
1071 | |
1072 | if (!s->lossless) |
1073 | for (i = 0; i < s->frame_size; i++) |
1074 | s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT); |
1075 | |
1076 | // internal -> short |
1077 | for (i = 0; i < s->frame_size; i++) |
1078 | samples[i] = av_clip_int16(s->int_samples[i]); |
1079 | |
1080 | *got_frame_ptr = 1; |
1081 | |
1082 | return buf_size; |
1083 | } |
1084 | |
1085 | AVCodec ff_sonic_decoder = { |
1086 | .name = "sonic", |
1087 | .long_name = NULL_IF_CONFIG_SMALL("Sonic"), |
1088 | .type = AVMEDIA_TYPE_AUDIO, |
1089 | .id = AV_CODEC_ID_SONIC, |
1090 | .priv_data_size = sizeof(SonicContext), |
1091 | .init = sonic_decode_init, |
1092 | .close = sonic_decode_close, |
1093 | .decode = sonic_decode_frame, |
1094 | .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL, |
1095 | }; |
1096 | #endif /* CONFIG_SONIC_DECODER */ |
1097 | |
1098 | #if CONFIG_SONIC_ENCODER |
1099 | AVCodec ff_sonic_encoder = { |
1100 | .name = "sonic", |
1101 | .long_name = NULL_IF_CONFIG_SMALL("Sonic"), |
1102 | .type = AVMEDIA_TYPE_AUDIO, |
1103 | .id = AV_CODEC_ID_SONIC, |
1104 | .priv_data_size = sizeof(SonicContext), |
1105 | .init = sonic_encode_init, |
1106 | .encode2 = sonic_encode_frame, |
1107 | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, |
1108 | .capabilities = AV_CODEC_CAP_EXPERIMENTAL, |
1109 | .close = sonic_encode_close, |
1110 | }; |
1111 | #endif |
1112 | |
1113 | #if CONFIG_SONIC_LS_ENCODER |
1114 | AVCodec ff_sonic_ls_encoder = { |
1115 | .name = "sonicls", |
1116 | .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"), |
1117 | .type = AVMEDIA_TYPE_AUDIO, |
1118 | .id = AV_CODEC_ID_SONIC_LS, |
1119 | .priv_data_size = sizeof(SonicContext), |
1120 | .init = sonic_encode_init, |
1121 | .encode2 = sonic_encode_frame, |
1122 | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, |
1123 | .capabilities = AV_CODEC_CAP_EXPERIMENTAL, |
1124 | .close = sonic_encode_close, |
1125 | }; |
1126 | #endif |
1127 |