blob: 8728c02a3b10a138fe19cf6bd238f17cbdb73304
1 | /* |
2 | * Enhanced Variable Rate Codec, Service Option 3 decoder |
3 | * Copyright (c) 2013 Paul B Mahol |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | /** |
23 | * @file |
24 | * Enhanced Variable Rate Codec, Service Option 3 decoder |
25 | * @author Paul B Mahol |
26 | */ |
27 | |
28 | #include "libavutil/mathematics.h" |
29 | #include "libavutil/opt.h" |
30 | #include "avcodec.h" |
31 | #include "internal.h" |
32 | #include "get_bits.h" |
33 | #include "evrcdata.h" |
34 | #include "acelp_vectors.h" |
35 | #include "lsp.h" |
36 | |
37 | #define MIN_LSP_SEP (0.05 / (2.0 * M_PI)) |
38 | #define MIN_DELAY 20 |
39 | #define MAX_DELAY 120 |
40 | #define NB_SUBFRAMES 3 |
41 | #define SUBFRAME_SIZE 54 |
42 | #define FILTER_ORDER 10 |
43 | #define ACB_SIZE 128 |
44 | |
45 | typedef enum { |
46 | RATE_ERRS = -1, |
47 | SILENCE, |
48 | RATE_QUANT, |
49 | RATE_QUARTER, |
50 | RATE_HALF, |
51 | RATE_FULL, |
52 | } evrc_packet_rate; |
53 | |
54 | /** |
55 | * EVRC-A unpacked data frame |
56 | */ |
57 | typedef struct EVRCAFrame { |
58 | uint8_t lpc_flag; ///< spectral change indicator |
59 | uint16_t lsp[4]; ///< index into LSP codebook |
60 | uint8_t pitch_delay; ///< pitch delay for entire frame |
61 | uint8_t delay_diff; ///< delay difference for entire frame |
62 | uint8_t acb_gain[3]; ///< adaptive codebook gain |
63 | uint16_t fcb_shape[3][4]; ///< fixed codebook shape |
64 | uint8_t fcb_gain[3]; ///< fixed codebook gain index |
65 | uint8_t energy_gain; ///< frame energy gain index |
66 | uint8_t tty; ///< tty baud rate bit |
67 | } EVRCAFrame; |
68 | |
69 | typedef struct EVRCContext { |
70 | AVClass *class; |
71 | |
72 | int postfilter; |
73 | |
74 | GetBitContext gb; |
75 | evrc_packet_rate bitrate; |
76 | evrc_packet_rate last_valid_bitrate; |
77 | EVRCAFrame frame; |
78 | |
79 | float lspf[FILTER_ORDER]; |
80 | float prev_lspf[FILTER_ORDER]; |
81 | float synthesis[FILTER_ORDER]; |
82 | float postfilter_fir[FILTER_ORDER]; |
83 | float postfilter_iir[FILTER_ORDER]; |
84 | float postfilter_residual[ACB_SIZE + SUBFRAME_SIZE]; |
85 | float pitch_delay; |
86 | float prev_pitch_delay; |
87 | float avg_acb_gain; ///< average adaptive codebook gain |
88 | float avg_fcb_gain; ///< average fixed codebook gain |
89 | float pitch[ACB_SIZE + FILTER_ORDER + SUBFRAME_SIZE]; |
90 | float pitch_back[ACB_SIZE]; |
91 | float interpolation_coeffs[136]; |
92 | float energy_vector[NB_SUBFRAMES]; |
93 | float fade_scale; |
94 | float last; |
95 | |
96 | uint8_t prev_energy_gain; |
97 | uint8_t prev_error_flag; |
98 | uint8_t warned_buf_mismatch_bitrate; |
99 | } EVRCContext; |
100 | |
101 | /** |
102 | * Frame unpacking for RATE_FULL, RATE_HALF and RATE_QUANT |
103 | * |
104 | * @param e the context |
105 | * |
106 | * TIA/IS-127 Table 4.21-1 |
107 | */ |
108 | static void unpack_frame(EVRCContext *e) |
109 | { |
110 | EVRCAFrame *frame = &e->frame; |
111 | GetBitContext *gb = &e->gb; |
112 | |
113 | switch (e->bitrate) { |
114 | case RATE_FULL: |
115 | frame->lpc_flag = get_bits1(gb); |
116 | frame->lsp[0] = get_bits(gb, 6); |
117 | frame->lsp[1] = get_bits(gb, 6); |
118 | frame->lsp[2] = get_bits(gb, 9); |
119 | frame->lsp[3] = get_bits(gb, 7); |
120 | frame->pitch_delay = get_bits(gb, 7); |
121 | frame->delay_diff = get_bits(gb, 5); |
122 | frame->acb_gain[0] = get_bits(gb, 3); |
123 | frame->fcb_shape[0][0] = get_bits(gb, 8); |
124 | frame->fcb_shape[0][1] = get_bits(gb, 8); |
125 | frame->fcb_shape[0][2] = get_bits(gb, 8); |
126 | frame->fcb_shape[0][3] = get_bits(gb, 11); |
127 | frame->fcb_gain[0] = get_bits(gb, 5); |
128 | frame->acb_gain[1] = get_bits(gb, 3); |
129 | frame->fcb_shape[1][0] = get_bits(gb, 8); |
130 | frame->fcb_shape[1][1] = get_bits(gb, 8); |
131 | frame->fcb_shape[1][2] = get_bits(gb, 8); |
132 | frame->fcb_shape[1][3] = get_bits(gb, 11); |
133 | frame->fcb_gain [1] = get_bits(gb, 5); |
134 | frame->acb_gain [2] = get_bits(gb, 3); |
135 | frame->fcb_shape[2][0] = get_bits(gb, 8); |
136 | frame->fcb_shape[2][1] = get_bits(gb, 8); |
137 | frame->fcb_shape[2][2] = get_bits(gb, 8); |
138 | frame->fcb_shape[2][3] = get_bits(gb, 11); |
139 | frame->fcb_gain [2] = get_bits(gb, 5); |
140 | frame->tty = get_bits1(gb); |
141 | break; |
142 | case RATE_HALF: |
143 | frame->lsp [0] = get_bits(gb, 7); |
144 | frame->lsp [1] = get_bits(gb, 7); |
145 | frame->lsp [2] = get_bits(gb, 8); |
146 | frame->pitch_delay = get_bits(gb, 7); |
147 | frame->acb_gain [0] = get_bits(gb, 3); |
148 | frame->fcb_shape[0][0] = get_bits(gb, 10); |
149 | frame->fcb_gain [0] = get_bits(gb, 4); |
150 | frame->acb_gain [1] = get_bits(gb, 3); |
151 | frame->fcb_shape[1][0] = get_bits(gb, 10); |
152 | frame->fcb_gain [1] = get_bits(gb, 4); |
153 | frame->acb_gain [2] = get_bits(gb, 3); |
154 | frame->fcb_shape[2][0] = get_bits(gb, 10); |
155 | frame->fcb_gain [2] = get_bits(gb, 4); |
156 | break; |
157 | case RATE_QUANT: |
158 | frame->lsp [0] = get_bits(gb, 4); |
159 | frame->lsp [1] = get_bits(gb, 4); |
160 | frame->energy_gain = get_bits(gb, 8); |
161 | break; |
162 | } |
163 | } |
164 | |
165 | static evrc_packet_rate buf_size2bitrate(const int buf_size) |
166 | { |
167 | switch (buf_size) { |
168 | case 23: return RATE_FULL; |
169 | case 11: return RATE_HALF; |
170 | case 6: return RATE_QUARTER; |
171 | case 3: return RATE_QUANT; |
172 | case 1: return SILENCE; |
173 | } |
174 | |
175 | return RATE_ERRS; |
176 | } |
177 | |
178 | /** |
179 | * Determine the bitrate from the frame size and/or the first byte of the frame. |
180 | * |
181 | * @param avctx the AV codec context |
182 | * @param buf_size length of the buffer |
183 | * @param buf the bufffer |
184 | * |
185 | * @return the bitrate on success, |
186 | * RATE_ERRS if the bitrate cannot be satisfactorily determined |
187 | */ |
188 | static evrc_packet_rate determine_bitrate(AVCodecContext *avctx, |
189 | int *buf_size, |
190 | const uint8_t **buf) |
191 | { |
192 | evrc_packet_rate bitrate; |
193 | |
194 | if ((bitrate = buf_size2bitrate(*buf_size)) >= 0) { |
195 | if (bitrate > **buf) { |
196 | EVRCContext *e = avctx->priv_data; |
197 | if (!e->warned_buf_mismatch_bitrate) { |
198 | av_log(avctx, AV_LOG_WARNING, |
199 | "Claimed bitrate and buffer size mismatch.\n"); |
200 | e->warned_buf_mismatch_bitrate = 1; |
201 | } |
202 | bitrate = **buf; |
203 | } else if (bitrate < **buf) { |
204 | av_log(avctx, AV_LOG_ERROR, |
205 | "Buffer is too small for the claimed bitrate.\n"); |
206 | return RATE_ERRS; |
207 | } |
208 | (*buf)++; |
209 | *buf_size -= 1; |
210 | } else if ((bitrate = buf_size2bitrate(*buf_size + 1)) >= 0) { |
211 | av_log(avctx, AV_LOG_DEBUG, |
212 | "Bitrate byte is missing, guessing the bitrate from packet size.\n"); |
213 | } else |
214 | return RATE_ERRS; |
215 | |
216 | return bitrate; |
217 | } |
218 | |
219 | static void warn_insufficient_frame_quality(AVCodecContext *avctx, |
220 | const char *message) |
221 | { |
222 | av_log(avctx, AV_LOG_WARNING, "Frame #%d, %s\n", |
223 | avctx->frame_number, message); |
224 | } |
225 | |
226 | /** |
227 | * Initialize the speech codec according to the specification. |
228 | * |
229 | * TIA/IS-127 5.2 |
230 | */ |
231 | static av_cold int evrc_decode_init(AVCodecContext *avctx) |
232 | { |
233 | EVRCContext *e = avctx->priv_data; |
234 | int i, n, idx = 0; |
235 | float denom = 2.0 / (2.0 * 8.0 + 1.0); |
236 | |
237 | avctx->channels = 1; |
238 | avctx->channel_layout = AV_CH_LAYOUT_MONO; |
239 | avctx->sample_fmt = AV_SAMPLE_FMT_FLT; |
240 | |
241 | for (i = 0; i < FILTER_ORDER; i++) { |
242 | e->prev_lspf[i] = (i + 1) * 0.048; |
243 | e->synthesis[i] = 0.0; |
244 | } |
245 | |
246 | for (i = 0; i < ACB_SIZE; i++) |
247 | e->pitch[i] = e->pitch_back[i] = 0.0; |
248 | |
249 | e->last_valid_bitrate = RATE_QUANT; |
250 | e->prev_pitch_delay = 40.0; |
251 | e->fade_scale = 1.0; |
252 | e->prev_error_flag = 0; |
253 | e->avg_acb_gain = e->avg_fcb_gain = 0.0; |
254 | |
255 | for (i = 0; i < 8; i++) { |
256 | float tt = ((float)i - 8.0 / 2.0) / 8.0; |
257 | |
258 | for (n = -8; n <= 8; n++, idx++) { |
259 | float arg1 = M_PI * 0.9 * (tt - n); |
260 | float arg2 = M_PI * (tt - n); |
261 | |
262 | e->interpolation_coeffs[idx] = 0.9; |
263 | if (arg1) |
264 | e->interpolation_coeffs[idx] *= (0.54 + 0.46 * cos(arg2 * denom)) * |
265 | sin(arg1) / arg1; |
266 | } |
267 | } |
268 | |
269 | return 0; |
270 | } |
271 | |
272 | /** |
273 | * Decode the 10 vector quantized line spectral pair frequencies from the LSP |
274 | * transmission codes of any bitrate and check for badly received packets. |
275 | * |
276 | * @param e the context |
277 | * |
278 | * @return 0 on success, -1 if the packet is badly received |
279 | * |
280 | * TIA/IS-127 5.2.1, 5.7.1 |
281 | */ |
282 | static int decode_lspf(EVRCContext *e) |
283 | { |
284 | const float * const *codebooks = evrc_lspq_codebooks[e->bitrate]; |
285 | int i, j, k = 0; |
286 | |
287 | for (i = 0; i < evrc_lspq_nb_codebooks[e->bitrate]; i++) { |
288 | int row_size = evrc_lspq_codebooks_row_sizes[e->bitrate][i]; |
289 | const float *codebook = codebooks[i]; |
290 | |
291 | for (j = 0; j < row_size; j++) |
292 | e->lspf[k++] = codebook[e->frame.lsp[i] * row_size + j]; |
293 | } |
294 | |
295 | // check for monotonic LSPs |
296 | for (i = 1; i < FILTER_ORDER; i++) |
297 | if (e->lspf[i] <= e->lspf[i - 1]) |
298 | return -1; |
299 | |
300 | // check for minimum separation of LSPs at the splits |
301 | for (i = 0, k = 0; i < evrc_lspq_nb_codebooks[e->bitrate] - 1; i++) { |
302 | k += evrc_lspq_codebooks_row_sizes[e->bitrate][i]; |
303 | if (e->lspf[k] - e->lspf[k - 1] <= MIN_LSP_SEP) |
304 | return -1; |
305 | } |
306 | |
307 | return 0; |
308 | } |
309 | |
310 | /* |
311 | * Interpolation of LSP parameters. |
312 | * |
313 | * TIA/IS-127 5.2.3.1, 5.7.3.2 |
314 | */ |
315 | static void interpolate_lsp(float *ilsp, const float *lsp, |
316 | const float *prev, int index) |
317 | { |
318 | static const float lsp_interpolation_factors[] = { 0.1667, 0.5, 0.8333 }; |
319 | ff_weighted_vector_sumf(ilsp, prev, lsp, |
320 | 1.0 - lsp_interpolation_factors[index], |
321 | lsp_interpolation_factors[index], FILTER_ORDER); |
322 | } |
323 | |
324 | /* |
325 | * Reconstruction of the delay contour. |
326 | * |
327 | * TIA/IS-127 5.2.2.3.2 |
328 | */ |
329 | static void interpolate_delay(float *dst, float current, float prev, int index) |
330 | { |
331 | static const float d_interpolation_factors[] = { 0, 0.3313, 0.6625, 1, 1 }; |
332 | dst[0] = (1.0 - d_interpolation_factors[index ]) * prev |
333 | + d_interpolation_factors[index ] * current; |
334 | dst[1] = (1.0 - d_interpolation_factors[index + 1]) * prev |
335 | + d_interpolation_factors[index + 1] * current; |
336 | dst[2] = (1.0 - d_interpolation_factors[index + 2]) * prev |
337 | + d_interpolation_factors[index + 2] * current; |
338 | } |
339 | |
340 | /* |
341 | * Convert the quantized, interpolated line spectral frequencies, |
342 | * to prediction coefficients. |
343 | * |
344 | * TIA/IS-127 5.2.3.2, 4.7.2.2 |
345 | */ |
346 | static void decode_predictor_coeffs(const float *ilspf, float *ilpc) |
347 | { |
348 | double lsp[FILTER_ORDER]; |
349 | float a[FILTER_ORDER / 2 + 1], b[FILTER_ORDER / 2 + 1]; |
350 | float a1[FILTER_ORDER / 2] = { 0 }; |
351 | float a2[FILTER_ORDER / 2] = { 0 }; |
352 | float b1[FILTER_ORDER / 2] = { 0 }; |
353 | float b2[FILTER_ORDER / 2] = { 0 }; |
354 | int i, k; |
355 | |
356 | ff_acelp_lsf2lspd(lsp, ilspf, FILTER_ORDER); |
357 | |
358 | for (k = 0; k <= FILTER_ORDER; k++) { |
359 | a[0] = k < 2 ? 0.25 : 0; |
360 | b[0] = k < 2 ? k < 1 ? 0.25 : -0.25 : 0; |
361 | |
362 | for (i = 0; i < FILTER_ORDER / 2; i++) { |
363 | a[i + 1] = a[i] - 2 * lsp[i * 2 ] * a1[i] + a2[i]; |
364 | b[i + 1] = b[i] - 2 * lsp[i * 2 + 1] * b1[i] + b2[i]; |
365 | a2[i] = a1[i]; |
366 | a1[i] = a[i]; |
367 | b2[i] = b1[i]; |
368 | b1[i] = b[i]; |
369 | } |
370 | |
371 | if (k) |
372 | ilpc[k - 1] = 2.0 * (a[FILTER_ORDER / 2] + b[FILTER_ORDER / 2]); |
373 | } |
374 | } |
375 | |
376 | static void bl_intrp(EVRCContext *e, float *ex, float delay) |
377 | { |
378 | float *f; |
379 | int offset, i, coef_idx; |
380 | int16_t t; |
381 | |
382 | offset = lrintf(delay); |
383 | |
384 | t = (offset - delay + 0.5) * 8.0 + 0.5; |
385 | if (t == 8) { |
386 | t = 0; |
387 | offset--; |
388 | } |
389 | |
390 | f = ex - offset - 8; |
391 | |
392 | coef_idx = t * (2 * 8 + 1); |
393 | |
394 | ex[0] = 0.0; |
395 | for (i = 0; i < 2 * 8 + 1; i++) |
396 | ex[0] += e->interpolation_coeffs[coef_idx + i] * f[i]; |
397 | } |
398 | |
399 | /* |
400 | * Adaptive codebook excitation. |
401 | * |
402 | * TIA/IS-127 5.2.2.3.3, 4.12.5.2 |
403 | */ |
404 | static void acb_excitation(EVRCContext *e, float *excitation, float gain, |
405 | const float delay[3], int length) |
406 | { |
407 | float denom, locdelay, dpr, invl; |
408 | int i; |
409 | |
410 | invl = 1.0 / ((float) length); |
411 | dpr = length; |
412 | |
413 | /* first at-most extra samples */ |
414 | denom = (delay[1] - delay[0]) * invl; |
415 | for (i = 0; i < dpr; i++) { |
416 | locdelay = delay[0] + i * denom; |
417 | bl_intrp(e, excitation + i, locdelay); |
418 | } |
419 | |
420 | denom = (delay[2] - delay[1]) * invl; |
421 | /* interpolation */ |
422 | for (i = dpr; i < dpr + 10; i++) { |
423 | locdelay = delay[1] + (i - dpr) * denom; |
424 | bl_intrp(e, excitation + i, locdelay); |
425 | } |
426 | |
427 | for (i = 0; i < length; i++) |
428 | excitation[i] *= gain; |
429 | } |
430 | |
431 | static void decode_8_pulses_35bits(const uint16_t *fixed_index, float *cod) |
432 | { |
433 | int i, pos1, pos2, offset; |
434 | |
435 | offset = (fixed_index[3] >> 9) & 3; |
436 | |
437 | for (i = 0; i < 3; i++) { |
438 | pos1 = ((fixed_index[i] & 0x7f) / 11) * 5 + ((i + offset) % 5); |
439 | pos2 = ((fixed_index[i] & 0x7f) % 11) * 5 + ((i + offset) % 5); |
440 | |
441 | cod[pos1] = (fixed_index[i] & 0x80) ? -1.0 : 1.0; |
442 | |
443 | if (pos2 < pos1) |
444 | cod[pos2] = -cod[pos1]; |
445 | else |
446 | cod[pos2] += cod[pos1]; |
447 | } |
448 | |
449 | pos1 = ((fixed_index[3] & 0x7f) / 11) * 5 + ((3 + offset) % 5); |
450 | pos2 = ((fixed_index[3] & 0x7f) % 11) * 5 + ((4 + offset) % 5); |
451 | |
452 | cod[pos1] = (fixed_index[3] & 0x100) ? -1.0 : 1.0; |
453 | cod[pos2] = (fixed_index[3] & 0x80 ) ? -1.0 : 1.0; |
454 | } |
455 | |
456 | static void decode_3_pulses_10bits(uint16_t fixed_index, float *cod) |
457 | { |
458 | float sign; |
459 | int pos; |
460 | |
461 | sign = (fixed_index & 0x200) ? -1.0 : 1.0; |
462 | |
463 | pos = ((fixed_index & 0x7) * 7) + 4; |
464 | cod[pos] += sign; |
465 | pos = (((fixed_index >> 3) & 0x7) * 7) + 2; |
466 | cod[pos] -= sign; |
467 | pos = (((fixed_index >> 6) & 0x7) * 7); |
468 | cod[pos] += sign; |
469 | } |
470 | |
471 | /* |
472 | * Reconstruction of ACELP fixed codebook excitation for full and half rate. |
473 | * |
474 | * TIA/IS-127 5.2.3.7 |
475 | */ |
476 | static void fcb_excitation(EVRCContext *e, const uint16_t *codebook, |
477 | float *excitation, float pitch_gain, |
478 | int pitch_lag, int subframe_size) |
479 | { |
480 | int i; |
481 | |
482 | if (e->bitrate == RATE_FULL) |
483 | decode_8_pulses_35bits(codebook, excitation); |
484 | else |
485 | decode_3_pulses_10bits(*codebook, excitation); |
486 | |
487 | pitch_gain = av_clipf(pitch_gain, 0.2, 0.9); |
488 | |
489 | for (i = pitch_lag; i < subframe_size; i++) |
490 | excitation[i] += pitch_gain * excitation[i - pitch_lag]; |
491 | } |
492 | |
493 | /** |
494 | * Synthesis of the decoder output signal. |
495 | * |
496 | * param[in] in input signal |
497 | * param[in] filter_coeffs LPC coefficients |
498 | * param[in/out] memory synthesis filter memory |
499 | * param buffer_length amount of data to process |
500 | * param[out] samples output samples |
501 | * |
502 | * TIA/IS-127 5.2.3.15, 5.7.3.4 |
503 | */ |
504 | static void synthesis_filter(const float *in, const float *filter_coeffs, |
505 | float *memory, int buffer_length, float *samples) |
506 | { |
507 | int i, j; |
508 | |
509 | for (i = 0; i < buffer_length; i++) { |
510 | samples[i] = in[i]; |
511 | for (j = FILTER_ORDER - 1; j > 0; j--) { |
512 | samples[i] -= filter_coeffs[j] * memory[j]; |
513 | memory[j] = memory[j - 1]; |
514 | } |
515 | samples[i] -= filter_coeffs[0] * memory[0]; |
516 | memory[0] = samples[i]; |
517 | } |
518 | } |
519 | |
520 | static void bandwidth_expansion(float *coeff, const float *inbuf, float gamma) |
521 | { |
522 | double fac = gamma; |
523 | int i; |
524 | |
525 | for (i = 0; i < FILTER_ORDER; i++) { |
526 | coeff[i] = inbuf[i] * fac; |
527 | fac *= gamma; |
528 | } |
529 | } |
530 | |
531 | static void residual_filter(float *output, const float *input, |
532 | const float *coef, float *memory, int length) |
533 | { |
534 | float sum; |
535 | int i, j; |
536 | |
537 | for (i = 0; i < length; i++) { |
538 | sum = input[i]; |
539 | |
540 | for (j = FILTER_ORDER - 1; j > 0; j--) { |
541 | sum += coef[j] * memory[j]; |
542 | memory[j] = memory[j - 1]; |
543 | } |
544 | sum += coef[0] * memory[0]; |
545 | memory[0] = input[i]; |
546 | output[i] = sum; |
547 | } |
548 | } |
549 | |
550 | /* |
551 | * TIA/IS-127 Table 5.9.1-1. |
552 | */ |
553 | static const struct PfCoeff { |
554 | float tilt; |
555 | float ltgain; |
556 | float p1; |
557 | float p2; |
558 | } postfilter_coeffs[5] = { |
559 | { 0.0 , 0.0 , 0.0 , 0.0 }, |
560 | { 0.0 , 0.0 , 0.57, 0.57 }, |
561 | { 0.0 , 0.0 , 0.0 , 0.0 }, |
562 | { 0.35, 0.50, 0.50, 0.75 }, |
563 | { 0.20, 0.50, 0.57, 0.75 }, |
564 | }; |
565 | |
566 | /* |
567 | * Adaptive postfilter. |
568 | * |
569 | * TIA/IS-127 5.9 |
570 | */ |
571 | static void postfilter(EVRCContext *e, float *in, const float *coeff, |
572 | float *out, int idx, const struct PfCoeff *pfc, |
573 | int length) |
574 | { |
575 | float wcoef1[FILTER_ORDER], wcoef2[FILTER_ORDER], |
576 | scratch[SUBFRAME_SIZE], temp[SUBFRAME_SIZE], |
577 | mem[SUBFRAME_SIZE]; |
578 | float sum1 = 0.0, sum2 = 0.0, gamma, gain; |
579 | float tilt = pfc->tilt; |
580 | int i, n, best; |
581 | |
582 | bandwidth_expansion(wcoef1, coeff, pfc->p1); |
583 | bandwidth_expansion(wcoef2, coeff, pfc->p2); |
584 | |
585 | /* Tilt compensation filter, TIA/IS-127 5.9.1 */ |
586 | for (i = 0; i < length - 1; i++) |
587 | sum2 += in[i] * in[i + 1]; |
588 | if (sum2 < 0.0) |
589 | tilt = 0.0; |
590 | |
591 | for (i = 0; i < length; i++) { |
592 | scratch[i] = in[i] - tilt * e->last; |
593 | e->last = in[i]; |
594 | } |
595 | |
596 | /* Short term residual filter, TIA/IS-127 5.9.2 */ |
597 | residual_filter(&e->postfilter_residual[ACB_SIZE], scratch, wcoef1, e->postfilter_fir, length); |
598 | |
599 | /* Long term postfilter */ |
600 | best = idx; |
601 | for (i = FFMIN(MIN_DELAY, idx - 3); i <= FFMAX(MAX_DELAY, idx + 3); i++) { |
602 | for (n = ACB_SIZE, sum2 = 0; n < ACB_SIZE + length; n++) |
603 | sum2 += e->postfilter_residual[n] * e->postfilter_residual[n - i]; |
604 | if (sum2 > sum1) { |
605 | sum1 = sum2; |
606 | best = i; |
607 | } |
608 | } |
609 | |
610 | for (i = ACB_SIZE, sum1 = 0; i < ACB_SIZE + length; i++) |
611 | sum1 += e->postfilter_residual[i - best] * e->postfilter_residual[i - best]; |
612 | for (i = ACB_SIZE, sum2 = 0; i < ACB_SIZE + length; i++) |
613 | sum2 += e->postfilter_residual[i] * e->postfilter_residual[i - best]; |
614 | |
615 | if (sum2 * sum1 == 0 || e->bitrate == RATE_QUANT) { |
616 | memcpy(temp, e->postfilter_residual + ACB_SIZE, length * sizeof(float)); |
617 | } else { |
618 | gamma = sum2 / sum1; |
619 | if (gamma < 0.5) |
620 | memcpy(temp, e->postfilter_residual + ACB_SIZE, length * sizeof(float)); |
621 | else { |
622 | gamma = FFMIN(gamma, 1.0); |
623 | |
624 | for (i = 0; i < length; i++) { |
625 | temp[i] = e->postfilter_residual[ACB_SIZE + i] + gamma * |
626 | pfc->ltgain * e->postfilter_residual[ACB_SIZE + i - best]; |
627 | } |
628 | } |
629 | } |
630 | |
631 | memcpy(scratch, temp, length * sizeof(float)); |
632 | memcpy(mem, e->postfilter_iir, FILTER_ORDER * sizeof(float)); |
633 | synthesis_filter(scratch, wcoef2, mem, length, scratch); |
634 | |
635 | /* Gain computation, TIA/IS-127 5.9.4-2 */ |
636 | for (i = 0, sum1 = 0, sum2 = 0; i < length; i++) { |
637 | sum1 += in[i] * in[i]; |
638 | sum2 += scratch[i] * scratch[i]; |
639 | } |
640 | gain = sum2 ? sqrt(sum1 / sum2) : 1.0; |
641 | |
642 | for (i = 0; i < length; i++) |
643 | temp[i] *= gain; |
644 | |
645 | /* Short term postfilter */ |
646 | synthesis_filter(temp, wcoef2, e->postfilter_iir, length, out); |
647 | |
648 | memmove(e->postfilter_residual, |
649 | e->postfilter_residual + length, ACB_SIZE * sizeof(float)); |
650 | } |
651 | |
652 | static void frame_erasure(EVRCContext *e, float *samples) |
653 | { |
654 | float ilspf[FILTER_ORDER], ilpc[FILTER_ORDER], idelay[NB_SUBFRAMES], |
655 | tmp[SUBFRAME_SIZE + 6], f; |
656 | int i, j; |
657 | |
658 | for (i = 0; i < FILTER_ORDER; i++) { |
659 | if (e->bitrate != RATE_QUANT) |
660 | e->lspf[i] = e->prev_lspf[i] * 0.875 + 0.125 * (i + 1) * 0.048; |
661 | else |
662 | e->lspf[i] = e->prev_lspf[i]; |
663 | } |
664 | |
665 | if (e->prev_error_flag) |
666 | e->avg_acb_gain *= 0.75; |
667 | if (e->bitrate == RATE_FULL) |
668 | memcpy(e->pitch_back, e->pitch, ACB_SIZE * sizeof(float)); |
669 | if (e->last_valid_bitrate == RATE_QUANT) |
670 | e->bitrate = RATE_QUANT; |
671 | else |
672 | e->bitrate = RATE_FULL; |
673 | |
674 | if (e->bitrate == RATE_FULL || e->bitrate == RATE_HALF) { |
675 | e->pitch_delay = e->prev_pitch_delay; |
676 | } else { |
677 | float sum = 0; |
678 | |
679 | idelay[0] = idelay[1] = idelay[2] = MIN_DELAY; |
680 | |
681 | for (i = 0; i < NB_SUBFRAMES; i++) |
682 | sum += evrc_energy_quant[e->prev_energy_gain][i]; |
683 | sum /= (float) NB_SUBFRAMES; |
684 | sum = pow(10, sum); |
685 | for (i = 0; i < NB_SUBFRAMES; i++) |
686 | e->energy_vector[i] = sum; |
687 | } |
688 | |
689 | if (fabs(e->pitch_delay - e->prev_pitch_delay) > 15) |
690 | e->prev_pitch_delay = e->pitch_delay; |
691 | |
692 | for (i = 0; i < NB_SUBFRAMES; i++) { |
693 | int subframe_size = subframe_sizes[i]; |
694 | int pitch_lag; |
695 | |
696 | interpolate_lsp(ilspf, e->lspf, e->prev_lspf, i); |
697 | |
698 | if (e->bitrate != RATE_QUANT) { |
699 | if (e->avg_acb_gain < 0.3) { |
700 | idelay[0] = estimation_delay[i]; |
701 | idelay[1] = estimation_delay[i + 1]; |
702 | idelay[2] = estimation_delay[i + 2]; |
703 | } else { |
704 | interpolate_delay(idelay, e->pitch_delay, e->prev_pitch_delay, i); |
705 | } |
706 | } |
707 | |
708 | pitch_lag = lrintf((idelay[1] + idelay[0]) / 2.0); |
709 | decode_predictor_coeffs(ilspf, ilpc); |
710 | |
711 | if (e->bitrate != RATE_QUANT) { |
712 | acb_excitation(e, e->pitch + ACB_SIZE, |
713 | e->avg_acb_gain, idelay, subframe_size); |
714 | for (j = 0; j < subframe_size; j++) |
715 | e->pitch[ACB_SIZE + j] *= e->fade_scale; |
716 | e->fade_scale = FFMAX(e->fade_scale - 0.05, 0.0); |
717 | } else { |
718 | for (j = 0; j < subframe_size; j++) |
719 | e->pitch[ACB_SIZE + j] = e->energy_vector[i]; |
720 | } |
721 | |
722 | memmove(e->pitch, e->pitch + subframe_size, ACB_SIZE * sizeof(float)); |
723 | |
724 | if (e->bitrate != RATE_QUANT && e->avg_acb_gain < 0.4) { |
725 | f = 0.1 * e->avg_fcb_gain; |
726 | for (j = 0; j < subframe_size; j++) |
727 | e->pitch[ACB_SIZE + j] += f; |
728 | } else if (e->bitrate == RATE_QUANT) { |
729 | for (j = 0; j < subframe_size; j++) |
730 | e->pitch[ACB_SIZE + j] = e->energy_vector[i]; |
731 | } |
732 | |
733 | synthesis_filter(e->pitch + ACB_SIZE, ilpc, |
734 | e->synthesis, subframe_size, tmp); |
735 | postfilter(e, tmp, ilpc, samples, pitch_lag, |
736 | &postfilter_coeffs[e->bitrate], subframe_size); |
737 | |
738 | samples += subframe_size; |
739 | } |
740 | } |
741 | |
742 | static int evrc_decode_frame(AVCodecContext *avctx, void *data, |
743 | int *got_frame_ptr, AVPacket *avpkt) |
744 | { |
745 | const uint8_t *buf = avpkt->data; |
746 | AVFrame *frame = data; |
747 | EVRCContext *e = avctx->priv_data; |
748 | int buf_size = avpkt->size; |
749 | float ilspf[FILTER_ORDER], ilpc[FILTER_ORDER], idelay[NB_SUBFRAMES]; |
750 | float *samples; |
751 | int i, j, ret, error_flag = 0; |
752 | |
753 | frame->nb_samples = 160; |
754 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) |
755 | return ret; |
756 | samples = (float *)frame->data[0]; |
757 | |
758 | if ((e->bitrate = determine_bitrate(avctx, &buf_size, &buf)) == RATE_ERRS) { |
759 | warn_insufficient_frame_quality(avctx, "bitrate cannot be determined."); |
760 | goto erasure; |
761 | } |
762 | if (e->bitrate <= SILENCE || e->bitrate == RATE_QUARTER) |
763 | goto erasure; |
764 | if (e->bitrate == RATE_QUANT && e->last_valid_bitrate == RATE_FULL |
765 | && !e->prev_error_flag) |
766 | goto erasure; |
767 | |
768 | if ((ret = init_get_bits8(&e->gb, buf, buf_size)) < 0) |
769 | return ret; |
770 | memset(&e->frame, 0, sizeof(EVRCAFrame)); |
771 | |
772 | unpack_frame(e); |
773 | |
774 | if (e->bitrate != RATE_QUANT) { |
775 | uint8_t *p = (uint8_t *) &e->frame; |
776 | for (i = 0; i < sizeof(EVRCAFrame); i++) { |
777 | if (p[i]) |
778 | break; |
779 | } |
780 | if (i == sizeof(EVRCAFrame)) |
781 | goto erasure; |
782 | } else if (e->frame.lsp[0] == 0xf && |
783 | e->frame.lsp[1] == 0xf && |
784 | e->frame.energy_gain == 0xff) { |
785 | goto erasure; |
786 | } |
787 | |
788 | if (decode_lspf(e) < 0) |
789 | goto erasure; |
790 | |
791 | if (e->bitrate == RATE_FULL || e->bitrate == RATE_HALF) { |
792 | /* Pitch delay parameter checking as per TIA/IS-127 5.1.5.1 */ |
793 | if (e->frame.pitch_delay > MAX_DELAY - MIN_DELAY) |
794 | goto erasure; |
795 | |
796 | e->pitch_delay = e->frame.pitch_delay + MIN_DELAY; |
797 | |
798 | /* Delay diff parameter checking as per TIA/IS-127 5.1.5.2 */ |
799 | if (e->frame.delay_diff) { |
800 | int p = e->pitch_delay - e->frame.delay_diff + 16; |
801 | if (p < MIN_DELAY || p > MAX_DELAY) |
802 | goto erasure; |
803 | } |
804 | |
805 | /* Delay contour reconstruction as per TIA/IS-127 5.2.2.2 */ |
806 | if (e->frame.delay_diff && |
807 | e->bitrate == RATE_FULL && e->prev_error_flag) { |
808 | float delay; |
809 | |
810 | memcpy(e->pitch, e->pitch_back, ACB_SIZE * sizeof(float)); |
811 | |
812 | delay = e->prev_pitch_delay; |
813 | e->prev_pitch_delay = delay - e->frame.delay_diff + 16.0; |
814 | |
815 | if (fabs(e->pitch_delay - delay) > 15) |
816 | delay = e->pitch_delay; |
817 | |
818 | for (i = 0; i < NB_SUBFRAMES; i++) { |
819 | int subframe_size = subframe_sizes[i]; |
820 | |
821 | interpolate_delay(idelay, delay, e->prev_pitch_delay, i); |
822 | acb_excitation(e, e->pitch + ACB_SIZE, e->avg_acb_gain, idelay, subframe_size); |
823 | memmove(e->pitch, e->pitch + subframe_size, ACB_SIZE * sizeof(float)); |
824 | } |
825 | } |
826 | |
827 | /* Smoothing of the decoded delay as per TIA/IS-127 5.2.2.5 */ |
828 | if (fabs(e->pitch_delay - e->prev_pitch_delay) > 15) |
829 | e->prev_pitch_delay = e->pitch_delay; |
830 | |
831 | e->avg_acb_gain = e->avg_fcb_gain = 0.0; |
832 | } else { |
833 | idelay[0] = idelay[1] = idelay[2] = MIN_DELAY; |
834 | |
835 | /* Decode frame energy vectors as per TIA/IS-127 5.7.2 */ |
836 | for (i = 0; i < NB_SUBFRAMES; i++) |
837 | e->energy_vector[i] = pow(10, evrc_energy_quant[e->frame.energy_gain][i]); |
838 | e->prev_energy_gain = e->frame.energy_gain; |
839 | } |
840 | |
841 | for (i = 0; i < NB_SUBFRAMES; i++) { |
842 | float tmp[SUBFRAME_SIZE + 6] = { 0 }; |
843 | int subframe_size = subframe_sizes[i]; |
844 | int pitch_lag; |
845 | |
846 | interpolate_lsp(ilspf, e->lspf, e->prev_lspf, i); |
847 | |
848 | if (e->bitrate != RATE_QUANT) |
849 | interpolate_delay(idelay, e->pitch_delay, e->prev_pitch_delay, i); |
850 | |
851 | pitch_lag = lrintf((idelay[1] + idelay[0]) / 2.0); |
852 | decode_predictor_coeffs(ilspf, ilpc); |
853 | |
854 | /* Bandwidth expansion as per TIA/IS-127 5.2.3.3 */ |
855 | if (e->frame.lpc_flag && e->prev_error_flag) |
856 | bandwidth_expansion(ilpc, ilpc, 0.75); |
857 | |
858 | if (e->bitrate != RATE_QUANT) { |
859 | float acb_sum, f; |
860 | |
861 | f = exp((e->bitrate == RATE_HALF ? 0.5 : 0.25) |
862 | * (e->frame.fcb_gain[i] + 1)); |
863 | acb_sum = pitch_gain_vq[e->frame.acb_gain[i]]; |
864 | e->avg_acb_gain += acb_sum / NB_SUBFRAMES; |
865 | e->avg_fcb_gain += f / NB_SUBFRAMES; |
866 | |
867 | acb_excitation(e, e->pitch + ACB_SIZE, |
868 | acb_sum, idelay, subframe_size); |
869 | fcb_excitation(e, e->frame.fcb_shape[i], tmp, |
870 | acb_sum, pitch_lag, subframe_size); |
871 | |
872 | /* Total excitation generation as per TIA/IS-127 5.2.3.9 */ |
873 | for (j = 0; j < subframe_size; j++) |
874 | e->pitch[ACB_SIZE + j] += f * tmp[j]; |
875 | e->fade_scale = FFMIN(e->fade_scale + 0.2, 1.0); |
876 | } else { |
877 | for (j = 0; j < subframe_size; j++) |
878 | e->pitch[ACB_SIZE + j] = e->energy_vector[i]; |
879 | } |
880 | |
881 | memmove(e->pitch, e->pitch + subframe_size, ACB_SIZE * sizeof(float)); |
882 | |
883 | synthesis_filter(e->pitch + ACB_SIZE, ilpc, |
884 | e->synthesis, subframe_size, |
885 | e->postfilter ? tmp : samples); |
886 | if (e->postfilter) |
887 | postfilter(e, tmp, ilpc, samples, pitch_lag, |
888 | &postfilter_coeffs[e->bitrate], subframe_size); |
889 | |
890 | samples += subframe_size; |
891 | } |
892 | |
893 | if (error_flag) { |
894 | erasure: |
895 | error_flag = 1; |
896 | av_log(avctx, AV_LOG_WARNING, "frame erasure\n"); |
897 | frame_erasure(e, samples); |
898 | } |
899 | |
900 | memcpy(e->prev_lspf, e->lspf, sizeof(e->prev_lspf)); |
901 | e->prev_error_flag = error_flag; |
902 | e->last_valid_bitrate = e->bitrate; |
903 | |
904 | if (e->bitrate != RATE_QUANT) |
905 | e->prev_pitch_delay = e->pitch_delay; |
906 | |
907 | samples = (float *)frame->data[0]; |
908 | for (i = 0; i < 160; i++) |
909 | samples[i] /= 32768; |
910 | |
911 | *got_frame_ptr = 1; |
912 | |
913 | return avpkt->size; |
914 | } |
915 | |
916 | #define OFFSET(x) offsetof(EVRCContext, x) |
917 | #define AD AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM |
918 | |
919 | static const AVOption options[] = { |
920 | { "postfilter", "enable postfilter", OFFSET(postfilter), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AD }, |
921 | { NULL } |
922 | }; |
923 | |
924 | static const AVClass evrcdec_class = { |
925 | .class_name = "evrc", |
926 | .item_name = av_default_item_name, |
927 | .option = options, |
928 | .version = LIBAVUTIL_VERSION_INT, |
929 | }; |
930 | |
931 | AVCodec ff_evrc_decoder = { |
932 | .name = "evrc", |
933 | .long_name = NULL_IF_CONFIG_SMALL("EVRC (Enhanced Variable Rate Codec)"), |
934 | .type = AVMEDIA_TYPE_AUDIO, |
935 | .id = AV_CODEC_ID_EVRC, |
936 | .init = evrc_decode_init, |
937 | .decode = evrc_decode_frame, |
938 | .capabilities = AV_CODEC_CAP_DR1, |
939 | .priv_data_size = sizeof(EVRCContext), |
940 | .priv_class = &evrcdec_class, |
941 | }; |
942 |