blob: bfaf1d7d00be6c8cf6645cd6475cd0b9f739f7c6
1 | /* |
2 | * Copyright (c) 2012 Andrew D'Addesio |
3 | * Copyright (c) 2013-2014 Mozilla Corporation |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | /** |
23 | * @file |
24 | * Opus SILK decoder |
25 | */ |
26 | |
27 | #include <stdint.h> |
28 | |
29 | #include "opus.h" |
30 | #include "opustab.h" |
31 | |
32 | typedef struct SilkFrame { |
33 | int coded; |
34 | int log_gain; |
35 | int16_t nlsf[16]; |
36 | float lpc[16]; |
37 | |
38 | float output [2 * SILK_HISTORY]; |
39 | float lpc_history[2 * SILK_HISTORY]; |
40 | int primarylag; |
41 | |
42 | int prev_voiced; |
43 | } SilkFrame; |
44 | |
45 | struct SilkContext { |
46 | AVCodecContext *avctx; |
47 | int output_channels; |
48 | |
49 | int midonly; |
50 | int subframes; |
51 | int sflength; |
52 | int flength; |
53 | int nlsf_interp_factor; |
54 | |
55 | enum OpusBandwidth bandwidth; |
56 | int wb; |
57 | |
58 | SilkFrame frame[2]; |
59 | float prev_stereo_weights[2]; |
60 | float stereo_weights[2]; |
61 | |
62 | int prev_coded_channels; |
63 | }; |
64 | |
65 | static inline void silk_stabilize_lsf(int16_t nlsf[16], int order, const uint16_t min_delta[17]) |
66 | { |
67 | int pass, i; |
68 | for (pass = 0; pass < 20; pass++) { |
69 | int k, min_diff = 0; |
70 | for (i = 0; i < order+1; i++) { |
71 | int low = i != 0 ? nlsf[i-1] : 0; |
72 | int high = i != order ? nlsf[i] : 32768; |
73 | int diff = (high - low) - (min_delta[i]); |
74 | |
75 | if (diff < min_diff) { |
76 | min_diff = diff; |
77 | k = i; |
78 | |
79 | if (pass == 20) |
80 | break; |
81 | } |
82 | } |
83 | if (min_diff == 0) /* no issues; stabilized */ |
84 | return; |
85 | |
86 | /* wiggle one or two LSFs */ |
87 | if (k == 0) { |
88 | /* repel away from lower bound */ |
89 | nlsf[0] = min_delta[0]; |
90 | } else if (k == order) { |
91 | /* repel away from higher bound */ |
92 | nlsf[order-1] = 32768 - min_delta[order]; |
93 | } else { |
94 | /* repel away from current position */ |
95 | int min_center = 0, max_center = 32768, center_val; |
96 | |
97 | /* lower extent */ |
98 | for (i = 0; i < k; i++) |
99 | min_center += min_delta[i]; |
100 | min_center += min_delta[k] >> 1; |
101 | |
102 | /* upper extent */ |
103 | for (i = order; i > k; i--) |
104 | max_center -= min_delta[i]; |
105 | max_center -= min_delta[k] >> 1; |
106 | |
107 | /* move apart */ |
108 | center_val = nlsf[k - 1] + nlsf[k]; |
109 | center_val = (center_val >> 1) + (center_val & 1); // rounded divide by 2 |
110 | center_val = FFMIN(max_center, FFMAX(min_center, center_val)); |
111 | |
112 | nlsf[k - 1] = center_val - (min_delta[k] >> 1); |
113 | nlsf[k] = nlsf[k - 1] + min_delta[k]; |
114 | } |
115 | } |
116 | |
117 | /* resort to the fall-back method, the standard method for LSF stabilization */ |
118 | |
119 | /* sort; as the LSFs should be nearly sorted, use insertion sort */ |
120 | for (i = 1; i < order; i++) { |
121 | int j, value = nlsf[i]; |
122 | for (j = i - 1; j >= 0 && nlsf[j] > value; j--) |
123 | nlsf[j + 1] = nlsf[j]; |
124 | nlsf[j + 1] = value; |
125 | } |
126 | |
127 | /* push forwards to increase distance */ |
128 | if (nlsf[0] < min_delta[0]) |
129 | nlsf[0] = min_delta[0]; |
130 | for (i = 1; i < order; i++) |
131 | if (nlsf[i] < nlsf[i - 1] + min_delta[i]) |
132 | nlsf[i] = nlsf[i - 1] + min_delta[i]; |
133 | |
134 | /* push backwards to increase distance */ |
135 | if (nlsf[order-1] > 32768 - min_delta[order]) |
136 | nlsf[order-1] = 32768 - min_delta[order]; |
137 | for (i = order-2; i >= 0; i--) |
138 | if (nlsf[i] > nlsf[i + 1] - min_delta[i+1]) |
139 | nlsf[i] = nlsf[i + 1] - min_delta[i+1]; |
140 | |
141 | return; |
142 | } |
143 | |
144 | static inline int silk_is_lpc_stable(const int16_t lpc[16], int order) |
145 | { |
146 | int k, j, DC_resp = 0; |
147 | int32_t lpc32[2][16]; // Q24 |
148 | int totalinvgain = 1 << 30; // 1.0 in Q30 |
149 | int32_t *row = lpc32[0], *prevrow; |
150 | |
151 | /* initialize the first row for the Levinson recursion */ |
152 | for (k = 0; k < order; k++) { |
153 | DC_resp += lpc[k]; |
154 | row[k] = lpc[k] * 4096; |
155 | } |
156 | |
157 | if (DC_resp >= 4096) |
158 | return 0; |
159 | |
160 | /* check if prediction gain pushes any coefficients too far */ |
161 | for (k = order - 1; 1; k--) { |
162 | int rc; // Q31; reflection coefficient |
163 | int gaindiv; // Q30; inverse of the gain (the divisor) |
164 | int gain; // gain for this reflection coefficient |
165 | int fbits; // fractional bits used for the gain |
166 | int error; // Q29; estimate of the error of our partial estimate of 1/gaindiv |
167 | |
168 | if (FFABS(row[k]) > 16773022) |
169 | return 0; |
170 | |
171 | rc = -(row[k] * 128); |
172 | gaindiv = (1 << 30) - MULH(rc, rc); |
173 | |
174 | totalinvgain = MULH(totalinvgain, gaindiv) << 2; |
175 | if (k == 0) |
176 | return (totalinvgain >= 107374); |
177 | |
178 | /* approximate 1.0/gaindiv */ |
179 | fbits = opus_ilog(gaindiv); |
180 | gain = ((1 << 29) - 1) / (gaindiv >> (fbits + 1 - 16)); // Q<fbits-16> |
181 | error = (1 << 29) - MULL(gaindiv << (15 + 16 - fbits), gain, 16); |
182 | gain = ((gain << 16) + (error * gain >> 13)); |
183 | |
184 | /* switch to the next row of the LPC coefficients */ |
185 | prevrow = row; |
186 | row = lpc32[k & 1]; |
187 | |
188 | for (j = 0; j < k; j++) { |
189 | int x = prevrow[j] - ROUND_MULL(prevrow[k - j - 1], rc, 31); |
190 | row[j] = ROUND_MULL(x, gain, fbits); |
191 | } |
192 | } |
193 | } |
194 | |
195 | static void silk_lsp2poly(const int32_t lsp[16], int32_t pol[16], int half_order) |
196 | { |
197 | int i, j; |
198 | |
199 | pol[0] = 65536; // 1.0 in Q16 |
200 | pol[1] = -lsp[0]; |
201 | |
202 | for (i = 1; i < half_order; i++) { |
203 | pol[i + 1] = pol[i - 1] * 2 - ROUND_MULL(lsp[2 * i], pol[i], 16); |
204 | for (j = i; j > 1; j--) |
205 | pol[j] += pol[j - 2] - ROUND_MULL(lsp[2 * i], pol[j - 1], 16); |
206 | |
207 | pol[1] -= lsp[2 * i]; |
208 | } |
209 | } |
210 | |
211 | static void silk_lsf2lpc(const int16_t nlsf[16], float lpcf[16], int order) |
212 | { |
213 | int i, k; |
214 | int32_t lsp[16]; // Q17; 2*cos(LSF) |
215 | int32_t p[9], q[9]; // Q16 |
216 | int32_t lpc32[16]; // Q17 |
217 | int16_t lpc[16]; // Q12 |
218 | |
219 | /* convert the LSFs to LSPs, i.e. 2*cos(LSF) */ |
220 | for (k = 0; k < order; k++) { |
221 | int index = nlsf[k] >> 8; |
222 | int offset = nlsf[k] & 255; |
223 | int k2 = (order == 10) ? ff_silk_lsf_ordering_nbmb[k] : ff_silk_lsf_ordering_wb[k]; |
224 | |
225 | /* interpolate and round */ |
226 | lsp[k2] = ff_silk_cosine[index] * 256; |
227 | lsp[k2] += (ff_silk_cosine[index + 1] - ff_silk_cosine[index]) * offset; |
228 | lsp[k2] = (lsp[k2] + 4) >> 3; |
229 | } |
230 | |
231 | silk_lsp2poly(lsp , p, order >> 1); |
232 | silk_lsp2poly(lsp + 1, q, order >> 1); |
233 | |
234 | /* reconstruct A(z) */ |
235 | for (k = 0; k < order>>1; k++) { |
236 | lpc32[k] = -p[k + 1] - p[k] - q[k + 1] + q[k]; |
237 | lpc32[order-k-1] = -p[k + 1] - p[k] + q[k + 1] - q[k]; |
238 | } |
239 | |
240 | /* limit the range of the LPC coefficients to each fit within an int16_t */ |
241 | for (i = 0; i < 10; i++) { |
242 | int j; |
243 | unsigned int maxabs = 0; |
244 | for (j = 0, k = 0; j < order; j++) { |
245 | unsigned int x = FFABS(lpc32[k]); |
246 | if (x > maxabs) { |
247 | maxabs = x; // Q17 |
248 | k = j; |
249 | } |
250 | } |
251 | |
252 | maxabs = (maxabs + 16) >> 5; // convert to Q12 |
253 | |
254 | if (maxabs > 32767) { |
255 | /* perform bandwidth expansion */ |
256 | unsigned int chirp, chirp_base; // Q16 |
257 | maxabs = FFMIN(maxabs, 163838); // anything above this overflows chirp's numerator |
258 | chirp_base = chirp = 65470 - ((maxabs - 32767) << 14) / ((maxabs * (k+1)) >> 2); |
259 | |
260 | for (k = 0; k < order; k++) { |
261 | lpc32[k] = ROUND_MULL(lpc32[k], chirp, 16); |
262 | chirp = (chirp_base * chirp + 32768) >> 16; |
263 | } |
264 | } else break; |
265 | } |
266 | |
267 | if (i == 10) { |
268 | /* time's up: just clamp */ |
269 | for (k = 0; k < order; k++) { |
270 | int x = (lpc32[k] + 16) >> 5; |
271 | lpc[k] = av_clip_int16(x); |
272 | lpc32[k] = lpc[k] << 5; // shortcut mandated by the spec; drops lower 5 bits |
273 | } |
274 | } else { |
275 | for (k = 0; k < order; k++) |
276 | lpc[k] = (lpc32[k] + 16) >> 5; |
277 | } |
278 | |
279 | /* if the prediction gain causes the LPC filter to become unstable, |
280 | apply further bandwidth expansion on the Q17 coefficients */ |
281 | for (i = 1; i <= 16 && !silk_is_lpc_stable(lpc, order); i++) { |
282 | unsigned int chirp, chirp_base; |
283 | chirp_base = chirp = 65536 - (1 << i); |
284 | |
285 | for (k = 0; k < order; k++) { |
286 | lpc32[k] = ROUND_MULL(lpc32[k], chirp, 16); |
287 | lpc[k] = (lpc32[k] + 16) >> 5; |
288 | chirp = (chirp_base * chirp + 32768) >> 16; |
289 | } |
290 | } |
291 | |
292 | for (i = 0; i < order; i++) |
293 | lpcf[i] = lpc[i] / 4096.0f; |
294 | } |
295 | |
296 | static inline void silk_decode_lpc(SilkContext *s, SilkFrame *frame, |
297 | OpusRangeCoder *rc, |
298 | float lpc_leadin[16], float lpc[16], |
299 | int *lpc_order, int *has_lpc_leadin, int voiced) |
300 | { |
301 | int i; |
302 | int order; // order of the LP polynomial; 10 for NB/MB and 16 for WB |
303 | int8_t lsf_i1, lsf_i2[16]; // stage-1 and stage-2 codebook indices |
304 | int16_t lsf_res[16]; // residual as a Q10 value |
305 | int16_t nlsf[16]; // Q15 |
306 | |
307 | *lpc_order = order = s->wb ? 16 : 10; |
308 | |
309 | /* obtain LSF stage-1 and stage-2 indices */ |
310 | lsf_i1 = ff_opus_rc_dec_cdf(rc, ff_silk_model_lsf_s1[s->wb][voiced]); |
311 | for (i = 0; i < order; i++) { |
312 | int index = s->wb ? ff_silk_lsf_s2_model_sel_wb [lsf_i1][i] : |
313 | ff_silk_lsf_s2_model_sel_nbmb[lsf_i1][i]; |
314 | lsf_i2[i] = ff_opus_rc_dec_cdf(rc, ff_silk_model_lsf_s2[index]) - 4; |
315 | if (lsf_i2[i] == -4) |
316 | lsf_i2[i] -= ff_opus_rc_dec_cdf(rc, ff_silk_model_lsf_s2_ext); |
317 | else if (lsf_i2[i] == 4) |
318 | lsf_i2[i] += ff_opus_rc_dec_cdf(rc, ff_silk_model_lsf_s2_ext); |
319 | } |
320 | |
321 | /* reverse the backwards-prediction step */ |
322 | for (i = order - 1; i >= 0; i--) { |
323 | int qstep = s->wb ? 9830 : 11796; |
324 | |
325 | lsf_res[i] = lsf_i2[i] * 1024; |
326 | if (lsf_i2[i] < 0) lsf_res[i] += 102; |
327 | else if (lsf_i2[i] > 0) lsf_res[i] -= 102; |
328 | lsf_res[i] = (lsf_res[i] * qstep) >> 16; |
329 | |
330 | if (i + 1 < order) { |
331 | int weight = s->wb ? ff_silk_lsf_pred_weights_wb [ff_silk_lsf_weight_sel_wb [lsf_i1][i]][i] : |
332 | ff_silk_lsf_pred_weights_nbmb[ff_silk_lsf_weight_sel_nbmb[lsf_i1][i]][i]; |
333 | lsf_res[i] += (lsf_res[i+1] * weight) >> 8; |
334 | } |
335 | } |
336 | |
337 | /* reconstruct the NLSF coefficients from the supplied indices */ |
338 | for (i = 0; i < order; i++) { |
339 | const uint8_t * codebook = s->wb ? ff_silk_lsf_codebook_wb [lsf_i1] : |
340 | ff_silk_lsf_codebook_nbmb[lsf_i1]; |
341 | int cur, prev, next, weight_sq, weight, ipart, fpart, y, value; |
342 | |
343 | /* find the weight of the residual */ |
344 | /* TODO: precompute */ |
345 | cur = codebook[i]; |
346 | prev = i ? codebook[i - 1] : 0; |
347 | next = i + 1 < order ? codebook[i + 1] : 256; |
348 | weight_sq = (1024 / (cur - prev) + 1024 / (next - cur)) << 16; |
349 | |
350 | /* approximate square-root with mandated fixed-point arithmetic */ |
351 | ipart = opus_ilog(weight_sq); |
352 | fpart = (weight_sq >> (ipart-8)) & 127; |
353 | y = ((ipart & 1) ? 32768 : 46214) >> ((32 - ipart)>>1); |
354 | weight = y + ((213 * fpart * y) >> 16); |
355 | |
356 | value = cur * 128 + (lsf_res[i] * 16384) / weight; |
357 | nlsf[i] = av_clip_uintp2(value, 15); |
358 | } |
359 | |
360 | /* stabilize the NLSF coefficients */ |
361 | silk_stabilize_lsf(nlsf, order, s->wb ? ff_silk_lsf_min_spacing_wb : |
362 | ff_silk_lsf_min_spacing_nbmb); |
363 | |
364 | /* produce an interpolation for the first 2 subframes, */ |
365 | /* and then convert both sets of NLSFs to LPC coefficients */ |
366 | *has_lpc_leadin = 0; |
367 | if (s->subframes == 4) { |
368 | int offset = ff_opus_rc_dec_cdf(rc, ff_silk_model_lsf_interpolation_offset); |
369 | if (offset != 4 && frame->coded) { |
370 | *has_lpc_leadin = 1; |
371 | if (offset != 0) { |
372 | int16_t nlsf_leadin[16]; |
373 | for (i = 0; i < order; i++) |
374 | nlsf_leadin[i] = frame->nlsf[i] + |
375 | ((nlsf[i] - frame->nlsf[i]) * offset >> 2); |
376 | silk_lsf2lpc(nlsf_leadin, lpc_leadin, order); |
377 | } else /* avoid re-computation for a (roughly) 1-in-4 occurrence */ |
378 | memcpy(lpc_leadin, frame->lpc, 16 * sizeof(float)); |
379 | } else |
380 | offset = 4; |
381 | s->nlsf_interp_factor = offset; |
382 | |
383 | silk_lsf2lpc(nlsf, lpc, order); |
384 | } else { |
385 | s->nlsf_interp_factor = 4; |
386 | silk_lsf2lpc(nlsf, lpc, order); |
387 | } |
388 | |
389 | memcpy(frame->nlsf, nlsf, order * sizeof(nlsf[0])); |
390 | memcpy(frame->lpc, lpc, order * sizeof(lpc[0])); |
391 | } |
392 | |
393 | static inline void silk_count_children(OpusRangeCoder *rc, int model, int32_t total, |
394 | int32_t child[2]) |
395 | { |
396 | if (total != 0) { |
397 | child[0] = ff_opus_rc_dec_cdf(rc, |
398 | ff_silk_model_pulse_location[model] + (((total - 1 + 5) * (total - 1)) >> 1)); |
399 | child[1] = total - child[0]; |
400 | } else { |
401 | child[0] = 0; |
402 | child[1] = 0; |
403 | } |
404 | } |
405 | |
406 | static inline void silk_decode_excitation(SilkContext *s, OpusRangeCoder *rc, |
407 | float* excitationf, |
408 | int qoffset_high, int active, int voiced) |
409 | { |
410 | int i; |
411 | uint32_t seed; |
412 | int shellblocks; |
413 | int ratelevel; |
414 | uint8_t pulsecount[20]; // total pulses in each shell block |
415 | uint8_t lsbcount[20] = {0}; // raw lsbits defined for each pulse in each shell block |
416 | int32_t excitation[320]; // Q23 |
417 | |
418 | /* excitation parameters */ |
419 | seed = ff_opus_rc_dec_cdf(rc, ff_silk_model_lcg_seed); |
420 | shellblocks = ff_silk_shell_blocks[s->bandwidth][s->subframes >> 2]; |
421 | ratelevel = ff_opus_rc_dec_cdf(rc, ff_silk_model_exc_rate[voiced]); |
422 | |
423 | for (i = 0; i < shellblocks; i++) { |
424 | pulsecount[i] = ff_opus_rc_dec_cdf(rc, ff_silk_model_pulse_count[ratelevel]); |
425 | if (pulsecount[i] == 17) { |
426 | while (pulsecount[i] == 17 && ++lsbcount[i] != 10) |
427 | pulsecount[i] = ff_opus_rc_dec_cdf(rc, ff_silk_model_pulse_count[9]); |
428 | if (lsbcount[i] == 10) |
429 | pulsecount[i] = ff_opus_rc_dec_cdf(rc, ff_silk_model_pulse_count[10]); |
430 | } |
431 | } |
432 | |
433 | /* decode pulse locations using PVQ */ |
434 | for (i = 0; i < shellblocks; i++) { |
435 | if (pulsecount[i] != 0) { |
436 | int a, b, c, d; |
437 | int32_t * location = excitation + 16*i; |
438 | int32_t branch[4][2]; |
439 | branch[0][0] = pulsecount[i]; |
440 | |
441 | /* unrolled tail recursion */ |
442 | for (a = 0; a < 1; a++) { |
443 | silk_count_children(rc, 0, branch[0][a], branch[1]); |
444 | for (b = 0; b < 2; b++) { |
445 | silk_count_children(rc, 1, branch[1][b], branch[2]); |
446 | for (c = 0; c < 2; c++) { |
447 | silk_count_children(rc, 2, branch[2][c], branch[3]); |
448 | for (d = 0; d < 2; d++) { |
449 | silk_count_children(rc, 3, branch[3][d], location); |
450 | location += 2; |
451 | } |
452 | } |
453 | } |
454 | } |
455 | } else |
456 | memset(excitation + 16*i, 0, 16*sizeof(int32_t)); |
457 | } |
458 | |
459 | /* decode least significant bits */ |
460 | for (i = 0; i < shellblocks << 4; i++) { |
461 | int bit; |
462 | for (bit = 0; bit < lsbcount[i >> 4]; bit++) |
463 | excitation[i] = (excitation[i] << 1) | |
464 | ff_opus_rc_dec_cdf(rc, ff_silk_model_excitation_lsb); |
465 | } |
466 | |
467 | /* decode signs */ |
468 | for (i = 0; i < shellblocks << 4; i++) { |
469 | if (excitation[i] != 0) { |
470 | int sign = ff_opus_rc_dec_cdf(rc, ff_silk_model_excitation_sign[active + |
471 | voiced][qoffset_high][FFMIN(pulsecount[i >> 4], 6)]); |
472 | if (sign == 0) |
473 | excitation[i] *= -1; |
474 | } |
475 | } |
476 | |
477 | /* assemble the excitation */ |
478 | for (i = 0; i < shellblocks << 4; i++) { |
479 | int value = excitation[i]; |
480 | excitation[i] = value * 256 | ff_silk_quant_offset[voiced][qoffset_high]; |
481 | if (value < 0) excitation[i] += 20; |
482 | else if (value > 0) excitation[i] -= 20; |
483 | |
484 | /* invert samples pseudorandomly */ |
485 | seed = 196314165 * seed + 907633515; |
486 | if (seed & 0x80000000) |
487 | excitation[i] *= -1; |
488 | seed += value; |
489 | |
490 | excitationf[i] = excitation[i] / 8388608.0f; |
491 | } |
492 | } |
493 | |
494 | /** Maximum residual history according to 4.2.7.6.1 */ |
495 | #define SILK_MAX_LAG (288 + LTP_ORDER / 2) |
496 | |
497 | /** Order of the LTP filter */ |
498 | #define LTP_ORDER 5 |
499 | |
500 | static void silk_decode_frame(SilkContext *s, OpusRangeCoder *rc, |
501 | int frame_num, int channel, int coded_channels, int active, int active1) |
502 | { |
503 | /* per frame */ |
504 | int voiced; // combines with active to indicate inactive, active, or active+voiced |
505 | int qoffset_high; |
506 | int order; // order of the LPC coefficients |
507 | float lpc_leadin[16], lpc_body[16], residual[SILK_MAX_LAG + SILK_HISTORY]; |
508 | int has_lpc_leadin; |
509 | float ltpscale; |
510 | |
511 | /* per subframe */ |
512 | struct { |
513 | float gain; |
514 | int pitchlag; |
515 | float ltptaps[5]; |
516 | } sf[4]; |
517 | |
518 | SilkFrame * const frame = s->frame + channel; |
519 | |
520 | int i; |
521 | |
522 | /* obtain stereo weights */ |
523 | if (coded_channels == 2 && channel == 0) { |
524 | int n, wi[2], ws[2], w[2]; |
525 | n = ff_opus_rc_dec_cdf(rc, ff_silk_model_stereo_s1); |
526 | wi[0] = ff_opus_rc_dec_cdf(rc, ff_silk_model_stereo_s2) + 3 * (n / 5); |
527 | ws[0] = ff_opus_rc_dec_cdf(rc, ff_silk_model_stereo_s3); |
528 | wi[1] = ff_opus_rc_dec_cdf(rc, ff_silk_model_stereo_s2) + 3 * (n % 5); |
529 | ws[1] = ff_opus_rc_dec_cdf(rc, ff_silk_model_stereo_s3); |
530 | |
531 | for (i = 0; i < 2; i++) |
532 | w[i] = ff_silk_stereo_weights[wi[i]] + |
533 | (((ff_silk_stereo_weights[wi[i] + 1] - ff_silk_stereo_weights[wi[i]]) * 6554) >> 16) |
534 | * (ws[i]*2 + 1); |
535 | |
536 | s->stereo_weights[0] = (w[0] - w[1]) / 8192.0; |
537 | s->stereo_weights[1] = w[1] / 8192.0; |
538 | |
539 | /* and read the mid-only flag */ |
540 | s->midonly = active1 ? 0 : ff_opus_rc_dec_cdf(rc, ff_silk_model_mid_only); |
541 | } |
542 | |
543 | /* obtain frame type */ |
544 | if (!active) { |
545 | qoffset_high = ff_opus_rc_dec_cdf(rc, ff_silk_model_frame_type_inactive); |
546 | voiced = 0; |
547 | } else { |
548 | int type = ff_opus_rc_dec_cdf(rc, ff_silk_model_frame_type_active); |
549 | qoffset_high = type & 1; |
550 | voiced = type >> 1; |
551 | } |
552 | |
553 | /* obtain subframe quantization gains */ |
554 | for (i = 0; i < s->subframes; i++) { |
555 | int log_gain; //Q7 |
556 | int ipart, fpart, lingain; |
557 | |
558 | if (i == 0 && (frame_num == 0 || !frame->coded)) { |
559 | /* gain is coded absolute */ |
560 | int x = ff_opus_rc_dec_cdf(rc, ff_silk_model_gain_highbits[active + voiced]); |
561 | log_gain = (x<<3) | ff_opus_rc_dec_cdf(rc, ff_silk_model_gain_lowbits); |
562 | |
563 | if (frame->coded) |
564 | log_gain = FFMAX(log_gain, frame->log_gain - 16); |
565 | } else { |
566 | /* gain is coded relative */ |
567 | int delta_gain = ff_opus_rc_dec_cdf(rc, ff_silk_model_gain_delta); |
568 | log_gain = av_clip_uintp2(FFMAX((delta_gain<<1) - 16, |
569 | frame->log_gain + delta_gain - 4), 6); |
570 | } |
571 | |
572 | frame->log_gain = log_gain; |
573 | |
574 | /* approximate 2**(x/128) with a Q7 (i.e. non-integer) input */ |
575 | log_gain = (log_gain * 0x1D1C71 >> 16) + 2090; |
576 | ipart = log_gain >> 7; |
577 | fpart = log_gain & 127; |
578 | lingain = (1 << ipart) + ((-174 * fpart * (128-fpart) >>16) + fpart) * ((1<<ipart) >> 7); |
579 | sf[i].gain = lingain / 65536.0f; |
580 | } |
581 | |
582 | /* obtain LPC filter coefficients */ |
583 | silk_decode_lpc(s, frame, rc, lpc_leadin, lpc_body, &order, &has_lpc_leadin, voiced); |
584 | |
585 | /* obtain pitch lags, if this is a voiced frame */ |
586 | if (voiced) { |
587 | int lag_absolute = (!frame_num || !frame->prev_voiced); |
588 | int primarylag; // primary pitch lag for the entire SILK frame |
589 | int ltpfilter; |
590 | const int8_t * offsets; |
591 | |
592 | if (!lag_absolute) { |
593 | int delta = ff_opus_rc_dec_cdf(rc, ff_silk_model_pitch_delta); |
594 | if (delta) |
595 | primarylag = frame->primarylag + delta - 9; |
596 | else |
597 | lag_absolute = 1; |
598 | } |
599 | |
600 | if (lag_absolute) { |
601 | /* primary lag is coded absolute */ |
602 | int highbits, lowbits; |
603 | static const uint16_t *model[] = { |
604 | ff_silk_model_pitch_lowbits_nb, ff_silk_model_pitch_lowbits_mb, |
605 | ff_silk_model_pitch_lowbits_wb |
606 | }; |
607 | highbits = ff_opus_rc_dec_cdf(rc, ff_silk_model_pitch_highbits); |
608 | lowbits = ff_opus_rc_dec_cdf(rc, model[s->bandwidth]); |
609 | |
610 | primarylag = ff_silk_pitch_min_lag[s->bandwidth] + |
611 | highbits*ff_silk_pitch_scale[s->bandwidth] + lowbits; |
612 | } |
613 | frame->primarylag = primarylag; |
614 | |
615 | if (s->subframes == 2) |
616 | offsets = (s->bandwidth == OPUS_BANDWIDTH_NARROWBAND) |
617 | ? ff_silk_pitch_offset_nb10ms[ff_opus_rc_dec_cdf(rc, |
618 | ff_silk_model_pitch_contour_nb10ms)] |
619 | : ff_silk_pitch_offset_mbwb10ms[ff_opus_rc_dec_cdf(rc, |
620 | ff_silk_model_pitch_contour_mbwb10ms)]; |
621 | else |
622 | offsets = (s->bandwidth == OPUS_BANDWIDTH_NARROWBAND) |
623 | ? ff_silk_pitch_offset_nb20ms[ff_opus_rc_dec_cdf(rc, |
624 | ff_silk_model_pitch_contour_nb20ms)] |
625 | : ff_silk_pitch_offset_mbwb20ms[ff_opus_rc_dec_cdf(rc, |
626 | ff_silk_model_pitch_contour_mbwb20ms)]; |
627 | |
628 | for (i = 0; i < s->subframes; i++) |
629 | sf[i].pitchlag = av_clip(primarylag + offsets[i], |
630 | ff_silk_pitch_min_lag[s->bandwidth], |
631 | ff_silk_pitch_max_lag[s->bandwidth]); |
632 | |
633 | /* obtain LTP filter coefficients */ |
634 | ltpfilter = ff_opus_rc_dec_cdf(rc, ff_silk_model_ltp_filter); |
635 | for (i = 0; i < s->subframes; i++) { |
636 | int index, j; |
637 | static const uint16_t *filter_sel[] = { |
638 | ff_silk_model_ltp_filter0_sel, ff_silk_model_ltp_filter1_sel, |
639 | ff_silk_model_ltp_filter2_sel |
640 | }; |
641 | static const int8_t (*filter_taps[])[5] = { |
642 | ff_silk_ltp_filter0_taps, ff_silk_ltp_filter1_taps, ff_silk_ltp_filter2_taps |
643 | }; |
644 | index = ff_opus_rc_dec_cdf(rc, filter_sel[ltpfilter]); |
645 | for (j = 0; j < 5; j++) |
646 | sf[i].ltptaps[j] = filter_taps[ltpfilter][index][j] / 128.0f; |
647 | } |
648 | } |
649 | |
650 | /* obtain LTP scale factor */ |
651 | if (voiced && frame_num == 0) |
652 | ltpscale = ff_silk_ltp_scale_factor[ff_opus_rc_dec_cdf(rc, |
653 | ff_silk_model_ltp_scale_index)] / 16384.0f; |
654 | else ltpscale = 15565.0f/16384.0f; |
655 | |
656 | /* generate the excitation signal for the entire frame */ |
657 | silk_decode_excitation(s, rc, residual + SILK_MAX_LAG, qoffset_high, |
658 | active, voiced); |
659 | |
660 | /* skip synthesising the side channel if we want mono-only */ |
661 | if (s->output_channels == channel) |
662 | return; |
663 | |
664 | /* generate the output signal */ |
665 | for (i = 0; i < s->subframes; i++) { |
666 | const float * lpc_coeff = (i < 2 && has_lpc_leadin) ? lpc_leadin : lpc_body; |
667 | float *dst = frame->output + SILK_HISTORY + i * s->sflength; |
668 | float *resptr = residual + SILK_MAX_LAG + i * s->sflength; |
669 | float *lpc = frame->lpc_history + SILK_HISTORY + i * s->sflength; |
670 | float sum; |
671 | int j, k; |
672 | |
673 | if (voiced) { |
674 | int out_end; |
675 | float scale; |
676 | |
677 | if (i < 2 || s->nlsf_interp_factor == 4) { |
678 | out_end = -i * s->sflength; |
679 | scale = ltpscale; |
680 | } else { |
681 | out_end = -(i - 2) * s->sflength; |
682 | scale = 1.0f; |
683 | } |
684 | |
685 | /* when the LPC coefficients change, a re-whitening filter is used */ |
686 | /* to produce a residual that accounts for the change */ |
687 | for (j = - sf[i].pitchlag - LTP_ORDER/2; j < out_end; j++) { |
688 | sum = dst[j]; |
689 | for (k = 0; k < order; k++) |
690 | sum -= lpc_coeff[k] * dst[j - k - 1]; |
691 | resptr[j] = av_clipf(sum, -1.0f, 1.0f) * scale / sf[i].gain; |
692 | } |
693 | |
694 | if (out_end) { |
695 | float rescale = sf[i-1].gain / sf[i].gain; |
696 | for (j = out_end; j < 0; j++) |
697 | resptr[j] *= rescale; |
698 | } |
699 | |
700 | /* LTP synthesis */ |
701 | for (j = 0; j < s->sflength; j++) { |
702 | sum = resptr[j]; |
703 | for (k = 0; k < LTP_ORDER; k++) |
704 | sum += sf[i].ltptaps[k] * resptr[j - sf[i].pitchlag + LTP_ORDER/2 - k]; |
705 | resptr[j] = sum; |
706 | } |
707 | } |
708 | |
709 | /* LPC synthesis */ |
710 | for (j = 0; j < s->sflength; j++) { |
711 | sum = resptr[j] * sf[i].gain; |
712 | for (k = 1; k <= order; k++) |
713 | sum += lpc_coeff[k - 1] * lpc[j - k]; |
714 | |
715 | lpc[j] = sum; |
716 | dst[j] = av_clipf(sum, -1.0f, 1.0f); |
717 | } |
718 | } |
719 | |
720 | frame->prev_voiced = voiced; |
721 | memmove(frame->lpc_history, frame->lpc_history + s->flength, SILK_HISTORY * sizeof(float)); |
722 | memmove(frame->output, frame->output + s->flength, SILK_HISTORY * sizeof(float)); |
723 | |
724 | frame->coded = 1; |
725 | } |
726 | |
727 | static void silk_unmix_ms(SilkContext *s, float *l, float *r) |
728 | { |
729 | float *mid = s->frame[0].output + SILK_HISTORY - s->flength; |
730 | float *side = s->frame[1].output + SILK_HISTORY - s->flength; |
731 | float w0_prev = s->prev_stereo_weights[0]; |
732 | float w1_prev = s->prev_stereo_weights[1]; |
733 | float w0 = s->stereo_weights[0]; |
734 | float w1 = s->stereo_weights[1]; |
735 | int n1 = ff_silk_stereo_interp_len[s->bandwidth]; |
736 | int i; |
737 | |
738 | for (i = 0; i < n1; i++) { |
739 | float interp0 = w0_prev + i * (w0 - w0_prev) / n1; |
740 | float interp1 = w1_prev + i * (w1 - w1_prev) / n1; |
741 | float p0 = 0.25 * (mid[i - 2] + 2 * mid[i - 1] + mid[i]); |
742 | |
743 | l[i] = av_clipf((1 + interp1) * mid[i - 1] + side[i - 1] + interp0 * p0, -1.0, 1.0); |
744 | r[i] = av_clipf((1 - interp1) * mid[i - 1] - side[i - 1] - interp0 * p0, -1.0, 1.0); |
745 | } |
746 | |
747 | for (; i < s->flength; i++) { |
748 | float p0 = 0.25 * (mid[i - 2] + 2 * mid[i - 1] + mid[i]); |
749 | |
750 | l[i] = av_clipf((1 + w1) * mid[i - 1] + side[i - 1] + w0 * p0, -1.0, 1.0); |
751 | r[i] = av_clipf((1 - w1) * mid[i - 1] - side[i - 1] - w0 * p0, -1.0, 1.0); |
752 | } |
753 | |
754 | memcpy(s->prev_stereo_weights, s->stereo_weights, sizeof(s->stereo_weights)); |
755 | } |
756 | |
757 | static void silk_flush_frame(SilkFrame *frame) |
758 | { |
759 | if (!frame->coded) |
760 | return; |
761 | |
762 | memset(frame->output, 0, sizeof(frame->output)); |
763 | memset(frame->lpc_history, 0, sizeof(frame->lpc_history)); |
764 | |
765 | memset(frame->lpc, 0, sizeof(frame->lpc)); |
766 | memset(frame->nlsf, 0, sizeof(frame->nlsf)); |
767 | |
768 | frame->log_gain = 0; |
769 | |
770 | frame->primarylag = 0; |
771 | frame->prev_voiced = 0; |
772 | frame->coded = 0; |
773 | } |
774 | |
775 | int ff_silk_decode_superframe(SilkContext *s, OpusRangeCoder *rc, |
776 | float *output[2], |
777 | enum OpusBandwidth bandwidth, |
778 | int coded_channels, |
779 | int duration_ms) |
780 | { |
781 | int active[2][6], redundancy[2]; |
782 | int nb_frames, i, j; |
783 | |
784 | if (bandwidth > OPUS_BANDWIDTH_WIDEBAND || |
785 | coded_channels > 2 || duration_ms > 60) { |
786 | av_log(s->avctx, AV_LOG_ERROR, "Invalid parameters passed " |
787 | "to the SILK decoder.\n"); |
788 | return AVERROR(EINVAL); |
789 | } |
790 | |
791 | nb_frames = 1 + (duration_ms > 20) + (duration_ms > 40); |
792 | s->subframes = duration_ms / nb_frames / 5; // 5ms subframes |
793 | s->sflength = 20 * (bandwidth + 2); |
794 | s->flength = s->sflength * s->subframes; |
795 | s->bandwidth = bandwidth; |
796 | s->wb = bandwidth == OPUS_BANDWIDTH_WIDEBAND; |
797 | |
798 | /* make sure to flush the side channel when switching from mono to stereo */ |
799 | if (coded_channels > s->prev_coded_channels) |
800 | silk_flush_frame(&s->frame[1]); |
801 | s->prev_coded_channels = coded_channels; |
802 | |
803 | /* read the LP-layer header bits */ |
804 | for (i = 0; i < coded_channels; i++) { |
805 | for (j = 0; j < nb_frames; j++) |
806 | active[i][j] = ff_opus_rc_dec_log(rc, 1); |
807 | |
808 | redundancy[i] = ff_opus_rc_dec_log(rc, 1); |
809 | if (redundancy[i]) { |
810 | avpriv_report_missing_feature(s->avctx, "LBRR frames"); |
811 | return AVERROR_PATCHWELCOME; |
812 | } |
813 | } |
814 | |
815 | for (i = 0; i < nb_frames; i++) { |
816 | for (j = 0; j < coded_channels && !s->midonly; j++) |
817 | silk_decode_frame(s, rc, i, j, coded_channels, active[j][i], active[1][i]); |
818 | |
819 | /* reset the side channel if it is not coded */ |
820 | if (s->midonly && s->frame[1].coded) |
821 | silk_flush_frame(&s->frame[1]); |
822 | |
823 | if (coded_channels == 1 || s->output_channels == 1) { |
824 | for (j = 0; j < s->output_channels; j++) { |
825 | memcpy(output[j] + i * s->flength, |
826 | s->frame[0].output + SILK_HISTORY - s->flength - 2, |
827 | s->flength * sizeof(float)); |
828 | } |
829 | } else { |
830 | silk_unmix_ms(s, output[0] + i * s->flength, output[1] + i * s->flength); |
831 | } |
832 | |
833 | s->midonly = 0; |
834 | } |
835 | |
836 | return nb_frames * s->flength; |
837 | } |
838 | |
839 | void ff_silk_free(SilkContext **ps) |
840 | { |
841 | av_freep(ps); |
842 | } |
843 | |
844 | void ff_silk_flush(SilkContext *s) |
845 | { |
846 | silk_flush_frame(&s->frame[0]); |
847 | silk_flush_frame(&s->frame[1]); |
848 | |
849 | memset(s->prev_stereo_weights, 0, sizeof(s->prev_stereo_weights)); |
850 | } |
851 | |
852 | int ff_silk_init(AVCodecContext *avctx, SilkContext **ps, int output_channels) |
853 | { |
854 | SilkContext *s; |
855 | |
856 | if (output_channels != 1 && output_channels != 2) { |
857 | av_log(avctx, AV_LOG_ERROR, "Invalid number of output channels: %d\n", |
858 | output_channels); |
859 | return AVERROR(EINVAL); |
860 | } |
861 | |
862 | s = av_mallocz(sizeof(*s)); |
863 | if (!s) |
864 | return AVERROR(ENOMEM); |
865 | |
866 | s->avctx = avctx; |
867 | s->output_channels = output_channels; |
868 | |
869 | ff_silk_flush(s); |
870 | |
871 | *ps = s; |
872 | |
873 | return 0; |
874 | } |
875 |