blob: e0382091ba941216cbeb1ef5f3587b94d2458a52
1 | /* |
2 | * Copyright (c) 2013-2014 Mozilla Corporation |
3 | * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com> |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | /** |
23 | * @file |
24 | * Celt non-power of 2 iMDCT |
25 | */ |
26 | |
27 | #include <float.h> |
28 | #include <math.h> |
29 | #include <stddef.h> |
30 | |
31 | #include "config.h" |
32 | |
33 | #include "libavutil/attributes.h" |
34 | #include "libavutil/common.h" |
35 | |
36 | #include "avfft.h" |
37 | #include "mdct15.h" |
38 | |
39 | // complex c = a * b |
40 | #define CMUL3(cre, cim, are, aim, bre, bim) \ |
41 | do { \ |
42 | cre = are * bre - aim * bim; \ |
43 | cim = are * bim + aim * bre; \ |
44 | } while (0) |
45 | |
46 | #define CMUL(c, a, b) CMUL3((c).re, (c).im, (a).re, (a).im, (b).re, (b).im) |
47 | |
48 | av_cold void ff_mdct15_uninit(MDCT15Context **ps) |
49 | { |
50 | MDCT15Context *s = *ps; |
51 | |
52 | if (!s) |
53 | return; |
54 | |
55 | ff_fft_end(&s->ptwo_fft); |
56 | |
57 | av_freep(&s->pfa_prereindex); |
58 | av_freep(&s->pfa_postreindex); |
59 | av_freep(&s->twiddle_exptab); |
60 | av_freep(&s->tmp); |
61 | |
62 | av_freep(ps); |
63 | } |
64 | |
65 | static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride); |
66 | |
67 | static void imdct15_half(MDCT15Context *s, float *dst, const float *src, |
68 | ptrdiff_t stride, float scale); |
69 | |
70 | static inline int init_pfa_reindex_tabs(MDCT15Context *s) |
71 | { |
72 | int i, j; |
73 | const int b_ptwo = s->ptwo_fft.nbits; /* Bits for the power of two FFTs */ |
74 | const int l_ptwo = 1 << b_ptwo; /* Total length for the power of two FFTs */ |
75 | const int inv_1 = l_ptwo << ((4 - b_ptwo) & 3); /* (2^b_ptwo)^-1 mod 15 */ |
76 | const int inv_2 = 0xeeeeeeef & ((1U << b_ptwo) - 1); /* 15^-1 mod 2^b_ptwo */ |
77 | |
78 | s->pfa_prereindex = av_malloc(15 * l_ptwo * sizeof(*s->pfa_prereindex)); |
79 | if (!s->pfa_prereindex) |
80 | return 1; |
81 | |
82 | s->pfa_postreindex = av_malloc(15 * l_ptwo * sizeof(*s->pfa_postreindex)); |
83 | if (!s->pfa_postreindex) |
84 | return 1; |
85 | |
86 | /* Pre/Post-reindex */ |
87 | for (i = 0; i < l_ptwo; i++) { |
88 | for (j = 0; j < 15; j++) { |
89 | const int q_pre = ((l_ptwo * j)/15 + i) >> b_ptwo; |
90 | const int q_post = (((j*inv_1)/15) + (i*inv_2)) >> b_ptwo; |
91 | const int k_pre = 15*i + (j - q_pre*15)*(1 << b_ptwo); |
92 | const int k_post = i*inv_2*15 + j*inv_1 - 15*q_post*l_ptwo; |
93 | s->pfa_prereindex[i*15 + j] = k_pre; |
94 | s->pfa_postreindex[k_post] = l_ptwo*j + i; |
95 | } |
96 | } |
97 | |
98 | return 0; |
99 | } |
100 | |
101 | av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale) |
102 | { |
103 | MDCT15Context *s; |
104 | double alpha, theta; |
105 | int len2 = 15 * (1 << N); |
106 | int len = 2 * len2; |
107 | int i; |
108 | |
109 | /* Tested and verified to work on everything in between */ |
110 | if ((N < 2) || (N > 13)) |
111 | return AVERROR(EINVAL); |
112 | |
113 | s = av_mallocz(sizeof(*s)); |
114 | if (!s) |
115 | return AVERROR(ENOMEM); |
116 | |
117 | s->fft_n = N - 1; |
118 | s->len4 = len2 / 2; |
119 | s->len2 = len2; |
120 | s->inverse = inverse; |
121 | s->mdct = mdct15; |
122 | s->imdct_half = imdct15_half; |
123 | |
124 | if (ff_fft_init(&s->ptwo_fft, N - 1, s->inverse) < 0) |
125 | goto fail; |
126 | |
127 | if (init_pfa_reindex_tabs(s)) |
128 | goto fail; |
129 | |
130 | s->tmp = av_malloc_array(len, 2 * sizeof(*s->tmp)); |
131 | if (!s->tmp) |
132 | goto fail; |
133 | |
134 | s->twiddle_exptab = av_malloc_array(s->len4, sizeof(*s->twiddle_exptab)); |
135 | if (!s->twiddle_exptab) |
136 | goto fail; |
137 | |
138 | theta = 0.125f + (scale < 0 ? s->len4 : 0); |
139 | scale = sqrt(fabs(scale)); |
140 | for (i = 0; i < s->len4; i++) { |
141 | alpha = 2 * M_PI * (i + theta) / len; |
142 | s->twiddle_exptab[i].re = cos(alpha) * scale; |
143 | s->twiddle_exptab[i].im = sin(alpha) * scale; |
144 | } |
145 | |
146 | /* 15-point FFT exptab */ |
147 | for (i = 0; i < 19; i++) { |
148 | if (i < 15) { |
149 | double theta = (2.0f * M_PI * i) / 15.0f; |
150 | if (!s->inverse) |
151 | theta *= -1; |
152 | s->exptab[i].re = cos(theta); |
153 | s->exptab[i].im = sin(theta); |
154 | } else { /* Wrap around to simplify fft15 */ |
155 | s->exptab[i] = s->exptab[i - 15]; |
156 | } |
157 | } |
158 | |
159 | /* 5-point FFT exptab */ |
160 | s->exptab[19].re = cos(2.0f * M_PI / 5.0f); |
161 | s->exptab[19].im = sin(2.0f * M_PI / 5.0f); |
162 | s->exptab[20].re = cos(1.0f * M_PI / 5.0f); |
163 | s->exptab[20].im = sin(1.0f * M_PI / 5.0f); |
164 | |
165 | /* Invert the phase for an inverse transform, do nothing for a forward transform */ |
166 | if (s->inverse) { |
167 | s->exptab[19].im *= -1; |
168 | s->exptab[20].im *= -1; |
169 | } |
170 | |
171 | *ps = s; |
172 | |
173 | return 0; |
174 | |
175 | fail: |
176 | ff_mdct15_uninit(&s); |
177 | return AVERROR(ENOMEM); |
178 | } |
179 | |
180 | /* Stride is hardcoded to 3 */ |
181 | static inline void fft5(const FFTComplex exptab[2], FFTComplex *out, |
182 | const FFTComplex *in) |
183 | { |
184 | FFTComplex z0[4], t[6]; |
185 | |
186 | t[0].re = in[3].re + in[12].re; |
187 | t[0].im = in[3].im + in[12].im; |
188 | t[1].im = in[3].re - in[12].re; |
189 | t[1].re = in[3].im - in[12].im; |
190 | t[2].re = in[6].re + in[ 9].re; |
191 | t[2].im = in[6].im + in[ 9].im; |
192 | t[3].im = in[6].re - in[ 9].re; |
193 | t[3].re = in[6].im - in[ 9].im; |
194 | |
195 | out[0].re = in[0].re + in[3].re + in[6].re + in[9].re + in[12].re; |
196 | out[0].im = in[0].im + in[3].im + in[6].im + in[9].im + in[12].im; |
197 | |
198 | t[4].re = exptab[0].re * t[2].re - exptab[1].re * t[0].re; |
199 | t[4].im = exptab[0].re * t[2].im - exptab[1].re * t[0].im; |
200 | t[0].re = exptab[0].re * t[0].re - exptab[1].re * t[2].re; |
201 | t[0].im = exptab[0].re * t[0].im - exptab[1].re * t[2].im; |
202 | t[5].re = exptab[0].im * t[3].re - exptab[1].im * t[1].re; |
203 | t[5].im = exptab[0].im * t[3].im - exptab[1].im * t[1].im; |
204 | t[1].re = exptab[0].im * t[1].re + exptab[1].im * t[3].re; |
205 | t[1].im = exptab[0].im * t[1].im + exptab[1].im * t[3].im; |
206 | |
207 | z0[0].re = t[0].re - t[1].re; |
208 | z0[0].im = t[0].im - t[1].im; |
209 | z0[1].re = t[4].re + t[5].re; |
210 | z0[1].im = t[4].im + t[5].im; |
211 | |
212 | z0[2].re = t[4].re - t[5].re; |
213 | z0[2].im = t[4].im - t[5].im; |
214 | z0[3].re = t[0].re + t[1].re; |
215 | z0[3].im = t[0].im + t[1].im; |
216 | |
217 | out[1].re = in[0].re + z0[3].re; |
218 | out[1].im = in[0].im + z0[0].im; |
219 | out[2].re = in[0].re + z0[2].re; |
220 | out[2].im = in[0].im + z0[1].im; |
221 | out[3].re = in[0].re + z0[1].re; |
222 | out[3].im = in[0].im + z0[2].im; |
223 | out[4].re = in[0].re + z0[0].re; |
224 | out[4].im = in[0].im + z0[3].im; |
225 | } |
226 | |
227 | static void fft15(const FFTComplex exptab[22], FFTComplex *out, const FFTComplex *in, size_t stride) |
228 | { |
229 | int k; |
230 | FFTComplex tmp1[5], tmp2[5], tmp3[5]; |
231 | |
232 | fft5(exptab + 19, tmp1, in + 0); |
233 | fft5(exptab + 19, tmp2, in + 1); |
234 | fft5(exptab + 19, tmp3, in + 2); |
235 | |
236 | for (k = 0; k < 5; k++) { |
237 | FFTComplex t[2]; |
238 | |
239 | CMUL(t[0], tmp2[k], exptab[k]); |
240 | CMUL(t[1], tmp3[k], exptab[2 * k]); |
241 | out[stride*k].re = tmp1[k].re + t[0].re + t[1].re; |
242 | out[stride*k].im = tmp1[k].im + t[0].im + t[1].im; |
243 | |
244 | CMUL(t[0], tmp2[k], exptab[k + 5]); |
245 | CMUL(t[1], tmp3[k], exptab[2 * (k + 5)]); |
246 | out[stride*(k + 5)].re = tmp1[k].re + t[0].re + t[1].re; |
247 | out[stride*(k + 5)].im = tmp1[k].im + t[0].im + t[1].im; |
248 | |
249 | CMUL(t[0], tmp2[k], exptab[k + 10]); |
250 | CMUL(t[1], tmp3[k], exptab[2 * k + 5]); |
251 | out[stride*(k + 10)].re = tmp1[k].re + t[0].re + t[1].re; |
252 | out[stride*(k + 10)].im = tmp1[k].im + t[0].im + t[1].im; |
253 | } |
254 | } |
255 | |
256 | static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride) |
257 | { |
258 | int i, j; |
259 | const int len4 = s->len4, len3 = len4 * 3, len8 = len4 >> 1; |
260 | const int l_ptwo = 1 << s->ptwo_fft.nbits; |
261 | FFTComplex fft15in[15]; |
262 | |
263 | /* Folding and pre-reindexing */ |
264 | for (i = 0; i < l_ptwo; i++) { |
265 | for (j = 0; j < 15; j++) { |
266 | float re, im; |
267 | const int k = s->pfa_prereindex[i*15 + j]; |
268 | if (k < len8) { |
269 | re = -src[2*k+len3] - src[len3-1-2*k]; |
270 | im = -src[len4+2*k] + src[len4-1-2*k]; |
271 | } else { |
272 | re = src[2*k-len4] - src[1*len3-1-2*k]; |
273 | im = -src[2*k+len4] - src[5*len4-1-2*k]; |
274 | } |
275 | CMUL3(fft15in[j].re, fft15in[j].im, re, im, s->twiddle_exptab[k].re, -s->twiddle_exptab[k].im); |
276 | } |
277 | fft15(s->exptab, s->tmp + s->ptwo_fft.revtab[i], fft15in, l_ptwo); |
278 | } |
279 | |
280 | /* Then a 15xN FFT (where N is a power of two) */ |
281 | for (i = 0; i < 15; i++) |
282 | s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i); |
283 | |
284 | /* Reindex again, apply twiddles and output */ |
285 | for (i = 0; i < len8; i++) { |
286 | float re0, im0, re1, im1; |
287 | const int i0 = len8 + i, i1 = len8 - i - 1; |
288 | const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1]; |
289 | |
290 | CMUL3(im1, re0, s->tmp[s1].re, s->tmp[s1].im, s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re); |
291 | CMUL3(im0, re1, s->tmp[s0].re, s->tmp[s0].im, s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re); |
292 | |
293 | dst[2*i1*stride ] = re0; |
294 | dst[2*i1*stride + stride] = im0; |
295 | dst[2*i0*stride ] = re1; |
296 | dst[2*i0*stride + stride] = im1; |
297 | } |
298 | } |
299 | |
300 | static void imdct15_half(MDCT15Context *s, float *dst, const float *src, |
301 | ptrdiff_t stride, float scale) |
302 | { |
303 | FFTComplex fft15in[15]; |
304 | FFTComplex *z = (FFTComplex *)dst; |
305 | int i, j, len8 = s->len4 >> 1, l_ptwo = 1 << s->ptwo_fft.nbits; |
306 | const float *in1 = src, *in2 = src + (s->len2 - 1) * stride; |
307 | |
308 | /* Reindex input, putting it into a buffer and doing an Nx15 FFT */ |
309 | for (i = 0; i < l_ptwo; i++) { |
310 | for (j = 0; j < 15; j++) { |
311 | const int k = s->pfa_prereindex[i*15 + j]; |
312 | FFTComplex tmp = { *(in2 - 2*k*stride), *(in1 + 2*k*stride) }; |
313 | CMUL(fft15in[j], tmp, s->twiddle_exptab[k]); |
314 | } |
315 | fft15(s->exptab, s->tmp + s->ptwo_fft.revtab[i], fft15in, l_ptwo); |
316 | } |
317 | |
318 | /* Then a 15xN FFT (where N is a power of two) */ |
319 | for (i = 0; i < 15; i++) |
320 | s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i); |
321 | |
322 | /* Reindex again, apply twiddles and output */ |
323 | for (i = 0; i < len8; i++) { |
324 | float re0, im0, re1, im1; |
325 | const int i0 = len8 + i, i1 = len8 - i - 1; |
326 | const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1]; |
327 | |
328 | CMUL3(re0, im1, s->tmp[s1].im, s->tmp[s1].re, s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re); |
329 | CMUL3(re1, im0, s->tmp[s0].im, s->tmp[s0].re, s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re); |
330 | z[i1].re = scale * re0; |
331 | z[i1].im = scale * im0; |
332 | z[i0].re = scale * re1; |
333 | z[i0].im = scale * im1; |
334 | } |
335 | } |
336 |