blob: 1c5dab5c5abb033677774872523c38dd6e590b80
1 | /* |
2 | * copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at> |
3 | * Copyright (C) 2016 foo86 |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | #include "fft.h" |
23 | #include "dcadct.h" |
24 | #include "dcamath.h" |
25 | #include "synth_filter.h" |
26 | |
27 | static void synth_filter_float(FFTContext *imdct, |
28 | float *synth_buf_ptr, int *synth_buf_offset, |
29 | float synth_buf2[32], const float window[512], |
30 | float out[32], const float in[32], float scale) |
31 | { |
32 | float *synth_buf = synth_buf_ptr + *synth_buf_offset; |
33 | int i, j; |
34 | |
35 | imdct->imdct_half(imdct, synth_buf, in); |
36 | |
37 | for (i = 0; i < 16; i++) { |
38 | float a = synth_buf2[i ]; |
39 | float b = synth_buf2[i + 16]; |
40 | float c = 0; |
41 | float d = 0; |
42 | for (j = 0; j < 512 - *synth_buf_offset; j += 64) { |
43 | a += window[i + j ] * (-synth_buf[15 - i + j ]); |
44 | b += window[i + j + 16] * ( synth_buf[ i + j ]); |
45 | c += window[i + j + 32] * ( synth_buf[16 + i + j ]); |
46 | d += window[i + j + 48] * ( synth_buf[31 - i + j ]); |
47 | } |
48 | for ( ; j < 512; j += 64) { |
49 | a += window[i + j ] * (-synth_buf[15 - i + j - 512]); |
50 | b += window[i + j + 16] * ( synth_buf[ i + j - 512]); |
51 | c += window[i + j + 32] * ( synth_buf[16 + i + j - 512]); |
52 | d += window[i + j + 48] * ( synth_buf[31 - i + j - 512]); |
53 | } |
54 | out[i ] = a * scale; |
55 | out[i + 16] = b * scale; |
56 | synth_buf2[i ] = c; |
57 | synth_buf2[i + 16] = d; |
58 | } |
59 | |
60 | *synth_buf_offset = (*synth_buf_offset - 32) & 511; |
61 | } |
62 | |
63 | static void synth_filter_float_64(FFTContext *imdct, |
64 | float *synth_buf_ptr, int *synth_buf_offset, |
65 | float synth_buf2[64], const float window[1024], |
66 | float out[64], const float in[64], float scale) |
67 | { |
68 | float *synth_buf = synth_buf_ptr + *synth_buf_offset; |
69 | int i, j; |
70 | |
71 | imdct->imdct_half(imdct, synth_buf, in); |
72 | |
73 | for (i = 0; i < 32; i++) { |
74 | float a = synth_buf2[i ]; |
75 | float b = synth_buf2[i + 32]; |
76 | float c = 0; |
77 | float d = 0; |
78 | for (j = 0; j < 1024 - *synth_buf_offset; j += 128) { |
79 | a += window[i + j ] * (-synth_buf[31 - i + j ]); |
80 | b += window[i + j + 32] * ( synth_buf[ i + j ]); |
81 | c += window[i + j + 64] * ( synth_buf[32 + i + j ]); |
82 | d += window[i + j + 96] * ( synth_buf[63 - i + j ]); |
83 | } |
84 | for ( ; j < 1024; j += 128) { |
85 | a += window[i + j ] * (-synth_buf[31 - i + j - 1024]); |
86 | b += window[i + j + 32] * ( synth_buf[ i + j - 1024]); |
87 | c += window[i + j + 64] * ( synth_buf[32 + i + j - 1024]); |
88 | d += window[i + j + 96] * ( synth_buf[63 - i + j - 1024]); |
89 | } |
90 | out[i ] = a * scale; |
91 | out[i + 32] = b * scale; |
92 | synth_buf2[i ] = c; |
93 | synth_buf2[i + 32] = d; |
94 | } |
95 | |
96 | *synth_buf_offset = (*synth_buf_offset - 64) & 1023; |
97 | } |
98 | |
99 | static void synth_filter_fixed(DCADCTContext *imdct, |
100 | int32_t *synth_buf_ptr, int *synth_buf_offset, |
101 | int32_t synth_buf2[32], const int32_t window[512], |
102 | int32_t out[32], const int32_t in[32]) |
103 | { |
104 | int32_t *synth_buf = synth_buf_ptr + *synth_buf_offset; |
105 | int i, j; |
106 | |
107 | imdct->imdct_half[0](synth_buf, in); |
108 | |
109 | for (i = 0; i < 16; i++) { |
110 | int64_t a = synth_buf2[i ] * (INT64_C(1) << 21); |
111 | int64_t b = synth_buf2[i + 16] * (INT64_C(1) << 21); |
112 | int64_t c = 0; |
113 | int64_t d = 0; |
114 | for (j = 0; j < 512 - *synth_buf_offset; j += 64) { |
115 | a += (int64_t)window[i + j ] * synth_buf[ i + j ]; |
116 | b += (int64_t)window[i + j + 16] * synth_buf[15 - i + j ]; |
117 | c += (int64_t)window[i + j + 32] * synth_buf[16 + i + j ]; |
118 | d += (int64_t)window[i + j + 48] * synth_buf[31 - i + j ]; |
119 | } |
120 | for ( ; j < 512; j += 64) { |
121 | a += (int64_t)window[i + j ] * synth_buf[ i + j - 512]; |
122 | b += (int64_t)window[i + j + 16] * synth_buf[15 - i + j - 512]; |
123 | c += (int64_t)window[i + j + 32] * synth_buf[16 + i + j - 512]; |
124 | d += (int64_t)window[i + j + 48] * synth_buf[31 - i + j - 512]; |
125 | } |
126 | out[i ] = clip23(norm21(a)); |
127 | out[i + 16] = clip23(norm21(b)); |
128 | synth_buf2[i ] = norm21(c); |
129 | synth_buf2[i + 16] = norm21(d); |
130 | } |
131 | |
132 | *synth_buf_offset = (*synth_buf_offset - 32) & 511; |
133 | } |
134 | |
135 | static void synth_filter_fixed_64(DCADCTContext *imdct, |
136 | int32_t *synth_buf_ptr, int *synth_buf_offset, |
137 | int32_t synth_buf2[64], const int32_t window[1024], |
138 | int32_t out[64], const int32_t in[64]) |
139 | { |
140 | int32_t *synth_buf = synth_buf_ptr + *synth_buf_offset; |
141 | int i, j; |
142 | |
143 | imdct->imdct_half[1](synth_buf, in); |
144 | |
145 | for (i = 0; i < 32; i++) { |
146 | int64_t a = synth_buf2[i ] * (INT64_C(1) << 20); |
147 | int64_t b = synth_buf2[i + 32] * (INT64_C(1) << 20); |
148 | int64_t c = 0; |
149 | int64_t d = 0; |
150 | for (j = 0; j < 1024 - *synth_buf_offset; j += 128) { |
151 | a += (int64_t)window[i + j ] * synth_buf[ i + j ]; |
152 | b += (int64_t)window[i + j + 32] * synth_buf[31 - i + j ]; |
153 | c += (int64_t)window[i + j + 64] * synth_buf[32 + i + j ]; |
154 | d += (int64_t)window[i + j + 96] * synth_buf[63 - i + j ]; |
155 | } |
156 | for ( ; j < 1024; j += 128) { |
157 | a += (int64_t)window[i + j ] * synth_buf[ i + j - 1024]; |
158 | b += (int64_t)window[i + j + 32] * synth_buf[31 - i + j - 1024]; |
159 | c += (int64_t)window[i + j + 64] * synth_buf[32 + i + j - 1024]; |
160 | d += (int64_t)window[i + j + 96] * synth_buf[63 - i + j - 1024]; |
161 | } |
162 | out[i ] = clip23(norm20(a)); |
163 | out[i + 32] = clip23(norm20(b)); |
164 | synth_buf2[i ] = norm20(c); |
165 | synth_buf2[i + 32] = norm20(d); |
166 | } |
167 | |
168 | *synth_buf_offset = (*synth_buf_offset - 64) & 1023; |
169 | } |
170 | |
171 | av_cold void ff_synth_filter_init(SynthFilterContext *c) |
172 | { |
173 | c->synth_filter_float = synth_filter_float; |
174 | c->synth_filter_float_64 = synth_filter_float_64; |
175 | c->synth_filter_fixed = synth_filter_fixed; |
176 | c->synth_filter_fixed_64 = synth_filter_fixed_64; |
177 | |
178 | if (ARCH_AARCH64) |
179 | ff_synth_filter_init_aarch64(c); |
180 | if (ARCH_ARM) |
181 | ff_synth_filter_init_arm(c); |
182 | if (ARCH_X86) |
183 | ff_synth_filter_init_x86(c); |
184 | } |
185 |