blob: 3049ce8b79e76393731a119b5e2d0978543f9aff
1 | /* |
2 | * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com> |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | * |
20 | * Note: Rounding-to-nearest used unless otherwise stated |
21 | * |
22 | */ |
23 | #include <stdint.h> |
24 | |
25 | #include "config.h" |
26 | #include "libavutil/attributes.h" |
27 | #include "aacpsdsp.h" |
28 | |
29 | static void ps_add_squares_c(INTFLOAT *dst, const INTFLOAT (*src)[2], int n) |
30 | { |
31 | int i; |
32 | for (i = 0; i < n; i++) |
33 | dst[i] += AAC_MADD28(src[i][0], src[i][0], src[i][1], src[i][1]); |
34 | } |
35 | |
36 | static void ps_mul_pair_single_c(INTFLOAT (*dst)[2], INTFLOAT (*src0)[2], INTFLOAT *src1, |
37 | int n) |
38 | { |
39 | int i; |
40 | for (i = 0; i < n; i++) { |
41 | dst[i][0] = AAC_MUL16(src0[i][0], src1[i]); |
42 | dst[i][1] = AAC_MUL16(src0[i][1], src1[i]); |
43 | } |
44 | } |
45 | |
46 | static void ps_hybrid_analysis_c(INTFLOAT (*out)[2], INTFLOAT (*in)[2], |
47 | const INTFLOAT (*filter)[8][2], |
48 | int stride, int n) |
49 | { |
50 | int i, j; |
51 | |
52 | for (i = 0; i < n; i++) { |
53 | INT64FLOAT sum_re = (INT64FLOAT)filter[i][6][0] * in[6][0]; |
54 | INT64FLOAT sum_im = (INT64FLOAT)filter[i][6][0] * in[6][1]; |
55 | |
56 | for (j = 0; j < 6; j++) { |
57 | INTFLOAT in0_re = in[j][0]; |
58 | INTFLOAT in0_im = in[j][1]; |
59 | INTFLOAT in1_re = in[12-j][0]; |
60 | INTFLOAT in1_im = in[12-j][1]; |
61 | sum_re += (INT64FLOAT)filter[i][j][0] * (in0_re + in1_re) - |
62 | (INT64FLOAT)filter[i][j][1] * (in0_im - in1_im); |
63 | sum_im += (INT64FLOAT)filter[i][j][0] * (in0_im + in1_im) + |
64 | (INT64FLOAT)filter[i][j][1] * (in0_re - in1_re); |
65 | } |
66 | #if USE_FIXED |
67 | out[i * stride][0] = (int)((sum_re + 0x40000000) >> 31); |
68 | out[i * stride][1] = (int)((sum_im + 0x40000000) >> 31); |
69 | #else |
70 | out[i * stride][0] = sum_re; |
71 | out[i * stride][1] = sum_im; |
72 | #endif /* USE_FIXED */ |
73 | } |
74 | } |
75 | static void ps_hybrid_analysis_ileave_c(INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64], |
76 | int i, int len) |
77 | { |
78 | int j; |
79 | |
80 | for (; i < 64; i++) { |
81 | for (j = 0; j < len; j++) { |
82 | out[i][j][0] = L[0][j][i]; |
83 | out[i][j][1] = L[1][j][i]; |
84 | } |
85 | } |
86 | } |
87 | |
88 | static void ps_hybrid_synthesis_deint_c(INTFLOAT out[2][38][64], |
89 | INTFLOAT (*in)[32][2], |
90 | int i, int len) |
91 | { |
92 | int n; |
93 | |
94 | for (; i < 64; i++) { |
95 | for (n = 0; n < len; n++) { |
96 | out[0][n][i] = in[i][n][0]; |
97 | out[1][n][i] = in[i][n][1]; |
98 | } |
99 | } |
100 | } |
101 | |
102 | static void ps_decorrelate_c(INTFLOAT (*out)[2], INTFLOAT (*delay)[2], |
103 | INTFLOAT (*ap_delay)[PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2], |
104 | const INTFLOAT phi_fract[2], const INTFLOAT (*Q_fract)[2], |
105 | const INTFLOAT *transient_gain, |
106 | INTFLOAT g_decay_slope, |
107 | int len) |
108 | { |
109 | static const INTFLOAT a[] = { Q31(0.65143905753106f), |
110 | Q31(0.56471812200776f), |
111 | Q31(0.48954165955695f) }; |
112 | INTFLOAT ag[PS_AP_LINKS]; |
113 | int m, n; |
114 | |
115 | for (m = 0; m < PS_AP_LINKS; m++) |
116 | ag[m] = AAC_MUL30(a[m], g_decay_slope); |
117 | |
118 | for (n = 0; n < len; n++) { |
119 | INTFLOAT in_re = AAC_MSUB30(delay[n][0], phi_fract[0], delay[n][1], phi_fract[1]); |
120 | INTFLOAT in_im = AAC_MADD30(delay[n][0], phi_fract[1], delay[n][1], phi_fract[0]); |
121 | for (m = 0; m < PS_AP_LINKS; m++) { |
122 | INTFLOAT a_re = AAC_MUL31(ag[m], in_re); |
123 | INTFLOAT a_im = AAC_MUL31(ag[m], in_im); |
124 | INTFLOAT link_delay_re = ap_delay[m][n+2-m][0]; |
125 | INTFLOAT link_delay_im = ap_delay[m][n+2-m][1]; |
126 | INTFLOAT fractional_delay_re = Q_fract[m][0]; |
127 | INTFLOAT fractional_delay_im = Q_fract[m][1]; |
128 | INTFLOAT apd_re = in_re; |
129 | INTFLOAT apd_im = in_im; |
130 | in_re = AAC_MSUB30(link_delay_re, fractional_delay_re, |
131 | link_delay_im, fractional_delay_im); |
132 | in_re -= a_re; |
133 | in_im = AAC_MADD30(link_delay_re, fractional_delay_im, |
134 | link_delay_im, fractional_delay_re); |
135 | in_im -= a_im; |
136 | ap_delay[m][n+5][0] = apd_re + AAC_MUL31(ag[m], in_re); |
137 | ap_delay[m][n+5][1] = apd_im + AAC_MUL31(ag[m], in_im); |
138 | } |
139 | out[n][0] = AAC_MUL16(transient_gain[n], in_re); |
140 | out[n][1] = AAC_MUL16(transient_gain[n], in_im); |
141 | } |
142 | } |
143 | |
144 | static void ps_stereo_interpolate_c(INTFLOAT (*l)[2], INTFLOAT (*r)[2], |
145 | INTFLOAT h[2][4], INTFLOAT h_step[2][4], |
146 | int len) |
147 | { |
148 | INTFLOAT h0 = h[0][0]; |
149 | INTFLOAT h1 = h[0][1]; |
150 | INTFLOAT h2 = h[0][2]; |
151 | INTFLOAT h3 = h[0][3]; |
152 | INTFLOAT hs0 = h_step[0][0]; |
153 | INTFLOAT hs1 = h_step[0][1]; |
154 | INTFLOAT hs2 = h_step[0][2]; |
155 | INTFLOAT hs3 = h_step[0][3]; |
156 | int n; |
157 | |
158 | for (n = 0; n < len; n++) { |
159 | //l is s, r is d |
160 | INTFLOAT l_re = l[n][0]; |
161 | INTFLOAT l_im = l[n][1]; |
162 | INTFLOAT r_re = r[n][0]; |
163 | INTFLOAT r_im = r[n][1]; |
164 | h0 += hs0; |
165 | h1 += hs1; |
166 | h2 += hs2; |
167 | h3 += hs3; |
168 | l[n][0] = AAC_MADD30(h0, l_re, h2, r_re); |
169 | l[n][1] = AAC_MADD30(h0, l_im, h2, r_im); |
170 | r[n][0] = AAC_MADD30(h1, l_re, h3, r_re); |
171 | r[n][1] = AAC_MADD30(h1, l_im, h3, r_im); |
172 | } |
173 | } |
174 | |
175 | static void ps_stereo_interpolate_ipdopd_c(INTFLOAT (*l)[2], INTFLOAT (*r)[2], |
176 | INTFLOAT h[2][4], INTFLOAT h_step[2][4], |
177 | int len) |
178 | { |
179 | INTFLOAT h00 = h[0][0], h10 = h[1][0]; |
180 | INTFLOAT h01 = h[0][1], h11 = h[1][1]; |
181 | INTFLOAT h02 = h[0][2], h12 = h[1][2]; |
182 | INTFLOAT h03 = h[0][3], h13 = h[1][3]; |
183 | INTFLOAT hs00 = h_step[0][0], hs10 = h_step[1][0]; |
184 | INTFLOAT hs01 = h_step[0][1], hs11 = h_step[1][1]; |
185 | INTFLOAT hs02 = h_step[0][2], hs12 = h_step[1][2]; |
186 | INTFLOAT hs03 = h_step[0][3], hs13 = h_step[1][3]; |
187 | int n; |
188 | |
189 | for (n = 0; n < len; n++) { |
190 | //l is s, r is d |
191 | INTFLOAT l_re = l[n][0]; |
192 | INTFLOAT l_im = l[n][1]; |
193 | INTFLOAT r_re = r[n][0]; |
194 | INTFLOAT r_im = r[n][1]; |
195 | h00 += hs00; |
196 | h01 += hs01; |
197 | h02 += hs02; |
198 | h03 += hs03; |
199 | h10 += hs10; |
200 | h11 += hs11; |
201 | h12 += hs12; |
202 | h13 += hs13; |
203 | |
204 | l[n][0] = AAC_MSUB30_V8(h00, l_re, h02, r_re, h10, l_im, h12, r_im); |
205 | l[n][1] = AAC_MADD30_V8(h00, l_im, h02, r_im, h10, l_re, h12, r_re); |
206 | r[n][0] = AAC_MSUB30_V8(h01, l_re, h03, r_re, h11, l_im, h13, r_im); |
207 | r[n][1] = AAC_MADD30_V8(h01, l_im, h03, r_im, h11, l_re, h13, r_re); |
208 | } |
209 | } |
210 | |
211 | av_cold void AAC_RENAME(ff_psdsp_init)(PSDSPContext *s) |
212 | { |
213 | s->add_squares = ps_add_squares_c; |
214 | s->mul_pair_single = ps_mul_pair_single_c; |
215 | s->hybrid_analysis = ps_hybrid_analysis_c; |
216 | s->hybrid_analysis_ileave = ps_hybrid_analysis_ileave_c; |
217 | s->hybrid_synthesis_deint = ps_hybrid_synthesis_deint_c; |
218 | s->decorrelate = ps_decorrelate_c; |
219 | s->stereo_interpolate[0] = ps_stereo_interpolate_c; |
220 | s->stereo_interpolate[1] = ps_stereo_interpolate_ipdopd_c; |
221 | |
222 | #if !USE_FIXED |
223 | if (ARCH_ARM) |
224 | ff_psdsp_init_arm(s); |
225 | if (ARCH_MIPS) |
226 | ff_psdsp_init_mips(s); |
227 | if (ARCH_X86) |
228 | ff_psdsp_init_x86(s); |
229 | #endif /* !USE_FIXED */ |
230 | } |
231 |