blob: d1be38f947d42cf9e5c0ecfcb394b86619587983
1 | /* |
2 | * This file is part of FFmpeg. |
3 | * |
4 | * FFmpeg is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU Lesser General Public |
6 | * License as published by the Free Software Foundation; either |
7 | * version 2.1 of the License, or (at your option) any later version. |
8 | * |
9 | * FFmpeg is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Lesser General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU Lesser General Public |
15 | * License along with FFmpeg; if not, write to the Free Software |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | #ifndef AVUTIL_FLOAT_DSP_H |
20 | #define AVUTIL_FLOAT_DSP_H |
21 | |
22 | #include "config.h" |
23 | |
24 | typedef struct AVFloatDSPContext { |
25 | /** |
26 | * Calculate the entry wise product of two vectors of floats and store the result in |
27 | * a vector of floats. |
28 | * |
29 | * @param dst output vector |
30 | * constraints: 32-byte aligned |
31 | * @param src0 first input vector |
32 | * constraints: 32-byte aligned |
33 | * @param src1 second input vector |
34 | * constraints: 32-byte aligned |
35 | * @param len number of elements in the input |
36 | * constraints: multiple of 16 |
37 | */ |
38 | void (*vector_fmul)(float *dst, const float *src0, const float *src1, |
39 | int len); |
40 | |
41 | /** |
42 | * Multiply a vector of floats by a scalar float and add to |
43 | * destination vector. Source and destination vectors must |
44 | * overlap exactly or not at all. |
45 | * |
46 | * @param dst result vector |
47 | * constraints: 32-byte aligned |
48 | * @param src input vector |
49 | * constraints: 32-byte aligned |
50 | * @param mul scalar value |
51 | * @param len length of vector |
52 | * constraints: multiple of 16 |
53 | */ |
54 | void (*vector_fmac_scalar)(float *dst, const float *src, float mul, |
55 | int len); |
56 | |
57 | /** |
58 | * Multiply a vector of floats by a scalar float. Source and |
59 | * destination vectors must overlap exactly or not at all. |
60 | * |
61 | * @param dst result vector |
62 | * constraints: 16-byte aligned |
63 | * @param src input vector |
64 | * constraints: 16-byte aligned |
65 | * @param mul scalar value |
66 | * @param len length of vector |
67 | * constraints: multiple of 4 |
68 | */ |
69 | void (*vector_fmul_scalar)(float *dst, const float *src, float mul, |
70 | int len); |
71 | |
72 | /** |
73 | * Multiply a vector of double by a scalar double. Source and |
74 | * destination vectors must overlap exactly or not at all. |
75 | * |
76 | * @param dst result vector |
77 | * constraints: 32-byte aligned |
78 | * @param src input vector |
79 | * constraints: 32-byte aligned |
80 | * @param mul scalar value |
81 | * @param len length of vector |
82 | * constraints: multiple of 8 |
83 | */ |
84 | void (*vector_dmul_scalar)(double *dst, const double *src, double mul, |
85 | int len); |
86 | |
87 | /** |
88 | * Overlap/add with window function. |
89 | * Used primarily by MDCT-based audio codecs. |
90 | * Source and destination vectors must overlap exactly or not at all. |
91 | * |
92 | * @param dst result vector |
93 | * constraints: 16-byte aligned |
94 | * @param src0 first source vector |
95 | * constraints: 16-byte aligned |
96 | * @param src1 second source vector |
97 | * constraints: 16-byte aligned |
98 | * @param win half-window vector |
99 | * constraints: 16-byte aligned |
100 | * @param len length of vector |
101 | * constraints: multiple of 4 |
102 | */ |
103 | void (*vector_fmul_window)(float *dst, const float *src0, |
104 | const float *src1, const float *win, int len); |
105 | |
106 | /** |
107 | * Calculate the entry wise product of two vectors of floats, add a third vector of |
108 | * floats and store the result in a vector of floats. |
109 | * |
110 | * @param dst output vector |
111 | * constraints: 32-byte aligned |
112 | * @param src0 first input vector |
113 | * constraints: 32-byte aligned |
114 | * @param src1 second input vector |
115 | * constraints: 32-byte aligned |
116 | * @param src2 third input vector |
117 | * constraints: 32-byte aligned |
118 | * @param len number of elements in the input |
119 | * constraints: multiple of 16 |
120 | */ |
121 | void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, |
122 | const float *src2, int len); |
123 | |
124 | /** |
125 | * Calculate the entry wise product of two vectors of floats, and store the result |
126 | * in a vector of floats. The second vector of floats is iterated over |
127 | * in reverse order. |
128 | * |
129 | * @param dst output vector |
130 | * constraints: 32-byte aligned |
131 | * @param src0 first input vector |
132 | * constraints: 32-byte aligned |
133 | * @param src1 second input vector |
134 | * constraints: 32-byte aligned |
135 | * @param len number of elements in the input |
136 | * constraints: multiple of 16 |
137 | */ |
138 | void (*vector_fmul_reverse)(float *dst, const float *src0, |
139 | const float *src1, int len); |
140 | |
141 | /** |
142 | * Calculate the sum and difference of two vectors of floats. |
143 | * |
144 | * @param v1 first input vector, sum output, 16-byte aligned |
145 | * @param v2 second input vector, difference output, 16-byte aligned |
146 | * @param len length of vectors, multiple of 4 |
147 | */ |
148 | void (*butterflies_float)(float *av_restrict v1, float *av_restrict v2, int len); |
149 | |
150 | /** |
151 | * Calculate the scalar product of two vectors of floats. |
152 | * |
153 | * @param v1 first vector, 16-byte aligned |
154 | * @param v2 second vector, 16-byte aligned |
155 | * @param len length of vectors, multiple of 4 |
156 | * |
157 | * @return sum of elementwise products |
158 | */ |
159 | float (*scalarproduct_float)(const float *v1, const float *v2, int len); |
160 | } AVFloatDSPContext; |
161 | |
162 | /** |
163 | * Return the scalar product of two vectors. |
164 | * |
165 | * @param v1 first input vector |
166 | * @param v2 first input vector |
167 | * @param len number of elements |
168 | * |
169 | * @return sum of elementwise products |
170 | */ |
171 | float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len); |
172 | |
173 | void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp); |
174 | void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp); |
175 | void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict); |
176 | void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp); |
177 | void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp); |
178 | |
179 | /** |
180 | * Allocate a float DSP context. |
181 | * |
182 | * @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant |
183 | */ |
184 | AVFloatDSPContext *avpriv_float_dsp_alloc(int strict); |
185 | |
186 | #endif /* AVUTIL_FLOAT_DSP_H */ |
187 |