blob: 62763bf3490542732c187aee0aca5ca9f4b93457
1 | /* |
2 | * Copyright (c) 2013-2014 Clément Bœsch |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | /** |
22 | * A simple, relatively efficient and slow DCT image denoiser. |
23 | * |
24 | * @see http://www.ipol.im/pub/art/2011/ys-dct/ |
25 | * |
26 | * The DCT factorization used is based on "Fast and numerically stable |
27 | * algorithms for discrete cosine transforms" from Gerlind Plonkaa & Manfred |
28 | * Tasche (DOI: 10.1016/j.laa.2004.07.015). |
29 | */ |
30 | |
31 | #include "libavutil/avassert.h" |
32 | #include "libavutil/eval.h" |
33 | #include "libavutil/opt.h" |
34 | #include "internal.h" |
35 | |
36 | static const char *const var_names[] = { "c", NULL }; |
37 | enum { VAR_C, VAR_VARS_NB }; |
38 | |
39 | #define MAX_THREADS 8 |
40 | |
41 | typedef struct DCTdnoizContext { |
42 | const AVClass *class; |
43 | |
44 | /* coefficient factor expression */ |
45 | char *expr_str; |
46 | AVExpr *expr[MAX_THREADS]; |
47 | double var_values[MAX_THREADS][VAR_VARS_NB]; |
48 | |
49 | int nb_threads; |
50 | int pr_width, pr_height; // width and height to process |
51 | float sigma; // used when no expression are st |
52 | float th; // threshold (3*sigma) |
53 | float *cbuf[2][3]; // two planar rgb color buffers |
54 | float *slices[MAX_THREADS]; // slices buffers (1 slice buffer per thread) |
55 | float *weights; // dct coeff are cumulated with overlapping; these values are used for averaging |
56 | int p_linesize; // line sizes for color and weights |
57 | int overlap; // number of block overlapping pixels |
58 | int step; // block step increment (blocksize - overlap) |
59 | int n; // 1<<n is the block size |
60 | int bsize; // block size, 1<<n |
61 | void (*filter_freq_func)(struct DCTdnoizContext *s, |
62 | const float *src, int src_linesize, |
63 | float *dst, int dst_linesize, |
64 | int thread_id); |
65 | void (*color_decorrelation)(float **dst, int dst_linesize, |
66 | const uint8_t *src, int src_linesize, |
67 | int w, int h); |
68 | void (*color_correlation)(uint8_t *dst, int dst_linesize, |
69 | float **src, int src_linesize, |
70 | int w, int h); |
71 | } DCTdnoizContext; |
72 | |
73 | #define MIN_NBITS 3 /* blocksize = 1<<3 = 8 */ |
74 | #define MAX_NBITS 4 /* blocksize = 1<<4 = 16 */ |
75 | #define DEFAULT_NBITS 3 |
76 | |
77 | #define OFFSET(x) offsetof(DCTdnoizContext, x) |
78 | #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM |
79 | static const AVOption dctdnoiz_options[] = { |
80 | { "sigma", "set noise sigma constant", OFFSET(sigma), AV_OPT_TYPE_FLOAT, {.dbl=0}, 0, 999, .flags = FLAGS }, |
81 | { "s", "set noise sigma constant", OFFSET(sigma), AV_OPT_TYPE_FLOAT, {.dbl=0}, 0, 999, .flags = FLAGS }, |
82 | { "overlap", "set number of block overlapping pixels", OFFSET(overlap), AV_OPT_TYPE_INT, {.i64=-1}, -1, (1<<MAX_NBITS)-1, .flags = FLAGS }, |
83 | { "expr", "set coefficient factor expression", OFFSET(expr_str), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS }, |
84 | { "e", "set coefficient factor expression", OFFSET(expr_str), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS }, |
85 | { "n", "set the block size, expressed in bits", OFFSET(n), AV_OPT_TYPE_INT, {.i64=DEFAULT_NBITS}, MIN_NBITS, MAX_NBITS, .flags = FLAGS }, |
86 | { NULL } |
87 | }; |
88 | |
89 | AVFILTER_DEFINE_CLASS(dctdnoiz); |
90 | |
91 | static void av_always_inline fdct8_1d(float *dst, const float *src, |
92 | int dst_stridea, int dst_strideb, |
93 | int src_stridea, int src_strideb) |
94 | { |
95 | int i; |
96 | |
97 | for (i = 0; i < 8; i++) { |
98 | const float x00 = src[0*src_stridea] + src[7*src_stridea]; |
99 | const float x01 = src[1*src_stridea] + src[6*src_stridea]; |
100 | const float x02 = src[2*src_stridea] + src[5*src_stridea]; |
101 | const float x03 = src[3*src_stridea] + src[4*src_stridea]; |
102 | const float x04 = src[0*src_stridea] - src[7*src_stridea]; |
103 | const float x05 = src[1*src_stridea] - src[6*src_stridea]; |
104 | const float x06 = src[2*src_stridea] - src[5*src_stridea]; |
105 | const float x07 = src[3*src_stridea] - src[4*src_stridea]; |
106 | const float x08 = x00 + x03; |
107 | const float x09 = x01 + x02; |
108 | const float x0a = x00 - x03; |
109 | const float x0b = x01 - x02; |
110 | const float x0c = 1.38703984532215f*x04 + 0.275899379282943f*x07; |
111 | const float x0d = 1.17587560241936f*x05 + 0.785694958387102f*x06; |
112 | const float x0e = -0.785694958387102f*x05 + 1.17587560241936f*x06; |
113 | const float x0f = 0.275899379282943f*x04 - 1.38703984532215f*x07; |
114 | const float x10 = 0.353553390593274f * (x0c - x0d); |
115 | const float x11 = 0.353553390593274f * (x0e - x0f); |
116 | dst[0*dst_stridea] = 0.353553390593274f * (x08 + x09); |
117 | dst[1*dst_stridea] = 0.353553390593274f * (x0c + x0d); |
118 | dst[2*dst_stridea] = 0.461939766255643f*x0a + 0.191341716182545f*x0b; |
119 | dst[3*dst_stridea] = 0.707106781186547f * (x10 - x11); |
120 | dst[4*dst_stridea] = 0.353553390593274f * (x08 - x09); |
121 | dst[5*dst_stridea] = 0.707106781186547f * (x10 + x11); |
122 | dst[6*dst_stridea] = 0.191341716182545f*x0a - 0.461939766255643f*x0b; |
123 | dst[7*dst_stridea] = 0.353553390593274f * (x0e + x0f); |
124 | dst += dst_strideb; |
125 | src += src_strideb; |
126 | } |
127 | } |
128 | |
129 | static void av_always_inline idct8_1d(float *dst, const float *src, |
130 | int dst_stridea, int dst_strideb, |
131 | int src_stridea, int src_strideb, |
132 | int add) |
133 | { |
134 | int i; |
135 | |
136 | for (i = 0; i < 8; i++) { |
137 | const float x00 = 1.4142135623731f *src[0*src_stridea]; |
138 | const float x01 = 1.38703984532215f *src[1*src_stridea] + 0.275899379282943f*src[7*src_stridea]; |
139 | const float x02 = 1.30656296487638f *src[2*src_stridea] + 0.541196100146197f*src[6*src_stridea]; |
140 | const float x03 = 1.17587560241936f *src[3*src_stridea] + 0.785694958387102f*src[5*src_stridea]; |
141 | const float x04 = 1.4142135623731f *src[4*src_stridea]; |
142 | const float x05 = -0.785694958387102f*src[3*src_stridea] + 1.17587560241936f*src[5*src_stridea]; |
143 | const float x06 = 0.541196100146197f*src[2*src_stridea] - 1.30656296487638f*src[6*src_stridea]; |
144 | const float x07 = -0.275899379282943f*src[1*src_stridea] + 1.38703984532215f*src[7*src_stridea]; |
145 | const float x09 = x00 + x04; |
146 | const float x0a = x01 + x03; |
147 | const float x0b = 1.4142135623731f*x02; |
148 | const float x0c = x00 - x04; |
149 | const float x0d = x01 - x03; |
150 | const float x0e = 0.353553390593274f * (x09 - x0b); |
151 | const float x0f = 0.353553390593274f * (x0c + x0d); |
152 | const float x10 = 0.353553390593274f * (x0c - x0d); |
153 | const float x11 = 1.4142135623731f*x06; |
154 | const float x12 = x05 + x07; |
155 | const float x13 = x05 - x07; |
156 | const float x14 = 0.353553390593274f * (x11 + x12); |
157 | const float x15 = 0.353553390593274f * (x11 - x12); |
158 | const float x16 = 0.5f*x13; |
159 | dst[0*dst_stridea] = (add ? dst[ 0*dst_stridea] : 0) + 0.25f * (x09 + x0b) + 0.353553390593274f*x0a; |
160 | dst[1*dst_stridea] = (add ? dst[ 1*dst_stridea] : 0) + 0.707106781186547f * (x0f + x15); |
161 | dst[2*dst_stridea] = (add ? dst[ 2*dst_stridea] : 0) + 0.707106781186547f * (x0f - x15); |
162 | dst[3*dst_stridea] = (add ? dst[ 3*dst_stridea] : 0) + 0.707106781186547f * (x0e + x16); |
163 | dst[4*dst_stridea] = (add ? dst[ 4*dst_stridea] : 0) + 0.707106781186547f * (x0e - x16); |
164 | dst[5*dst_stridea] = (add ? dst[ 5*dst_stridea] : 0) + 0.707106781186547f * (x10 - x14); |
165 | dst[6*dst_stridea] = (add ? dst[ 6*dst_stridea] : 0) + 0.707106781186547f * (x10 + x14); |
166 | dst[7*dst_stridea] = (add ? dst[ 7*dst_stridea] : 0) + 0.25f * (x09 + x0b) - 0.353553390593274f*x0a; |
167 | dst += dst_strideb; |
168 | src += src_strideb; |
169 | } |
170 | } |
171 | |
172 | |
173 | static void av_always_inline fdct16_1d(float *dst, const float *src, |
174 | int dst_stridea, int dst_strideb, |
175 | int src_stridea, int src_strideb) |
176 | { |
177 | int i; |
178 | |
179 | for (i = 0; i < 16; i++) { |
180 | const float x00 = src[ 0*src_stridea] + src[15*src_stridea]; |
181 | const float x01 = src[ 1*src_stridea] + src[14*src_stridea]; |
182 | const float x02 = src[ 2*src_stridea] + src[13*src_stridea]; |
183 | const float x03 = src[ 3*src_stridea] + src[12*src_stridea]; |
184 | const float x04 = src[ 4*src_stridea] + src[11*src_stridea]; |
185 | const float x05 = src[ 5*src_stridea] + src[10*src_stridea]; |
186 | const float x06 = src[ 6*src_stridea] + src[ 9*src_stridea]; |
187 | const float x07 = src[ 7*src_stridea] + src[ 8*src_stridea]; |
188 | const float x08 = src[ 0*src_stridea] - src[15*src_stridea]; |
189 | const float x09 = src[ 1*src_stridea] - src[14*src_stridea]; |
190 | const float x0a = src[ 2*src_stridea] - src[13*src_stridea]; |
191 | const float x0b = src[ 3*src_stridea] - src[12*src_stridea]; |
192 | const float x0c = src[ 4*src_stridea] - src[11*src_stridea]; |
193 | const float x0d = src[ 5*src_stridea] - src[10*src_stridea]; |
194 | const float x0e = src[ 6*src_stridea] - src[ 9*src_stridea]; |
195 | const float x0f = src[ 7*src_stridea] - src[ 8*src_stridea]; |
196 | const float x10 = x00 + x07; |
197 | const float x11 = x01 + x06; |
198 | const float x12 = x02 + x05; |
199 | const float x13 = x03 + x04; |
200 | const float x14 = x00 - x07; |
201 | const float x15 = x01 - x06; |
202 | const float x16 = x02 - x05; |
203 | const float x17 = x03 - x04; |
204 | const float x18 = x10 + x13; |
205 | const float x19 = x11 + x12; |
206 | const float x1a = x10 - x13; |
207 | const float x1b = x11 - x12; |
208 | const float x1c = 1.38703984532215f*x14 + 0.275899379282943f*x17; |
209 | const float x1d = 1.17587560241936f*x15 + 0.785694958387102f*x16; |
210 | const float x1e = -0.785694958387102f*x15 + 1.17587560241936f *x16; |
211 | const float x1f = 0.275899379282943f*x14 - 1.38703984532215f *x17; |
212 | const float x20 = 0.25f * (x1c - x1d); |
213 | const float x21 = 0.25f * (x1e - x1f); |
214 | const float x22 = 1.40740373752638f *x08 + 0.138617169199091f*x0f; |
215 | const float x23 = 1.35331800117435f *x09 + 0.410524527522357f*x0e; |
216 | const float x24 = 1.24722501298667f *x0a + 0.666655658477747f*x0d; |
217 | const float x25 = 1.09320186700176f *x0b + 0.897167586342636f*x0c; |
218 | const float x26 = -0.897167586342636f*x0b + 1.09320186700176f *x0c; |
219 | const float x27 = 0.666655658477747f*x0a - 1.24722501298667f *x0d; |
220 | const float x28 = -0.410524527522357f*x09 + 1.35331800117435f *x0e; |
221 | const float x29 = 0.138617169199091f*x08 - 1.40740373752638f *x0f; |
222 | const float x2a = x22 + x25; |
223 | const float x2b = x23 + x24; |
224 | const float x2c = x22 - x25; |
225 | const float x2d = x23 - x24; |
226 | const float x2e = 0.25f * (x2a - x2b); |
227 | const float x2f = 0.326640741219094f*x2c + 0.135299025036549f*x2d; |
228 | const float x30 = 0.135299025036549f*x2c - 0.326640741219094f*x2d; |
229 | const float x31 = x26 + x29; |
230 | const float x32 = x27 + x28; |
231 | const float x33 = x26 - x29; |
232 | const float x34 = x27 - x28; |
233 | const float x35 = 0.25f * (x31 - x32); |
234 | const float x36 = 0.326640741219094f*x33 + 0.135299025036549f*x34; |
235 | const float x37 = 0.135299025036549f*x33 - 0.326640741219094f*x34; |
236 | dst[ 0*dst_stridea] = 0.25f * (x18 + x19); |
237 | dst[ 1*dst_stridea] = 0.25f * (x2a + x2b); |
238 | dst[ 2*dst_stridea] = 0.25f * (x1c + x1d); |
239 | dst[ 3*dst_stridea] = 0.707106781186547f * (x2f - x37); |
240 | dst[ 4*dst_stridea] = 0.326640741219094f*x1a + 0.135299025036549f*x1b; |
241 | dst[ 5*dst_stridea] = 0.707106781186547f * (x2f + x37); |
242 | dst[ 6*dst_stridea] = 0.707106781186547f * (x20 - x21); |
243 | dst[ 7*dst_stridea] = 0.707106781186547f * (x2e + x35); |
244 | dst[ 8*dst_stridea] = 0.25f * (x18 - x19); |
245 | dst[ 9*dst_stridea] = 0.707106781186547f * (x2e - x35); |
246 | dst[10*dst_stridea] = 0.707106781186547f * (x20 + x21); |
247 | dst[11*dst_stridea] = 0.707106781186547f * (x30 - x36); |
248 | dst[12*dst_stridea] = 0.135299025036549f*x1a - 0.326640741219094f*x1b; |
249 | dst[13*dst_stridea] = 0.707106781186547f * (x30 + x36); |
250 | dst[14*dst_stridea] = 0.25f * (x1e + x1f); |
251 | dst[15*dst_stridea] = 0.25f * (x31 + x32); |
252 | dst += dst_strideb; |
253 | src += src_strideb; |
254 | } |
255 | } |
256 | |
257 | static void av_always_inline idct16_1d(float *dst, const float *src, |
258 | int dst_stridea, int dst_strideb, |
259 | int src_stridea, int src_strideb, |
260 | int add) |
261 | { |
262 | int i; |
263 | |
264 | for (i = 0; i < 16; i++) { |
265 | const float x00 = 1.4142135623731f *src[ 0*src_stridea]; |
266 | const float x01 = 1.40740373752638f *src[ 1*src_stridea] + 0.138617169199091f*src[15*src_stridea]; |
267 | const float x02 = 1.38703984532215f *src[ 2*src_stridea] + 0.275899379282943f*src[14*src_stridea]; |
268 | const float x03 = 1.35331800117435f *src[ 3*src_stridea] + 0.410524527522357f*src[13*src_stridea]; |
269 | const float x04 = 1.30656296487638f *src[ 4*src_stridea] + 0.541196100146197f*src[12*src_stridea]; |
270 | const float x05 = 1.24722501298667f *src[ 5*src_stridea] + 0.666655658477747f*src[11*src_stridea]; |
271 | const float x06 = 1.17587560241936f *src[ 6*src_stridea] + 0.785694958387102f*src[10*src_stridea]; |
272 | const float x07 = 1.09320186700176f *src[ 7*src_stridea] + 0.897167586342636f*src[ 9*src_stridea]; |
273 | const float x08 = 1.4142135623731f *src[ 8*src_stridea]; |
274 | const float x09 = -0.897167586342636f*src[ 7*src_stridea] + 1.09320186700176f*src[ 9*src_stridea]; |
275 | const float x0a = 0.785694958387102f*src[ 6*src_stridea] - 1.17587560241936f*src[10*src_stridea]; |
276 | const float x0b = -0.666655658477747f*src[ 5*src_stridea] + 1.24722501298667f*src[11*src_stridea]; |
277 | const float x0c = 0.541196100146197f*src[ 4*src_stridea] - 1.30656296487638f*src[12*src_stridea]; |
278 | const float x0d = -0.410524527522357f*src[ 3*src_stridea] + 1.35331800117435f*src[13*src_stridea]; |
279 | const float x0e = 0.275899379282943f*src[ 2*src_stridea] - 1.38703984532215f*src[14*src_stridea]; |
280 | const float x0f = -0.138617169199091f*src[ 1*src_stridea] + 1.40740373752638f*src[15*src_stridea]; |
281 | const float x12 = x00 + x08; |
282 | const float x13 = x01 + x07; |
283 | const float x14 = x02 + x06; |
284 | const float x15 = x03 + x05; |
285 | const float x16 = 1.4142135623731f*x04; |
286 | const float x17 = x00 - x08; |
287 | const float x18 = x01 - x07; |
288 | const float x19 = x02 - x06; |
289 | const float x1a = x03 - x05; |
290 | const float x1d = x12 + x16; |
291 | const float x1e = x13 + x15; |
292 | const float x1f = 1.4142135623731f*x14; |
293 | const float x20 = x12 - x16; |
294 | const float x21 = x13 - x15; |
295 | const float x22 = 0.25f * (x1d - x1f); |
296 | const float x23 = 0.25f * (x20 + x21); |
297 | const float x24 = 0.25f * (x20 - x21); |
298 | const float x25 = 1.4142135623731f*x17; |
299 | const float x26 = 1.30656296487638f*x18 + 0.541196100146197f*x1a; |
300 | const float x27 = 1.4142135623731f*x19; |
301 | const float x28 = -0.541196100146197f*x18 + 1.30656296487638f*x1a; |
302 | const float x29 = 0.176776695296637f * (x25 + x27) + 0.25f*x26; |
303 | const float x2a = 0.25f * (x25 - x27); |
304 | const float x2b = 0.176776695296637f * (x25 + x27) - 0.25f*x26; |
305 | const float x2c = 0.353553390593274f*x28; |
306 | const float x1b = 0.707106781186547f * (x2a - x2c); |
307 | const float x1c = 0.707106781186547f * (x2a + x2c); |
308 | const float x2d = 1.4142135623731f*x0c; |
309 | const float x2e = x0b + x0d; |
310 | const float x2f = x0a + x0e; |
311 | const float x30 = x09 + x0f; |
312 | const float x31 = x09 - x0f; |
313 | const float x32 = x0a - x0e; |
314 | const float x33 = x0b - x0d; |
315 | const float x37 = 1.4142135623731f*x2d; |
316 | const float x38 = 1.30656296487638f*x2e + 0.541196100146197f*x30; |
317 | const float x39 = 1.4142135623731f*x2f; |
318 | const float x3a = -0.541196100146197f*x2e + 1.30656296487638f*x30; |
319 | const float x3b = 0.176776695296637f * (x37 + x39) + 0.25f*x38; |
320 | const float x3c = 0.25f * (x37 - x39); |
321 | const float x3d = 0.176776695296637f * (x37 + x39) - 0.25f*x38; |
322 | const float x3e = 0.353553390593274f*x3a; |
323 | const float x34 = 0.707106781186547f * (x3c - x3e); |
324 | const float x35 = 0.707106781186547f * (x3c + x3e); |
325 | const float x3f = 1.4142135623731f*x32; |
326 | const float x40 = x31 + x33; |
327 | const float x41 = x31 - x33; |
328 | const float x42 = 0.25f * (x3f + x40); |
329 | const float x43 = 0.25f * (x3f - x40); |
330 | const float x44 = 0.353553390593274f*x41; |
331 | dst[ 0*dst_stridea] = (add ? dst[ 0*dst_stridea] : 0) + 0.176776695296637f * (x1d + x1f) + 0.25f*x1e; |
332 | dst[ 1*dst_stridea] = (add ? dst[ 1*dst_stridea] : 0) + 0.707106781186547f * (x29 + x3d); |
333 | dst[ 2*dst_stridea] = (add ? dst[ 2*dst_stridea] : 0) + 0.707106781186547f * (x29 - x3d); |
334 | dst[ 3*dst_stridea] = (add ? dst[ 3*dst_stridea] : 0) + 0.707106781186547f * (x23 - x43); |
335 | dst[ 4*dst_stridea] = (add ? dst[ 4*dst_stridea] : 0) + 0.707106781186547f * (x23 + x43); |
336 | dst[ 5*dst_stridea] = (add ? dst[ 5*dst_stridea] : 0) + 0.707106781186547f * (x1b - x35); |
337 | dst[ 6*dst_stridea] = (add ? dst[ 6*dst_stridea] : 0) + 0.707106781186547f * (x1b + x35); |
338 | dst[ 7*dst_stridea] = (add ? dst[ 7*dst_stridea] : 0) + 0.707106781186547f * (x22 + x44); |
339 | dst[ 8*dst_stridea] = (add ? dst[ 8*dst_stridea] : 0) + 0.707106781186547f * (x22 - x44); |
340 | dst[ 9*dst_stridea] = (add ? dst[ 9*dst_stridea] : 0) + 0.707106781186547f * (x1c + x34); |
341 | dst[10*dst_stridea] = (add ? dst[10*dst_stridea] : 0) + 0.707106781186547f * (x1c - x34); |
342 | dst[11*dst_stridea] = (add ? dst[11*dst_stridea] : 0) + 0.707106781186547f * (x24 + x42); |
343 | dst[12*dst_stridea] = (add ? dst[12*dst_stridea] : 0) + 0.707106781186547f * (x24 - x42); |
344 | dst[13*dst_stridea] = (add ? dst[13*dst_stridea] : 0) + 0.707106781186547f * (x2b - x3b); |
345 | dst[14*dst_stridea] = (add ? dst[14*dst_stridea] : 0) + 0.707106781186547f * (x2b + x3b); |
346 | dst[15*dst_stridea] = (add ? dst[15*dst_stridea] : 0) + 0.176776695296637f * (x1d + x1f) - 0.25f*x1e; |
347 | dst += dst_strideb; |
348 | src += src_strideb; |
349 | } |
350 | } |
351 | |
352 | #define DEF_FILTER_FREQ_FUNCS(bsize) \ |
353 | static av_always_inline void filter_freq_##bsize(const float *src, int src_linesize, \ |
354 | float *dst, int dst_linesize, \ |
355 | AVExpr *expr, double *var_values, \ |
356 | int sigma_th) \ |
357 | { \ |
358 | unsigned i; \ |
359 | DECLARE_ALIGNED(32, float, tmp_block1)[bsize * bsize]; \ |
360 | DECLARE_ALIGNED(32, float, tmp_block2)[bsize * bsize]; \ |
361 | \ |
362 | /* forward DCT */ \ |
363 | fdct##bsize##_1d(tmp_block1, src, 1, bsize, 1, src_linesize); \ |
364 | fdct##bsize##_1d(tmp_block2, tmp_block1, bsize, 1, bsize, 1); \ |
365 | \ |
366 | for (i = 0; i < bsize*bsize; i++) { \ |
367 | float *b = &tmp_block2[i]; \ |
368 | /* frequency filtering */ \ |
369 | if (expr) { \ |
370 | var_values[VAR_C] = fabsf(*b); \ |
371 | *b *= av_expr_eval(expr, var_values, NULL); \ |
372 | } else { \ |
373 | if (fabsf(*b) < sigma_th) \ |
374 | *b = 0; \ |
375 | } \ |
376 | } \ |
377 | \ |
378 | /* inverse DCT */ \ |
379 | idct##bsize##_1d(tmp_block1, tmp_block2, 1, bsize, 1, bsize, 0); \ |
380 | idct##bsize##_1d(dst, tmp_block1, dst_linesize, 1, bsize, 1, 1); \ |
381 | } \ |
382 | \ |
383 | static void filter_freq_sigma_##bsize(DCTdnoizContext *s, \ |
384 | const float *src, int src_linesize, \ |
385 | float *dst, int dst_linesize, int thread_id) \ |
386 | { \ |
387 | filter_freq_##bsize(src, src_linesize, dst, dst_linesize, NULL, NULL, s->th); \ |
388 | } \ |
389 | \ |
390 | static void filter_freq_expr_##bsize(DCTdnoizContext *s, \ |
391 | const float *src, int src_linesize, \ |
392 | float *dst, int dst_linesize, int thread_id) \ |
393 | { \ |
394 | filter_freq_##bsize(src, src_linesize, dst, dst_linesize, \ |
395 | s->expr[thread_id], s->var_values[thread_id], 0); \ |
396 | } |
397 | |
398 | DEF_FILTER_FREQ_FUNCS(8) |
399 | DEF_FILTER_FREQ_FUNCS(16) |
400 | |
401 | #define DCT3X3_0_0 0.5773502691896258f /* 1/sqrt(3) */ |
402 | #define DCT3X3_0_1 0.5773502691896258f /* 1/sqrt(3) */ |
403 | #define DCT3X3_0_2 0.5773502691896258f /* 1/sqrt(3) */ |
404 | #define DCT3X3_1_0 0.7071067811865475f /* 1/sqrt(2) */ |
405 | #define DCT3X3_1_2 -0.7071067811865475f /* -1/sqrt(2) */ |
406 | #define DCT3X3_2_0 0.4082482904638631f /* 1/sqrt(6) */ |
407 | #define DCT3X3_2_1 -0.8164965809277261f /* -2/sqrt(6) */ |
408 | #define DCT3X3_2_2 0.4082482904638631f /* 1/sqrt(6) */ |
409 | |
410 | static av_always_inline void color_decorrelation(float **dst, int dst_linesize, |
411 | const uint8_t *src, int src_linesize, |
412 | int w, int h, |
413 | int r, int g, int b) |
414 | { |
415 | int x, y; |
416 | float *dstp_r = dst[0]; |
417 | float *dstp_g = dst[1]; |
418 | float *dstp_b = dst[2]; |
419 | |
420 | for (y = 0; y < h; y++) { |
421 | const uint8_t *srcp = src; |
422 | |
423 | for (x = 0; x < w; x++) { |
424 | dstp_r[x] = srcp[r] * DCT3X3_0_0 + srcp[g] * DCT3X3_0_1 + srcp[b] * DCT3X3_0_2; |
425 | dstp_g[x] = srcp[r] * DCT3X3_1_0 + srcp[b] * DCT3X3_1_2; |
426 | dstp_b[x] = srcp[r] * DCT3X3_2_0 + srcp[g] * DCT3X3_2_1 + srcp[b] * DCT3X3_2_2; |
427 | srcp += 3; |
428 | } |
429 | src += src_linesize; |
430 | dstp_r += dst_linesize; |
431 | dstp_g += dst_linesize; |
432 | dstp_b += dst_linesize; |
433 | } |
434 | } |
435 | |
436 | static av_always_inline void color_correlation(uint8_t *dst, int dst_linesize, |
437 | float **src, int src_linesize, |
438 | int w, int h, |
439 | int r, int g, int b) |
440 | { |
441 | int x, y; |
442 | const float *src_r = src[0]; |
443 | const float *src_g = src[1]; |
444 | const float *src_b = src[2]; |
445 | |
446 | for (y = 0; y < h; y++) { |
447 | uint8_t *dstp = dst; |
448 | |
449 | for (x = 0; x < w; x++) { |
450 | dstp[r] = av_clip_uint8(src_r[x] * DCT3X3_0_0 + src_g[x] * DCT3X3_1_0 + src_b[x] * DCT3X3_2_0); |
451 | dstp[g] = av_clip_uint8(src_r[x] * DCT3X3_0_1 + src_b[x] * DCT3X3_2_1); |
452 | dstp[b] = av_clip_uint8(src_r[x] * DCT3X3_0_2 + src_g[x] * DCT3X3_1_2 + src_b[x] * DCT3X3_2_2); |
453 | dstp += 3; |
454 | } |
455 | dst += dst_linesize; |
456 | src_r += src_linesize; |
457 | src_g += src_linesize; |
458 | src_b += src_linesize; |
459 | } |
460 | } |
461 | |
462 | #define DECLARE_COLOR_FUNCS(name, r, g, b) \ |
463 | static void color_decorrelation_##name(float **dst, int dst_linesize, \ |
464 | const uint8_t *src, int src_linesize, \ |
465 | int w, int h) \ |
466 | { \ |
467 | color_decorrelation(dst, dst_linesize, src, src_linesize, w, h, r, g, b); \ |
468 | } \ |
469 | \ |
470 | static void color_correlation_##name(uint8_t *dst, int dst_linesize, \ |
471 | float **src, int src_linesize, \ |
472 | int w, int h) \ |
473 | { \ |
474 | color_correlation(dst, dst_linesize, src, src_linesize, w, h, r, g, b); \ |
475 | } |
476 | |
477 | DECLARE_COLOR_FUNCS(rgb, 0, 1, 2) |
478 | DECLARE_COLOR_FUNCS(bgr, 2, 1, 0) |
479 | |
480 | static int config_input(AVFilterLink *inlink) |
481 | { |
482 | AVFilterContext *ctx = inlink->dst; |
483 | DCTdnoizContext *s = ctx->priv; |
484 | int i, x, y, bx, by, linesize, *iweights, max_slice_h, slice_h; |
485 | const int bsize = 1 << s->n; |
486 | |
487 | switch (inlink->format) { |
488 | case AV_PIX_FMT_BGR24: |
489 | s->color_decorrelation = color_decorrelation_bgr; |
490 | s->color_correlation = color_correlation_bgr; |
491 | break; |
492 | case AV_PIX_FMT_RGB24: |
493 | s->color_decorrelation = color_decorrelation_rgb; |
494 | s->color_correlation = color_correlation_rgb; |
495 | break; |
496 | default: |
497 | av_assert0(0); |
498 | } |
499 | |
500 | s->pr_width = inlink->w - (inlink->w - bsize) % s->step; |
501 | s->pr_height = inlink->h - (inlink->h - bsize) % s->step; |
502 | if (s->pr_width != inlink->w) |
503 | av_log(ctx, AV_LOG_WARNING, "The last %d horizontal pixels won't be denoised\n", |
504 | inlink->w - s->pr_width); |
505 | if (s->pr_height != inlink->h) |
506 | av_log(ctx, AV_LOG_WARNING, "The last %d vertical pixels won't be denoised\n", |
507 | inlink->h - s->pr_height); |
508 | |
509 | max_slice_h = s->pr_height / ((s->bsize - 1) * 2); |
510 | s->nb_threads = FFMIN3(MAX_THREADS, ff_filter_get_nb_threads(ctx), max_slice_h); |
511 | av_log(ctx, AV_LOG_DEBUG, "threads: [max=%d hmax=%d user=%d] => %d\n", |
512 | MAX_THREADS, max_slice_h, ff_filter_get_nb_threads(ctx), s->nb_threads); |
513 | |
514 | s->p_linesize = linesize = FFALIGN(s->pr_width, 32); |
515 | for (i = 0; i < 2; i++) { |
516 | s->cbuf[i][0] = av_malloc_array(linesize * s->pr_height, sizeof(*s->cbuf[i][0])); |
517 | s->cbuf[i][1] = av_malloc_array(linesize * s->pr_height, sizeof(*s->cbuf[i][1])); |
518 | s->cbuf[i][2] = av_malloc_array(linesize * s->pr_height, sizeof(*s->cbuf[i][2])); |
519 | if (!s->cbuf[i][0] || !s->cbuf[i][1] || !s->cbuf[i][2]) |
520 | return AVERROR(ENOMEM); |
521 | } |
522 | |
523 | /* eval expressions are probably not thread safe when the eval internal |
524 | * state can be changed (typically through load & store operations) */ |
525 | if (s->expr_str) { |
526 | for (i = 0; i < s->nb_threads; i++) { |
527 | int ret = av_expr_parse(&s->expr[i], s->expr_str, var_names, |
528 | NULL, NULL, NULL, NULL, 0, ctx); |
529 | if (ret < 0) |
530 | return ret; |
531 | } |
532 | } |
533 | |
534 | /* each slice will need to (pre & re)process the top and bottom block of |
535 | * the previous one in in addition to its processing area. This is because |
536 | * each pixel is averaged by all the surrounding blocks */ |
537 | slice_h = (int)ceilf(s->pr_height / (float)s->nb_threads) + (s->bsize - 1) * 2; |
538 | for (i = 0; i < s->nb_threads; i++) { |
539 | s->slices[i] = av_malloc_array(linesize, slice_h * sizeof(*s->slices[i])); |
540 | if (!s->slices[i]) |
541 | return AVERROR(ENOMEM); |
542 | } |
543 | |
544 | s->weights = av_malloc(s->pr_height * linesize * sizeof(*s->weights)); |
545 | if (!s->weights) |
546 | return AVERROR(ENOMEM); |
547 | iweights = av_calloc(s->pr_height, linesize * sizeof(*iweights)); |
548 | if (!iweights) |
549 | return AVERROR(ENOMEM); |
550 | for (y = 0; y < s->pr_height - bsize + 1; y += s->step) |
551 | for (x = 0; x < s->pr_width - bsize + 1; x += s->step) |
552 | for (by = 0; by < bsize; by++) |
553 | for (bx = 0; bx < bsize; bx++) |
554 | iweights[(y + by)*linesize + x + bx]++; |
555 | for (y = 0; y < s->pr_height; y++) |
556 | for (x = 0; x < s->pr_width; x++) |
557 | s->weights[y*linesize + x] = 1. / iweights[y*linesize + x]; |
558 | av_free(iweights); |
559 | |
560 | return 0; |
561 | } |
562 | |
563 | static av_cold int init(AVFilterContext *ctx) |
564 | { |
565 | DCTdnoizContext *s = ctx->priv; |
566 | |
567 | s->bsize = 1 << s->n; |
568 | if (s->overlap == -1) |
569 | s->overlap = s->bsize - 1; |
570 | |
571 | if (s->overlap > s->bsize - 1) { |
572 | av_log(s, AV_LOG_ERROR, "Overlap value can not except %d " |
573 | "with a block size of %dx%d\n", |
574 | s->bsize - 1, s->bsize, s->bsize); |
575 | return AVERROR(EINVAL); |
576 | } |
577 | |
578 | if (s->expr_str) { |
579 | switch (s->n) { |
580 | case 3: s->filter_freq_func = filter_freq_expr_8; break; |
581 | case 4: s->filter_freq_func = filter_freq_expr_16; break; |
582 | default: av_assert0(0); |
583 | } |
584 | } else { |
585 | switch (s->n) { |
586 | case 3: s->filter_freq_func = filter_freq_sigma_8; break; |
587 | case 4: s->filter_freq_func = filter_freq_sigma_16; break; |
588 | default: av_assert0(0); |
589 | } |
590 | } |
591 | |
592 | s->th = s->sigma * 3.; |
593 | s->step = s->bsize - s->overlap; |
594 | return 0; |
595 | } |
596 | |
597 | static int query_formats(AVFilterContext *ctx) |
598 | { |
599 | static const enum AVPixelFormat pix_fmts[] = { |
600 | AV_PIX_FMT_BGR24, AV_PIX_FMT_RGB24, |
601 | AV_PIX_FMT_NONE |
602 | }; |
603 | AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); |
604 | if (!fmts_list) |
605 | return AVERROR(ENOMEM); |
606 | return ff_set_common_formats(ctx, fmts_list); |
607 | } |
608 | |
609 | typedef struct ThreadData { |
610 | float *src, *dst; |
611 | } ThreadData; |
612 | |
613 | static int filter_slice(AVFilterContext *ctx, |
614 | void *arg, int jobnr, int nb_jobs) |
615 | { |
616 | int x, y; |
617 | DCTdnoizContext *s = ctx->priv; |
618 | const ThreadData *td = arg; |
619 | const int w = s->pr_width; |
620 | const int h = s->pr_height; |
621 | const int slice_start = (h * jobnr ) / nb_jobs; |
622 | const int slice_end = (h * (jobnr+1)) / nb_jobs; |
623 | const int slice_start_ctx = FFMAX(slice_start - s->bsize + 1, 0); |
624 | const int slice_end_ctx = FFMIN(slice_end, h - s->bsize + 1); |
625 | const int slice_h = slice_end_ctx - slice_start_ctx; |
626 | const int src_linesize = s->p_linesize; |
627 | const int dst_linesize = s->p_linesize; |
628 | const int slice_linesize = s->p_linesize; |
629 | float *dst; |
630 | const float *src = td->src + slice_start_ctx * src_linesize; |
631 | const float *weights = s->weights + slice_start * dst_linesize; |
632 | float *slice = s->slices[jobnr]; |
633 | |
634 | // reset block sums |
635 | memset(slice, 0, (slice_h + s->bsize - 1) * dst_linesize * sizeof(*slice)); |
636 | |
637 | // block dct sums |
638 | for (y = 0; y < slice_h; y += s->step) { |
639 | for (x = 0; x < w - s->bsize + 1; x += s->step) |
640 | s->filter_freq_func(s, src + x, src_linesize, |
641 | slice + x, slice_linesize, |
642 | jobnr); |
643 | src += s->step * src_linesize; |
644 | slice += s->step * slice_linesize; |
645 | } |
646 | |
647 | // average blocks |
648 | slice = s->slices[jobnr] + (slice_start - slice_start_ctx) * slice_linesize; |
649 | dst = td->dst + slice_start * dst_linesize; |
650 | for (y = slice_start; y < slice_end; y++) { |
651 | for (x = 0; x < w; x++) |
652 | dst[x] = slice[x] * weights[x]; |
653 | slice += slice_linesize; |
654 | dst += dst_linesize; |
655 | weights += dst_linesize; |
656 | } |
657 | |
658 | return 0; |
659 | } |
660 | |
661 | static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
662 | { |
663 | AVFilterContext *ctx = inlink->dst; |
664 | DCTdnoizContext *s = ctx->priv; |
665 | AVFilterLink *outlink = inlink->dst->outputs[0]; |
666 | int direct, plane; |
667 | AVFrame *out; |
668 | |
669 | if (av_frame_is_writable(in)) { |
670 | direct = 1; |
671 | out = in; |
672 | } else { |
673 | direct = 0; |
674 | out = ff_get_video_buffer(outlink, outlink->w, outlink->h); |
675 | if (!out) { |
676 | av_frame_free(&in); |
677 | return AVERROR(ENOMEM); |
678 | } |
679 | av_frame_copy_props(out, in); |
680 | } |
681 | |
682 | s->color_decorrelation(s->cbuf[0], s->p_linesize, |
683 | in->data[0], in->linesize[0], |
684 | s->pr_width, s->pr_height); |
685 | for (plane = 0; plane < 3; plane++) { |
686 | ThreadData td = { |
687 | .src = s->cbuf[0][plane], |
688 | .dst = s->cbuf[1][plane], |
689 | }; |
690 | ctx->internal->execute(ctx, filter_slice, &td, NULL, s->nb_threads); |
691 | } |
692 | s->color_correlation(out->data[0], out->linesize[0], |
693 | s->cbuf[1], s->p_linesize, |
694 | s->pr_width, s->pr_height); |
695 | |
696 | if (!direct) { |
697 | int y; |
698 | uint8_t *dst = out->data[0]; |
699 | const uint8_t *src = in->data[0]; |
700 | const int dst_linesize = out->linesize[0]; |
701 | const int src_linesize = in->linesize[0]; |
702 | const int hpad = (inlink->w - s->pr_width) * 3; |
703 | const int vpad = (inlink->h - s->pr_height); |
704 | |
705 | if (hpad) { |
706 | uint8_t *dstp = dst + s->pr_width * 3; |
707 | const uint8_t *srcp = src + s->pr_width * 3; |
708 | |
709 | for (y = 0; y < s->pr_height; y++) { |
710 | memcpy(dstp, srcp, hpad); |
711 | dstp += dst_linesize; |
712 | srcp += src_linesize; |
713 | } |
714 | } |
715 | if (vpad) { |
716 | uint8_t *dstp = dst + s->pr_height * dst_linesize; |
717 | const uint8_t *srcp = src + s->pr_height * src_linesize; |
718 | |
719 | for (y = 0; y < vpad; y++) { |
720 | memcpy(dstp, srcp, inlink->w * 3); |
721 | dstp += dst_linesize; |
722 | srcp += src_linesize; |
723 | } |
724 | } |
725 | |
726 | av_frame_free(&in); |
727 | } |
728 | |
729 | return ff_filter_frame(outlink, out); |
730 | } |
731 | |
732 | static av_cold void uninit(AVFilterContext *ctx) |
733 | { |
734 | int i; |
735 | DCTdnoizContext *s = ctx->priv; |
736 | |
737 | av_freep(&s->weights); |
738 | for (i = 0; i < 2; i++) { |
739 | av_freep(&s->cbuf[i][0]); |
740 | av_freep(&s->cbuf[i][1]); |
741 | av_freep(&s->cbuf[i][2]); |
742 | } |
743 | for (i = 0; i < s->nb_threads; i++) { |
744 | av_freep(&s->slices[i]); |
745 | av_expr_free(s->expr[i]); |
746 | } |
747 | } |
748 | |
749 | static const AVFilterPad dctdnoiz_inputs[] = { |
750 | { |
751 | .name = "default", |
752 | .type = AVMEDIA_TYPE_VIDEO, |
753 | .filter_frame = filter_frame, |
754 | .config_props = config_input, |
755 | }, |
756 | { NULL } |
757 | }; |
758 | |
759 | static const AVFilterPad dctdnoiz_outputs[] = { |
760 | { |
761 | .name = "default", |
762 | .type = AVMEDIA_TYPE_VIDEO, |
763 | }, |
764 | { NULL } |
765 | }; |
766 | |
767 | AVFilter ff_vf_dctdnoiz = { |
768 | .name = "dctdnoiz", |
769 | .description = NULL_IF_CONFIG_SMALL("Denoise frames using 2D DCT."), |
770 | .priv_size = sizeof(DCTdnoizContext), |
771 | .init = init, |
772 | .uninit = uninit, |
773 | .query_formats = query_formats, |
774 | .inputs = dctdnoiz_inputs, |
775 | .outputs = dctdnoiz_outputs, |
776 | .priv_class = &dctdnoiz_class, |
777 | .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, |
778 | }; |
779 |