summaryrefslogtreecommitdiff
path: root/libavfilter/vf_fspp.c (plain)
blob: c6989046c4ead0df04e3f0925c96001645ac7d4b
1/*
2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23/**
24 * @file
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
32 * higher speed.
33 *
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
36 */
37
38#include "libavutil/avassert.h"
39#include "libavutil/imgutils.h"
40#include "libavutil/opt.h"
41#include "libavutil/pixdesc.h"
42#include "internal.h"
43#include "vf_fspp.h"
44
45#define OFFSET(x) offsetof(FSPPContext, x)
46#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
47static const AVOption fspp_options[] = {
48 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
49 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
50 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
51 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
52 { NULL }
53};
54
55AVFILTER_DEFINE_CLASS(fspp);
56
57DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
58 { 0, 48, 12, 60, 3, 51, 15, 63, },
59 { 32, 16, 44, 28, 35, 19, 47, 31, },
60 { 8, 56, 4, 52, 11, 59, 7, 55, },
61 { 40, 24, 36, 20, 43, 27, 39, 23, },
62 { 2, 50, 14, 62, 1, 49, 13, 61, },
63 { 34, 18, 46, 30, 33, 17, 45, 29, },
64 { 10, 58, 6, 54, 9, 57, 5, 53, },
65 { 42, 26, 38, 22, 41, 25, 37, 21, },
66};
67
68static const short custom_threshold[64] = {
69// values (296) can't be too high
70// -it causes too big quant dependence
71// or maybe overflow(check), which results in some flashing
72 71, 296, 295, 237, 71, 40, 38, 19,
73 245, 193, 185, 121, 102, 73, 53, 27,
74 158, 129, 141, 107, 97, 73, 50, 26,
75 102, 116, 109, 98, 82, 66, 45, 23,
76 71, 94, 95, 81, 70, 56, 38, 20,
77 56, 77, 74, 66, 56, 44, 30, 15,
78 38, 53, 50, 45, 38, 30, 21, 11,
79 20, 27, 26, 23, 20, 15, 11, 5
80};
81
82//This func reads from 1 slice, 1 and clears 0 & 1
83static void store_slice_c(uint8_t *dst, int16_t *src,
84 ptrdiff_t dst_stride, ptrdiff_t src_stride,
85 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
86{
87 int y, x;
88#define STORE(pos) \
89 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
90 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
91 if (temp & 0x100) temp = ~(temp >> 31); \
92 dst[x + pos] = temp;
93
94 for (y = 0; y < height; y++) {
95 const uint8_t *d = dither[y];
96 for (x = 0; x < width; x += 8) {
97 int temp;
98 STORE(0);
99 STORE(1);
100 STORE(2);
101 STORE(3);
102 STORE(4);
103 STORE(5);
104 STORE(6);
105 STORE(7);
106 }
107 src += src_stride;
108 dst += dst_stride;
109 }
110}
111
112//This func reads from 2 slices, 0 & 2 and clears 2-nd
113static void store_slice2_c(uint8_t *dst, int16_t *src,
114 ptrdiff_t dst_stride, ptrdiff_t src_stride,
115 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
116{
117 int y, x;
118#define STORE2(pos) \
119 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
120 src[x + pos + 16 * src_stride] = 0; \
121 if (temp & 0x100) temp = ~(temp >> 31); \
122 dst[x + pos] = temp;
123
124 for (y = 0; y < height; y++) {
125 const uint8_t *d = dither[y];
126 for (x = 0; x < width; x += 8) {
127 int temp;
128 STORE2(0);
129 STORE2(1);
130 STORE2(2);
131 STORE2(3);
132 STORE2(4);
133 STORE2(5);
134 STORE2(6);
135 STORE2(7);
136 }
137 src += src_stride;
138 dst += dst_stride;
139 }
140}
141
142static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
143{
144 int a;
145 for (a = 0; a < 64; a++)
146 thr_adr[a] = q * thr_adr_noq[a];
147}
148
149static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
150 int dst_stride, int src_stride,
151 int width, int height,
152 uint8_t *qp_store, int qp_stride, int is_luma)
153{
154 int x, x0, y, es, qy, t;
155
156 const int stride = is_luma ? p->temp_stride : (width + 16);
157 const int step = 6 - p->log2_count;
158 const int qpsh = 4 - p->hsub * !is_luma;
159 const int qpsv = 4 - p->vsub * !is_luma;
160
161 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
162 int16_t *block = (int16_t *)block_align;
163 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
164
165 memset(block3, 0, 4 * 8 * BLOCKSZ);
166
167 if (!src || !dst) return;
168
169 for (y = 0; y < height; y++) {
170 int index = 8 + 8 * stride + y * stride;
171 memcpy(p->src + index, src + y * src_stride, width);
172 for (x = 0; x < 8; x++) {
173 p->src[index - x - 1] = p->src[index + x ];
174 p->src[index + width + x ] = p->src[index + width - x - 1];
175 }
176 }
177
178 for (y = 0; y < 8; y++) {
179 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
180 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
181 }
182 //FIXME (try edge emu)
183
184 for (y = 8; y < 24; y++)
185 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
186
187 for (y = step; y < height + 8; y += step) { //step= 1,2
188 const int y1 = y - 8 + step; //l5-7 l4-6;
189 qy = y - 4;
190
191 if (qy > height - 1) qy = height - 1;
192 if (qy < 0) qy = 0;
193
194 qy = (qy >> qpsv) * qp_stride;
195 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
196
197 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
198 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
199
200 if (p->qp)
201 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
202 else
203 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
204 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
205
206 if (t < 0) t = 0; //t always < width-2
207
208 t = qp_store[qy + (t >> qpsh)];
209 t = ff_norm_qscale(t, p->qscale_type);
210
211 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
212 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
213 }
214 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
215 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
216 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
217 }
218
219 es = width + 8 - x0; // 8, ...
220 if (es > 8)
221 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
222
223 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
224 if (es > 3)
225 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
226
227 if (!(y1 & 7) && y1) {
228 if (y1 & 8)
229 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
230 dst_stride, stride, width, 8, 5 - p->log2_count);
231 else
232 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
233 dst_stride, stride, width, 8, 5 - p->log2_count);
234 }
235 }
236
237 if (y & 7) { // height % 8 != 0
238 if (y & 8)
239 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
240 dst_stride, stride, width, y&7, 5 - p->log2_count);
241 else
242 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
243 dst_stride, stride, width, y&7, 5 - p->log2_count);
244 }
245}
246
247static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
248{
249 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
250 int_simd16_t tmp10, tmp11, tmp12, tmp13;
251 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
252 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
253
254 int16_t *dataptr;
255 int16_t *wsptr;
256 int16_t *threshold;
257 int ctr;
258
259 dataptr = data;
260 wsptr = output;
261
262 for (; cnt > 0; cnt -= 2) { //start positions
263 threshold = (int16_t *)thr_adr;//threshold_mtx
264 for (ctr = DCTSIZE; ctr > 0; ctr--) {
265 // Process columns from input, add to output.
266 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
267 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
268
269 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
270 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
271
272 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
273 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
274
275 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
276 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
277
278 // Even part of FDCT
279
280 tmp10 = tmp0 + tmp3;
281 tmp13 = tmp0 - tmp3;
282 tmp11 = tmp1 + tmp2;
283 tmp12 = tmp1 - tmp2;
284
285 d0 = tmp10 + tmp11;
286 d4 = tmp10 - tmp11;
287
288 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
289 d2 = tmp13 + z1;
290 d6 = tmp13 - z1;
291
292 // Even part of IDCT
293
294 THRESHOLD(tmp0, d0, threshold[0 * 8]);
295 THRESHOLD(tmp1, d2, threshold[2 * 8]);
296 THRESHOLD(tmp2, d4, threshold[4 * 8]);
297 THRESHOLD(tmp3, d6, threshold[6 * 8]);
298 tmp0 += 2;
299 tmp10 = (tmp0 + tmp2) >> 2;
300 tmp11 = (tmp0 - tmp2) >> 2;
301
302 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
303 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
304
305 tmp0 = tmp10 + tmp13; //->temps
306 tmp3 = tmp10 - tmp13; //->temps
307 tmp1 = tmp11 + tmp12; //->temps
308 tmp2 = tmp11 - tmp12; //->temps
309
310 // Odd part of FDCT
311
312 tmp10 = tmp4 + tmp5;
313 tmp11 = tmp5 + tmp6;
314 tmp12 = tmp6 + tmp7;
315
316 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
317 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
318 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
319 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
320
321 z11 = tmp7 + z3;
322 z13 = tmp7 - z3;
323
324 d5 = z13 + z2;
325 d3 = z13 - z2;
326 d1 = z11 + z4;
327 d7 = z11 - z4;
328
329 // Odd part of IDCT
330
331 THRESHOLD(tmp4, d1, threshold[1 * 8]);
332 THRESHOLD(tmp5, d3, threshold[3 * 8]);
333 THRESHOLD(tmp6, d5, threshold[5 * 8]);
334 THRESHOLD(tmp7, d7, threshold[7 * 8]);
335
336 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
337 z13 = tmp6 + tmp5;
338 z10 = (tmp6 - tmp5) << 1;
339 z11 = tmp4 + tmp7;
340 z12 = (tmp4 - tmp7) << 1;
341
342 tmp7 = (z11 + z13) >> 2; //+2 !
343 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
344 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
345 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
346 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
347
348 tmp6 = tmp12 - tmp7;
349 tmp5 = tmp11 - tmp6;
350 tmp4 = tmp10 + tmp5;
351
352 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
353 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
354 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
355 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
356 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
357 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
358 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
359 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
360 //
361 dataptr++; //next column
362 wsptr++;
363 threshold++;
364 }
365 dataptr += 8; //skip each second start pos
366 wsptr += 8;
367 }
368}
369
370static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
371{
372 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
373 int_simd16_t tmp10, tmp11, tmp12, tmp13;
374 int_simd16_t z5, z10, z11, z12, z13;
375 int16_t *outptr;
376 int16_t *wsptr;
377
378 cnt *= 4;
379 wsptr = workspace;
380 outptr = output_adr;
381 for (; cnt > 0; cnt--) {
382 // Even part
383 //Simd version reads 4x4 block and transposes it
384 tmp10 = wsptr[2] + wsptr[3];
385 tmp11 = wsptr[2] - wsptr[3];
386
387 tmp13 = wsptr[0] + wsptr[1];
388 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
389
390 tmp0 = tmp10 + tmp13; //->temps
391 tmp3 = tmp10 - tmp13; //->temps
392 tmp1 = tmp11 + tmp12;
393 tmp2 = tmp11 - tmp12;
394
395 // Odd part
396 //Also transpose, with previous:
397 // ---- ---- ||||
398 // ---- ---- idct ||||
399 // ---- ---- ---> ||||
400 // ---- ---- ||||
401 z13 = wsptr[4] + wsptr[5];
402 z10 = wsptr[4] - wsptr[5];
403 z11 = wsptr[6] + wsptr[7];
404 z12 = wsptr[6] - wsptr[7];
405
406 tmp7 = z11 + z13;
407 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
408
409 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
410 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
411 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
412
413 tmp6 = (tmp12 << 3) - tmp7;
414 tmp5 = (tmp11 << 3) - tmp6;
415 tmp4 = (tmp10 << 3) + tmp5;
416
417 // Final output stage: descale and write column
418 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
419 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
420 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
421 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
422 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
423 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
424 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
425 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
426 outptr++;
427
428 wsptr += DCTSIZE; // advance pointer to next row
429 }
430}
431
432static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
433{
434 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
435 int_simd16_t tmp10, tmp11, tmp12, tmp13;
436 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
437 int16_t *dataptr;
438
439 cnt *= 4;
440 // Pass 1: process rows.
441
442 dataptr = data;
443 for (; cnt > 0; cnt--) {
444 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
445 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
446 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
447 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
448 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
449 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
450 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
451 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
452
453 // Even part
454
455 tmp10 = tmp0 + tmp3;
456 tmp13 = tmp0 - tmp3;
457 tmp11 = tmp1 + tmp2;
458 tmp12 = tmp1 - tmp2;
459 //Even columns are written first, this leads to different order of columns
460 //in column_fidct(), but they are processed independently, so all ok.
461 //Later in the row_idct() columns readed at the same order.
462 dataptr[2] = tmp10 + tmp11;
463 dataptr[3] = tmp10 - tmp11;
464
465 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
466 dataptr[0] = tmp13 + z1;
467 dataptr[1] = tmp13 - z1;
468
469 // Odd part
470
471 tmp10 = (tmp4 + tmp5) << 2;
472 tmp11 = (tmp5 + tmp6) << 2;
473 tmp12 = (tmp6 + tmp7) << 2;
474
475 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
476 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
477 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
478 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
479
480 z11 = tmp7 + z3;
481 z13 = tmp7 - z3;
482
483 dataptr[4] = z13 + z2;
484 dataptr[5] = z13 - z2;
485 dataptr[6] = z11 + z4;
486 dataptr[7] = z11 - z4;
487
488 pixels++; // advance pointer to next column
489 dataptr += DCTSIZE;
490 }
491}
492
493static int query_formats(AVFilterContext *ctx)
494{
495 static const enum AVPixelFormat pix_fmts[] = {
496 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
497 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
498 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
499 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
500 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
501 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
502 AV_PIX_FMT_NONE
503 };
504
505 AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
506 if (!fmts_list)
507 return AVERROR(ENOMEM);
508 return ff_set_common_formats(ctx, fmts_list);
509}
510
511static int config_input(AVFilterLink *inlink)
512{
513 AVFilterContext *ctx = inlink->dst;
514 FSPPContext *fspp = ctx->priv;
515 const int h = FFALIGN(inlink->h + 16, 16);
516 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
517
518 fspp->hsub = desc->log2_chroma_w;
519 fspp->vsub = desc->log2_chroma_h;
520
521 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
522 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
523 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
524
525 if (!fspp->temp || !fspp->src)
526 return AVERROR(ENOMEM);
527
528 if (!fspp->use_bframe_qp && !fspp->qp) {
529 fspp->non_b_qp_alloc_size = AV_CEIL_RSHIFT(inlink->w, 4) * AV_CEIL_RSHIFT(inlink->h, 4);
530 fspp->non_b_qp_table = av_calloc(fspp->non_b_qp_alloc_size, sizeof(*fspp->non_b_qp_table));
531 if (!fspp->non_b_qp_table)
532 return AVERROR(ENOMEM);
533 }
534
535 fspp->store_slice = store_slice_c;
536 fspp->store_slice2 = store_slice2_c;
537 fspp->mul_thrmat = mul_thrmat_c;
538 fspp->column_fidct = column_fidct_c;
539 fspp->row_idct = row_idct_c;
540 fspp->row_fdct = row_fdct_c;
541
542 if (ARCH_X86)
543 ff_fspp_init_x86(fspp);
544
545 return 0;
546}
547
548static int filter_frame(AVFilterLink *inlink, AVFrame *in)
549{
550 AVFilterContext *ctx = inlink->dst;
551 FSPPContext *fspp = ctx->priv;
552 AVFilterLink *outlink = ctx->outputs[0];
553 AVFrame *out = in;
554
555 int qp_stride = 0;
556 uint8_t *qp_table = NULL;
557 int i, bias;
558 int custom_threshold_m[64];
559
560 bias = (1 << 4) + fspp->strength;
561
562 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
563 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
564
565 for (i = 0; i < 8; i++) {
566 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
567 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
568 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
569 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
570
571 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
572 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
573 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
574 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
575 }
576
577 if (fspp->qp)
578 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
579
580 /* if we are not in a constant user quantizer mode and we don't want to use
581 * the quantizers from the B-frames (B-frames often have a higher QP), we
582 * need to save the qp table from the last non B-frame; this is what the
583 * following code block does */
584 if (!fspp->qp) {
585 qp_table = av_frame_get_qp_table(in, &qp_stride, &fspp->qscale_type);
586
587 if (qp_table && !fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
588 int w, h;
589
590 /* if the qp stride is not set, it means the QP are only defined on
591 * a line basis */
592 if (!qp_stride) {
593 w = AV_CEIL_RSHIFT(inlink->w, 4);
594 h = 1;
595 } else {
596 w = qp_stride;
597 h = AV_CEIL_RSHIFT(inlink->h, 4);
598 }
599 if (w * h > fspp->non_b_qp_alloc_size) {
600 int ret = av_reallocp_array(&fspp->non_b_qp_table, w, h);
601 if (ret < 0) {
602 fspp->non_b_qp_alloc_size = 0;
603 return ret;
604 }
605 fspp->non_b_qp_alloc_size = w * h;
606 }
607
608 av_assert0(w * h <= fspp->non_b_qp_alloc_size);
609 memcpy(fspp->non_b_qp_table, qp_table, w * h);
610 }
611 }
612
613 if (fspp->log2_count && !ctx->is_disabled) {
614 if (!fspp->use_bframe_qp && fspp->non_b_qp_table)
615 qp_table = fspp->non_b_qp_table;
616
617 if (qp_table || fspp->qp) {
618 const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
619 const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
620
621 /* get a new frame if in-place is not possible or if the dimensions
622 * are not multiple of 8 */
623 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
624 const int aligned_w = FFALIGN(inlink->w, 8);
625 const int aligned_h = FFALIGN(inlink->h, 8);
626
627 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
628 if (!out) {
629 av_frame_free(&in);
630 return AVERROR(ENOMEM);
631 }
632 av_frame_copy_props(out, in);
633 out->width = in->width;
634 out->height = in->height;
635 }
636
637 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
638 inlink->w, inlink->h, qp_table, qp_stride, 1);
639 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
640 cw, ch, qp_table, qp_stride, 0);
641 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
642 cw, ch, qp_table, qp_stride, 0);
643 emms_c();
644 }
645 }
646
647 if (in != out) {
648 if (in->data[3])
649 av_image_copy_plane(out->data[3], out->linesize[3],
650 in ->data[3], in ->linesize[3],
651 inlink->w, inlink->h);
652 av_frame_free(&in);
653 }
654 return ff_filter_frame(outlink, out);
655}
656
657static av_cold void uninit(AVFilterContext *ctx)
658{
659 FSPPContext *fspp = ctx->priv;
660 av_freep(&fspp->temp);
661 av_freep(&fspp->src);
662 av_freep(&fspp->non_b_qp_table);
663}
664
665static const AVFilterPad fspp_inputs[] = {
666 {
667 .name = "default",
668 .type = AVMEDIA_TYPE_VIDEO,
669 .config_props = config_input,
670 .filter_frame = filter_frame,
671 },
672 { NULL }
673};
674
675static const AVFilterPad fspp_outputs[] = {
676 {
677 .name = "default",
678 .type = AVMEDIA_TYPE_VIDEO,
679 },
680 { NULL }
681};
682
683AVFilter ff_vf_fspp = {
684 .name = "fspp",
685 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
686 .priv_size = sizeof(FSPPContext),
687 .uninit = uninit,
688 .query_formats = query_formats,
689 .inputs = fspp_inputs,
690 .outputs = fspp_outputs,
691 .priv_class = &fspp_class,
692 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
693};
694