summaryrefslogtreecommitdiff
path: root/libpostproc/postprocess.c (plain)
blob: 1dc719cf93985d5504acb8af20441f5f3d5f9fe8
1/*
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23/**
24 * @file
25 * postprocessing.
26 */
27
28/*
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
49
50* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51# more or less selfinvented filters so the exactness is not too meaningful
52E = Exact implementation
53e = almost exact implementation (slightly different rounding,...)
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
56p = partially optimized, still some work to do
57*/
58
59/*
60TODO:
61reduce the time wasted on the mem transfer
62unroll stuff if instructions depend too much on the prior one
63move YScale thing to the end instead of fixing QP
64write a faster and higher quality deblocking filter :)
65make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67compare the quality & speed of all filters
68split this huge file
69optimize c versions
70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71...
72*/
73
74//Changelog: use git log
75
76#include "config.h"
77#include "libavutil/avutil.h"
78#include "libavutil/avassert.h"
79#include "libavutil/intreadwrite.h"
80#include <inttypes.h>
81#include <stdio.h>
82#include <stdlib.h>
83#include <string.h>
84//#undef HAVE_MMXEXT_INLINE
85//#define HAVE_AMD3DNOW_INLINE
86//#undef HAVE_MMX_INLINE
87//#undef ARCH_X86
88//#define DEBUG_BRIGHTNESS
89#include "postprocess.h"
90#include "postprocess_internal.h"
91#include "libavutil/avstring.h"
92
93#include "libavutil/ffversion.h"
94const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
95
96unsigned postproc_version(void)
97{
98 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
99 return LIBPOSTPROC_VERSION_INT;
100}
101
102const char *postproc_configuration(void)
103{
104 return FFMPEG_CONFIGURATION;
105}
106
107const char *postproc_license(void)
108{
109#define LICENSE_PREFIX "libpostproc license: "
110 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
111}
112
113#if HAVE_ALTIVEC_H
114#include <altivec.h>
115#endif
116
117#define GET_MODE_BUFFER_SIZE 500
118#define OPTIONS_ARRAY_SIZE 10
119#define BLOCK_SIZE 8
120#define TEMP_STRIDE 8
121//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
122
123#if ARCH_X86 && HAVE_INLINE_ASM
124DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
125DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
126DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
127DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
128DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
129DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
130DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
131DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
132#endif
133
134DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
135
136
137static const struct PPFilter filters[]=
138{
139 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
140 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
141/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
142 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
143 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
144 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
145 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
146 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
147 {"dr", "dering", 1, 5, 6, DERING},
148 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
149 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
150 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
151 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
152 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
153 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
154 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
155 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
156 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
157 {"be", "bitexact", 1, 0, 0, BITEXACT},
158 {"vi", "visualize", 1, 0, 0, VISUALIZE},
159 {NULL, NULL,0,0,0,0} //End Marker
160};
161
162static const char * const replaceTable[]=
163{
164 "default", "hb:a,vb:a,dr:a",
165 "de", "hb:a,vb:a,dr:a",
166 "fast", "h1:a,v1:a,dr:a",
167 "fa", "h1:a,v1:a,dr:a",
168 "ac", "ha:a:128:7,va:a,dr:a",
169 NULL //End Marker
170};
171
172/* The horizontal functions exist only in C because the MMX
173 * code is faster with vertical filters and transposing. */
174
175/**
176 * Check if the given 8x8 Block is mostly "flat"
177 */
178static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
179{
180 int numEq= 0;
181 int y;
182 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
183 const int dcThreshold= dcOffset*2 + 1;
184
185 for(y=0; y<BLOCK_SIZE; y++){
186 numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
187 numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
188 numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
189 numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
190 numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
191 numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
192 numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
193 src+= stride;
194 }
195 return numEq > c->ppMode.flatnessThreshold;
196}
197
198/**
199 * Check if the middle 8x8 Block in the given 8x16 block is flat
200 */
201static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
202{
203 int numEq= 0;
204 int y;
205 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
206 const int dcThreshold= dcOffset*2 + 1;
207
208 src+= stride*4; // src points to begin of the 8x8 Block
209 for(y=0; y<BLOCK_SIZE-1; y++){
210 numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
211 numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
212 numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
213 numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
214 numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
215 numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
216 numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
217 numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
218 src+= stride;
219 }
220 return numEq > c->ppMode.flatnessThreshold;
221}
222
223static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
224{
225 int i;
226 for(i=0; i<2; i++){
227 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
228 src += stride;
229 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
230 src += stride;
231 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
232 src += stride;
233 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
234 src += stride;
235 }
236 return 1;
237}
238
239static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
240{
241 int x;
242 src+= stride*4;
243 for(x=0; x<BLOCK_SIZE; x+=4){
244 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
245 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
246 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
247 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
248 }
249 return 1;
250}
251
252static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
253{
254 if( isHorizDC_C(src, stride, c) ){
255 return isHorizMinMaxOk_C(src, stride, c->QP);
256 }else{
257 return 2;
258 }
259}
260
261static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
262{
263 if( isVertDC_C(src, stride, c) ){
264 return isVertMinMaxOk_C(src, stride, c->QP);
265 }else{
266 return 2;
267 }
268}
269
270static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
271{
272 int y;
273 for(y=0; y<BLOCK_SIZE; y++){
274 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
275
276 if(FFABS(middleEnergy) < 8*c->QP){
277 const int q=(dst[3] - dst[4])/2;
278 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
279 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
280
281 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
282 d= FFMAX(d, 0);
283
284 d= (5*d + 32) >> 6;
285 d*= FFSIGN(-middleEnergy);
286
287 if(q>0)
288 {
289 d = FFMAX(d, 0);
290 d = FFMIN(d, q);
291 }
292 else
293 {
294 d = FFMIN(d, 0);
295 d = FFMAX(d, q);
296 }
297
298 dst[3]-= d;
299 dst[4]+= d;
300 }
301 dst+= stride;
302 }
303}
304
305/**
306 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
307 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
308 */
309static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
310{
311 int y;
312 for(y=0; y<BLOCK_SIZE; y++){
313 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
314 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
315
316 int sums[10];
317 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
318 sums[1] = sums[0] - first + dst[3];
319 sums[2] = sums[1] - first + dst[4];
320 sums[3] = sums[2] - first + dst[5];
321 sums[4] = sums[3] - first + dst[6];
322 sums[5] = sums[4] - dst[0] + dst[7];
323 sums[6] = sums[5] - dst[1] + last;
324 sums[7] = sums[6] - dst[2] + last;
325 sums[8] = sums[7] - dst[3] + last;
326 sums[9] = sums[8] - dst[4] + last;
327
328 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
329 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
330 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
331 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
332 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
333 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
334 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
335 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
336
337 dst+= stride;
338 }
339}
340
341/**
342 * Experimental Filter 1 (Horizontal)
343 * will not damage linear gradients
344 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
345 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
346 * MMX2 version does correct clipping C version does not
347 * not identical with the vertical one
348 */
349static inline void horizX1Filter(uint8_t *src, int stride, int QP)
350{
351 int y;
352 static uint64_t lut[256];
353 if(!lut[255])
354 {
355 int i;
356 for(i=0; i<256; i++)
357 {
358 int v= i < 128 ? 2*i : 2*(i-256);
359/*
360//Simulate 112242211 9-Tap filter
361 uint64_t a= (v/16) & 0xFF;
362 uint64_t b= (v/8) & 0xFF;
363 uint64_t c= (v/4) & 0xFF;
364 uint64_t d= (3*v/8) & 0xFF;
365*/
366//Simulate piecewise linear interpolation
367 uint64_t a= (v/16) & 0xFF;
368 uint64_t b= (v*3/16) & 0xFF;
369 uint64_t c= (v*5/16) & 0xFF;
370 uint64_t d= (7*v/16) & 0xFF;
371 uint64_t A= (0x100 - a)&0xFF;
372 uint64_t B= (0x100 - b)&0xFF;
373 uint64_t C= (0x100 - c)&0xFF;
374 uint64_t D= (0x100 - c)&0xFF;
375
376 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
377 (D<<24) | (C<<16) | (B<<8) | (A);
378 //lut[i] = (v<<32) | (v<<24);
379 }
380 }
381
382 for(y=0; y<BLOCK_SIZE; y++){
383 int a= src[1] - src[2];
384 int b= src[3] - src[4];
385 int c= src[5] - src[6];
386
387 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
388
389 if(d < QP){
390 int v = d * FFSIGN(-b);
391
392 src[1] +=v/8;
393 src[2] +=v/4;
394 src[3] +=3*v/8;
395 src[4] -=3*v/8;
396 src[5] -=v/4;
397 src[6] -=v/8;
398 }
399 src+=stride;
400 }
401}
402
403/**
404 * accurate deblock filter
405 */
406static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
407 int stride, const PPContext *c, int mode)
408{
409 int y;
410 const int QP= c->QP;
411 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
412 const int dcThreshold= dcOffset*2 + 1;
413//START_TIMER
414 src+= step*4; // src points to begin of the 8x8 Block
415 for(y=0; y<8; y++){
416 int numEq= 0;
417
418 numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
419 numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
420 numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
421 numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
422 numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
423 numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
424 numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
425 numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
426 numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
427 if(numEq > c->ppMode.flatnessThreshold){
428 int min, max, x;
429
430 if(src[0] > src[step]){
431 max= src[0];
432 min= src[step];
433 }else{
434 max= src[step];
435 min= src[0];
436 }
437 for(x=2; x<8; x+=2){
438 if(src[x*step] > src[(x+1)*step]){
439 if(src[x *step] > max) max= src[ x *step];
440 if(src[(x+1)*step] < min) min= src[(x+1)*step];
441 }else{
442 if(src[(x+1)*step] > max) max= src[(x+1)*step];
443 if(src[ x *step] < min) min= src[ x *step];
444 }
445 }
446 if(max-min < 2*QP){
447 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
448 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
449
450 int sums[10];
451 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
452 sums[1] = sums[0] - first + src[3*step];
453 sums[2] = sums[1] - first + src[4*step];
454 sums[3] = sums[2] - first + src[5*step];
455 sums[4] = sums[3] - first + src[6*step];
456 sums[5] = sums[4] - src[0*step] + src[7*step];
457 sums[6] = sums[5] - src[1*step] + last;
458 sums[7] = sums[6] - src[2*step] + last;
459 sums[8] = sums[7] - src[3*step] + last;
460 sums[9] = sums[8] - src[4*step] + last;
461
462 if (mode & VISUALIZE) {
463 src[0*step] =
464 src[1*step] =
465 src[2*step] =
466 src[3*step] =
467 src[4*step] =
468 src[5*step] =
469 src[6*step] =
470 src[7*step] = 128;
471 }
472 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
473 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
474 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
475 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
476 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
477 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
478 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
479 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
480 }
481 }else{
482 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
483
484 if(FFABS(middleEnergy) < 8*QP){
485 const int q=(src[3*step] - src[4*step])/2;
486 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
487 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
488
489 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
490 d= FFMAX(d, 0);
491
492 d= (5*d + 32) >> 6;
493 d*= FFSIGN(-middleEnergy);
494
495 if(q>0){
496 d = FFMAX(d, 0);
497 d = FFMIN(d, q);
498 }else{
499 d = FFMIN(d, 0);
500 d = FFMAX(d, q);
501 }
502
503 if ((mode & VISUALIZE) && d) {
504 d= (d < 0) ? 32 : -32;
505 src[3*step]= av_clip_uint8(src[3*step] - d);
506 src[4*step]= av_clip_uint8(src[4*step] + d);
507 d = 0;
508 }
509
510 src[3*step]-= d;
511 src[4*step]+= d;
512 }
513 }
514
515 src += stride;
516 }
517/*if(step==16){
518 STOP_TIMER("step16")
519}else{
520 STOP_TIMER("stepX")
521}*/
522}
523
524//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
525//Plain C versions
526//we always compile C for testing which needs bitexactness
527#define TEMPLATE_PP_C 1
528#include "postprocess_template.c"
529
530#if HAVE_ALTIVEC
531# define TEMPLATE_PP_ALTIVEC 1
532# include "postprocess_altivec_template.c"
533# include "postprocess_template.c"
534#endif
535
536#if ARCH_X86 && HAVE_INLINE_ASM
537# if CONFIG_RUNTIME_CPUDETECT
538# define TEMPLATE_PP_MMX 1
539# include "postprocess_template.c"
540# define TEMPLATE_PP_MMXEXT 1
541# include "postprocess_template.c"
542# define TEMPLATE_PP_3DNOW 1
543# include "postprocess_template.c"
544# define TEMPLATE_PP_SSE2 1
545# include "postprocess_template.c"
546# else
547# if HAVE_SSE2_INLINE
548# define TEMPLATE_PP_SSE2 1
549# include "postprocess_template.c"
550# elif HAVE_MMXEXT_INLINE
551# define TEMPLATE_PP_MMXEXT 1
552# include "postprocess_template.c"
553# elif HAVE_AMD3DNOW_INLINE
554# define TEMPLATE_PP_3DNOW 1
555# include "postprocess_template.c"
556# elif HAVE_MMX_INLINE
557# define TEMPLATE_PP_MMX 1
558# include "postprocess_template.c"
559# endif
560# endif
561#endif
562
563typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
564 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
565
566static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
567 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
568{
569 pp_fn pp = postProcess_C;
570 PPContext *c= (PPContext *)vc;
571 PPMode *ppMode= (PPMode *)vm;
572 c->ppMode= *ppMode; //FIXME
573
574 if (!(ppMode->lumMode & BITEXACT)) {
575#if CONFIG_RUNTIME_CPUDETECT
576#if ARCH_X86 && HAVE_INLINE_ASM
577 // ordered per speed fastest first
578 if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
579 else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
580 else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
581 else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
582#elif HAVE_ALTIVEC
583 if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
584#endif
585#else /* CONFIG_RUNTIME_CPUDETECT */
586#if HAVE_SSE2_INLINE
587 pp = postProcess_SSE2;
588#elif HAVE_MMXEXT_INLINE
589 pp = postProcess_MMX2;
590#elif HAVE_AMD3DNOW_INLINE
591 pp = postProcess_3DNow;
592#elif HAVE_MMX_INLINE
593 pp = postProcess_MMX;
594#elif HAVE_ALTIVEC
595 pp = postProcess_altivec;
596#endif
597#endif /* !CONFIG_RUNTIME_CPUDETECT */
598 }
599
600 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
601}
602
603/* -pp Command line Help
604*/
605const char pp_help[] =
606"Available postprocessing filters:\n"
607"Filters Options\n"
608"short long name short long option Description\n"
609"* * a autoq CPU power dependent enabler\n"
610" c chrom chrominance filtering enabled\n"
611" y nochrom chrominance filtering disabled\n"
612" n noluma luma filtering disabled\n"
613"hb hdeblock (2 threshold) horizontal deblocking filter\n"
614" 1. difference factor: default=32, higher -> more deblocking\n"
615" 2. flatness threshold: default=39, lower -> more deblocking\n"
616" the h & v deblocking filters share these\n"
617" so you can't set different thresholds for h / v\n"
618"vb vdeblock (2 threshold) vertical deblocking filter\n"
619"ha hadeblock (2 threshold) horizontal deblocking filter\n"
620"va vadeblock (2 threshold) vertical deblocking filter\n"
621"h1 x1hdeblock experimental h deblock filter 1\n"
622"v1 x1vdeblock experimental v deblock filter 1\n"
623"dr dering deringing filter\n"
624"al autolevels automatic brightness / contrast\n"
625" f fullyrange stretch luminance to (0..255)\n"
626"lb linblenddeint linear blend deinterlacer\n"
627"li linipoldeint linear interpolating deinterlace\n"
628"ci cubicipoldeint cubic interpolating deinterlacer\n"
629"md mediandeint median deinterlacer\n"
630"fd ffmpegdeint ffmpeg deinterlacer\n"
631"l5 lowpass5 FIR lowpass deinterlacer\n"
632"de default hb:a,vb:a,dr:a\n"
633"fa fast h1:a,v1:a,dr:a\n"
634"ac ha:a:128:7,va:a,dr:a\n"
635"tn tmpnoise (3 threshold) temporal noise reducer\n"
636" 1. <= 2. <= 3. larger -> stronger filtering\n"
637"fq forceQuant <quantizer> force quantizer\n"
638"Usage:\n"
639"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
640"long form example:\n"
641"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
642"short form example:\n"
643"vb:a/hb:a/lb de,-vb\n"
644"more examples:\n"
645"tn:64:128:256\n"
646"\n"
647;
648
649pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
650{
651 char temp[GET_MODE_BUFFER_SIZE];
652 char *p= temp;
653 static const char filterDelimiters[] = ",/";
654 static const char optionDelimiters[] = ":|";
655 struct PPMode *ppMode;
656 char *filterToken;
657
658 if (!name) {
659 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
660 return NULL;
661 }
662
663 if (!strcmp(name, "help")) {
664 const char *p;
665 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
666 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
667 av_log(NULL, AV_LOG_INFO, "%s", temp);
668 }
669 return NULL;
670 }
671
672 ppMode= av_malloc(sizeof(PPMode));
673 if (!ppMode)
674 return NULL;
675
676 ppMode->lumMode= 0;
677 ppMode->chromMode= 0;
678 ppMode->maxTmpNoise[0]= 700;
679 ppMode->maxTmpNoise[1]= 1500;
680 ppMode->maxTmpNoise[2]= 3000;
681 ppMode->maxAllowedY= 234;
682 ppMode->minAllowedY= 16;
683 ppMode->baseDcDiff= 256/8;
684 ppMode->flatnessThreshold= 56-16-1;
685 ppMode->maxClippedThreshold= (AVRational){1,100};
686 ppMode->error=0;
687
688 memset(temp, 0, GET_MODE_BUFFER_SIZE);
689 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
690
691 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
692
693 for(;;){
694 const char *filterName;
695 int q= 1000000; //PP_QUALITY_MAX;
696 int chrom=-1;
697 int luma=-1;
698 const char *option;
699 const char *options[OPTIONS_ARRAY_SIZE];
700 int i;
701 int filterNameOk=0;
702 int numOfUnknownOptions=0;
703 int enable=1; //does the user want us to enabled or disabled the filter
704 char *tokstate;
705
706 filterToken= av_strtok(p, filterDelimiters, &tokstate);
707 if(!filterToken) break;
708 p+= strlen(filterToken) + 1; // p points to next filterToken
709 filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
710 if (!filterName) {
711 ppMode->error++;
712 break;
713 }
714 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
715
716 if(*filterName == '-'){
717 enable=0;
718 filterName++;
719 }
720
721 for(;;){ //for all options
722 option= av_strtok(NULL, optionDelimiters, &tokstate);
723 if(!option) break;
724
725 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
726 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
727 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
728 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
729 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
730 else{
731 options[numOfUnknownOptions] = option;
732 numOfUnknownOptions++;
733 }
734 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
735 }
736 options[numOfUnknownOptions] = NULL;
737
738 /* replace stuff from the replace Table */
739 for(i=0; replaceTable[2*i]; i++){
740 if(!strcmp(replaceTable[2*i], filterName)){
741 size_t newlen = strlen(replaceTable[2*i + 1]);
742 int plen;
743 int spaceLeft;
744
745 p--, *p=',';
746
747 plen= strlen(p);
748 spaceLeft= p - temp + plen;
749 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
750 ppMode->error++;
751 break;
752 }
753 memmove(p + newlen, p, plen+1);
754 memcpy(p, replaceTable[2*i + 1], newlen);
755 filterNameOk=1;
756 }
757 }
758
759 for(i=0; filters[i].shortName; i++){
760 if( !strcmp(filters[i].longName, filterName)
761 || !strcmp(filters[i].shortName, filterName)){
762 ppMode->lumMode &= ~filters[i].mask;
763 ppMode->chromMode &= ~filters[i].mask;
764
765 filterNameOk=1;
766 if(!enable) break; // user wants to disable it
767
768 if(q >= filters[i].minLumQuality && luma)
769 ppMode->lumMode|= filters[i].mask;
770 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
771 if(q >= filters[i].minChromQuality)
772 ppMode->chromMode|= filters[i].mask;
773
774 if(filters[i].mask == LEVEL_FIX){
775 int o;
776 ppMode->minAllowedY= 16;
777 ppMode->maxAllowedY= 234;
778 for(o=0; options[o]; o++){
779 if( !strcmp(options[o],"fullyrange")
780 ||!strcmp(options[o],"f")){
781 ppMode->minAllowedY= 0;
782 ppMode->maxAllowedY= 255;
783 numOfUnknownOptions--;
784 }
785 }
786 }
787 else if(filters[i].mask == TEMP_NOISE_FILTER)
788 {
789 int o;
790 int numOfNoises=0;
791
792 for(o=0; options[o]; o++){
793 char *tail;
794 ppMode->maxTmpNoise[numOfNoises]=
795 strtol(options[o], &tail, 0);
796 if(tail!=options[o]){
797 numOfNoises++;
798 numOfUnknownOptions--;
799 if(numOfNoises >= 3) break;
800 }
801 }
802 }
803 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
804 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
805 int o;
806
807 for(o=0; options[o] && o<2; o++){
808 char *tail;
809 int val= strtol(options[o], &tail, 0);
810 if(tail==options[o]) break;
811
812 numOfUnknownOptions--;
813 if(o==0) ppMode->baseDcDiff= val;
814 else ppMode->flatnessThreshold= val;
815 }
816 }
817 else if(filters[i].mask == FORCE_QUANT){
818 int o;
819 ppMode->forcedQuant= 15;
820
821 for(o=0; options[o] && o<1; o++){
822 char *tail;
823 int val= strtol(options[o], &tail, 0);
824 if(tail==options[o]) break;
825
826 numOfUnknownOptions--;
827 ppMode->forcedQuant= val;
828 }
829 }
830 }
831 }
832 if(!filterNameOk) ppMode->error++;
833 ppMode->error += numOfUnknownOptions;
834 }
835
836 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
837 if(ppMode->error){
838 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
839 av_free(ppMode);
840 return NULL;
841 }
842 return ppMode;
843}
844
845void pp_free_mode(pp_mode *mode){
846 av_free(mode);
847}
848
849static void reallocAlign(void **p, int size){
850 av_free(*p);
851 *p= av_mallocz(size);
852}
853
854static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
855 int mbWidth = (width+15)>>4;
856 int mbHeight= (height+15)>>4;
857 int i;
858
859 c->stride= stride;
860 c->qpStride= qpStride;
861
862 reallocAlign((void **)&c->tempDst, stride*24+32);
863 reallocAlign((void **)&c->tempSrc, stride*24);
864 reallocAlign((void **)&c->tempBlocks, 2*16*8);
865 reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
866 for(i=0; i<256; i++)
867 c->yHistogram[i]= width*height/64*15/256;
868
869 for(i=0; i<3; i++){
870 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
871 reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
872 reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
873 }
874
875 reallocAlign((void **)&c->deintTemp, 2*width+32);
876 reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
877 reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
878 reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
879}
880
881static const char * context_to_name(void * ptr) {
882 return "postproc";
883}
884
885static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
886
887av_cold pp_context *pp_get_context(int width, int height, int cpuCaps){
888 PPContext *c= av_mallocz(sizeof(PPContext));
889 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
890 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
891
892 if (!c)
893 return NULL;
894
895 c->av_class = &av_codec_context_class;
896 if(cpuCaps&PP_FORMAT){
897 c->hChromaSubSample= cpuCaps&0x3;
898 c->vChromaSubSample= (cpuCaps>>4)&0x3;
899 }else{
900 c->hChromaSubSample= 1;
901 c->vChromaSubSample= 1;
902 }
903 if (cpuCaps & PP_CPU_CAPS_AUTO) {
904 c->cpuCaps = av_get_cpu_flags();
905 } else {
906 c->cpuCaps = 0;
907 if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
908 if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
909 if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
910 if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
911 }
912
913 reallocBuffers(c, width, height, stride, qpStride);
914
915 c->frameNum=-1;
916
917 return c;
918}
919
920av_cold void pp_free_context(void *vc){
921 PPContext *c = (PPContext*)vc;
922 int i;
923
924 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
925 av_free(c->tempBlurred[i]);
926 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
927 av_free(c->tempBlurredPast[i]);
928
929 av_free(c->tempBlocks);
930 av_free(c->yHistogram);
931 av_free(c->tempDst);
932 av_free(c->tempSrc);
933 av_free(c->deintTemp);
934 av_free(c->stdQPTable);
935 av_free(c->nonBQPTable);
936 av_free(c->forcedQPTable);
937
938 memset(c, 0, sizeof(PPContext));
939
940 av_free(c);
941}
942
943void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
944 uint8_t * dst[3], const int dstStride[3],
945 int width, int height,
946 const QP_STORE_T *QP_store, int QPStride,
947 pp_mode *vm, void *vc, int pict_type)
948{
949 int mbWidth = (width+15)>>4;
950 int mbHeight= (height+15)>>4;
951 PPMode *mode = vm;
952 PPContext *c = vc;
953 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
954 int absQPStride = FFABS(QPStride);
955
956 // c->stride and c->QPStride are always positive
957 if(c->stride < minStride || c->qpStride < absQPStride)
958 reallocBuffers(c, width, height,
959 FFMAX(minStride, c->stride),
960 FFMAX(c->qpStride, absQPStride));
961
962 if(!QP_store || (mode->lumMode & FORCE_QUANT)){
963 int i;
964 QP_store= c->forcedQPTable;
965 absQPStride = QPStride = 0;
966 if(mode->lumMode & FORCE_QUANT)
967 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
968 else
969 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
970 }
971
972 if(pict_type & PP_PICT_TYPE_QP2){
973 int i;
974 const int count= FFMAX(mbHeight * absQPStride, mbWidth);
975 for(i=0; i<(count>>2); i++){
976 AV_WN32(c->stdQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) >> 1 & 0x7F7F7F7F);
977 }
978 for(i<<=2; i<count; i++){
979 c->stdQPTable[i] = QP_store[i]>>1;
980 }
981 QP_store= c->stdQPTable;
982 QPStride= absQPStride;
983 }
984
985 if(0){
986 int x,y;
987 for(y=0; y<mbHeight; y++){
988 for(x=0; x<mbWidth; x++){
989 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
990 }
991 av_log(c, AV_LOG_INFO, "\n");
992 }
993 av_log(c, AV_LOG_INFO, "\n");
994 }
995
996 if((pict_type&7)!=3){
997 if (QPStride >= 0){
998 int i;
999 const int count= FFMAX(mbHeight * QPStride, mbWidth);
1000 for(i=0; i<(count>>2); i++){
1001 AV_WN32(c->nonBQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) & 0x3F3F3F3F);
1002 }
1003 for(i<<=2; i<count; i++){
1004 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1005 }
1006 } else {
1007 int i,j;
1008 for(i=0; i<mbHeight; i++) {
1009 for(j=0; j<absQPStride; j++) {
1010 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1011 }
1012 }
1013 }
1014 }
1015
1016 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1017 mode->lumMode, mode->chromMode);
1018
1019 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1020 width, height, QP_store, QPStride, 0, mode, c);
1021
1022 if (!(src[1] && src[2] && dst[1] && dst[2]))
1023 return;
1024
1025 width = (width )>>c->hChromaSubSample;
1026 height = (height)>>c->vChromaSubSample;
1027
1028 if(mode->chromMode){
1029 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1030 width, height, QP_store, QPStride, 1, mode, c);
1031 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1032 width, height, QP_store, QPStride, 2, mode, c);
1033 }
1034 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1035 linecpy(dst[1], src[1], height, srcStride[1]);
1036 linecpy(dst[2], src[2], height, srcStride[2]);
1037 }else{
1038 int y;
1039 for(y=0; y<height; y++){
1040 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1041 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1042 }
1043 }
1044}
1045