summaryrefslogtreecommitdiff
authorDerek Buitenhuis <derek.buitenhuis@gmail.com>2013-10-09 12:54:59 (GMT)
committer Derek Buitenhuis <derek.buitenhuis@gmail.com>2013-10-14 11:39:29 (GMT)
commit206895708ea2b464755d340e44501daf9a07c310 (patch)
treeadef04fec470970a6e01ed4752c44d9159b56ab3
parentc108ba0175d4fc3a3253a8b0f782fbfb96ba5098 (diff)
downloadffmpeg-206895708ea2b464755d340e44501daf9a07c310.zip
ffmpeg-206895708ea2b464755d340e44501daf9a07c310.tar.gz
ffmpeg-206895708ea2b464755d340e44501daf9a07c310.tar.bz2
x86inc: Remove our FMA4 support
This is so we can sync to x264's version of FMA4 support. This partialy reverts commit 79687079a97a039c325ab79d7a95920d800b791f. Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
Diffstat
-rw-r--r--libavresample/x86/audio_mix.asm18
-rw-r--r--libavutil/x86/x86inc.asm16
-rw-r--r--libavutil/x86/x86util.asm11
3 files changed, 25 insertions, 20 deletions
diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm
index 4710bb1..2c657b5 100644
--- a/libavresample/x86/audio_mix.asm
+++ b/libavresample/x86/audio_mix.asm
@@ -384,10 +384,10 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, n
S16_TO_S32_SX 4, 5
cvtdq2ps m4, m4
cvtdq2ps m5, m5
- fmaddps m2, m4, mx_1_ %+ %%i, m2, m6
- fmaddps m3, m5, mx_1_ %+ %%i, m3, m6
- fmaddps m0, m4, mx_0_ %+ %%i, m0, m4
- fmaddps m1, m5, mx_0_ %+ %%i, m1, m5
+ FMULADD_PS m2, m4, mx_1_ %+ %%i, m2, m6
+ FMULADD_PS m3, m5, mx_1_ %+ %%i, m3, m6
+ FMULADD_PS m0, m4, mx_0_ %+ %%i, m0, m4
+ FMULADD_PS m1, m5, mx_0_ %+ %%i, m1, m5
%else
%if copy_src_from_stack
mov src_ptr, src %+ %%i %+ m
@@ -396,8 +396,8 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, n
S16_TO_S32_SX 2, 3
cvtdq2ps m2, m2
cvtdq2ps m3, m3
- fmaddps m0, m2, mx_0_ %+ %%i, m0, m4
- fmaddps m1, m3, mx_0_ %+ %%i, m1, m4
+ FMULADD_PS m0, m2, mx_0_ %+ %%i, m0, m4
+ FMULADD_PS m1, m3, mx_0_ %+ %%i, m1, m4
%endif
%assign %%i %%i+1
%endrep
@@ -437,12 +437,12 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, n
mova m2, [src_ptr+lenq]
%endif
%if stereo
- fmaddps m1, m2, mx_1_ %+ %%i, m1, m3
+ FMULADD_PS m1, m2, mx_1_ %+ %%i, m1, m3
%endif
%if stereo || mx_stack_0_ %+ %%i
- fmaddps m0, m2, mx_0_ %+ %%i, m0, m2
+ FMULADD_PS m0, m2, mx_0_ %+ %%i, m0, m2
%else
- fmaddps m0, mx_0_ %+ %%i, [src_ptr+lenq], m0, m1
+ FMULADD_PS m0, mx_0_ %+ %%i, [src_ptr+lenq], m0, m1
%endif
%assign %%i %%i+1
%endrep
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index b2b67c5..051ac58 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1400,22 +1400,16 @@ AVX_INSTR pfmul, 1, 0, 1
%undef j
%macro FMA_INSTR 3
- %macro %1 5-8 %1, %2, %3
- %if cpuflag(xop) || cpuflag(fma4)
- v%6 %1, %2, %3, %4
+ %macro %1 4-7 %1, %2, %3
+ %if cpuflag(xop)
+ v%5 %1, %2, %3, %4
%else
- %ifidn %1, %4
- %7 %5, %2, %3
- %8 %1, %4, %5
- %else
- %7 %1, %2, %3
- %8 %1, %4
- %endif
+ %6 %1, %2, %3
+ %7 %1, %4
%endif
%endmacro
%endmacro
-FMA_INSTR fmaddps, mulps, addps
FMA_INSTR pmacsdd, pmulld, paddd
FMA_INSTR pmacsww, pmullw, paddw
FMA_INSTR pmadcswd, pmaddwd, paddd
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 79a023f..bfe7a23 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -665,3 +665,14 @@
psrad %1, 16
%endif
%endmacro
+
+; Wrapper for non-FMA version of fmaddps
+%macro FMULADD_PS 5
+ %ifidn %1, %4
+ mulps %5, %2, %3
+ addps %1, %4, %5
+ %else
+ mulps %1, %2, %3
+ addps %1, %4
+ %endif
+%endmacro