aboutsummaryrefslogtreecommitdiffstats
path: root/main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch
diff options
context:
space:
mode:
Diffstat (limited to 'main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch')
-rw-r--r--main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch159
1 files changed, 159 insertions, 0 deletions
diff --git a/main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch b/main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch
new file mode 100644
index 0000000000..807536be46
--- /dev/null
+++ b/main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch
@@ -0,0 +1,159 @@
+From a732e80d33b4fd6f510f7cec4f5573ef5d89bc4e Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <nsz@port70.net>
+Date: Wed, 5 Nov 2014 21:40:29 +0100
+Subject: [PATCH] math: fix x86_64 and x32 asm not to use sahf instruction
+
+Some early x86_64 cpus (released before 2006) did not support sahf/lahf
+instructions so they should be avoided (intel manual says they are only
+supported if CPUID.80000001H:ECX.LAHF-SAHF[bit 0] = 1).
+
+The workaround simplifies exp2l and expm1l because fucomip can be
+used instead of the fucomp;fnstsw;sahf sequence copied from i386.
+
+In fmodl and remainderl sahf is replaced by a simple bit test.
+---
+ src/math/x32/exp2l.s | 13 +++----------
+ src/math/x32/fmodl.s | 4 ++--
+ src/math/x32/remainderl.s | 4 ++--
+ src/math/x86_64/exp2l.s | 13 +++----------
+ src/math/x86_64/fmodl.s | 4 ++--
+ src/math/x86_64/remainderl.s | 4 ++--
+ 6 files changed, 14 insertions(+), 28 deletions(-)
+
+diff --git a/src/math/x32/exp2l.s b/src/math/x32/exp2l.s
+index d9f4d6e..dfb2bc7 100644
+--- a/src/math/x32/exp2l.s
++++ b/src/math/x32/exp2l.s
+@@ -6,9 +6,7 @@ expm1l:
+ fmulp
+ movl $0xc2820000,-4(%esp)
+ flds -4(%esp)
+- fucomp %st(1)
+- fnstsw %ax
+- sahf
++ fucomip %st(1)
+ fld1
+ jb 1f
+ # x*log2e <= -65, return -1 without underflow
+@@ -17,11 +15,8 @@ expm1l:
+ ret
+ 1: fld %st(1)
+ fabs
+- fucom %st(1)
+- fnstsw %ax
++ fucomip %st(1)
+ fstp %st(0)
+- fstp %st(0)
+- sahf
+ ja 1f
+ f2xm1
+ ret
+@@ -53,9 +48,7 @@ exp2l:
+ fld %st(1)
+ fsub %st(1)
+ faddp
+- fucomp %st(1)
+- fnstsw
+- sahf
++ fucomip %st(1)
+ je 2f # x - 0x1p63 + 0x1p63 == x
+ movl $1,(%esp)
+ flds (%esp) # 0x1p-149
+diff --git a/src/math/x32/fmodl.s b/src/math/x32/fmodl.s
+index 9e4378a..b951320 100644
+--- a/src/math/x32/fmodl.s
++++ b/src/math/x32/fmodl.s
+@@ -5,7 +5,7 @@ fmodl:
+ fldt 8(%esp)
+ 1: fprem
+ fstsw %ax
+- sahf
+- jp 1b
++ testb $4,%ah
++ jnz 1b
+ fstp %st(1)
+ ret
+diff --git a/src/math/x32/remainderl.s b/src/math/x32/remainderl.s
+index c97f68a..79bf4fe 100644
+--- a/src/math/x32/remainderl.s
++++ b/src/math/x32/remainderl.s
+@@ -5,7 +5,7 @@ remainderl:
+ fldt 8(%esp)
+ 1: fprem1
+ fstsw %ax
+- sahf
+- jp 1b
++ testb $4,%ah
++ jnz 1b
+ fstp %st(1)
+ ret
+diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s
+index 0d6cd56..0e9bdf9 100644
+--- a/src/math/x86_64/exp2l.s
++++ b/src/math/x86_64/exp2l.s
+@@ -6,9 +6,7 @@ expm1l:
+ fmulp
+ movl $0xc2820000,-4(%rsp)
+ flds -4(%rsp)
+- fucomp %st(1)
+- fnstsw %ax
+- sahf
++ fucomip %st(1)
+ fld1
+ jb 1f
+ # x*log2e <= -65, return -1 without underflow
+@@ -17,11 +15,8 @@ expm1l:
+ ret
+ 1: fld %st(1)
+ fabs
+- fucom %st(1)
+- fnstsw %ax
++ fucomip %st(1)
+ fstp %st(0)
+- fstp %st(0)
+- sahf
+ ja 1f
+ f2xm1
+ ret
+@@ -53,9 +48,7 @@ exp2l:
+ fld %st(1)
+ fsub %st(1)
+ faddp
+- fucomp %st(1)
+- fnstsw
+- sahf
++ fucomip %st(1)
+ je 2f # x - 0x1p63 + 0x1p63 == x
+ movl $1,(%rsp)
+ flds (%rsp) # 0x1p-149
+diff --git a/src/math/x86_64/fmodl.s b/src/math/x86_64/fmodl.s
+index ca81e60..cd8d2b7 100644
+--- a/src/math/x86_64/fmodl.s
++++ b/src/math/x86_64/fmodl.s
+@@ -5,7 +5,7 @@ fmodl:
+ fldt 8(%rsp)
+ 1: fprem
+ fstsw %ax
+- sahf
+- jp 1b
++ testb $4,%ah
++ jnz 1b
+ fstp %st(1)
+ ret
+diff --git a/src/math/x86_64/remainderl.s b/src/math/x86_64/remainderl.s
+index 75c1237..2c337cf 100644
+--- a/src/math/x86_64/remainderl.s
++++ b/src/math/x86_64/remainderl.s
+@@ -5,7 +5,7 @@ remainderl:
+ fldt 8(%rsp)
+ 1: fprem1
+ fstsw %ax
+- sahf
+- jp 1b
++ testb $4,%ah
++ jnz 1b
+ fstp %st(1)
+ ret
+--
+2.2.0
+