diff options
Diffstat (limited to 'main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch')
-rw-r--r-- | main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch | 159 |
1 files changed, 159 insertions, 0 deletions
diff --git a/main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch b/main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch new file mode 100644 index 0000000000..807536be46 --- /dev/null +++ b/main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch @@ -0,0 +1,159 @@ +From a732e80d33b4fd6f510f7cec4f5573ef5d89bc4e Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <nsz@port70.net> +Date: Wed, 5 Nov 2014 21:40:29 +0100 +Subject: [PATCH] math: fix x86_64 and x32 asm not to use sahf instruction + +Some early x86_64 cpus (released before 2006) did not support sahf/lahf +instructions so they should be avoided (intel manual says they are only +supported if CPUID.80000001H:ECX.LAHF-SAHF[bit 0] = 1). + +The workaround simplifies exp2l and expm1l because fucomip can be +used instead of the fucomp;fnstsw;sahf sequence copied from i386. + +In fmodl and remainderl sahf is replaced by a simple bit test. +--- + src/math/x32/exp2l.s | 13 +++---------- + src/math/x32/fmodl.s | 4 ++-- + src/math/x32/remainderl.s | 4 ++-- + src/math/x86_64/exp2l.s | 13 +++---------- + src/math/x86_64/fmodl.s | 4 ++-- + src/math/x86_64/remainderl.s | 4 ++-- + 6 files changed, 14 insertions(+), 28 deletions(-) + +diff --git a/src/math/x32/exp2l.s b/src/math/x32/exp2l.s +index d9f4d6e..dfb2bc7 100644 +--- a/src/math/x32/exp2l.s ++++ b/src/math/x32/exp2l.s +@@ -6,9 +6,7 @@ expm1l: + fmulp + movl $0xc2820000,-4(%esp) + flds -4(%esp) +- fucomp %st(1) +- fnstsw %ax +- sahf ++ fucomip %st(1) + fld1 + jb 1f + # x*log2e <= -65, return -1 without underflow +@@ -17,11 +15,8 @@ expm1l: + ret + 1: fld %st(1) + fabs +- fucom %st(1) +- fnstsw %ax ++ fucomip %st(1) + fstp %st(0) +- fstp %st(0) +- sahf + ja 1f + f2xm1 + ret +@@ -53,9 +48,7 @@ exp2l: + fld %st(1) + fsub %st(1) + faddp +- fucomp %st(1) +- fnstsw +- sahf ++ fucomip %st(1) + je 2f # x - 0x1p63 + 0x1p63 == x + movl $1,(%esp) + flds (%esp) # 0x1p-149 +diff --git a/src/math/x32/fmodl.s b/src/math/x32/fmodl.s +index 9e4378a..b951320 100644 +--- a/src/math/x32/fmodl.s ++++ b/src/math/x32/fmodl.s +@@ -5,7 +5,7 @@ fmodl: + fldt 8(%esp) + 1: fprem + fstsw %ax +- sahf +- jp 1b ++ testb $4,%ah ++ jnz 1b + fstp %st(1) + ret +diff --git a/src/math/x32/remainderl.s b/src/math/x32/remainderl.s +index c97f68a..79bf4fe 100644 +--- a/src/math/x32/remainderl.s ++++ b/src/math/x32/remainderl.s +@@ -5,7 +5,7 @@ remainderl: + fldt 8(%esp) + 1: fprem1 + fstsw %ax +- sahf +- jp 1b ++ testb $4,%ah ++ jnz 1b + fstp %st(1) + ret +diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s +index 0d6cd56..0e9bdf9 100644 +--- a/src/math/x86_64/exp2l.s ++++ b/src/math/x86_64/exp2l.s +@@ -6,9 +6,7 @@ expm1l: + fmulp + movl $0xc2820000,-4(%rsp) + flds -4(%rsp) +- fucomp %st(1) +- fnstsw %ax +- sahf ++ fucomip %st(1) + fld1 + jb 1f + # x*log2e <= -65, return -1 without underflow +@@ -17,11 +15,8 @@ expm1l: + ret + 1: fld %st(1) + fabs +- fucom %st(1) +- fnstsw %ax ++ fucomip %st(1) + fstp %st(0) +- fstp %st(0) +- sahf + ja 1f + f2xm1 + ret +@@ -53,9 +48,7 @@ exp2l: + fld %st(1) + fsub %st(1) + faddp +- fucomp %st(1) +- fnstsw +- sahf ++ fucomip %st(1) + je 2f # x - 0x1p63 + 0x1p63 == x + movl $1,(%rsp) + flds (%rsp) # 0x1p-149 +diff --git a/src/math/x86_64/fmodl.s b/src/math/x86_64/fmodl.s +index ca81e60..cd8d2b7 100644 +--- a/src/math/x86_64/fmodl.s ++++ b/src/math/x86_64/fmodl.s +@@ -5,7 +5,7 @@ fmodl: + fldt 8(%rsp) + 1: fprem + fstsw %ax +- sahf +- jp 1b ++ testb $4,%ah ++ jnz 1b + fstp %st(1) + ret +diff --git a/src/math/x86_64/remainderl.s b/src/math/x86_64/remainderl.s +index 75c1237..2c337cf 100644 +--- a/src/math/x86_64/remainderl.s ++++ b/src/math/x86_64/remainderl.s +@@ -5,7 +5,7 @@ remainderl: + fldt 8(%rsp) + 1: fprem1 + fstsw %ax +- sahf +- jp 1b ++ testb $4,%ah ++ jnz 1b + fstp %st(1) + ret +-- +2.2.0 + |