summaryrefslogtreecommitdiffstats
path: root/main/musl/0005-math-fix-x86_64-and-x32-asm-not-to-use-sahf-instruct.patch
blob: 807536be46c51e16a53cc5317e1538d99ab6c74f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
From a732e80d33b4fd6f510f7cec4f5573ef5d89bc4e Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Wed, 5 Nov 2014 21:40:29 +0100
Subject: [PATCH] math: fix x86_64 and x32 asm not to use sahf instruction

Some early x86_64 cpus (released before 2006) did not support sahf/lahf
instructions so they should be avoided (intel manual says they are only
supported if CPUID.80000001H:ECX.LAHF-SAHF[bit 0] = 1).

The workaround simplifies exp2l and expm1l because fucomip can be
used instead of the fucomp;fnstsw;sahf sequence copied from i386.

In fmodl and remainderl sahf is replaced by a simple bit test.
---
 src/math/x32/exp2l.s         | 13 +++----------
 src/math/x32/fmodl.s         |  4 ++--
 src/math/x32/remainderl.s    |  4 ++--
 src/math/x86_64/exp2l.s      | 13 +++----------
 src/math/x86_64/fmodl.s      |  4 ++--
 src/math/x86_64/remainderl.s |  4 ++--
 6 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/src/math/x32/exp2l.s b/src/math/x32/exp2l.s
index d9f4d6e..dfb2bc7 100644
--- a/src/math/x32/exp2l.s
+++ b/src/math/x32/exp2l.s
@@ -6,9 +6,7 @@ expm1l:
 	fmulp
 	movl $0xc2820000,-4(%esp)
 	flds -4(%esp)
-	fucomp %st(1)
-	fnstsw %ax
-	sahf
+	fucomip %st(1)
 	fld1
 	jb 1f
 		# x*log2e <= -65, return -1 without underflow
@@ -17,11 +15,8 @@ expm1l:
 	ret
 1:	fld %st(1)
 	fabs
-	fucom %st(1)
-	fnstsw %ax
+	fucomip %st(1)
 	fstp %st(0)
-	fstp %st(0)
-	sahf
 	ja 1f
 	f2xm1
 	ret
@@ -53,9 +48,7 @@ exp2l:
 	fld %st(1)
 	fsub %st(1)
 	faddp
-	fucomp %st(1)
-	fnstsw
-	sahf
+	fucomip %st(1)
 	je 2f             # x - 0x1p63 + 0x1p63 == x
 	movl $1,(%esp)
 	flds (%esp)       # 0x1p-149
diff --git a/src/math/x32/fmodl.s b/src/math/x32/fmodl.s
index 9e4378a..b951320 100644
--- a/src/math/x32/fmodl.s
+++ b/src/math/x32/fmodl.s
@@ -5,7 +5,7 @@ fmodl:
 	fldt 8(%esp)
 1:	fprem
 	fstsw %ax
-	sahf
-	jp 1b
+	testb $4,%ah
+	jnz 1b
 	fstp %st(1)
 	ret
diff --git a/src/math/x32/remainderl.s b/src/math/x32/remainderl.s
index c97f68a..79bf4fe 100644
--- a/src/math/x32/remainderl.s
+++ b/src/math/x32/remainderl.s
@@ -5,7 +5,7 @@ remainderl:
 	fldt 8(%esp)
 1:	fprem1
 	fstsw %ax
-	sahf
-	jp 1b
+	testb $4,%ah
+	jnz 1b
 	fstp %st(1)
 	ret
diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s
index 0d6cd56..0e9bdf9 100644
--- a/src/math/x86_64/exp2l.s
+++ b/src/math/x86_64/exp2l.s
@@ -6,9 +6,7 @@ expm1l:
 	fmulp
 	movl $0xc2820000,-4(%rsp)
 	flds -4(%rsp)
-	fucomp %st(1)
-	fnstsw %ax
-	sahf
+	fucomip %st(1)
 	fld1
 	jb 1f
 		# x*log2e <= -65, return -1 without underflow
@@ -17,11 +15,8 @@ expm1l:
 	ret
 1:	fld %st(1)
 	fabs
-	fucom %st(1)
-	fnstsw %ax
+	fucomip %st(1)
 	fstp %st(0)
-	fstp %st(0)
-	sahf
 	ja 1f
 	f2xm1
 	ret
@@ -53,9 +48,7 @@ exp2l:
 	fld %st(1)
 	fsub %st(1)
 	faddp
-	fucomp %st(1)
-	fnstsw
-	sahf
+	fucomip %st(1)
 	je 2f             # x - 0x1p63 + 0x1p63 == x
 	movl $1,(%rsp)
 	flds (%rsp)       # 0x1p-149
diff --git a/src/math/x86_64/fmodl.s b/src/math/x86_64/fmodl.s
index ca81e60..cd8d2b7 100644
--- a/src/math/x86_64/fmodl.s
+++ b/src/math/x86_64/fmodl.s
@@ -5,7 +5,7 @@ fmodl:
 	fldt 8(%rsp)
 1:	fprem
 	fstsw %ax
-	sahf
-	jp 1b
+	testb $4,%ah
+	jnz 1b
 	fstp %st(1)
 	ret
diff --git a/src/math/x86_64/remainderl.s b/src/math/x86_64/remainderl.s
index 75c1237..2c337cf 100644
--- a/src/math/x86_64/remainderl.s
+++ b/src/math/x86_64/remainderl.s
@@ -5,7 +5,7 @@ remainderl:
 	fldt 8(%rsp)
 1:	fprem1
 	fstsw %ax
-	sahf
-	jp 1b
+	testb $4,%ah
+	jnz 1b
 	fstp %st(1)
 	ret
-- 
2.2.0