summary refs log tree commit diff
path: root/test
diff options
context:
space:
mode:
authorMichael Forney <mforney@mforney.org>2021-08-22 12:55:02 -0700
committerQuentin Carbonneaux <quentin@c9x.me>2021-08-29 22:33:04 +0200
commit7ac88f5d4874f03d62f48055eded26e9a08e54ac (patch)
tree21469dfbb3dfbb1a198fe47061019d43e88e5159 /test
parent804921a3ab463848aa0ffbe495ca542b3789c841 (diff)
downloadroux-7ac88f5d4874f03d62f48055eded26e9a08e54ac.tar.gz
amd64/isel: fix floating point == and != result with NaN
On x86_64, ucomis[sd] sets ZF=1, PF=0, CF=0 for equal arguments.
However, if the arguments are unordered it sets ZF=1, PF=1, CF=1,
and there is no jump/flag instruction for ZF=1 & PF=0 or ZF=1 & CF=0.

So, in order to correctly implement ceq[sd] on x86_64, we need to
be a bit more creative. There are several options available, depending
on whether the result of ceq[sd] is used with jnz, or with other
instructions, or both.

If the result is used for a conditional jump, both gcc and clang
use a combination of jp and jnz:

	ucomisd %xmm1, %xmm0
	jp .Lfalse
	jnz .Lfalse
	...
	.Lfalse:

If the result is used in other instructions or return, gcc does the
following for x == y:

	ucomisd %xmm1, %xmm0
	setnp %al
	movzbl %al, %eax
	movl $0, %edx
	cmovne %edx, %eax

This sets EAX to PF=0, then uses cmovne to clear it if ZF=0. It
also takes care to avoid clobbering the flags register in case the
result is also used for a conditional jump. Implementing this
approach in QBE would require adding an architecture-specific
instruction for cmovne.

In contrast, clang does an additional compare, this time using
cmpeqsd instead of ucomisd:

	cmpeqsd %xmm1, %xmm0
	movq %xmm0, %rax
	andl $1, %rax

The cmpeqsd instruction doas a floating point equality test, setting
XMM0 to all 1s if they are equal and all 0s if they are not. However,
we need the result in a non-XMM register, so it moves the result
back then masks off all but the first bit.

Both of these approaches are a bit awkward to implement in QBE, so
instead, this commit does the following:

	ucomisd %xmm1, %xmm0
	setz %al
	movzbl %al, %eax
	setnp %cl
	movzbl %cl, %ecx
	andl %ecx, %eax

This sets the result by anding the two flags, but has a side effect
of clobbering the flags register. This was a problem in one of my
earlier patches to fix this issue[0], in addition to being more
complex than I'd hoped.

Instead, this commit always leaves the ceq[sd] instruction in the
block, even if the result is only used to control a jump, so that
the above instruction sequence is always used. Then, since we now
have ZF=!(ZF=1 & PF=0) for x == y, or ZF=!(ZF=0 | PF=1) for x != y,
we can use jnz for the jump instruction.

[0] https://git.sr.ht/~sircmpwn/qbe/commit/64833841b18c074a23b4a1254625315e05b86658
Diffstat (limited to 'test')
-rw-r--r--test/isel2.ssa14
1 files changed, 6 insertions, 8 deletions
diff --git a/test/isel2.ssa b/test/isel2.ssa
index 280ceb2..8ca4a24 100644
--- a/test/isel2.ssa
+++ b/test/isel2.ssa
@@ -1,7 +1,5 @@
 # tests that NaN is handled properly by
 # floating point comparisons
-#
-# TODO: fix eq[123](NAN, NAN) on amd64
 
 export function w $lt(d %x, d %y) {
 @start
@@ -97,12 +95,12 @@ export function w $ne3(d %x, d %y) {
 # 	     + !le(0, 1)  + !le(0, 0)  + le(1, 0)    + le(NAN, NAN)
 # 	     + gt(0, 1)   + gt(0, 0)   + !gt(1, 0)   + gt(NAN, NAN)
 # 	     + ge(0, 1)   + !ge(0, 0)  + !ge(1, 0)   + ge(NAN, NAN)
-# 	     + eq1(0, 1)  + !eq1(0, 0) + eq1(1, 0)   /*+ eq1(NAN, NAN)*/
-# 	     + eq2(0, 1)  + !eq2(0, 0) + eq2(1, 0)   /*+ eq2(NAN, NAN)*/
-# 	     + eq3(0, 1)  + !eq3(0, 0) + eq3(1, 0)   /*+ eq3(NAN, NAN)*/
-# 	     + !ne1(0, 1) + ne1(0, 0)  + !ne1(1, 0)  /*+ !ne1(NAN, NAN)*/
-# 	     + !ne2(0, 1) + ne2(0, 0)  + !ne2(1, 0)  /*+ !ne2(NAN, NAN)*/
-# 	     + !ne3(0, 1) + ne3(0, 0)  + !ne3(1, 0)  /*+ !ne3(NAN, NAN)*/
+# 	     + eq1(0, 1)  + !eq1(0, 0) + eq1(1, 0)   + eq1(NAN, NAN)
+# 	     + eq2(0, 1)  + !eq2(0, 0) + eq2(1, 0)   + eq2(NAN, NAN)
+# 	     + eq3(0, 1)  + !eq3(0, 0) + eq3(1, 0)   + eq3(NAN, NAN)
+# 	     + !ne1(0, 1) + ne1(0, 0)  + !ne1(1, 0)  + !ne1(NAN, NAN)
+# 	     + !ne2(0, 1) + ne2(0, 0)  + !ne2(1, 0)  + !ne2(NAN, NAN)
+# 	     + !ne3(0, 1) + ne3(0, 0)  + !ne3(1, 0)  + !ne3(NAN, NAN)
 # 	     ;
 # }
 # <<<