From 7ac88f5d4874f03d62f48055eded26e9a08e54ac Mon Sep 17 00:00:00 2001 From: Michael Forney Date: Sun, 22 Aug 2021 12:55:02 -0700 Subject: amd64/isel: fix floating point == and != result with NaN On x86_64, ucomis[sd] sets ZF=1, PF=0, CF=0 for equal arguments. However, if the arguments are unordered it sets ZF=1, PF=1, CF=1, and there is no jump/flag instruction for ZF=1 & PF=0 or ZF=1 & CF=0. So, in order to correctly implement ceq[sd] on x86_64, we need to be a bit more creative. There are several options available, depending on whether the result of ceq[sd] is used with jnz, or with other instructions, or both. If the result is used for a conditional jump, both gcc and clang use a combination of jp and jnz: ucomisd %xmm1, %xmm0 jp .Lfalse jnz .Lfalse ... .Lfalse: If the result is used in other instructions or return, gcc does the following for x == y: ucomisd %xmm1, %xmm0 setnp %al movzbl %al, %eax movl $0, %edx cmovne %edx, %eax This sets EAX to PF=0, then uses cmovne to clear it if ZF=0. It also takes care to avoid clobbering the flags register in case the result is also used for a conditional jump. Implementing this approach in QBE would require adding an architecture-specific instruction for cmovne. In contrast, clang does an additional compare, this time using cmpeqsd instead of ucomisd: cmpeqsd %xmm1, %xmm0 movq %xmm0, %rax andl $1, %rax The cmpeqsd instruction doas a floating point equality test, setting XMM0 to all 1s if they are equal and all 0s if they are not. However, we need the result in a non-XMM register, so it moves the result back then masks off all but the first bit. Both of these approaches are a bit awkward to implement in QBE, so instead, this commit does the following: ucomisd %xmm1, %xmm0 setz %al movzbl %al, %eax setnp %cl movzbl %cl, %ecx andl %ecx, %eax This sets the result by anding the two flags, but has a side effect of clobbering the flags register. This was a problem in one of my earlier patches to fix this issue[0], in addition to being more complex than I'd hoped. Instead, this commit always leaves the ceq[sd] instruction in the block, even if the result is only used to control a jump, so that the above instruction sequence is always used. Then, since we now have ZF=!(ZF=1 & PF=0) for x == y, or ZF=!(ZF=0 | PF=1) for x != y, we can use jnz for the jump instruction. [0] https://git.sr.ht/~sircmpwn/qbe/commit/64833841b18c074a23b4a1254625315e05b86658 --- test/isel2.ssa | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'test/isel2.ssa') diff --git a/test/isel2.ssa b/test/isel2.ssa index 280ceb2..8ca4a24 100644 --- a/test/isel2.ssa +++ b/test/isel2.ssa @@ -1,7 +1,5 @@ # tests that NaN is handled properly by # floating point comparisons -# -# TODO: fix eq[123](NAN, NAN) on amd64 export function w $lt(d %x, d %y) { @start @@ -97,12 +95,12 @@ export function w $ne3(d %x, d %y) { # + !le(0, 1) + !le(0, 0) + le(1, 0) + le(NAN, NAN) # + gt(0, 1) + gt(0, 0) + !gt(1, 0) + gt(NAN, NAN) # + ge(0, 1) + !ge(0, 0) + !ge(1, 0) + ge(NAN, NAN) -# + eq1(0, 1) + !eq1(0, 0) + eq1(1, 0) /*+ eq1(NAN, NAN)*/ -# + eq2(0, 1) + !eq2(0, 0) + eq2(1, 0) /*+ eq2(NAN, NAN)*/ -# + eq3(0, 1) + !eq3(0, 0) + eq3(1, 0) /*+ eq3(NAN, NAN)*/ -# + !ne1(0, 1) + ne1(0, 0) + !ne1(1, 0) /*+ !ne1(NAN, NAN)*/ -# + !ne2(0, 1) + ne2(0, 0) + !ne2(1, 0) /*+ !ne2(NAN, NAN)*/ -# + !ne3(0, 1) + ne3(0, 0) + !ne3(1, 0) /*+ !ne3(NAN, NAN)*/ +# + eq1(0, 1) + !eq1(0, 0) + eq1(1, 0) + eq1(NAN, NAN) +# + eq2(0, 1) + !eq2(0, 0) + eq2(1, 0) + eq2(NAN, NAN) +# + eq3(0, 1) + !eq3(0, 0) + eq3(1, 0) + eq3(NAN, NAN) +# + !ne1(0, 1) + ne1(0, 0) + !ne1(1, 0) + !ne1(NAN, NAN) +# + !ne2(0, 1) + ne2(0, 0) + !ne2(1, 0) + !ne2(NAN, NAN) +# + !ne3(0, 1) + ne3(0, 0) + !ne3(1, 0) + !ne3(NAN, NAN) # ; # } # <<< -- cgit 1.4.1