amd64/isel: fix floating < and <= result with NaN

When the two operands are Unordered (for instance if one of them is NaN), ucomisd sets ZF=1, PF=1, and CF=1. When the result is LessThan, it sets ZF=0, PF=0, and CF=1. However, jb[e]/setb[e] only checks that CF=1 [or ZF=1] which causes the result to be true for unordered operands. To fix this, change the operand swap condition for these two floating point comparison types: always rewrite x < y as y > x, and never rewrite x > y as y < x. Add a test to check the result of cltd, cled, cgtd, cged, ceqd, and cned with arguments that are LessThan, Equal, GreaterThan, and Unordered. Additionally, check three different implementations for equality testing: one that uses the result of ceqd directly, one that uses the result to control a conditional jump, and one that uses the result both as a value and for a conditional jump. For now, unordered equality tests are still broken so they are disabled.
author: Michael Forney <mforney@mforney.org> 2021-08-17 13:14:54 -0700
committer: Quentin Carbonneaux <quentin@c9x.me> 2021-08-27 10:45:13 +0200
commit: 804921a3ab463848aa0ffbe495ca542b3789c841 (patch)
tree: 17419e724fe59ab12eb16c3404d211ceb523b89c
parent: 3cbad4d9c465d3f298cbe19c46f7c16f6a9b9f0f (diff)
download: roux-804921a3ab463848aa0ffbe495ca542b3789c841.tar.gz
2 files changed, 135 insertions, 13 deletions
diff --git a/amd64/isel.c b/amd64/isel.c
index 0b0a2df..07e6142 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -165,13 +165,25 @@ seladdr(Ref *r, ANum *an, Fn *fn)
 }
 
 static int
-selcmp(Ref arg[2], int k, Fn *fn)
+cmpswap(Ref arg[2], int op)
+{
+	switch (op) {
+	case NCmpI+Cflt:
+	case NCmpI+Cfle:
+		return 1;
+	case NCmpI+Cfgt:
+	case NCmpI+Cfge:
+		return 0;
+	}
+	return rtype(arg[0]) == RCon;
+}
+
+static void
+selcmp(Ref arg[2], int k, int swap, Fn *fn)
 {
-	int swap;
 	Ref r;
 	Ins *icmp;
 
-	swap = rtype(arg[0]) == RCon;
 	if (swap) {
 		r = arg[1];
 		arg[1] = arg[0];
@@ -180,20 +192,20 @@ selcmp(Ref arg[2], int k, Fn *fn)
 	emit(Oxcmp, k, R, arg[1], arg[0]);
 	icmp = curi;
 	if (rtype(arg[0]) == RCon) {
-		assert(k == Kl);
+		assert(k != Kw);
 		icmp->arg[1] = newtmp("isel", k, fn);
 		emit(Ocopy, k, icmp->arg[1], arg[0], R);
+		fixarg(&curi->arg[0], k, curi, fn);
 	}
 	fixarg(&icmp->arg[0], k, icmp, fn);
 	fixarg(&icmp->arg[1], k, icmp, fn);
-	return swap;
 }
 
 static void
 sel(Ins i, ANum *an, Fn *fn)
 {
 	Ref r0, r1;
-	int x, k, kc;
+	int x, k, kc, swap;
 	int64_t sz;
 	Ins *i0, *i1;
 
@@ -332,10 +344,11 @@ Emit:
 		if (isload(i.op))
 			goto case_Oload;
 		if (iscmp(i.op, &kc, &x)) {
+			swap = cmpswap(i.arg, x);
+			if (swap)
+				x = cmpop(x);
 			emit(Oflag+x, k, i.to, R, R);
-			i1 = curi;
-			if (selcmp(i.arg, kc, fn))
-				i1->op = Oflag + cmpop(x);
+			selcmp(i.arg, kc, swap, fn);
 			break;
 		}
 		die("unknown instruction %s", optab[i.op].name);
@@ -365,7 +378,7 @@ static void
 seljmp(Blk *b, Fn *fn)
 {
 	Ref r;
-	int c, k;
+	int c, k, swap;
 	Ins *fi;
 	Tmp *t;
 
@@ -384,14 +397,15 @@ seljmp(Blk *b, Fn *fn)
 	}
 	fi = flagi(b->ins, &b->ins[b->nins]);
 	if (!fi || !req(fi->to, r)) {
-		selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, long jnz */
+		selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn); /* todo, long jnz */
 		b->jmp.type = Jjf + Cine;
 	}
 	else if (iscmp(fi->op, &k, &c)) {
-		if (rtype(fi->arg[0]) == RCon)
+		swap = cmpswap(fi->arg, c);
+		if (swap)
 			c = cmpop(c);
 		if (t->nuse == 1) {
-			selcmp(fi->arg, k, fn);
+			selcmp(fi->arg, k, swap, fn);
 			*fi = (Ins){.op = Onop};
 		}
 		b->jmp.type = Jjf + c;
diff --git a/test/isel2.ssa b/test/isel2.ssa
new file mode 100644
index 0000000..280ceb2
--- /dev/null
+++ b/test/isel2.ssa
@@ -0,0 +1,108 @@
+# tests that NaN is handled properly by
+# floating point comparisons
+#
+# TODO: fix eq[123](NAN, NAN) on amd64
+
+export function w $lt(d %x, d %y) {
+@start
+	%r =w cltd %x, %y
+	ret %r
+}
+
+export function w $le(d %x, d %y) {
+@start
+	%r =w cled %x, %y
+	ret %r
+}
+
+export function w $gt(d %x, d %y) {
+@start
+	%r =w cgtd %x, %y
+	ret %r
+}
+
+export function w $ge(d %x, d %y) {
+@start
+	%r =w cged %x, %y
+	ret %r
+}
+
+export function w $eq1(d %x, d %y) {
+@start
+	%r =w ceqd %x, %y
+	ret %r
+}
+
+export function w $eq2(d %x, d %y) {
+@start
+	%r =w ceqd %x, %y
+	jnz %r, @true, @false
+@true
+	ret 1
+@false
+	ret 0
+}
+
+export function w $eq3(d %x, d %y) {
+@start
+	%r =w ceqd %x, %y
+	jnz %r, @true, @false
+@true
+	ret %r
+@false
+	ret 0
+}
+
+export function w $ne1(d %x, d %y) {
+@start
+	%r =w cned %x, %y
+	ret %r
+}
+
+export function w $ne2(d %x, d %y) {
+@start
+	%r =w cned %x, %y
+	jnz %r, @true, @false
+@true
+	ret 1
+@false
+	ret 0
+}
+
+export function w $ne3(d %x, d %y) {
+@start
+	%r =w cned %x, %y
+	jnz %r, @true, @false
+@true
+	ret %r
+@false
+	ret 0
+}
+
+# >>> driver
+# #include <math.h>
+# extern int lt(double, double);
+# extern int le(double, double);
+# extern int gt(double, double);
+# extern int ge(double, double);
+# extern int eq1(double, double);
+# extern int eq2(double, double);
+# extern int eq3(double, double);
+# extern int ne1(double, double);
+# extern int ne2(double, double);
+# extern int ne3(double, double);
+# int main(void) {
+# 	/*     LessThan     Equal        GreaterThan   Unordered */
+# 	return !lt(0, 1)  + lt(0, 0)   + lt(1, 0)    + lt(NAN, NAN)
+# 	     + !le(0, 1)  + !le(0, 0)  + le(1, 0)    + le(NAN, NAN)
+# 	     + gt(0, 1)   + gt(0, 0)   + !gt(1, 0)   + gt(NAN, NAN)
+# 	     + ge(0, 1)   + !ge(0, 0)  + !ge(1, 0)   + ge(NAN, NAN)
+# 	     + eq1(0, 1)  + !eq1(0, 0) + eq1(1, 0)   /*+ eq1(NAN, NAN)*/
+# 	     + eq2(0, 1)  + !eq2(0, 0) + eq2(1, 0)   /*+ eq2(NAN, NAN)*/
+# 	     + eq3(0, 1)  + !eq3(0, 0) + eq3(1, 0)   /*+ eq3(NAN, NAN)*/
+# 	     + !ne1(0, 1) + ne1(0, 0)  + !ne1(1, 0)  /*+ !ne1(NAN, NAN)*/
+# 	     + !ne2(0, 1) + ne2(0, 0)  + !ne2(1, 0)  /*+ !ne2(NAN, NAN)*/
+# 	     + !ne3(0, 1) + ne3(0, 0)  + !ne3(1, 0)  /*+ !ne3(NAN, NAN)*/
+# 	     ;
+# }
+# <<<
author	Michael Forney <mforney@mforney.org>	2021-08-17 13:14:54 -0700
committer	Quentin Carbonneaux <quentin@c9x.me>	2021-08-27 10:45:13 +0200
commit	804921a3ab463848aa0ffbe495ca542b3789c841 (patch)
tree	17419e724fe59ab12eb16c3404d211ceb523b89c
parent	3cbad4d9c465d3f298cbe19c46f7c16f6a9b9f0f (diff)
download	roux-804921a3ab463848aa0ffbe495ca542b3789c841.tar.gz