implement unsigned -> float casts

amd64 lacks an instruction for this so it has to be implemented with signed -> float casts: - Word casting is done by zero-extending the word to a long and then doing a regular signed cast. - Long casting is done by dividing by two with correct rounding if the highest bit is set and casting that to float, then adding 1 to mantissa with integer addition
author: Bor Grošelj Simić <bor.groseljsimic@telemach.net> 2022-01-28 02:06:17 +0100
committer: Quentin Carbonneaux <quentin@c9x.me> 2022-01-28 09:24:15 +0100
commit: 74d022f975f22fda20c0d1fe09a3f6fc7680f64f (patch)
tree: a8b7e9e1e822d8eee5eb694984f7d45d8a7f43f0
parent: b0d27d8a019811d6a4e0c0cb7ec804ab27fcec80 (diff)
download: roux-74d022f975f22fda20c0d1fe09a3f6fc7680f64f.tar.gz
6 files changed, 107 insertions, 5 deletions
diff --git a/amd64/isel.c b/amd64/isel.c
index 404b714..17ab86d 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -201,8 +201,8 @@ selcmp(Ref arg[2], int k, int swap, Fn *fn)
 static void
 sel(Ins i, ANum *an, Fn *fn)
 {
-	Ref r0, r1;
-	int x, k, kc, swap;
+	Ref r0, r1, tmp[7];
+	int x, j, k, kc, swap;
 	int64_t sz;
 	Ins *i0, *i1;
 
@@ -266,6 +266,47 @@ sel(Ins i, ANum *an, Fn *fn)
 		emit(Ocopy, Kw, TMP(RCX), r0, R);
 		fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
 		break;
+	case Ouwtof:
+		r0 = newtmp("utof", Kl, fn);
+		emit(Osltof, k, i.to, r0, R);
+		emit(Oextuw, Kl, r0, i.arg[0], R);
+		fixarg(&curi->arg[0], k, curi, fn);
+		break;
+	case Oultof:
+		/*
+		%mask =l and %arg.0, 1
+		%isbig =l shr %arg.0, 63
+		%divided =l shr %arg.0, %isbig
+		%or =l or %mask, %divided
+		%float =d sltof %or
+		%cast =l cast %float
+		%addend =l shl %isbig, 52
+		%sum =l add %cast, %addend
+		%result =d cast %sum
+		*/
+		r0 = newtmp("utof", k, fn);
+		if (k == Ks)
+			kc = Kw;
+		else
+			kc = Kl;
+		for (j=0; j<4; j++)
+			tmp[j] = newtmp("utof", Kl, fn);
+		for (; j<7; j++)
+			tmp[j] = newtmp("utof", kc, fn);
+		emit(Ocast, k, i.to, tmp[6], R);
+		emit(Oadd, kc, tmp[6], tmp[4], tmp[5]);
+		emit(Oshl, kc, tmp[5], tmp[1], getcon(k == Ks ? 23 : 52, fn));
+		emit(Ocast, kc, tmp[4], r0, R);
+
+		emit(Osltof, k, r0, tmp[3], R);
+		emit(Oor, Kl, tmp[3], tmp[0], tmp[2]);
+		emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]);
+		sel(*curi++, an, fn);
+		emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn));
+		fixarg(&curi->arg[0], Kl, curi, fn);
+		emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
+		fixarg(&curi->arg[0], Kl, curi, fn);
+		break;
 	case Onop:
 		break;
 	case Ostored:
diff --git a/arm64/emit.c b/arm64/emit.c
index de1859b..7cebcab 100644
--- a/arm64/emit.c
+++ b/arm64/emit.c
@@ -91,7 +91,9 @@ static struct {
 	{ Ostosi,  Ka, "fcvtzs %=, %S0" },
 	{ Odtosi,  Ka, "fcvtzs %=, %D0" },
 	{ Oswtof,  Ka, "scvtf %=, %W0" },
+	{ Ouwtof,  Ka, "ucvtf %=, %W0" },
 	{ Osltof,  Ka, "scvtf %=, %L0" },
+	{ Oultof,  Ka, "ucvtf %=, %L0" },
 	{ Ocall,   Kw, "blr %L0" },
 
 	{ Oacmp,   Ki, "cmp %0, %1" },
diff --git a/doc/il.txt b/doc/il.txt
index 48ecb23..818f0a4 100644
--- a/doc/il.txt
+++ b/doc/il.txt
@@ -698,7 +698,9 @@ or convert a floating point into an integer and vice versa.
   * `stosi` -- `I(ss)`
   * `dtosi` -- `I(dd)`
   * `swtof` -- `F(ww)`
+  * `uwtof` -- `F(ww)`
   * `sltof` -- `F(ll)`
+  * `ultof` -- `F(ll)`
 
 Extending the precision of a temporary is done using the
 `ext` family of instructions.  Because QBE types do not
@@ -717,9 +719,9 @@ zero.
 Converting between signed integers and floating points is
 done using `stosi` (single to signed integer), `dtosi`
 (double to signed integer), `swtof` (signed word to float),
-and `sltof` (signed long to float).  These instructions
-only handle signed integers, conversion to and from
-unsigned types are not yet supported.
+`uwtof` (unsigned word to float), `sltof` (signed long
+to float) and `ultof` (unsigned long to float).  Conversion
+from unsigned types is not yet supported.
 
 Because of <@ Subtyping >, there is no need to have an
 instruction to lower the precision of an integer temporary.
@@ -990,8 +992,10 @@ instructions unless you know exactly what you are doing.
       * `extuh`
       * `extuw`
       * `sltof`
+      * `ultof`
       * `stosi`
       * `swtof`
+      * `uwtof`
       * `truncd`
 
   * <@ Cast and Copy > :
diff --git a/fold.c b/fold.c
index 9923f75..30e21d2 100644
--- a/fold.c
+++ b/fold.c
@@ -469,7 +469,9 @@ foldflt(Con *res, int op, int w, Con *cl, Con *cr)
 		case Odiv: xd = ld / rd; break;
 		case Omul: xd = ld * rd; break;
 		case Oswtof: xd = (int32_t)cl->bits.i; break;
+		case Ouwtof: xd = (uint32_t)cl->bits.i; break;
 		case Osltof: xd = (int64_t)cl->bits.i; break;
+		case Oultof: xd = (uint64_t)cl->bits.i; break;
 		case Oexts: xd = cl->bits.s; break;
 		case Ocast: xd = ld; break;
 		default: die("unreachable");
@@ -486,7 +488,9 @@ foldflt(Con *res, int op, int w, Con *cl, Con *cr)
 		case Odiv: xs = ls / rs; break;
 		case Omul: xs = ls * rs; break;
 		case Oswtof: xs = (int32_t)cl->bits.i; break;
+		case Ouwtof: xs = (uint32_t)cl->bits.i; break;
 		case Osltof: xs = (int64_t)cl->bits.i; break;
+		case Oultof: xs = (uint64_t)cl->bits.i; break;
 		case Otruncd: xs = cl->bits.d; break;
 		case Ocast: xs = ls; break;
 		default: die("unreachable");
diff --git a/ops.h b/ops.h
index 0729d46..04b0cf8 100644
--- a/ops.h
+++ b/ops.h
@@ -98,7 +98,9 @@ O(truncd,  T(e,e,d,e, e,e,x,e), 1) X(0, 0, 1)
 O(stosi,   T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1)
 O(dtosi,   T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1)
 O(swtof,   T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
+O(uwtof,   T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
 O(sltof,   T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
+O(ultof,   T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
 O(cast,    T(s,d,w,l, x,x,x,x), 1) X(0, 0, 1)
 
 /* Stack Allocation */
diff --git a/test/fpcnv.ssa b/test/fpcnv.ssa
index d9851d8..4dac489 100644
--- a/test/fpcnv.ssa
+++ b/test/fpcnv.ssa
@@ -17,13 +17,62 @@ function d $ftrunc(d %f) {
 	ret %rt
 }
 
+export
+function s $wtos(w %w) {
+@start
+	%rt =s uwtof %w
+	ret %rt
+}
+export
+function d $wtod(w %w) {
+@start
+	%rt =d uwtof %w
+	ret %rt
+}
+
+export
+function s $ltos(l %l) {
+@start
+	%rt =s ultof %l
+	ret %rt
+}
+export
+function d $ltod(l %l) {
+@start
+	%rt =d ultof %l
+	ret %rt
+}
+
 # >>> driver
 # extern float fneg(float);
 # extern double ftrunc(double);
+#
+# extern float wtos(unsigned int);
+# extern double wtod(unsigned int);
+# extern float ltos(long long unsigned int);
+# extern double ltod(long long unsigned int);
+#
+# unsigned long long iin[] = { 0, 1, 16, 234987, 427386245, 0x7fff0000,
+# 	0xffff0000, 23602938196141, 72259248152500195, 9589010795705032704ull,
+# 	0xdcf5fbe299d0148aull, 0xffffffff00000000ull, -1 };
+#
 # int main() {
+# 	int i;
+#
 # 	if (fneg(1.23f) != -1.23f)  return 1;
 # 	if (ftrunc(3.1415) != 3.0)  return 2;
 # 	if (ftrunc(-1.234) != -1.0) return 3;
+#
+# 	for (i=0; i<sizeof(iin)/sizeof(iin[0]); i++) {
+# 		if (wtos(iin[i]) != (float) (unsigned int)iin[i])
+# 			return 4;
+# 		if (wtod(iin[i]) != (double)(unsigned int)iin[i])
+# 			return 5;
+# 		if (ltos(iin[i]) != (float) iin[i])
+# 			return 6;
+# 		if (ltod(iin[i]) != (double)iin[i])
+# 			return 7;
+# 	}
 # 	return 0;
 # }
 # <<<
author	Bor Grošelj Simić <bor.groseljsimic@telemach.net>	2022-01-28 02:06:17 +0100
committer	Quentin Carbonneaux <quentin@c9x.me>	2022-01-28 09:24:15 +0100
commit	74d022f975f22fda20c0d1fe09a3f6fc7680f64f (patch)
tree	a8b7e9e1e822d8eee5eb694984f7d45d8a7f43f0
parent	b0d27d8a019811d6a4e0c0cb7ec804ab27fcec80 (diff)
download	roux-74d022f975f22fda20c0d1fe09a3f6fc7680f64f.tar.gz