summary refs log tree commit diff
diff options
context:
space:
mode:
authorQuentin Carbonneaux <quentin@c9x.me>2017-04-08 21:06:33 -0400
committerQuentin Carbonneaux <quentin@c9x.me>2017-04-08 21:56:20 -0400
commit49a4593c335126ba279f47328824abfef379725e (patch)
tree2f4cb5e9884ec958ea32a494da302a9aae8ca420
parent9d1c38d69547d835f7228651e71e8a7d744c456d (diff)
downloadroux-49a4593c335126ba279f47328824abfef379725e.tar.gz
prepare for multi-target
This big diff does multiple changes to allow
the addition of new targets to qbe.  The
changes are listed below in decreasing order
of impact.

1. Add a new Target structure.

To add support for a given target, one has to
implement all the members of the Target
structure.  All the source files where changed
to use this interface where needed.

2. Single out amd64-specific code.

In this commit, the amd64 target T_amd64_sysv
is the only target available, it is implemented
in the amd64/ directory.  All the non-static
items in this directory are prefixed with either
amd64_ or amd64_sysv (for items that are
specific to the System V ABI).

3. Centralize Ops information.

There is now a file 'ops.h' that must be used to
store all the available operations together with
their metadata.  The various targets will only
select what they need; but it is beneficial that
there is only *one* place to change to add a new
instruction.

One good side effect of this change is that any
operation 'xyz' in the IL now as a corresponding
'Oxyz' in the code.

4. Misc fixes.

One notable change is that instruction selection
now generates generic comparison operations and
the lowering to the target's comparisons is done
in the emitter.

GAS directives for data are the same for many
targets, so data emission was extracted in a
file 'gas.c'.

5. Modularize the Makefile.

The Makefile now has a list of C files that
are target-independent (SRC), and one list
of C files per target.  Each target can also
use its own 'all.h' header (for example to
define registers).
-rw-r--r--Makefile28
-rw-r--r--all.h334
-rw-r--r--amd64/all.h70
-rw-r--r--amd64/emit.c (renamed from emit.c)225
-rw-r--r--amd64/isel.c (renamed from isel.c)78
-rw-r--r--amd64/sysv.c (renamed from sysv.c)47
-rw-r--r--amd64/targ.c30
-rw-r--r--cfg.c4
-rw-r--r--fold.c62
-rw-r--r--gas.c122
-rw-r--r--live.c30
-rw-r--r--main.c68
-rw-r--r--mem.c4
-rw-r--r--ops.h167
-rw-r--r--parse.c144
-rw-r--r--rega.c38
-rw-r--r--spill.c32
-rw-r--r--util.c90
18 files changed, 852 insertions, 721 deletions
diff --git a/Makefile b/Makefile
index f8e3da0..2433e25 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,15 @@
 BIN = qbe
-ABI = sysv
 
 V = @
 OBJDIR = obj
 
-SRC = main.c util.c parse.c cfg.c mem.c ssa.c alias.c load.c copy.c fold.c live.c $(ABI).c isel.c spill.c rega.c emit.c
-OBJ = $(SRC:%.c=$(OBJDIR)/%.o)
+SRC      = main.c util.c parse.c cfg.c mem.c ssa.c alias.c load.c copy.c \
+           fold.c live.c spill.c rega.c gas.c
+AMD64SRC = amd64/targ.c amd64/sysv.c amd64/isel.c amd64/emit.c
+SRCALL   = $(SRC) $(AMD64SRC)
+
+AMD64OBJ = $(AMD64SRC:%.c=$(OBJDIR)/%.o)
+OBJ      = $(SRC:%.c=$(OBJDIR)/%.o) $(AMD64OBJ)
 
 CFLAGS += -Wall -Wextra -std=c99 -g -pedantic
 
@@ -19,15 +23,23 @@ $(OBJDIR)/%.o: %.c $(OBJDIR)/timestamp
 
 $(OBJDIR)/timestamp:
 	@mkdir -p $(OBJDIR)
+	@mkdir -p $(OBJDIR)/amd64
 	@touch $@
 
-$(OBJ): all.h
+$(OBJ): all.h ops.h
+$(AMD64OBJ): amd64/all.h
 obj/main.o: config.h
 
 config.h:
-	@case `uname` in                                 \
-	*Darwin*)  echo "#define Defaultasm Gasmacho" ;; \
-	*)         echo "#define Defaultasm Gaself" ;;   \
+	@case `uname` in                               \
+	*Darwin*)                                      \
+		echo "#define Defasm Gasmacho";        \
+		echo "#define Deftgt T_amd64_sysv";    \
+		;;                                     \
+	*)                                             \
+		echo "#define Defasm Gaself";          \
+		echo "#define Deftgt T_amd64_sysv";    \
+		;;                                     \
 	esac > $@
 
 install: $(OBJDIR)/$(BIN)
@@ -47,7 +59,7 @@ check: $(OBJDIR)/$(BIN)
 	tools/unit.sh all
 
 80:
-	@for F in $(SRC);                          \
+	@for F in $(SRCALL);                       \
 	do                                         \
 		awk "{                             \
 			gsub(/\\t/, \"        \"); \
diff --git a/all.h b/all.h
index 124a8d2..c0e08fe 100644
--- a/all.h
+++ b/all.h
@@ -8,13 +8,14 @@
 #define MAKESURE(what, x) typedef char make_sure_##what[(x)?1:-1]
 #define die(...) die_(__FILE__, __VA_ARGS__)
 
+typedef unsigned char uchar;
 typedef unsigned int uint;
 typedef unsigned long ulong;
 typedef unsigned long long bits;
 
 typedef struct BSet BSet;
 typedef struct Ref Ref;
-typedef struct OpDesc OpDesc;
+typedef struct Op Op;
 typedef struct Ins Ins;
 typedef struct Phi Phi;
 typedef struct Blk Blk;
@@ -27,6 +28,7 @@ typedef struct Fn Fn;
 typedef struct Typ Typ;
 typedef struct Seg Seg;
 typedef struct Dat Dat;
+typedef struct Target Target;
 
 enum {
 	NString = 32,
@@ -38,61 +40,29 @@ enum {
 	NBit    = CHAR_BIT * sizeof(bits),
 };
 
-#define BIT(n) ((bits)1 << (n))
-
-enum Reg {
-	RXX,
-
-	RAX, /* caller-save */
-	RCX,
-	RDX,
-	RSI,
-	RDI,
-	R8,
-	R9,
-	R10,
-	R11,
-
-	RBX, /* callee-save */
-	R12,
-	R13,
-	R14,
-	R15,
-
-	RBP, /* globally live */
-	RSP,
-#define RGLOB (BIT(RBP)|BIT(RSP))
-
-	XMM0, /* sse */
-	XMM1,
-	XMM2,
-	XMM3,
-	XMM4,
-	XMM5,
-	XMM6,
-	XMM7,
-	XMM8,
-	XMM9,
-	XMM10,
-	XMM11,
-	XMM12,
-	XMM13,
-	XMM14,
-	XMM15,
-
-	Tmp0, /* first non-reg temporary */
-
-	NRGlob = 2,
-	NIReg = R15 - RAX + 1 + NRGlob,
-	NFReg = XMM14 - XMM0 + 1, /* XMM15 is reserved */
-	NISave = R11 - RAX + 1,
-	NFSave = NFReg,
-	NRSave = NISave + NFSave,
-	NRClob = R15 - RBX + 1,
+struct Target {
+	int gpr0;   /* first general purpose reg */
+	int ngpr;
+	int fpr0;   /* first floating point reg */
+	int nfpr;
+	bits rglob; /* globally live regs (e.g., sp, fp) */
+	int nrglob;
+	int *rsave; /* caller-save */
+	int nrsave[2];
+	bits (*retregs)(Ref, int[2]);
+	bits (*argregs)(Ref, int[2]);
+	int (*memargs)(int);
+	void (*abi)(Fn *);
+	void (*isel)(Fn *);
+	void (*emitfn)(Fn *, FILE *);
 };
 
-MAKESURE(NBit_is_enough, NBit >= (int)Tmp0);
+#define BIT(n) ((bits)1 << (n))
 
+enum {
+	RXX = 0,
+	Tmp0 = NBit, /* first non-reg temporary */
+};
 
 struct BSet {
 	uint nt;
@@ -139,51 +109,81 @@ static inline int isreg(Ref r)
 	return rtype(r) == RTmp && r.val < Tmp0;
 }
 
-enum ICmp {
-#define ICMPS(X) \
-	X(ule)   \
-	X(ult)   \
-	X(sle)   \
-	X(slt)   \
-	X(sgt)   \
-	X(sge)   \
-	X(ugt)   \
-	X(uge)   \
-	X(eq)    \
-	X(ne) /* make sure icmpop() below works! */
-
-#define X(c) IC##c,
-	ICMPS(X)
-#undef X
-	NICmp,
+enum CmpI {
+	Cieq,
+	Cine,
+	Cisge,
+	Cisgt,
+	Cisle,
+	Cislt,
+	Ciuge,
+	Ciugt,
+	Ciule,
+	Ciult,
+	NCmpI,
+};
 
-	ICxnp = NICmp, /* x64 specific */
-	ICxp,
-	NXICmp
+enum CmpF {
+	Cfeq,
+	Cfge,
+	Cfgt,
+	Cfle,
+	Cflt,
+	Cfne,
+	Cfo,
+	Cfuo,
+	NCmpF,
+	NCmp = NCmpI + NCmpF,
 };
 
-static inline int icmpop(int c)
-{
-	return c >= ICeq ? c : ICuge - c;
-}
+enum O {
+	Oxxx,
+#define O(op, x, y) O##op,
+	#include "ops.h"
+	NOp,
+};
 
-enum FCmp {
-#define FCMPS(X) \
-	X(le)    \
-	X(lt)    \
-	X(gt)    \
-	X(ge)    \
-	X(ne)    \
-	X(eq)    \
-	X(o)     \
-	X(uo)
-
-#define X(c) FC##c,
-	FCMPS(X)
+enum J {
+	Jxxx,
+#define JMPS(X)                                 \
+	X(ret0)   X(retw)   X(retl)   X(rets)   \
+	X(retd)   X(retc)   X(jmp)    X(jnz)    \
+	X(jfieq)  X(jfine)  X(jfisge) X(jfisgt) \
+	X(jfisle) X(jfislt) X(jfiuge) X(jfiugt) \
+	X(jfiule) X(jfiult) X(jffeq)  X(jffge)  \
+	X(jffgt)  X(jffle)  X(jfflt)  X(jffne)  \
+	X(jffo)   X(jffuo)
+#define X(j) J##j,
+	JMPS(X)
 #undef X
-	NFCmp
+	NJmp
+};
+
+enum {
+	Ocmpw = Oceqw,
+	Ocmpw1 = Ocultw,
+	Ocmpl = Oceql,
+	Ocmpl1 = Ocultl,
+	Ocmps = Oceqs,
+	Ocmps1 = Ocuos,
+	Ocmpd = Oceqd,
+	Ocmpd1 = Ocuod,
+	Oalloc = Oalloc4,
+	Oalloc1 = Oalloc16,
+	Oflag = Oflagieq,
+	Oflag1 = Oflagfuo,
+	NPubOp = Onop,
+	Jjf = Jjfieq,
+	Jjf1 = Jjffuo,
 };
 
+#define isstore(o) (Ostoreb <= o && o <= Ostored)
+#define isload(o) (Oloadsb <= o && o <= Oload)
+#define isext(o) (Oextsb <= o && o <= Oextuw)
+#define ispar(o) (Opar <= o && o <= Opare)
+#define isarg(o) (Oarg <= o && o <= Oarge)
+#define isret(j) (Jret0 <= j && j <= Jretc)
+
 enum Class {
 	Kx = -1, /* "top" class (see usecheck() and clsmerge()) */
 	Kw,
@@ -195,124 +195,10 @@ enum Class {
 #define KWIDE(k) ((k)&1)
 #define KBASE(k) ((k)>>1)
 
-enum Op {
-	Oxxx,
-
-	/* public instructions */
-	Oadd,
-	Osub,
-	Odiv,
-	Orem,
-	Oudiv,
-	Ourem,
-	Omul,
-	Oand,
-	Oor,
-	Oxor,
-	Osar,
-	Oshr,
-	Oshl,
-	Ocmpw,
-	Ocmpw1 = Ocmpw + NICmp-1,
-	Ocmpl,
-	Ocmpl1 = Ocmpl + NICmp-1,
-	Ocmps,
-	Ocmps1 = Ocmps + NFCmp-1,
-	Ocmpd,
-	Ocmpd1 = Ocmpd + NFCmp-1,
-
-	Ostoreb,
-	Ostoreh,
-	Ostorew,
-	Ostorel,
-	Ostores,
-	Ostored,
-#define isstore(o) (Ostoreb <= o && o <= Ostored)
-	Oloadsb,  /* must match Oext and Tmp.width */
-	Oloadub,
-	Oloadsh,
-	Oloaduh,
-	Oloadsw,
-	Oloaduw,
-	Oload,
-#define isload(o) (Oloadsb <= o && o <= Oload)
-	Oextsb,
-	Oextub,
-	Oextsh,
-	Oextuh,
-	Oextsw,
-	Oextuw,
-#define isext(o) (Oextsb <= o && o <= Oextuw)
-
-	Oexts,
-	Otruncd,
-	Ostosi,
-	Odtosi,
-	Oswtof,
-	Osltof,
-	Ocast,
-
-	Oalloc,
-	Oalloc1 = Oalloc + NAlign-1,
-
-	Ovastart,
-	Ovaarg,
-
-	Ocopy,
-	NPubOp,
-
-	/* function instructions */
-	Opar = NPubOp,
-	Oparc,
-	Opare,
-#define ispar(o) (Opar <= o && o <= Opare)
-	Oarg,
-	Oargc,
-	Oarge,
-#define isarg(o) (Oarg <= o && o <= Oarge)
-	Ocall,
-	Ovacall,
-
-	/* reserved instructions */
-	Onop,
-	Oaddr,
-	Oswap,
-	Osign,
-	Osalloc,
-	Oxidiv,
-	Oxdiv,
-	Oxcmp,
-	Oxset,
-	Oxsetnp = Oxset + ICxnp,
-	Oxsetp  = Oxset + ICxp,
-	Oxtest,
-	NOp
-};
-
-enum Jmp {
-	Jxxx,
-	Jret0,
-	Jretw,
-	Jretl,
-	Jrets,
-	Jretd,
-	Jretc,
-#define isret(j) (Jret0 <= j && j <= Jretc)
-	Jjmp,
-	Jjnz,
-	Jxjc,
-	Jxjnp = Jxjc + ICxnp,
-	Jxjp  = Jxjc + ICxp,
-	NJmp
-};
-
-struct OpDesc {
+struct Op {
 	char *name;
-	int nmem;
 	short argcls[2][4];
-	uint sflag:1; /* sets the zero flag */
-	uint lflag:1; /* leaves flags */
-	uint cfold:1; /* can fold */
+	int canfold;
 };
 
 struct Ins {
@@ -437,7 +323,7 @@ struct Con {
 
 typedef struct Addr Addr;
 
-struct Addr { /* x64 addressing */
+struct Addr { /* amd64 addressing */
 	Con offset;
 	Ref base;
 	Ref index;
@@ -508,8 +394,8 @@ struct Dat {
 	char export;
 };
 
-
 /* main.c */
+extern Target T;
 extern char debug['Z'+1];
 
 /* util.c */
@@ -524,6 +410,8 @@ void die_(char *, char *, ...) __attribute__((noreturn));
 void *emalloc(size_t);
 void *alloc(size_t);
 void freeall(void);
+int argcls(Ins *, int);
+int iscmp(int, int *, int *);
 void emit(int, int, Ref, Ref, Ref);
 void emiti(Ins);
 void idup(Ins **, Ins *, ulong);
@@ -531,12 +419,15 @@ Ins *icpy(Ins *, Ins *, ulong);
 void *vnew(ulong, size_t, Pool);
 void vfree(void *);
 void vgrow(void *, ulong);
+int cmpop(int);
+int cmpneg(int);
 int clsmerge(short *, short);
 int phicls(int, Tmp *);
 Ref newtmp(char *, int, Fn *);
 void chuse(Ref, int, Fn *);
 Ref getcon(int64_t, Fn *);
 void addcon(Con *, Con *);
+void blit(Ref, uint, Ref, uint, Fn *);
 void dumpts(BSet *, Tmp *, FILE *);
 
 void bsinit(BSet *, uint);
@@ -559,7 +450,7 @@ bshas(BSet *bs, uint elt)
 }
 
 /* parse.c */
-extern OpDesc opdesc[NOp];
+extern Op optab[NOp];
 void parse(FILE *, char *, void (Dat *), void (Fn *));
 void printfn(Fn *, FILE *);
 void printref(Ref, Fn *, FILE *);
@@ -611,16 +502,6 @@ void fold(Fn *);
 void liveon(BSet *, Blk *, Blk *);
 void filllive(Fn *);
 
-/* abi: sysv.c */
-extern int rsave[/* NRSave */];
-extern int rclob[/* NRClob */];
-bits retregs(Ref, int[2]);
-bits argregs(Ref, int[2]);
-void abi(Fn *);
-
-/* isel.c */
-void isel(Fn *);
-
 /* spill.c */
 void fillcost(Fn *);
 void spill(Fn *);
@@ -628,10 +509,9 @@ void spill(Fn *);
 /* rega.c */
 void rega(Fn *);
 
-/* emit.c */
-extern char *locprefix;
-extern char *symprefix;
-void emitfn(Fn *, FILE *);
-void emitdat(Dat *, FILE *);
-int stashfp(int64_t, int);
-void emitfin(FILE *);
+/* gas.c */
+extern char *gasloc;
+extern char *gassym;
+void gasemitdat(Dat *, FILE *);
+int gasstashfp(int64_t, int);
+void gasemitfin(FILE *);
diff --git a/amd64/all.h b/amd64/all.h
new file mode 100644
index 0000000..3a2db0e
--- /dev/null
+++ b/amd64/all.h
@@ -0,0 +1,70 @@
+#include "../all.h"
+
+typedef struct Amd64Op Amd64Op;
+
+enum Amd64Reg {
+	RAX = RXX+1, /* caller-save */
+	RCX,
+	RDX,
+	RSI,
+	RDI,
+	R8,
+	R9,
+	R10,
+	R11,
+
+	RBX, /* callee-save */
+	R12,
+	R13,
+	R14,
+	R15,
+
+	RBP, /* globally live */
+	RSP,
+
+	XMM0, /* sse */
+	XMM1,
+	XMM2,
+	XMM3,
+	XMM4,
+	XMM5,
+	XMM6,
+	XMM7,
+	XMM8,
+	XMM9,
+	XMM10,
+	XMM11,
+	XMM12,
+	XMM13,
+	XMM14,
+	XMM15,
+
+	NFPR = XMM14 - XMM0 + 1, /* reserve XMM15 */
+	NGPR = RSP - RAX + 1,
+	NGPS = R11 - RAX + 1,
+	NFPS = NFPR,
+	NCLR = R15 - RBX + 1,
+};
+MAKESURE(reg_not_tmp, XMM15 < (int)Tmp0);
+
+struct Amd64Op {
+	char nmem;
+	char zflag;
+	char lflag;
+};
+
+/* targ.c */
+extern Amd64Op amd64_op[];
+
+/* sysv.c (abi) */
+extern int amd64_sysv_rsave[];
+extern int amd64_sysv_rclob[];
+bits amd64_sysv_retregs(Ref, int[2]);
+bits amd64_sysv_argregs(Ref, int[2]);
+void amd64_sysv_abi(Fn *);
+
+/* isel.c */
+void amd64_isel(Fn *);
+
+/* emit.c */
+void amd64_emitfn(Fn *, FILE *);
diff --git a/emit.c b/amd64/emit.c
index 138bc1d..eccbd02 100644
--- a/emit.c
+++ b/amd64/emit.c
@@ -1,6 +1,25 @@
 #include "all.h"
 
-char *locprefix, *symprefix;
+
+#define CMP(X) \
+	X(Ciule,      "be") \
+	X(Ciult,      "b")  \
+	X(Cisle,      "le") \
+	X(Cislt,      "l")  \
+	X(Cisgt,      "g")  \
+	X(Cisge,      "ge") \
+	X(Ciugt,      "a")  \
+	X(Ciuge,      "ae") \
+	X(Cieq,       "z")  \
+	X(Cine,       "nz") \
+	X(NCmpI+Cfle, "be") \
+	X(NCmpI+Cflt, "b")  \
+	X(NCmpI+Cfgt, "a")  \
+	X(NCmpI+Cfge, "ae") \
+	X(NCmpI+Cfeq, "z")  \
+	X(NCmpI+Cfne, "nz") \
+	X(NCmpI+Cfo,  "np") \
+	X(NCmpI+Cfuo, "p")
 
 enum {
 	SLong = 0,
@@ -95,18 +114,10 @@ static struct {
 	{ Oxcmp,   Kd, "comisd %D0, %D1" },
 	{ Oxcmp,   Ki, "cmp%k %0, %1" },
 	{ Oxtest,  Ki, "test%k %0, %1" },
-	{ Oxset+ICule, Ki, "setbe %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICult, Ki, "setb %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICugt, Ki, "seta %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICuge, Ki, "setae %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICeq,  Ki, "setz %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICne,  Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICxnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" },
-	{ Oxset+ICxp,  Ki, "setp %B=\n\tmovsb%k %B=, %=" },
+#define X(c, s) \
+	{ Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
+	CMP(X)
+#undef X
 	{ NOp, 0, 0 }
 };
 
@@ -153,9 +164,9 @@ emitcon(Con *con, FILE *f)
 	switch (con->type) {
 	case CAddr:
 		if (con->local)
-			fprintf(f, "%s%s", locprefix, con->label);
+			fprintf(f, "%s%s", gasloc, con->label);
 		else
-			fprintf(f, "%s%s", symprefix, con->label);
+			fprintf(f, "%s%s", gassym, con->label);
 		if (con->bits.i)
 			fprintf(f, "%+"PRId64, con->bits.i);
 		break;
@@ -356,7 +367,8 @@ emitins(Ins i, Fn *fn, FILE *f)
 			/* this linear search should really be a binary
 			 * search */
 			if (omap[o].op == NOp)
-				die("no match for %s(%d)", opdesc[i.op].name, i.cls);
+				die("no match for %s(%d)",
+					optab[i.op].name, "wlsd"[i.cls]);
 			if (omap[o].op == i.op)
 			if (omap[o].cls == i.cls
 			|| (omap[o].cls == Ki && KBASE(i.cls) == 0)
@@ -453,54 +465,25 @@ emitins(Ins i, Fn *fn, FILE *f)
 }
 
 static int
-cneg(int cmp)
-{
-	switch (cmp) {
-	default:    die("invalid int comparison %d", cmp);
-	case ICule: return ICugt;
-	case ICult: return ICuge;
-	case ICsle: return ICsgt;
-	case ICslt: return ICsge;
-	case ICsgt: return ICsle;
-	case ICsge: return ICslt;
-	case ICugt: return ICule;
-	case ICuge: return ICult;
-	case ICeq:  return ICne;
-	case ICne:  return ICeq;
-	case ICxnp: return ICxp;
-	case ICxp:  return ICxnp;
-	}
-}
-
-static int
 framesz(Fn *fn)
 {
 	int i, o, f;
 
 	/* specific to NAlign == 3 */
-	for (i=0, o=0; i<NRClob; i++)
-		o ^= 1 & (fn->reg >> rclob[i]);
+	for (i=0, o=0; i<NCLR; i++)
+		o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]);
 	f = fn->slot;
 	f = (f + 3) & -4;
 	return 4*f + 8*o + 176*fn->vararg;
 }
 
 void
-emitfn(Fn *fn, FILE *f)
+amd64_emitfn(Fn *fn, FILE *f)
 {
 	static char *ctoa[] = {
-		[ICeq]  = "z",
-		[ICule] = "be",
-		[ICult] = "b",
-		[ICsle] = "le",
-		[ICslt] = "l",
-		[ICsgt] = "g",
-		[ICsge] = "ge",
-		[ICugt] = "a",
-		[ICuge] = "ae",
-		[ICne]  = "nz",
-		[ICxnp] = "np",
-		[ICxp]  = "p"
+	#define X(c, s) [c] = s,
+		CMP(X)
+	#undef X
 	};
 	static int id0;
 	Blk *b, *s;
@@ -509,24 +492,24 @@ emitfn(Fn *fn, FILE *f)
 
 	fprintf(f, ".text\n");
 	if (fn->export)
-		fprintf(f, ".globl %s%s\n", symprefix, fn->name);
+		fprintf(f, ".globl %s%s\n", gassym, fn->name);
 	fprintf(f,
 		"%s%s:\n"
 		"\tpushq %%rbp\n"
 		"\tmovq %%rsp, %%rbp\n",
-		symprefix, fn->name
+		gassym, fn->name
 	);
 	fs = framesz(fn);
 	if (fs)
 		fprintf(f, "\tsub $%d, %%rsp\n", fs);
 	if (fn->vararg) {
 		o = -176;
-		for (r=rsave; r-rsave<6; ++r, o+=8)
+		for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
 			fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
 		for (n=0; n<8; ++n, o+=16)
 			fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
 	}
-	for (r=rclob; r-rclob < NRClob; r++)
+	for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
 		if (fn->reg & BIT(*r)) {
 			itmp.arg[0] = TMP(*r);
 			emitf("pushq %L0", &itmp, fn, f);
@@ -534,13 +517,13 @@ emitfn(Fn *fn, FILE *f)
 
 	for (lbl=0, b=fn->start; b; b=b->link) {
 		if (lbl || b->npred > 1)
-			fprintf(f, "%sbb%d:\n", locprefix, id0+b->id);
+			fprintf(f, "%sbb%d:\n", gasloc, id0+b->id);
 		for (i=b->ins; i!=&b->ins[b->nins]; i++)
 			emitins(*i, fn, f);
 		lbl = 1;
 		switch (b->jmp.type) {
 		case Jret0:
-			for (r=&rclob[NRClob]; r>rclob;)
+			for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
 				if (fn->reg & BIT(*--r)) {
 					itmp.arg[0] = TMP(*r);
 					emitf("popq %L0", &itmp, fn, f);
@@ -554,21 +537,21 @@ emitfn(Fn *fn, FILE *f)
 		Jmp:
 			if (b->s1 != b->link)
 				fprintf(f, "\tjmp %sbb%d\n",
-					locprefix, id0+b->s1->id);
+					gasloc, id0+b->s1->id);
 			else
 				lbl = 0;
 			break;
 		default:
-			c = b->jmp.type - Jxjc;
-			if (0 <= c && c <= NXICmp) {
+			c = b->jmp.type - Jjf;
+			if (0 <= c && c <= NCmp) {
 				if (b->link == b->s2) {
 					s = b->s1;
 					b->s1 = b->s2;
 					b->s2 = s;
 				} else
-					c = cneg(c);
+					c = cmpneg(c);
 				fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
-					locprefix, id0+b->s2->id);
+					gasloc, id0+b->s2->id);
 				goto Jmp;
 			}
 			die("unhandled jump %d", b->jmp.type);
@@ -576,121 +559,3 @@ emitfn(Fn *fn, FILE *f)
 	}
 	id0 += fn->nblk;
 }
-
-void
-emitdat(Dat *d, FILE *f)
-{
-	static int align;
-	static char *dtoa[] = {
-		[DAlign] = ".align",
-		[DB] = "\t.byte",
-		[DH] = "\t.value",
-		[DW] = "\t.long",
-		[DL] = "\t.quad"
-	};
-
-	switch (d->type) {
-	case DStart:
-		align = 0;
-		fprintf(f, ".data\n");
-		break;
-	case DEnd:
-		break;
-	case DName:
-		if (!align)
-			fprintf(f, ".align 8\n");
-		if (d->export)
-			fprintf(f, ".globl %s%s\n", symprefix, d->u.str);
-		fprintf(f, "%s%s:\n", symprefix, d->u.str);
-		break;
-	case DZ:
-		fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
-		break;
-	default:
-		if (d->type == DAlign)
-			align = 1;
-
-		if (d->isstr) {
-			if (d->type != DB)
-				err("strings only supported for 'b' currently");
-			fprintf(f, "\t.ascii \"%s\"\n", d->u.str);
-		}
-		else if (d->isref) {
-			fprintf(f, "%s %s%+"PRId64"\n",
-				dtoa[d->type], d->u.ref.nam,
-				d->u.ref.off);
-		}
-		else {
-			fprintf(f, "%s %"PRId64"\n",
-				dtoa[d->type], d->u.num);
-		}
-		break;
-	}
-}
-
-typedef struct FBits FBits;
-
-struct FBits {
-	union {
-		int64_t n;
-		float f;
-		double d;
-	} bits;
-	int wide;
-	FBits *link;
-};
-
-static FBits *stash;
-
-int
-stashfp(int64_t n, int w)
-{
-	FBits **pb, *b;
-	int i;
-
-	/* does a dumb de-dup of fp constants
-	 * this should be the linker's job */
-	for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
-		if (n == b->bits.n && w == b->wide)
-			return i;
-	b = emalloc(sizeof *b);
-	b->bits.n = n;
-	b->wide = w;
-	b->link = 0;
-	*pb = b;
-	return i;
-}
-
-void
-emitfin(FILE *f)
-{
-	FBits *b;
-	int i;
-
-	if (!stash)
-		return;
-	fprintf(f, "/* floating point constants */\n");
-	fprintf(f, ".data\n.align 8\n");
-	for (b=stash, i=0; b; b=b->link, i++)
-		if (b->wide)
-			fprintf(f,
-				"%sfp%d:\n"
-				"\t.quad %"PRId64
-				" /* %f */\n",
-				locprefix, i, b->bits.n,
-				b->bits.d
-			);
-	for (b=stash, i=0; b; b=b->link, i++)
-		if (!b->wide)
-			fprintf(f,
-				"%sfp%d:\n"
-				"\t.long %"PRId64
-				" /* %lf */\n",
-				locprefix, i, b->bits.n & 0xffffffff,
-				b->bits.f
-			);
-	while ((b=stash)) {
-		stash = b->link;
-		free(b);
-	}
-}
diff --git a/isel.c b/amd64/isel.c
index 6ce5dd0..1623b9b 100644
--- a/isel.c
+++ b/amd64/isel.c
@@ -28,46 +28,6 @@ struct ANum {
 static void amatch(Addr *, Ref, ANum *, Fn *, int);
 
 static int
-fcmptoi(int fc)
-{
-	switch (fc) {
-	default:   die("invalid fp comparison %d", fc);
-	case FCle: return ICule;
-	case FClt: return ICult;
-	case FCgt: return ICugt;
-	case FCge: return ICuge;
-	case FCne: return ICne;
-	case FCeq: return ICeq;
-	case FCo:  return ICxnp;
-	case FCuo: return ICxp;
-	}
-}
-
-static int
-iscmp(int op, int *pk, int *pc)
-{
-	if (Ocmpw <= op && op <= Ocmpw1) {
-		*pc = op - Ocmpw;
-		*pk = Kw;
-	}
-	else if (Ocmpl <= op && op <= Ocmpl1) {
-		*pc = op - Ocmpl;
-		*pk = Kl;
-	}
-	else if (Ocmps <= op && op <= Ocmps1) {
-		*pc = fcmptoi(op - Ocmps);
-		*pk = Ks;
-	}
-	else if (Ocmpd <= op && op <= Ocmpd1) {
-		*pc = fcmptoi(op - Ocmpd);
-		*pk = Kd;
-	}
-	else
-		return 0;
-	return 1;
-}
-
-static int
 noimm(Ref r, Fn *fn)
 {
 	int64_t val;
@@ -98,14 +58,8 @@ rslot(Ref r, Fn *fn)
 	return fn->tmp[r.val].slot;
 }
 
-static int
-argcls(Ins *i, int n)
-{
-	return opdesc[i->op].argcls[n][i->cls];
-}
-
 static void
-fixarg(Ref *r, int k, int phi, Fn *fn)
+fixarg(Ref *r, int k, int cpy, Fn *fn)
 {
 	Addr a, *m;
 	Ref r0, r1;
@@ -123,11 +77,11 @@ fixarg(Ref *r, int k, int phi, Fn *fn)
 		memset(&a, 0, sizeof a);
 		a.offset.type = CAddr;
 		a.offset.local = 1;
-		n = stashfp(fn->con[r0.val].bits.i, KWIDE(k));
+		n = gasstashfp(fn->con[r0.val].bits.i, KWIDE(k));
 		sprintf(a.offset.label, "fp%d", n);
 		fn->mem[fn->nmem-1] = a;
 	}
-	else if (!phi && k == Kl && noimm(r0, fn)) {
+	else if (!cpy && k == Kl && noimm(r0, fn)) {
 		/* load constants that do not fit in
 		 * a 32bit signed integer into a
 		 * long temporary
@@ -251,7 +205,7 @@ sel(Ins i, ANum *an, Fn *fn)
 			r0 = i.arg[1];
 		if (fn->tmp[r0.val].slot != -1)
 			err("unlikely argument %%%s in %s",
-				fn->tmp[r0.val].name, opdesc[i.op].name);
+				fn->tmp[r0.val].name, optab[i.op].name);
 		if (i.op == Odiv || i.op == Orem) {
 			emit(Oxidiv, k, R, r0, R);
 			emit(Osign, k, TMP(RDX), TMP(RAX), R);
@@ -340,7 +294,7 @@ Emit:
 			emit(Oadd, Kl, r1, i.arg[0], getcon(15, fn));
 			if (fn->tmp[i.arg[0].val].slot != -1)
 				err("unlikely argument %%%s in %s",
-					fn->tmp[i.arg[0].val].name, opdesc[i.op].name);
+					fn->tmp[i.arg[0].val].name, optab[i.op].name);
 		}
 		break;
 	default:
@@ -349,13 +303,13 @@ Emit:
 		if (isload(i.op))
 			goto case_Oload;
 		if (iscmp(i.op, &kc, &x)) {
-			emit(Oxset+x, k, i.to, R, R);
+			emit(Oflag+x, k, i.to, R, R);
 			i1 = curi;
 			if (selcmp(i.arg, kc, fn))
-				i1->op = Oxset + icmpop(x);
+				i1->op = Oflag + cmpop(x);
 			break;
 		}
-		die("unknown instruction %s", opdesc[i.op].name);
+		die("unknown instruction %s", optab[i.op].name);
 	}
 
 	while (i0 > curi && --i0) {
@@ -369,9 +323,9 @@ flagi(Ins *i0, Ins *i)
 {
 	while (i>i0) {
 		i--;
-		if (opdesc[i->op].sflag)
+		if (amd64_op[i->op].zflag)
 			return i;
-		if (opdesc[i->op].lflag)
+		if (amd64_op[i->op].lflag)
 			continue;
 		return 0;
 	}
@@ -402,22 +356,22 @@ seljmp(Blk *b, Fn *fn)
 	fi = flagi(b->ins, &b->ins[b->nins]);
 	if (!fi || !req(fi->to, r)) {
 		selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, long jnz */
-		b->jmp.type = Jxjc + ICne;
+		b->jmp.type = Jjf + Cine;
 	}
 	else if (iscmp(fi->op, &k, &c)) {
 		if (t->nuse == 1) {
 			if (selcmp(fi->arg, k, fn))
-				c = icmpop(c);
+				c = cmpop(c);
 			*fi = (Ins){.op = Onop};
 		}
-		b->jmp.type = Jxjc + c;
+		b->jmp.type = Jjf + c;
 	}
 	else if (fi->op == Oand && t->nuse == 1
 	     && (rtype(fi->arg[0]) == RTmp ||
 	         rtype(fi->arg[1]) == RTmp)) {
 		fi->op = Oxtest;
 		fi->to = R;
-		b->jmp.type = Jxjc + ICne;
+		b->jmp.type = Jjf + Cine;
 		if (rtype(fi->arg[1]) == RCon) {
 			r = fi->arg[1];
 			fi->arg[1] = fi->arg[0];
@@ -431,7 +385,7 @@ seljmp(Blk *b, Fn *fn)
 		 */
 		if (t->nuse == 1)
 			emit(Ocopy, Kw, R, r, R);
-		b->jmp.type = Jxjc + ICne;
+		b->jmp.type = Jjf + Cine;
 	}
 }
 
@@ -593,7 +547,7 @@ amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top)
  * requires use counts (as given by parsing)
  */
 void
-isel(Fn *fn)
+amd64_isel(Fn *fn)
 {
 	Blk *b, **sb;
 	Ins *i;
diff --git a/sysv.c b/amd64/sysv.c
index a88b044..dcaa812 100644
--- a/sysv.c
+++ b/amd64/sysv.c
@@ -90,25 +90,6 @@ typclass(AClass *a, Typ *t)
 	classify(a, t, &n, &e);
 }
 
-static void
-blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn)
-{
-	Ref r, r1;
-	uint boff;
-
-	/* it's an impolite blit, we might go across the end
-	 * of the source object a little bit... */
-	for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) {
-		r = newtmp("abi", Kl, fn);
-		r1 = newtmp("abi", Kl, fn);
-		emit(Ostorel, 0, R, r, r1);
-		emit(Oadd, Kl, r1, rstk, getcon(soff, fn));
-		r1 = newtmp("abi", Kl, fn);
-		emit(Oload, Kl, r, r1, R);
-		emit(Oadd, Kl, r1, rsrc, getcon(boff, fn));
-	}
-}
-
 static int
 retr(Ref reg[2], AClass *aret)
 {
@@ -226,15 +207,17 @@ argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
 	return ((6-nint) << 4) | ((8-nsse) << 8);
 }
 
-int rsave[] = {
+int amd64_sysv_rsave[] = {
 	RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
 	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14
+	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
 };
-int rclob[] = {RBX, R12, R13, R14, R15};
+int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1};
 
-MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int));
-MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int));
+MAKESURE(sysv_arrays_ok,
+	sizeof amd64_sysv_rsave == (NGPS+NFPS+1) * sizeof(int) &&
+	sizeof amd64_sysv_rclob == (NCLR+1) * sizeof(int)
+);
 
 /* layout of call's second argument (RCall)
  *
@@ -248,7 +231,7 @@ MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int));
  */
 
 bits
-retregs(Ref r, int p[2])
+amd64_sysv_retregs(Ref r, int p[2])
 {
 	bits b;
 	int ni, nf;
@@ -273,7 +256,7 @@ retregs(Ref r, int p[2])
 }
 
 bits
-argregs(Ref r, int p[2])
+amd64_sysv_argregs(Ref r, int p[2])
 {
 	bits b;
 	int j, ni, nf, ra;
@@ -284,7 +267,7 @@ argregs(Ref r, int p[2])
 	nf = (r.val >> 8) & 15;
 	ra = (r.val >> 12) & 1;
 	for (j=0; j<ni; j++)
-		b |= BIT(rsave[j]);
+		b |= BIT(amd64_sysv_rsave[j]);
 	for (j=0; j<nf; j++)
 		b |= BIT(XMM0+j);
 	if (p) {
@@ -298,7 +281,7 @@ static Ref
 rarg(int ty, int *ni, int *ns)
 {
 	if (KBASE(ty) == 0)
-		return TMP(rsave[(*ni)++]);
+		return TMP(amd64_sysv_rsave[(*ni)++]);
 	else
 		return TMP(XMM0 + (*ns)++);
 }
@@ -531,7 +514,7 @@ chpred(Blk *b, Blk *bp, Blk *bp1)
 	}
 }
 
-void
+static void
 selvaarg(Fn *fn, Blk *b, Ins *i)
 {
 	Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
@@ -618,12 +601,12 @@ selvaarg(Fn *fn, Blk *b, Ins *i)
 	b->s1 = breg;
 	b->s2 = bstk;
 	c = getcon(isint ? 48 : 176, fn);
-	emit(Ocmpw+ICult, Kw, r1, nr, c);
+	emit(Ocmpw+Ciult, Kw, r1, nr, c);
 	emit(Oloadsw, Kl, nr, r0, R);
 	emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
 }
 
-void
+static void
 selvastart(Fn *fn, int fa, Ref ap)
 {
 	Ref r0, r1;
@@ -649,7 +632,7 @@ selvastart(Fn *fn, int fa, Ref ap)
 }
 
 void
-abi(Fn *fn)
+amd64_sysv_abi(Fn *fn)
 {
 	Blk *b;
 	Ins *i, *i0, *ip;
diff --git a/amd64/targ.c b/amd64/targ.c
new file mode 100644
index 0000000..e227574
--- /dev/null
+++ b/amd64/targ.c
@@ -0,0 +1,30 @@
+#include "all.h"
+
+Amd64Op amd64_op[NOp] = {
+#define O(op, t, x) [O##op] =
+#define X(nm, zf, lf) { nm, zf, lf, },
+	#include "../ops.h"
+};
+
+static int
+amd64_memargs(int op)
+{
+	return amd64_op[op].nmem;
+}
+
+Target T_amd64_sysv = {
+	.gpr0 = RAX,
+	.ngpr = NGPR,
+	.fpr0 = XMM0,
+	.nfpr = NFPR,
+	.rglob = BIT(RBP) | BIT(RSP),
+	.nrglob = 2,
+	.rsave = amd64_sysv_rsave,
+	.nrsave = {NGPS, NFPS},
+	.retregs = amd64_sysv_retregs,
+	.argregs = amd64_sysv_argregs,
+	.memargs = amd64_memargs,
+	.abi = amd64_sysv_abi,
+	.isel = amd64_isel,
+	.emitfn = amd64_emitfn,
+};
diff --git a/cfg.c b/cfg.c
index dff0765..ea1ae12 100644
--- a/cfg.c
+++ b/cfg.c
@@ -312,8 +312,8 @@ simpljmp(Fn *fn)
 			uffind(&b->s1, uf);
 		if (b->s2)
 			uffind(&b->s2, uf);
-		c = b->jmp.type - Jxjc;
-		if (0 <= c && c <= NXICmp)
+		c = b->jmp.type - Jjf;
+		if (0 <= c && c <= NCmp)
 		if (b->s1 == b->s2) {
 			b->jmp.type = Jjmp;
 			b->s2 = 0;
diff --git a/fold.c b/fold.c
index 6129421..55672dd 100644
--- a/fold.c
+++ b/fold.c
@@ -100,7 +100,7 @@ visitins(Ins *i, Fn *fn)
 
 	if (rtype(i->to) != RTmp)
 		return;
-	if (opdesc[i->op].cfold) {
+	if (optab[i->op].canfold) {
 		l = latval(i->arg[0]);
 		if (!req(i->arg[1], R))
 			r = latval(i->arg[1]);
@@ -114,7 +114,7 @@ visitins(Ins *i, Fn *fn)
 			v = opfold(i->op, i->cls, &fn->con[l], &fn->con[r], fn);
 	} else
 		v = Bot;
-	/* fprintf(stderr, "\nvisiting %s (%p)", opdesc[i->op].name, (void *)i); */
+	/* fprintf(stderr, "\nvisiting %s (%p)", optab[i->op].name, (void *)i); */
 	update(i->to.val, v, fn);
 }
 
@@ -360,7 +360,7 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
 	else if (cl->type == CAddr || cr->type == CAddr) {
 		if (Ocmpl <= op && op <= Ocmpl1)
 			return 1;
-		err("invalid address operand for '%s'", opdesc[op].name);
+		err("invalid address operand for '%s'", optab[op].name);
 	}
 	switch (op) {
 	case Oadd:  x = l.u + r.u; break;
@@ -397,42 +397,42 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
 			} else
 				op -= Ocmpl - Ocmpw;
 			switch (op - Ocmpw) {
-			case ICule: x = l.u <= r.u; break;
-			case ICult: x = l.u < r.u;  break;
-			case ICsle: x = l.s <= r.s; break;
-			case ICslt: x = l.s < r.s;  break;
-			case ICsgt: x = l.s > r.s;  break;
-			case ICsge: x = l.s >= r.s; break;
-			case ICugt: x = l.u > r.u;  break;
-			case ICuge: x = l.u >= r.u; break;
-			case ICeq:  x = l.u == r.u; break;
-			case ICne:  x = l.u != r.u; break;
+			case Ciule: x = l.u <= r.u; break;
+			case Ciult: x = l.u < r.u;  break;
+			case Cisle: x = l.s <= r.s; break;
+			case Cislt: x = l.s < r.s;  break;
+			case Cisgt: x = l.s > r.s;  break;
+			case Cisge: x = l.s >= r.s; break;
+			case Ciugt: x = l.u > r.u;  break;
+			case Ciuge: x = l.u >= r.u; break;
+			case Cieq:  x = l.u == r.u; break;
+			case Cine:  x = l.u != r.u; break;
 			default: die("unreachable");
 			}
 		}
 		else if (Ocmps <= op && op <= Ocmps1) {
 			switch (op - Ocmps) {
-			case FCle: x = l.fs <= r.fs; break;
-			case FClt: x = l.fs < r.fs;  break;
-			case FCgt: x = l.fs > r.fs;  break;
-			case FCge: x = l.fs >= r.fs; break;
-			case FCne: x = l.fs != r.fs; break;
-			case FCeq: x = l.fs == r.fs; break;
-			case FCo: x = l.fs < r.fs || l.fs >= r.fs; break;
-			case FCuo: x = !(l.fs < r.fs || l.fs >= r.fs); break;
+			case Cfle: x = l.fs <= r.fs; break;
+			case Cflt: x = l.fs < r.fs;  break;
+			case Cfgt: x = l.fs > r.fs;  break;
+			case Cfge: x = l.fs >= r.fs; break;
+			case Cfne: x = l.fs != r.fs; break;
+			case Cfeq: x = l.fs == r.fs; break;
+			case Cfo: x = l.fs < r.fs || l.fs >= r.fs; break;
+			case Cfuo: x = !(l.fs < r.fs || l.fs >= r.fs); break;
 			default: die("unreachable");
 			}
 		}
 		else if (Ocmpd <= op && op <= Ocmpd1) {
 			switch (op - Ocmpd) {
-			case FCle: x = l.fd <= r.fd; break;
-			case FClt: x = l.fd < r.fd;  break;
-			case FCgt: x = l.fd > r.fd;  break;
-			case FCge: x = l.fd >= r.fd; break;
-			case FCne: x = l.fd != r.fd; break;
-			case FCeq: x = l.fd == r.fd; break;
-			case FCo: x = l.fd < r.fd || l.fd >= r.fd; break;
-			case FCuo: x = !(l.fd < r.fd || l.fd >= r.fd); break;
+			case Cfle: x = l.fd <= r.fd; break;
+			case Cflt: x = l.fd < r.fd;  break;
+			case Cfgt: x = l.fd > r.fd;  break;
+			case Cfge: x = l.fd >= r.fd; break;
+			case Cfne: x = l.fd != r.fd; break;
+			case Cfeq: x = l.fd == r.fd; break;
+			case Cfo: x = l.fd < r.fd || l.fd >= r.fd; break;
+			case Cfuo: x = !(l.fd < r.fd || l.fd >= r.fd); break;
 			default: die("unreachable");
 			}
 		}
@@ -453,7 +453,7 @@ foldflt(Con *res, int op, int w, Con *cl, Con *cr)
 	double xd, ld, rd;
 
 	if (cl->type != CBits || cr->type != CBits)
-		err("invalid address operand for '%s'", opdesc[op].name);
+		err("invalid address operand for '%s'", optab[op].name);
 	if (w)  {
 		ld = cl->bits.d;
 		rd = cr->bits.d;
@@ -495,7 +495,7 @@ opfold(int op, int cls, Con *cl, Con *cr, Fn *fn)
 
 	if ((op == Odiv || op == Oudiv
 	|| op == Orem || op == Ourem) && czero(cr, KWIDE(cls)))
-		err("null divisor in '%s'", opdesc[op].name);
+		err("null divisor in '%s'", optab[op].name);
 	if (cls == Kw || cls == Kl) {
 		if (foldint(&c, op, cls == Kl, cl, cr))
 			return Bot;
diff --git a/gas.c b/gas.c
new file mode 100644
index 0000000..c1fd6df
--- /dev/null
+++ b/gas.c
@@ -0,0 +1,122 @@
+#include "all.h"
+
+
+char *gasloc, *gassym;
+
+void
+gasemitdat(Dat *d, FILE *f)
+{
+	static int align;
+	static char *dtoa[] = {
+		[DAlign] = ".align",
+		[DB] = "\t.byte",
+		[DH] = "\t.short",
+		[DW] = "\t.int",
+		[DL] = "\t.quad"
+	};
+
+	switch (d->type) {
+	case DStart:
+		align = 0;
+		fprintf(f, ".data\n");
+		break;
+	case DEnd:
+		break;
+	case DName:
+		if (!align)
+			fprintf(f, ".align 8\n");
+		if (d->export)
+			fprintf(f, ".globl %s%s\n", gassym, d->u.str);
+		fprintf(f, "%s%s:\n", gassym, d->u.str);
+		break;
+	case DZ:
+		fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
+		break;
+	default:
+		if (d->type == DAlign)
+			align = 1;
+
+		if (d->isstr) {
+			if (d->type != DB)
+				err("strings only supported for 'b' currently");
+			fprintf(f, "\t.ascii \"%s\"\n", d->u.str);
+		}
+		else if (d->isref) {
+			fprintf(f, "%s %s%+"PRId64"\n",
+				dtoa[d->type], d->u.ref.nam,
+				d->u.ref.off);
+		}
+		else {
+			fprintf(f, "%s %"PRId64"\n",
+				dtoa[d->type], d->u.num);
+		}
+		break;
+	}
+}
+
+typedef struct FBits FBits;
+
+struct FBits {
+	union {
+		int64_t n;
+		float f;
+		double d;
+	} bits;
+	int wide;
+	FBits *link;
+};
+
+static FBits *stash;
+
+int
+gasstashfp(int64_t n, int w)
+{
+	FBits **pb, *b;
+	int i;
+
+	/* does a dumb de-dup of fp constants
+	 * this should be the linker's job */
+	for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
+		if (n == b->bits.n && w == b->wide)
+			return i;
+	b = emalloc(sizeof *b);
+	b->bits.n = n;
+	b->wide = w;
+	b->link = 0;
+	*pb = b;
+	return i;
+}
+
+void
+gasemitfin(FILE *f)
+{
+	FBits *b;
+	int i;
+
+	if (!stash)
+		return;
+	fprintf(f, "/* floating point constants */\n");
+	fprintf(f, ".data\n.align 8\n");
+	for (b=stash, i=0; b; b=b->link, i++)
+		if (b->wide)
+			fprintf(f,
+				"%sfp%d:\n"
+				"\t.quad %"PRId64
+				" /* %f */\n",
+				gasloc, i, b->bits.n,
+				b->bits.d
+			);
+	for (b=stash, i=0; b; b=b->link, i++)
+		if (!b->wide)
+			fprintf(f,
+				"%sfp%d:\n"
+				"\t.long %"PRId64
+				" /* %lf */\n",
+				gasloc, i, b->bits.n & 0xffffffff,
+				b->bits.f
+			);
+	while ((b=stash)) {
+		stash = b->link;
+		free(b);
+	}
+}
diff --git a/live.c b/live.c
index 18c9b63..6e63705 100644
--- a/live.c
+++ b/live.c
@@ -104,31 +104,39 @@ Again:
 
 		memset(phi, 0, f->ntmp * sizeof phi[0]);
 		memset(nlv, 0, sizeof nlv);
-		b->out->t[0] |= RGLOB;
+		b->out->t[0] |= T.rglob;
 		bscopy(b->in, b->out);
 		for (t=0; bsiter(b->in, &t); t++) {
 			phifix(t, phi, f->tmp);
 			nlv[KBASE(f->tmp[t].cls)]++;
 		}
 		if (rtype(b->jmp.arg) == RCall) {
-			assert(bscount(b->in) == NRGlob && nlv[0] == NRGlob && nlv[1] == 0);
-			b->in->t[0] |= retregs(b->jmp.arg, nlv);
+			assert((int)bscount(b->in) == T.nrglob &&
+				nlv[0] == T.nrglob &&
+				nlv[1] == 0);
+			b->in->t[0] |= T.retregs(b->jmp.arg, nlv);
 		} else
 			bset(b->jmp.arg, b, nlv, phi, f->tmp);
 		for (k=0; k<2; k++)
 			b->nlive[k] = nlv[k];
 		for (i=&b->ins[b->nins]; i!=b->ins;) {
 			if ((--i)->op == Ocall && rtype(i->arg[1]) == RCall) {
-				b->in->t[0] &= ~retregs(i->arg[1], m);
-				for (k=0; k<2; k++)
+				b->in->t[0] &= ~T.retregs(i->arg[1], m);
+				for (k=0; k<2; k++) {
 					nlv[k] -= m[k];
-				if (nlv[0] + NISave > b->nlive[0])
-					b->nlive[0] = nlv[0] + NISave;
-				if (nlv[1] + NFSave > b->nlive[1])
-					b->nlive[1] = nlv[1] + NFSave;
-				b->in->t[0] |= argregs(i->arg[1], m);
-				for (k=0; k<2; k++)
+					/* caller-save registers are used
+					 * by the callee, in that sense,
+					 * right in the middle of the call,
+					 * they are live: */
+					nlv[k] += T.nrsave[k];
+					if (nlv[k] > b->nlive[k])
+						b->nlive[k] = nlv[k];
+				}
+				b->in->t[0] |= T.argregs(i->arg[1], m);
+				for (k=0; k<2; k++) {
+					nlv[k] -= T.nrsave[k];
 					nlv[k] += m[k];
+				}
 			}
 			if (!req(i->to, R)) {
 				assert(rtype(i->to) == RTmp);
diff --git a/main.c b/main.c
index 4d2e6bd..6098dee 100644
--- a/main.c
+++ b/main.c
@@ -3,6 +3,18 @@
 #include <ctype.h>
 #include <getopt.h>
 
+Target T;
+
+extern Target T_amd64_sysv;
+
+static struct TMap {
+	char *name;
+	Target *T;
+} tmap[] = {
+	{ "amd64_sysv", &T_amd64_sysv },
+	{ 0, 0 }
+};
+
 enum Asm {
 	Gasmacho,
 	Gaself,
@@ -33,7 +45,7 @@ data(Dat *d)
 		fputs("/* end data */\n\n", outf);
 		freeall();
 	}
-	emitdat(d, outf);
+	gasemitdat(d, outf);
 }
 
 static void
@@ -62,10 +74,10 @@ func(Fn *fn)
 	copy(fn);
 	filluse(fn);
 	fold(fn);
-	abi(fn);
+	T.abi(fn);
 	fillpreds(fn);
 	filluse(fn);
-	isel(fn);
+	T.isel(fn);
 	fillrpo(fn);
 	filllive(fn);
 	fillcost(fn);
@@ -83,7 +95,7 @@ func(Fn *fn)
 		} else
 			fn->rpo[n]->link = fn->rpo[n+1];
 	if (!dbg) {
-		emitfn(fn, outf);
+		T.emitfn(fn, outf);
 		fprintf(outf, "/* end function %s */\n\n", fn->name);
 	} else
 		fprintf(stderr, "\n");
@@ -93,13 +105,15 @@ func(Fn *fn)
 int
 main(int ac, char *av[])
 {
-	FILE *inf;
-	char *f;
+	struct TMap *tm;
+	FILE *inf, *hf;
+	char *f, *sep;
 	int c, asm;
 
-	asm = Defaultasm;
+	asm = Defasm;
+	T = Deftgt;
 	outf = stdout;
-	while ((c = getopt(ac, av, "hd:o:G:")) != -1)
+	while ((c = getopt(ac, av, "hd:o:G:t:")) != -1)
 		switch (c) {
 		case 'd':
 			for (; *optarg; optarg++)
@@ -112,6 +126,18 @@ main(int ac, char *av[])
 			if (strcmp(optarg, "-") != 0)
 				outf = fopen(optarg, "w");
 			break;
+		case 't':
+			for (tm=tmap;; tm++) {
+				if (!tm->name) {
+					fprintf(stderr, "unknown target '%s'\n", optarg);
+					exit(1);
+				}
+				if (strcmp(optarg, tm->name) == 0) {
+					T = *tm->T;
+					break;
+				}
+			}
+			break;
 		case 'G':
 			if (strcmp(optarg, "e") == 0)
 				asm = Gaself;
@@ -124,22 +150,28 @@ main(int ac, char *av[])
 			break;
 		case 'h':
 		default:
-			fprintf(stderr, "%s [OPTIONS] {file.ssa, -}\n", av[0]);
-			fprintf(stderr, "\t%-10s prints this help\n", "-h");
-			fprintf(stderr, "\t%-10s output to file\n", "-o file");
-			fprintf(stderr, "\t%-10s generate gas (e) or osx (m) asm\n", "-G {e,m}");
-			fprintf(stderr, "\t%-10s dump debug information\n", "-d <flags>");
+			hf = c != 'h' ? stderr : stdout;
+			fprintf(hf, "%s [OPTIONS] {file.ssa, -}\n", av[0]);
+			fprintf(hf, "\t%-11s prints this help\n", "-h");
+			fprintf(hf, "\t%-11s output to file\n", "-o file");
+			fprintf(hf, "\t%-11s generate for a target among:\n", "-t <target>");
+			fprintf(hf, "\t%-11s ", "");
+			for (tm=tmap, sep=""; tm->name; tm++, sep=", ")
+				fprintf(hf, "%s%s", sep, tm->name);
+			fprintf(hf, "\n");
+			fprintf(hf, "\t%-11s generate gas (e) or osx (m) asm\n", "-G {e,m}");
+			fprintf(hf, "\t%-11s dump debug information\n", "-d <flags>");
 			exit(c != 'h');
 		}
 
 	switch (asm) {
 	case Gaself:
-		locprefix = ".L";
-		symprefix = "";
+		gasloc = ".L";
+		gassym = "";
 		break;
 	case Gasmacho:
-		locprefix = "L";
-		symprefix = "_";
+		gasloc = "L";
+		gassym = "_";
 		break;
 	}
 
@@ -159,7 +191,7 @@ main(int ac, char *av[])
 	} while (++optind < ac);
 
 	if (!dbg)
-		emitfin(outf);
+		gasemitfin(outf);
 
 	exit(0);
 }
diff --git a/mem.c b/mem.c
index fd6ee16..eda3d18 100644
--- a/mem.c
+++ b/mem.c
@@ -34,9 +34,9 @@ memopt(Fn *fn)
 			if (isstore(l->op))
 			if (req(i->to, l->arg[1]) && !req(i->to, l->arg[0]))
 			if (s == -1 || s == storesz(l))
-			if (k == -1 || k == opdesc[l->op].argcls[0][0]) {
+			if (k == -1 || k == optab[l->op].argcls[0][0]) {
 				s = storesz(l);
-				k = opdesc[l->op].argcls[0][0];
+				k = optab[l->op].argcls[0][0];
 				continue;
 			}
 			goto Skip;
diff --git a/ops.h b/ops.h
new file mode 100644
index 0000000..9b357a5
--- /dev/null
+++ b/ops.h
@@ -0,0 +1,167 @@
+#ifndef X /* amd64 */
+	#define X(NMemArgs, SetsZeroFlag, LeavesFlags)
+#endif
+
+#define T(a,b,c,d,e,f,g,h) {                          \
+	{[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}, \
+	{[Kw]=K##e, [Kl]=K##f, [Ks]=K##g, [Kd]=K##h}  \
+}
+
+
+/*********************/
+/* PUBLIC OPERATIONS */
+/*********************/
+
+/* Arithmetic and Bits */
+O(add,     T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0)
+O(sub,     T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0)
+O(div,     T(w,l,s,d, w,l,s,d), 1) X(0, 0, 0)
+O(rem,     T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
+O(udiv,    T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
+O(urem,    T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
+O(mul,     T(w,l,s,d, w,l,s,d), 1) X(2, 0, 0)
+O(and,     T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
+O(or,      T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
+O(xor,     T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
+O(sar,     T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
+O(shr,     T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
+O(shl,     T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
+
+/* Comparisons */
+O(ceqw,    T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cnew,    T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(csgew,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(csgtw,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cslew,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(csltw,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cugew,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cugtw,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(culew,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cultw,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+
+O(ceql,    T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cnel,    T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(csgel,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(csgtl,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cslel,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(csltl,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cugel,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cugtl,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(culel,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cultl,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+
+O(ceqs,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cges,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cgts,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cles,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(clts,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cnes,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cos,     T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cuos,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+
+O(ceqd,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cged,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cgtd,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cled,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cltd,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cned,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cod,     T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cuod,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+
+/* Memory */
+O(storeb,  T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(storeh,  T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(storew,  T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(storel,  T(l,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(stores,  T(s,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(stored,  T(d,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+
+O(loadsb,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loadub,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loadsh,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loaduh,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loadsw,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loaduw,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(load,    T(m,m,m,m, x,x,x,x), 0) X(0, 0, 1)
+
+/* Extensions and Truncations */
+O(extsb,   T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extub,   T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extsh,   T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extuh,   T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extsw,   T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1)
+O(extuw,   T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1)
+
+O(exts,    T(e,e,e,s, e,e,e,x), 1) X(0, 0, 1)
+O(truncd,  T(e,e,d,e, e,e,x,e), 1) X(0, 0, 1)
+O(stosi,   T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(dtosi,   T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(swtof,   T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
+O(sltof,   T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
+O(cast,    T(s,d,w,l, x,x,x,x), 1) X(0, 0, 1)
+
+/* Stack Allocation */
+O(alloc4,  T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(alloc8,  T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(alloc16, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+
+/* Variadic Function Helpers */
+O(vaarg,   T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0)
+O(vastart, T(m,e,e,e, x,e,e,e), 0) X(0, 0, 0)
+
+O(copy,    T(w,l,s,d, x,x,x,x), 0) X(0, 0, 1)
+
+
+/****************************************/
+/* INTERNAL OPERATIONS (keep nop first) */
+/****************************************/
+
+/* Miscellaneous and Architecture-Specific Operations */
+O(nop,     T(x,x,x,x, x,x,x,x), 0) X(0, 0, 1)
+O(addr,    T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(swap,    T(w,l,s,d, w,l,s,d), 0) X(1, 0, 0)
+O(sign,    T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0)
+O(salloc,  T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(xidiv,   T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0)
+O(xdiv,    T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0)
+O(xcmp,    T(w,l,s,d, w,l,s,d), 0) X(1, 1, 0)
+O(xtest,   T(w,l,e,e, w,l,e,e), 0) X(1, 1, 0)
+
+/* Arguments, Parameters, and Calls */
+O(par,     T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0)
+O(parc,    T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(pare,    T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0)
+O(arg,     T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0)
+O(argc,    T(e,x,e,e, e,l,e,e), 0) X(0, 0, 0)
+O(arge,    T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0)
+O(call,    T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0)
+O(vacall,  T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0)
+
+/* Flags Setting */
+O(flagieq,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagine,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagisge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagisgt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagisle, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagislt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiuge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiugt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiule, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiult, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfeq,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfge,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfgt,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfle,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagflt,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfne,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfo,   T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfuo,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+
+
+#undef T
+#undef X
+#undef O
+
+/*
+| column -t -o ' '
+*/
diff --git a/parse.c b/parse.c
index b393fc2..69bd74e 100644
--- a/parse.c
+++ b/parse.c
@@ -4,91 +4,13 @@
 
 enum {
 	Ke = -2, /* Erroneous mode */
-	Km = Kl, /* Memory pointer (for x64) */
+	Km = Kl, /* Memory pointer */
 };
 
-OpDesc opdesc[NOp] = {
-#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}
-
-	/*            NAME       NM      ARGCLS0     ARGCLS1  SF LF FLD*/
-	[Oadd]    = { "add",      2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 1 },
-	[Osub]    = { "sub",      2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 1 },
-	[Odiv]    = { "div",      2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 1 },
-	[Orem]    = { "rem",      2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 },
-	[Oudiv]   = { "udiv",     2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 },
-	[Ourem]   = { "urem",     2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 },
-	[Omul]    = { "mul",      2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 1 },
-	[Oand]    = { "and",      2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 },
-	[Oor]     = { "or",       2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 },
-	[Oxor]    = { "xor",      2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 },
-	[Osar]    = { "sar",      1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 },
-	[Oshr]    = { "shr",      1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 },
-	[Oshl]    = { "shl",      1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 },
-	[Ostored] = { "stored",   0, {A(d,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Ostores] = { "stores",   0, {A(s,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Ostorel] = { "storel",   0, {A(l,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Ostorew] = { "storew",   0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Ostoreh] = { "storeh",   0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Ostoreb] = { "storeb",   0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Oload]   = { "load",     0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1, 0 },
-	[Oloadsw] = { "loadsw",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oloaduw] = { "loaduw",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oloadsh] = { "loadsh",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oloaduh] = { "loaduh",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oloadsb] = { "loadsb",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oloadub] = { "loadub",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oextsw]  = { "extsw",    0, {A(e,w,e,e), A(e,x,e,e)}, 0, 1, 1 },
-	[Oextuw]  = { "extuw",    0, {A(e,w,e,e), A(e,x,e,e)}, 0, 1, 1 },
-	[Oextsh]  = { "extsh",    0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Oextuh]  = { "extuh",    0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Oextsb]  = { "extsb",    0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Oextub]  = { "extub",    0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Oexts]   = { "exts",     0, {A(e,e,e,s), A(e,e,e,x)}, 0, 1, 1 },
-	[Otruncd] = { "truncd",   0, {A(e,e,d,e), A(e,e,x,e)}, 0, 1, 1 },
-	[Ostosi]  = { "stosi",    0, {A(s,s,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Odtosi]  = { "dtosi",    0, {A(d,d,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Oswtof]  = { "swtof",    0, {A(e,e,w,w), A(e,e,x,x)}, 0, 1, 1 },
-	[Osltof]  = { "sltof",    0, {A(e,e,l,l), A(e,e,x,x)}, 0, 1, 1 },
-	[Ocast]   = { "cast",     0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1, 1 },
-	[Ocopy]   = { "copy",     1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1, 0 },
-	[Onop]    = { "nop",      0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1, 0 },
-	[Oswap]   = { "swap",     2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 0 },
-	[Osign]   = { "sign",     0, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 },
-	[Osalloc] = { "salloc",   0, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
-	[Oxidiv]  = { "xidiv",    1, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 },
-	[Oxdiv]   = { "xdiv",     1, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 },
-	[Oxcmp]   = { "xcmp",     1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 0 },
-	[Oxtest]  = { "xtest",    1, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 0 },
-	[Oaddr]   = { "addr",     0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Opar]    = { "par",      0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0, 0 },
-	[Opare]   = { "pare",     0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0, 0 },
-	[Oparc]   = { "parc",     0, {A(e,x,e,e), A(e,x,e,e)}, 0, 0, 0 },
-	[Oarg]    = { "arg",      0, {A(w,l,s,d), A(x,x,x,x)}, 0, 0, 0 },
-	[Oarge]   = { "arge",     0, {A(w,l,s,d), A(x,x,x,x)}, 0, 0, 0 },
-	[Oargc]   = { "argc",     0, {A(e,x,e,e), A(e,l,e,e)}, 0, 0, 0 },
-	[Ocall]   = { "call",     0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 },
-	[Ovacall] = { "vacall",   0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 },
-	[Oxsetnp] = { "xsetnp",   0, {A(x,x,e,e), A(x,x,e,e)}, 0, 0, 0 },
-	[Oxsetp]  = { "xsetp",    0, {A(x,x,e,e), A(x,x,e,e)}, 0, 0, 0 },
-	[Oalloc]   = { "alloc4",  1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
-	[Oalloc+1] = { "alloc8",  1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
-	[Oalloc+2] = { "alloc16", 1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
-	[Ovaarg]   = { "vaarg",   0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 },
-	[Ovastart] = { "vastart", 0, {A(m,e,e,e), A(x,e,e,e)}, 0, 0, 0 },
-#define X(c) \
-	[Ocmpw+IC##c] = { "c"    #c "w", 0, {A(w,w,e,e), A(w,w,e,e)}, 1, 0, 1 }, \
-	[Ocmpl+IC##c] = { "c"    #c "l", 0, {A(l,l,e,e), A(l,l,e,e)}, 1, 0, 1 }, \
-	[Oxset+IC##c] = { "xset" #c,     0, {A(x,x,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	ICMPS(X)
-#undef X
-#define X(c) \
-	[Ocmps+FC##c] = { "c"    #c "s", 0, {A(s,s,e,e), A(s,s,e,e)}, 1, 0, 1 }, \
-	[Ocmpd+FC##c] = { "c"    #c "d", 0, {A(d,d,e,e), A(d,d,e,e)}, 1, 0, 1 },
-	FCMPS(X)
-#undef X
-
+Op optab[NOp] = {
+#define O(op, t, cf) [O##op]={#op, t, cf},
+	#include "ops.h"
 };
-#undef A
 
 typedef enum {
 	PXXX,
@@ -242,8 +164,8 @@ lexinit()
 	if (done)
 		return;
 	for (i=0; i<NPubOp; ++i)
-		if (opdesc[i].name)
-			kwmap[i] = opdesc[i].name;
+		if (optab[i].name)
+			kwmap[i] = optab[i].name;
 	assert(Ntok <= CHAR_MAX);
 	for (i=0; i<Ntok; ++i)
 		if (kwmap[i]) {
@@ -810,26 +732,26 @@ typecheck(Fn *fn)
 		}
 		for (i=b->ins; i-b->ins < b->nins; i++)
 			for (n=0; n<2; n++) {
-				k = opdesc[i->op].argcls[n][i->cls];
+				k = optab[i->op].argcls[n][i->cls];
 				r = i->arg[n];
 				t = &fn->tmp[r.val];
 				if (k == Ke)
 					err("invalid instruction type in %s",
-						opdesc[i->op].name);
+						optab[i->op].name);
 				if (rtype(r) == RType)
 					continue;
 				if (rtype(r) != -1 && k == Kx)
 					err("no %s operand expected in %s",
 						n == 1 ? "second" : "first",
-						opdesc[i->op].name);
+						optab[i->op].name);
 				if (rtype(r) == -1 && k != Kx)
 					err("missing %s operand in %s",
 						n == 1 ? "second" : "first",
-						opdesc[i->op].name);
+						optab[i->op].name);
 				if (!usecheck(r, k, fn))
 					err("invalid type for %s operand %%%s in %s",
 						n == 1 ? "second" : "first",
-						t->name, opdesc[i->op].name);
+						t->name, optab[i->op].name);
 			}
 		r = b->jmp.arg;
 		if (isret(b->jmp.type)) {
@@ -866,7 +788,10 @@ parsefn(int export)
 	curf->tmp = vnew(curf->ntmp, sizeof curf->tmp[0], Pfn);
 	curf->con = vnew(curf->ncon, sizeof curf->con[0], Pfn);
 	for (i=0; i<Tmp0; ++i)
-		newtmp(0, i < XMM0 ? Kl : Kd, curf);
+		if (T.fpr0 <= i && i < T.fpr0 + T.nfpr)
+			newtmp(0, Kd, curf);
+		else
+			newtmp(0, Kl, curf);
 	curf->con[0].type = CBits;
 	curf->export = export;
 	blink = &curf->start;
@@ -1228,29 +1153,12 @@ printref(Ref r, Fn *fn, FILE *f)
 void
 printfn(Fn *fn, FILE *f)
 {
+	static char ktoc[] = "wlsd";
 	static char *jtoa[NJmp] = {
-		[Jret0]     = "ret",
-		[Jretw]     = "retw",
-		[Jretl]     = "retl",
-		[Jretc]     = "retc",
-		[Jrets]     = "rets",
-		[Jretd]     = "retd",
-		[Jjnz]      = "jnz",
-		[Jxjnp]     = "xjnp",
-		[Jxjp]      = "xjp",
-	#define X(c) [Jxjc+IC##c] = "xj" #c,
-		ICMPS(X)
+	#define X(j) [J##j] = #j,
+		JMPS(X)
 	#undef X
 	};
-	static char prcls[NOp] = {
-		[Oarg] = 1,
-		[Oswap] = 1,
-		[Oxcmp] = 1,
-		[Oxtest] = 1,
-		[Oxdiv] = 1,
-		[Oxidiv] = 1,
-	};
-	static char ktoc[] = "wlsd";
 	Blk *b;
 	Phi *p;
 	Ins *i;
@@ -1282,10 +1190,18 @@ printfn(Fn *fn, FILE *f)
 				printref(i->to, fn, f);
 				fprintf(f, " =%c ", ktoc[i->cls]);
 			}
-			assert(opdesc[i->op].name);
-			fprintf(f, "%s", opdesc[i->op].name);
-			if (req(i->to, R) && prcls[i->op])
-				fputc(ktoc[i->cls], f);
+			assert(optab[i->op].name);
+			fprintf(f, "%s", optab[i->op].name);
+			if (req(i->to, R))
+				switch (i->op) {
+				case Oarg:
+				case Oswap:
+				case Oxcmp:
+				case Oxtest:
+				case Oxdiv:
+				case Oxidiv:
+					fputc(ktoc[i->cls], f);
+				}
 			if (!req(i->arg[0], R)) {
 				fprintf(f, " ");
 				printref(i->arg[0], fn, f);
diff --git a/rega.c b/rega.c
index 3d83327..02429a6 100644
--- a/rega.c
+++ b/rega.c
@@ -8,8 +8,8 @@
 typedef struct RMap RMap;
 
 struct RMap {
-	int t[NIReg+NFReg];
-	int r[NIReg+NFReg];
+	int t[Tmp0];
+	int r[Tmp0];
 	BSet b[1];
 	int n;
 };
@@ -78,10 +78,12 @@ static void
 radd(RMap *m, int t, int r)
 {
 	assert((t >= Tmp0 || t == r) && "invalid temporary");
-	assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register");
+	assert(((T.gpr0 <= r && r < T.gpr0 + T.ngpr)
+		|| (T.fpr0 <= r && r < T.fpr0 + T.nfpr))
+		&& "invalid register");
 	assert(!bshas(m->b, t) && "temporary has mapping");
 	assert(!bshas(m->b, r) && "register already allocated");
-	assert(m->n <= NIReg+NFReg && "too many mappings");
+	assert(m->n <= T.ngpr+T.nfpr && "too many mappings");
 	bsset(m->b, t);
 	bsset(m->b, r);
 	m->t[m->n] = t;
@@ -110,11 +112,11 @@ ralloc(RMap *m, int t)
 		regs = tmp[phicls(t, tmp)].hint.m;
 		regs |= m->b->t[0];
 		if (KBASE(tmp[t].cls) == 0) {
-			r0 = RAX;
-			r1 = RAX + NIReg;
+			r0 = T.gpr0;
+			r1 = r0 + T.ngpr;
 		} else {
-			r0 = XMM0;
-			r1 = XMM0 + NFReg;
+			r0 = T.fpr0;
+			r1 = r0 + T.nfpr;
 		}
 		for (r=r0; r<r1; r++)
 			if (!(regs & BIT(r)))
@@ -135,7 +137,7 @@ rfree(RMap *m, int t)
 {
 	int i, r;
 
-	assert(t >= Tmp0 || !(BIT(t) & RGLOB));
+	assert(t >= Tmp0 || !(BIT(t) & T.rglob));
 	if (!bshas(m->b, t))
 		return -1;
 	for (i=0; m->t[i] != t; i++)
@@ -295,10 +297,10 @@ dopm(Blk *b, Ins *i, RMap *m)
 	} while (i != b->ins && regcpy(i-1));
 	assert(m0.n <= m->n);
 	if (i != b->ins && (i-1)->op == Ocall) {
-		def = retregs((i-1)->arg[1], 0);
-		for (r=0; r<NRSave; r++)
-			if (!(BIT(rsave[r]) & def))
-				move(rsave[r], R, m);
+		def = T.retregs((i-1)->arg[1], 0) | T.rglob;
+		for (r=0; T.rsave[r]>=0; r++)
+			if (!(BIT(T.rsave[r]) & def))
+				move(T.rsave[r], R, m);
 	}
 	for (npm=0, n=0; n<m->n; n++) {
 		t = m->t[n];
@@ -370,10 +372,10 @@ doblk(Blk *b, RMap *cur)
 	for (i=&b->ins[b->nins]; i!=b->ins;) {
 		switch ((--i)->op) {
 		case Ocall:
-			rs = argregs(i->arg[1], 0);
-			for (r=0; r<NRSave; r++)
-				if (!(BIT(rsave[r]) & rs))
-					rfree(cur, rsave[r]);
+			rs = T.argregs(i->arg[1], 0) | T.rglob;
+			for (r=0; T.rsave[r]>=0; r++)
+				if (!(BIT(T.rsave[r]) & rs))
+					rfree(cur, T.rsave[r]);
 			break;
 		case Ocopy:
 			if (isreg(i->arg[0])) {
@@ -388,7 +390,7 @@ doblk(Blk *b, RMap *cur)
 			if (!req(i->to, R)) {
 				assert(rtype(i->to) == RTmp);
 				r = i->to.val;
-				if (r >= Tmp0 || !(BIT(r) & RGLOB))
+				if (r >= Tmp0 || !(BIT(r) & T.rglob))
 					r = rfree(cur, r);
 				if (r == -1) {
 					assert(!isreg(i->to));
diff --git a/spill.c b/spill.c
index 0872fd5..3871247 100644
--- a/spill.c
+++ b/spill.c
@@ -196,8 +196,8 @@ limit2(BSet *b1, int k1, int k2, BSet *fst)
 	bscopy(b2, b1);
 	bsinter(b1, mask[0]);
 	bsinter(b2, mask[1]);
-	limit(b1, NIReg - k1, fst);
-	limit(b2, NFReg - k2, fst);
+	limit(b1, T.ngpr - k1, fst);
+	limit(b2, T.nfpr - k2, fst);
 	bsunion(b1, b2);
 }
 
@@ -265,11 +265,11 @@ dopm(Blk *b, Ins *i, BSet *v)
 	} while (i != b->ins && regcpy(i-1));
 	bscopy(u, v);
 	if (i != b->ins && (i-1)->op == Ocall) {
-		v->t[0] &= ~retregs((i-1)->arg[1], 0);
-		limit2(v, NISave, NFSave, 0);
-		for (r=0, n=0; n<NRSave; n++)
-			r |= BIT(rsave[n]);
-		v->t[0] |= argregs((i-1)->arg[1], 0);
+		v->t[0] &= ~T.retregs((i-1)->arg[1], 0);
+		limit2(v, T.nrsave[0], T.nrsave[1], 0);
+		for (n=0, r=0; T.rsave[n]>=0; n++)
+			r |= BIT(T.rsave[n]);
+		v->t[0] |= T.argregs((i-1)->arg[1], 0);
 	} else {
 		limit2(v, 0, 0, 0);
 		r = v->t[0];
@@ -318,9 +318,9 @@ spill(Fn *fn)
 	slot8 = 0;
 	for (t=0; t<ntmp; t++) {
 		k = 0;
-		if (t >= XMM0 && t < XMM0 + NFReg)
+		if (t >= T.fpr0 && t < T.fpr0 + T.nfpr)
 			k = 1;
-		else if (t >= Tmp0)
+		if (t >= Tmp0)
 			k = KBASE(tmp[t].cls);
 		bsset(mask[k], t);
 	}
@@ -344,9 +344,9 @@ spill(Fn *fn)
 		if (hd) {
 			/* back-edge */
 			bszero(v);
-			hd->gen->t[0] |= RGLOB; /* don't spill registers */
+			hd->gen->t[0] |= T.rglob; /* don't spill registers */
 			for (k=0; k<2; k++) {
-				n = k == 0 ? NIReg : NFReg;
+				n = k == 0 ? T.ngpr : T.nfpr;
 				bscopy(u, b->out);
 				bsinter(u, mask[k]);
 				bscopy(w, u);
@@ -373,7 +373,7 @@ spill(Fn *fn)
 		} else {
 			bscopy(v, b->out);
 			if (rtype(b->jmp.arg) == RCall)
-				v->t[0] |= retregs(b->jmp.arg, 0);
+				v->t[0] |= T.retregs(b->jmp.arg, 0);
 		}
 		for (t=Tmp0; bsiter(b->out, &t); t++)
 			if (!bshas(v, t))
@@ -381,7 +381,7 @@ spill(Fn *fn)
 		bscopy(b->out, v);
 
 		/* 2. process the block instructions */
-		r = v->t[0] & (BIT(Tmp0)-1);
+		r = v->t[0];
 		curi = &insb[NIns];
 		for (i=&b->ins[b->nins]; i!=b->ins;) {
 			i--;
@@ -402,7 +402,7 @@ spill(Fn *fn)
 					bsset(w, t);
 				}
 			}
-			j = opdesc[i->op].nmem;
+			j = T.memargs(i->op);
 			for (n=0; n<2; n++)
 				if (rtype(i->arg[n]) == RMem)
 					j--;
@@ -449,11 +449,11 @@ spill(Fn *fn)
 				bsclr(v, t);
 			}
 			emiti(*i);
-			r = v->t[0] & (BIT(Tmp0)-1);
+			r = v->t[0]; /* Tmp0 is NBit */
 			if (r)
 				sethint(v, r);
 		}
-		assert(r == RGLOB || b == fn->start);
+		assert(r == T.rglob || b == fn->start);
 
 		for (p=b->phi; p; p=p->link) {
 			assert(rtype(p->to) == RTmp);
diff --git a/util.c b/util.c
index 9b73771..aae1481 100644
--- a/util.c
+++ b/util.c
@@ -87,6 +87,36 @@ freeall()
 	nptr = 1;
 }
 
+int
+iscmp(int op, int *pk, int *pc)
+{
+	if (Ocmpw <= op && op <= Ocmpw1) {
+		*pc = op - Ocmpw;
+		*pk = Kw;
+	}
+	else if (Ocmpl <= op && op <= Ocmpl1) {
+		*pc = op - Ocmpl;
+		*pk = Kl;
+	}
+	else if (Ocmps <= op && op <= Ocmps1) {
+		*pc = NCmpI + op - Ocmps;
+		*pk = Ks;
+	}
+	else if (Ocmpd <= op && op <= Ocmpd1) {
+		*pc = NCmpI + op - Ocmpd;
+		*pk = Kd;
+	}
+	else
+		return 0;
+	return 1;
+}
+
+int
+argcls(Ins *i, int n)
+{
+	return optab[i->op].argcls[n][i->cls];
+}
+
 void
 emit(int op, int k, Ref to, Ref arg0, Ref arg1)
 {
@@ -165,6 +195,42 @@ vgrow(void *vp, ulong len)
 	*(Vec **)vp = v1;
 }
 
+static int cmptab[][2] ={
+	             /* negation    swap */
+	[Ciule]      = {Ciugt,      Ciuge},
+	[Ciult]      = {Ciuge,      Ciugt},
+	[Ciugt]      = {Ciule,      Ciult},
+	[Ciuge]      = {Ciult,      Ciule},
+	[Cisle]      = {Cisgt,      Cisge},
+	[Cislt]      = {Cisge,      Cisgt},
+	[Cisgt]      = {Cisle,      Cislt},
+	[Cisge]      = {Cislt,      Cisle},
+	[Cieq]       = {Cine,       Cieq},
+	[Cine]       = {Cieq,       Cine},
+	[NCmpI+Cfle] = {NCmpI+Cfgt, NCmpI+Cfge},
+	[NCmpI+Cflt] = {NCmpI+Cfge, NCmpI+Cfgt},
+	[NCmpI+Cfgt] = {NCmpI+Cfle, NCmpI+Cflt},
+	[NCmpI+Cfge] = {NCmpI+Cflt, NCmpI+Cfle},
+	[NCmpI+Cfeq] = {NCmpI+Cfne, NCmpI+Cfeq},
+	[NCmpI+Cfne] = {NCmpI+Cfeq, NCmpI+Cfne},
+	[NCmpI+Cfo]  = {NCmpI+Cfuo, NCmpI+Cfo},
+	[NCmpI+Cfuo] = {NCmpI+Cfo,  NCmpI+Cfuo},
+};
+
+int
+cmpneg(int c)
+{
+	assert(0 <= c && c < NCmp);
+	return cmptab[c][0];
+}
+
+int
+cmpop(int c)
+{
+	assert(0 <= c && c < NCmp);
+	return cmptab[c][1];
+}
+
 int
 clsmerge(short *pk, short k)
 {
@@ -257,6 +323,30 @@ addcon(Con *c0, Con *c1)
 }
 
 void
+blit(Ref rdst, uint doff, Ref rsrc, uint sz, Fn *fn)
+{
+	struct { int st, ld, cls, size; } *p, tbl[] = {
+		{ Ostorel, Oload,   Kl, 8 },
+		{ Ostorew, Oload,   Kw, 8 },
+		{ Ostoreh, Oloaduh, Kw, 2 },
+		{ Ostoreb, Oloadub, Kw, 1 }
+	};
+	Ref r, r1;
+	uint boff, s;
+
+	for (boff=0, p=tbl; sz; p++)
+		for (s=p->size; sz>=s; sz-=s, doff+=s, boff+=s) {
+			r = newtmp("blt", Kl, fn);
+			r1 = newtmp("blt", Kl, fn);
+			emit(p->st, 0, R, r, r1);
+			emit(Oadd, Kl, r1, rdst, getcon(doff, fn));
+			r1 = newtmp("blt", Kl, fn);
+			emit(p->ld, p->cls, r, r1, R);
+			emit(Oadd, Kl, r1, rsrc, getcon(boff, fn));
+		}
+}
+
+void
 bsinit(BSet *bs, uint n)
 {
 	n = (n + NBit-1) / NBit;