summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Makefile28
-rw-r--r--all.h334
-rw-r--r--amd64/all.h70
-rw-r--r--amd64/emit.c (renamed from emit.c)225
-rw-r--r--amd64/isel.c (renamed from isel.c)78
-rw-r--r--amd64/sysv.c (renamed from sysv.c)47
-rw-r--r--amd64/targ.c30
-rw-r--r--cfg.c4
-rw-r--r--fold.c62
-rw-r--r--gas.c122
-rw-r--r--live.c30
-rw-r--r--main.c68
-rw-r--r--mem.c4
-rw-r--r--ops.h167
-rw-r--r--parse.c144
-rw-r--r--rega.c38
-rw-r--r--spill.c32
-rw-r--r--util.c90
18 files changed, 852 insertions, 721 deletions
diff --git a/Makefile b/Makefile
index f8e3da0..2433e25 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,15 @@
 BIN = qbe
-ABI = sysv
 
 V = @
 OBJDIR = obj
 
-SRC = main.c util.c parse.c cfg.c mem.c ssa.c alias.c load.c copy.c fold.c live.c $(ABI).c isel.c spill.c rega.c emit.c
-OBJ = $(SRC:%.c=$(OBJDIR)/%.o)
+SRC      = main.c util.c parse.c cfg.c mem.c ssa.c alias.c load.c copy.c \
+           fold.c live.c spill.c rega.c gas.c
+AMD64SRC = amd64/targ.c amd64/sysv.c amd64/isel.c amd64/emit.c
+SRCALL   = $(SRC) $(AMD64SRC)
+
+AMD64OBJ = $(AMD64SRC:%.c=$(OBJDIR)/%.o)
+OBJ      = $(SRC:%.c=$(OBJDIR)/%.o) $(AMD64OBJ)
 
 CFLAGS += -Wall -Wextra -std=c99 -g -pedantic
 
@@ -19,15 +23,23 @@ $(OBJDIR)/%.o: %.c $(OBJDIR)/timestamp
 
 $(OBJDIR)/timestamp:
 	@mkdir -p $(OBJDIR)
+	@mkdir -p $(OBJDIR)/amd64
 	@touch $@
 
-$(OBJ): all.h
+$(OBJ): all.h ops.h
+$(AMD64OBJ): amd64/all.h
 obj/main.o: config.h
 
 config.h:
-	@case `uname` in                                 \
-	*Darwin*)  echo "#define Defaultasm Gasmacho" ;; \
-	*)         echo "#define Defaultasm Gaself" ;;   \
+	@case `uname` in                               \
+	*Darwin*)                                      \
+		echo "#define Defasm Gasmacho";        \
+		echo "#define Deftgt T_amd64_sysv";    \
+		;;                                     \
+	*)                                             \
+		echo "#define Defasm Gaself";          \
+		echo "#define Deftgt T_amd64_sysv";    \
+		;;                                     \
 	esac > $@
 
 install: $(OBJDIR)/$(BIN)
@@ -47,7 +59,7 @@ check: $(OBJDIR)/$(BIN)
 	tools/unit.sh all
 
 80:
-	@for F in $(SRC);                          \
+	@for F in $(SRCALL);                       \
 	do                                         \
 		awk "{                             \
 			gsub(/\\t/, \"        \"); \
diff --git a/all.h b/all.h
index 124a8d2..c0e08fe 100644
--- a/all.h
+++ b/all.h
@@ -8,13 +8,14 @@
 #define MAKESURE(what, x) typedef char make_sure_##what[(x)?1:-1]
 #define die(...) die_(__FILE__, __VA_ARGS__)
 
+typedef unsigned char uchar;
 typedef unsigned int uint;
 typedef unsigned long ulong;
 typedef unsigned long long bits;
 
 typedef struct BSet BSet;
 typedef struct Ref Ref;
-typedef struct OpDesc OpDesc;
+typedef struct Op Op;
 typedef struct Ins Ins;
 typedef struct Phi Phi;
 typedef struct Blk Blk;
@@ -27,6 +28,7 @@ typedef struct Fn Fn;
 typedef struct Typ Typ;
 typedef struct Seg Seg;
 typedef struct Dat Dat;
+typedef struct Target Target;
 
 enum {
 	NString = 32,
@@ -38,61 +40,29 @@ enum {
 	NBit    = CHAR_BIT * sizeof(bits),
 };
 
-#define BIT(n) ((bits)1 << (n))
-
-enum Reg {
-	RXX,
-
-	RAX, /* caller-save */
-	RCX,
-	RDX,
-	RSI,
-	RDI,
-	R8,
-	R9,
-	R10,
-	R11,
-
-	RBX, /* callee-save */
-	R12,
-	R13,
-	R14,
-	R15,
-
-	RBP, /* globally live */
-	RSP,
-#define RGLOB (BIT(RBP)|BIT(RSP))
-
-	XMM0, /* sse */
-	XMM1,
-	XMM2,
-	XMM3,
-	XMM4,
-	XMM5,
-	XMM6,
-	XMM7,
-	XMM8,
-	XMM9,
-	XMM10,
-	XMM11,
-	XMM12,
-	XMM13,
-	XMM14,
-	XMM15,
-
-	Tmp0, /* first non-reg temporary */
-
-	NRGlob = 2,
-	NIReg = R15 - RAX + 1 + NRGlob,
-	NFReg = XMM14 - XMM0 + 1, /* XMM15 is reserved */
-	NISave = R11 - RAX + 1,
-	NFSave = NFReg,
-	NRSave = NISave + NFSave,
-	NRClob = R15 - RBX + 1,
+struct Target {
+	int gpr0;   /* first general purpose reg */
+	int ngpr;
+	int fpr0;   /* first floating point reg */
+	int nfpr;
+	bits rglob; /* globally live regs (e.g., sp, fp) */
+	int nrglob;
+	int *rsave; /* caller-save */
+	int nrsave[2];
+	bits (*retregs)(Ref, int[2]);
+	bits (*argregs)(Ref, int[2]);
+	int (*memargs)(int);
+	void (*abi)(Fn *);
+	void (*isel)(Fn *);
+	void (*emitfn)(Fn *, FILE *);
 };
 
-MAKESURE(NBit_is_enough, NBit >= (int)Tmp0);
+#define BIT(n) ((bits)1 << (n))
 
+enum {
+	RXX = 0,
+	Tmp0 = NBit, /* first non-reg temporary */
+};
 
 struct BSet {
 	uint nt;
@@ -139,51 +109,81 @@ static inline int isreg(Ref r)
 	return rtype(r) == RTmp && r.val < Tmp0;
 }
 
-enum ICmp {
-#define ICMPS(X) \
-	X(ule)   \
-	X(ult)   \
-	X(sle)   \
-	X(slt)   \
-	X(sgt)   \
-	X(sge)   \
-	X(ugt)   \
-	X(uge)   \
-	X(eq)    \
-	X(ne) /* make sure icmpop() below works! */
-
-#define X(c) IC##c,
-	ICMPS(X)
-#undef X
-	NICmp,
+enum CmpI {
+	Cieq,
+	Cine,
+	Cisge,
+	Cisgt,
+	Cisle,
+	Cislt,
+	Ciuge,
+	Ciugt,
+	Ciule,
+	Ciult,
+	NCmpI,
+};
 
-	ICxnp = NICmp, /* x64 specific */
-	ICxp,
-	NXICmp
+enum CmpF {
+	Cfeq,
+	Cfge,
+	Cfgt,
+	Cfle,
+	Cflt,
+	Cfne,
+	Cfo,
+	Cfuo,
+	NCmpF,
+	NCmp = NCmpI + NCmpF,
 };
 
-static inline int icmpop(int c)
-{
-	return c >= ICeq ? c : ICuge - c;
-}
+enum O {
+	Oxxx,
+#define O(op, x, y) O##op,
+	#include "ops.h"
+	NOp,
+};
 
-enum FCmp {
-#define FCMPS(X) \
-	X(le)    \
-	X(lt)    \
-	X(gt)    \
-	X(ge)    \
-	X(ne)    \
-	X(eq)    \
-	X(o)     \
-	X(uo)
-
-#define X(c) FC##c,
-	FCMPS(X)
+enum J {
+	Jxxx,
+#define JMPS(X)                                 \
+	X(ret0)   X(retw)   X(retl)   X(rets)   \
+	X(retd)   X(retc)   X(jmp)    X(jnz)    \
+	X(jfieq)  X(jfine)  X(jfisge) X(jfisgt) \
+	X(jfisle) X(jfislt) X(jfiuge) X(jfiugt) \
+	X(jfiule) X(jfiult) X(jffeq)  X(jffge)  \
+	X(jffgt)  X(jffle)  X(jfflt)  X(jffne)  \
+	X(jffo)   X(jffuo)
+#define X(j) J##j,
+	JMPS(X)
 #undef X
-	NFCmp
+	NJmp
+};
+
+enum {
+	Ocmpw = Oceqw,
+	Ocmpw1 = Ocultw,
+	Ocmpl = Oceql,
+	Ocmpl1 = Ocultl,
+	Ocmps = Oceqs,
+	Ocmps1 = Ocuos,
+	Ocmpd = Oceqd,
+	Ocmpd1 = Ocuod,
+	Oalloc = Oalloc4,
+	Oalloc1 = Oalloc16,
+	Oflag = Oflagieq,
+	Oflag1 = Oflagfuo,
+	NPubOp = Onop,
+	Jjf = Jjfieq,
+	Jjf1 = Jjffuo,
 };
 
+#define isstore(o) (Ostoreb <= o && o <= Ostored)
+#define isload(o) (Oloadsb <= o && o <= Oload)
+#define isext(o) (Oextsb <= o && o <= Oextuw)
+#define ispar(o) (Opar <= o && o <= Opare)
+#define isarg(o) (Oarg <= o && o <= Oarge)
+#define isret(j) (Jret0 <= j && j <= Jretc)
+
 enum Class {
 	Kx = -1, /* "top" class (see usecheck() and clsmerge()) */
 	Kw,
@@ -195,124 +195,10 @@ enum Class {
 #define KWIDE(k) ((k)&1)
 #define KBASE(k) ((k)>>1)
 
-enum Op {
-	Oxxx,
-
-	/* public instructions */
-	Oadd,
-	Osub,
-	Odiv,
-	Orem,
-	Oudiv,
-	Ourem,
-	Omul,
-	Oand,
-	Oor,
-	Oxor,
-	Osar,
-	Oshr,
-	Oshl,
-	Ocmpw,
-	Ocmpw1 = Ocmpw + NICmp-1,
-	Ocmpl,
-	Ocmpl1 = Ocmpl + NICmp-1,
-	Ocmps,
-	Ocmps1 = Ocmps + NFCmp-1,
-	Ocmpd,
-	Ocmpd1 = Ocmpd + NFCmp-1,
-
-	Ostoreb,
-	Ostoreh,
-	Ostorew,
-	Ostorel,
-	Ostores,
-	Ostored,
-#define isstore(o) (Ostoreb <= o && o <= Ostored)
-	Oloadsb,  /* must match Oext and Tmp.width */
-	Oloadub,
-	Oloadsh,
-	Oloaduh,
-	Oloadsw,
-	Oloaduw,
-	Oload,
-#define isload(o) (Oloadsb <= o && o <= Oload)
-	Oextsb,
-	Oextub,
-	Oextsh,
-	Oextuh,
-	Oextsw,
-	Oextuw,
-#define isext(o) (Oextsb <= o && o <= Oextuw)
-
-	Oexts,
-	Otruncd,
-	Ostosi,
-	Odtosi,
-	Oswtof,
-	Osltof,
-	Ocast,
-
-	Oalloc,
-	Oalloc1 = Oalloc + NAlign-1,
-
-	Ovastart,
-	Ovaarg,
-
-	Ocopy,
-	NPubOp,
-
-	/* function instructions */
-	Opar = NPubOp,
-	Oparc,
-	Opare,
-#define ispar(o) (Opar <= o && o <= Opare)
-	Oarg,
-	Oargc,
-	Oarge,
-#define isarg(o) (Oarg <= o && o <= Oarge)
-	Ocall,
-	Ovacall,
-
-	/* reserved instructions */
-	Onop,
-	Oaddr,
-	Oswap,
-	Osign,
-	Osalloc,
-	Oxidiv,
-	Oxdiv,
-	Oxcmp,
-	Oxset,
-	Oxsetnp = Oxset + ICxnp,
-	Oxsetp  = Oxset + ICxp,
-	Oxtest,
-	NOp
-};
-
-enum Jmp {
-	Jxxx,
-	Jret0,
-	Jretw,
-	Jretl,
-	Jrets,
-	Jretd,
-	Jretc,
-#define isret(j) (Jret0 <= j && j <= Jretc)
-	Jjmp,
-	Jjnz,
-	Jxjc,
-	Jxjnp = Jxjc + ICxnp,
-	Jxjp  = Jxjc + ICxp,
-	NJmp
-};
-
-struct OpDesc {
+struct Op {
 	char *name;
-	int nmem;
 	short argcls[2][4];
-	uint sflag:1; /* sets the zero flag */
-	uint lflag:1; /* leaves flags */
-	uint cfold:1; /* can fold */
+	int canfold;
 };
 
 struct Ins {
@@ -437,7 +323,7 @@ struct Con {
 
 typedef struct Addr Addr;
 
-struct Addr { /* x64 addressing */
+struct Addr { /* amd64 addressing */
 	Con offset;
 	Ref base;
 	Ref index;
@@ -508,8 +394,8 @@ struct Dat {
 	char export;
 };
 
-
 /* main.c */
+extern Target T;
 extern char debug['Z'+1];
 
 /* util.c */
@@ -524,6 +410,8 @@ void die_(char *, char *, ...) __attribute__((noreturn));
 void *emalloc(size_t);
 void *alloc(size_t);
 void freeall(void);
+int argcls(Ins *, int);
+int iscmp(int, int *, int *);
 void emit(int, int, Ref, Ref, Ref);
 void emiti(Ins);
 void idup(Ins **, Ins *, ulong);
@@ -531,12 +419,15 @@ Ins *icpy(Ins *, Ins *, ulong);
 void *vnew(ulong, size_t, Pool);
 void vfree(void *);
 void vgrow(void *, ulong);
+int cmpop(int);
+int cmpneg(int);
 int clsmerge(short *, short);
 int phicls(int, Tmp *);
 Ref newtmp(char *, int, Fn *);
 void chuse(Ref, int, Fn *);
 Ref getcon(int64_t, Fn *);
 void addcon(Con *, Con *);
+void blit(Ref, uint, Ref, uint, Fn *);
 void dumpts(BSet *, Tmp *, FILE *);
 
 void bsinit(BSet *, uint);
@@ -559,7 +450,7 @@ bshas(BSet *bs, uint elt)
 }
 
 /* parse.c */
-extern OpDesc opdesc[NOp];
+extern Op optab[NOp];
 void parse(FILE *, char *, void (Dat *), void (Fn *));
 void printfn(Fn *, FILE *);
 void printref(Ref, Fn *, FILE *);
@@ -611,16 +502,6 @@ void fold(Fn *);
 void liveon(BSet *, Blk *, Blk *);
 void filllive(Fn *);
 
-/* abi: sysv.c */
-extern int rsave[/* NRSave */];
-extern int rclob[/* NRClob */];
-bits retregs(Ref, int[2]);
-bits argregs(Ref, int[2]);
-void abi(Fn *);
-
-/* isel.c */
-void isel(Fn *);
-
 /* spill.c */
 void fillcost(Fn *);
 void spill(Fn *);
@@ -628,10 +509,9 @@ void spill(Fn *);
 /* rega.c */
 void rega(Fn *);
 
-/* emit.c */
-extern char *locprefix;
-extern char *symprefix;
-void emitfn(Fn *, FILE *);
-void emitdat(Dat *, FILE *);
-int stashfp(int64_t, int);
-void emitfin(FILE *);
+/* gas.c */
+extern char *gasloc;
+extern char *gassym;
+void gasemitdat(Dat *, FILE *);
+int gasstashfp(int64_t, int);
+void gasemitfin(FILE *);
diff --git a/amd64/all.h b/amd64/all.h
new file mode 100644
index 0000000..3a2db0e
--- /dev/null
+++ b/amd64/all.h
@@ -0,0 +1,70 @@
+#include "../all.h"
+
+typedef struct Amd64Op Amd64Op;
+
+enum Amd64Reg {
+	RAX = RXX+1, /* caller-save */
+	RCX,
+	RDX,
+	RSI,
+	RDI,
+	R8,
+	R9,
+	R10,
+	R11,
+
+	RBX, /* callee-save */
+	R12,
+	R13,
+	R14,
+	R15,
+
+	RBP, /* globally live */
+	RSP,
+
+	XMM0, /* sse */
+	XMM1,
+	XMM2,
+	XMM3,
+	XMM4,
+	XMM5,
+	XMM6,
+	XMM7,
+	XMM8,
+	XMM9,
+	XMM10,
+	XMM11,
+	XMM12,
+	XMM13,
+	XMM14,
+	XMM15,
+
+	NFPR = XMM14 - XMM0 + 1, /* reserve XMM15 */
+	NGPR = RSP - RAX + 1,
+	NGPS = R11 - RAX + 1,
+	NFPS = NFPR,
+	NCLR = R15 - RBX + 1,
+};
+MAKESURE(reg_not_tmp, XMM15 < (int)Tmp0);
+
+struct Amd64Op {
+	char nmem;
+	char zflag;
+	char lflag;
+};
+
+/* targ.c */
+extern Amd64Op amd64_op[];
+
+/* sysv.c (abi) */
+extern int amd64_sysv_rsave[];
+extern int amd64_sysv_rclob[];
+bits amd64_sysv_retregs(Ref, int[2]);
+bits amd64_sysv_argregs(Ref, int[2]);
+void amd64_sysv_abi(Fn *);
+
+/* isel.c */
+void amd64_isel(Fn *);
+
+/* emit.c */
+void amd64_emitfn(Fn *, FILE *);
diff --git a/emit.c b/amd64/emit.c
index 138bc1d..eccbd02 100644
--- a/emit.c
+++ b/amd64/emit.c
@@ -1,6 +1,25 @@
 #include "all.h"
 
-char *locprefix, *symprefix;
+
+#define CMP(X) \
+	X(Ciule,      "be") \
+	X(Ciult,      "b")  \
+	X(Cisle,      "le") \
+	X(Cislt,      "l")  \
+	X(Cisgt,      "g")  \
+	X(Cisge,      "ge") \
+	X(Ciugt,      "a")  \
+	X(Ciuge,      "ae") \
+	X(Cieq,       "z")  \
+	X(Cine,       "nz") \
+	X(NCmpI+Cfle, "be") \
+	X(NCmpI+Cflt, "b")  \
+	X(NCmpI+Cfgt, "a")  \
+	X(NCmpI+Cfge, "ae") \
+	X(NCmpI+Cfeq, "z")  \
+	X(NCmpI+Cfne, "nz") \
+	X(NCmpI+Cfo,  "np") \
+	X(NCmpI+Cfuo, "p")
 
 enum {
 	SLong = 0,
@@ -95,18 +114,10 @@ static struct {
 	{ Oxcmp,   Kd, "comisd %D0, %D1" },
 	{ Oxcmp,   Ki, "cmp%k %0, %1" },
 	{ Oxtest,  Ki, "test%k %0, %1" },
-	{ Oxset+ICule, Ki, "setbe %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICult, Ki, "setb %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICugt, Ki, "seta %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICuge, Ki, "setae %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICeq,  Ki, "setz %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICne,  Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
-	{ Oxset+ICxnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" },
-	{ Oxset+ICxp,  Ki, "setp %B=\n\tmovsb%k %B=, %=" },
+#define X(c, s) \
+	{ Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
+	CMP(X)
+#undef X
 	{ NOp, 0, 0 }
 };
 
@@ -153,9 +164,9 @@ emitcon(Con *con, FILE *f)
 	switch (con->type) {
 	case CAddr:
 		if (con->local)
-			fprintf(f, "%s%s", locprefix, con->label);
+			fprintf(f, "%s%s", gasloc, con->label);
 		else
-			fprintf(f, "%s%s", symprefix, con->label);
+			fprintf(f, "%s%s", gassym, con->label);
 		if (con->bits.i)
 			fprintf(f, "%+"PRId64, con->bits.i);
 		break;
@@ -356,7 +367,8 @@ emitins(Ins i, Fn *fn, FILE *f)
 			/* this linear search should really be a binary
 			 * search */
 			if (omap[o].op == NOp)
-				die("no match for %s(%d)", opdesc[i.op].name, i.cls);
+				die("no match for %s(%d)",
+					optab[i.op].name, "wlsd"[i.cls]);
 			if (omap[o].op == i.op)
 			if (omap[o].cls == i.cls
 			|| (omap[o].cls == Ki && KBASE(i.cls) == 0)
@@ -453,54 +465,25 @@ emitins(Ins i, Fn *fn, FILE *f)
 }
 
 static int
-cneg(int cmp)
-{
-	switch (cmp) {
-	default:    die("invalid int comparison %d", cmp);
-	case ICule: return ICugt;
-	case ICult: return ICuge;
-	case ICsle: return ICsgt;
-	case ICslt: return ICsge;
-	case ICsgt: return ICsle;
-	case ICsge: return ICslt;
-	case ICugt: return ICule;
-	case ICuge: return ICult;
-	case ICeq:  return ICne;
-	case ICne:  return ICeq;
-	case ICxnp: return ICxp;
-	case ICxp:  return ICxnp;
-	}
-}
-
-static int
 framesz(Fn *fn)
 {
 	int i, o, f;
 
 	/* specific to NAlign == 3 */
-	for (i=0, o=0; i<NRClob; i++)
-		o ^= 1 & (fn->reg >> rclob[i]);
+	for (i=0, o=0; i<NCLR; i++)
+		o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]);
 	f = fn->slot;
 	f = (f + 3) & -4;
 	return 4*f + 8*o + 176*fn->vararg;
 }
 
 void
-emitfn(Fn *fn, FILE *f)
+amd64_emitfn(Fn *fn, FILE *f)
 {
 	static char *ctoa[] = {
-		[ICeq]  = "z",
-		[ICule] = "be",
-		[ICult] = "b",
-		[ICsle] = "le",
-		[ICslt] = "l",
-		[ICsgt] = "g",
-		[ICsge] = "ge",
-		[ICugt] = "a",
-		[ICuge] = "ae",
-		[ICne]  = "nz",
-		[ICxnp] = "np",
-		[ICxp]  = "p"
+	#define X(c, s) [c] = s,
+		CMP(X)
+	#undef X
 	};
 	static int id0;
 	Blk *b, *s;
@@ -509,24 +492,24 @@ emitfn(Fn *fn, FILE *f)
 
 	fprintf(f, ".text\n");
 	if (fn->export)
-		fprintf(f, ".globl %s%s\n", symprefix, fn->name);
+		fprintf(f, ".globl %s%s\n", gassym, fn->name);
 	fprintf(f,
 		"%s%s:\n"
 		"\tpushq %%rbp\n"
 		"\tmovq %%rsp, %%rbp\n",
-		symprefix, fn->name
+		gassym, fn->name
 	);
 	fs = framesz(fn);
 	if (fs)
 		fprintf(f, "\tsub $%d, %%rsp\n", fs);
 	if (fn->vararg) {
 		o = -176;
-		for (r=rsave; r-rsave<6; ++r, o+=8)
+		for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
 			fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
 		for (n=0; n<8; ++n, o+=16)
 			fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
 	}
-	for (r=rclob; r-rclob < NRClob; r++)
+	for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
 		if (fn->reg & BIT(*r)) {
 			itmp.arg[0] = TMP(*r);
 			emitf("pushq %L0", &itmp, fn, f);
@@ -534,13 +517,13 @@ emitfn(Fn *fn, FILE *f)
 
 	for (lbl=0, b=fn->start; b; b=b->link) {
 		if (lbl || b->npred > 1)
-			fprintf(f, "%sbb%d:\n", locprefix, id0+b->id);
+			fprintf(f, "%sbb%d:\n", gasloc, id0+b->id);
 		for (i=b->ins; i!=&b->ins[b->nins]; i++)
 			emitins(*i, fn, f);
 		lbl = 1;
 		switch (b->jmp.type) {
 		case Jret0:
-			for (r=&rclob[NRClob]; r>rclob;)
+			for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
 				if (fn->reg & BIT(*--r)) {
 					itmp.arg[0] = TMP(*r);
 					emitf("popq %L0", &itmp, fn, f);
@@ -554,21 +537,21 @@ emitfn(Fn *fn, FILE *f)
 		Jmp:
 			if (b->s1 != b->link)
 				fprintf(f, "\tjmp %sbb%d\n",
-					locprefix, id0+b->s1->id);
+					gasloc, id0+b->s1->id);
 			else
 				lbl = 0;
 			break;
 		default:
-			c = b->jmp.type - Jxjc;
-			if (0 <= c && c <= NXICmp) {
+			c = b->jmp.type - Jjf;
+			if (0 <= c && c <= NCmp) {
 				if (b->link == b->s2) {
 					s = b->s1;
 					b->s1 = b->s2;
 					b->s2 = s;
 				} else
-					c = cneg(c);
+					c = cmpneg(c);
 				fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
-					locprefix, id0+b->s2->id);
+					gasloc, id0+b->s2->id);
 				goto Jmp;
 			}
 			die("unhandled jump %d", b->jmp.type);
@@ -576,121 +559,3 @@ emitfn(Fn *fn, FILE *f)
 	}
 	id0 += fn->nblk;
 }
-
-void
-emitdat(Dat *d, FILE *f)
-{
-	static int align;
-	static char *dtoa[] = {
-		[DAlign] = ".align",
-		[DB] = "\t.byte",
-		[DH] = "\t.value",
-		[DW] = "\t.long",
-		[DL] = "\t.quad"
-	};
-
-	switch (d->type) {
-	case DStart:
-		align = 0;
-		fprintf(f, ".data\n");
-		break;
-	case DEnd:
-		break;
-	case DName:
-		if (!align)
-			fprintf(f, ".align 8\n");
-		if (d->export)
-			fprintf(f, ".globl %s%s\n", symprefix, d->u.str);
-		fprintf(f, "%s%s:\n", symprefix, d->u.str);
-		break;
-	case DZ:
-		fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
-		break;
-	default:
-		if (d->type == DAlign)
-			align = 1;
-
-		if (d->isstr) {
-			if (d->type != DB)
-				err("strings only supported for 'b' currently");
-			fprintf(f, "\t.ascii \"%s\"\n", d->u.str);
-		}
-		else if (d->isref) {
-			fprintf(f, "%s %s%+"PRId64"\n",
-				dtoa[d->type], d->u.ref.nam,
-				d->u.ref.off);
-		}
-		else {
-			fprintf(f, "%s %"PRId64"\n",
-				dtoa[d->type], d->u.num);
-		}
-		break;
-	}
-}
-
-typedef struct FBits FBits;
-
-struct FBits {
-	union {
-		int64_t n;
-		float f;
-		double d;
-	} bits;
-	int wide;
-	FBits *link;
-};
-
-static FBits *stash;
-
-int
-stashfp(int64_t n, int w)
-{
-	FBits **pb, *b;
-	int i;
-
-	/* does a dumb de-dup of fp constants
-	 * this should be the linker's job */
-	for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
-		if (n == b->bits.n && w == b->wide)
-			return i;
-	b = emalloc(sizeof *b);
-	b->bits.n = n;
-	b->wide = w;
-	b->link = 0;
-	*pb = b;
-	return i;
-}
-
-void
-emitfin(FILE *f)
-{
-	FBits *b;
-	int i;
-
-	if (!stash)
-		return;
-	fprintf(f, "/* floating point constants */\n");
-	fprintf(f, ".data\n.align 8\n");
-	for (b=stash, i=0; b; b=b->link, i++)
-		if (b->wide)
-			fprintf(f,
-				"%sfp%d:\n"
-				"\t.quad %"PRId64
-				" /* %f */\n",
-				locprefix, i, b->bits.n,
-				b->bits.d
-			);
-	for (b=stash, i=0; b; b=b->link, i++)
-		if (!b->wide)
-			fprintf(f,
-				"%sfp%d:\n"
-				"\t.long %"PRId64
-				" /* %lf */\n",
-				locprefix, i, b->bits.n & 0xffffffff,
-				b->bits.f
-			);
-	while ((b=stash)) {
-		stash = b->link;
-		free(b);
-	}
-}
diff --git a/isel.c b/amd64/isel.c
index 6ce5dd0..1623b9b 100644
--- a/isel.c
+++ b/amd64/isel.c
@@ -28,46 +28,6 @@ struct ANum {
 static void amatch(Addr *, Ref, ANum *, Fn *, int);
 
 static int
-fcmptoi(int fc)
-{
-	switch (fc) {
-	default:   die("invalid fp comparison %d", fc);
-	case FCle: return ICule;
-	case FClt: return ICult;
-	case FCgt: return ICugt;
-	case FCge: return ICuge;
-	case FCne: return ICne;
-	case FCeq: return ICeq;
-	case FCo:  return ICxnp;
-	case FCuo: return ICxp;
-	}
-}
-
-static int
-iscmp(int op, int *pk, int *pc)
-{
-	if (Ocmpw <= op && op <= Ocmpw1) {
-		*pc = op - Ocmpw;
-		*pk = Kw;
-	}
-	else if (Ocmpl <= op && op <= Ocmpl1) {
-		*pc = op - Ocmpl;
-		*pk = Kl;
-	}
-	else if (Ocmps <= op && op <= Ocmps1) {
-		*pc = fcmptoi(op - Ocmps);
-		*pk = Ks;
-	}
-	else if (Ocmpd <= op && op <= Ocmpd1) {
-		*pc = fcmptoi(op - Ocmpd);
-		*pk = Kd;
-	}
-	else
-		return 0;
-	return 1;
-}
-
-static int
 noimm(Ref r, Fn *fn)
 {
 	int64_t val;
@@ -98,14 +58,8 @@ rslot(Ref r, Fn *fn)
 	return fn->tmp[r.val].slot;
 }
 
-static int
-argcls(Ins *i, int n)
-{
-	return opdesc[i->op].argcls[n][i->cls];
-}
-
 static void
-fixarg(Ref *r, int k, int phi, Fn *fn)
+fixarg(Ref *r, int k, int cpy, Fn *fn)
 {
 	Addr a, *m;
 	Ref r0, r1;
@@ -123,11 +77,11 @@ fixarg(Ref *r, int k, int phi, Fn *fn)
 		memset(&a, 0, sizeof a);
 		a.offset.type = CAddr;
 		a.offset.local = 1;
-		n = stashfp(fn->con[r0.val].bits.i, KWIDE(k));
+		n = gasstashfp(fn->con[r0.val].bits.i, KWIDE(k));
 		sprintf(a.offset.label, "fp%d", n);
 		fn->mem[fn->nmem-1] = a;
 	}
-	else if (!phi && k == Kl && noimm(r0, fn)) {
+	else if (!cpy && k == Kl && noimm(r0, fn)) {
 		/* load constants that do not fit in
 		 * a 32bit signed integer into a
 		 * long temporary
@@ -251,7 +205,7 @@ sel(Ins i, ANum *an, Fn *fn)
 			r0 = i.arg[1];
 		if (fn->tmp[r0.val].slot != -1)
 			err("unlikely argument %%%s in %s",
-				fn->tmp[r0.val].name, opdesc[i.op].name);
+				fn->tmp[r0.val].name, optab[i.op].name);
 		if (i.op == Odiv || i.op == Orem) {
 			emit(Oxidiv, k, R, r0, R);
 			emit(Osign, k, TMP(RDX), TMP(RAX), R);
@@ -340,7 +294,7 @@ Emit:
 			emit(Oadd, Kl, r1, i.arg[0], getcon(15, fn));
 			if (fn->tmp[i.arg[0].val].slot != -1)
 				err("unlikely argument %%%s in %s",
-					fn->tmp[i.arg[0].val].name, opdesc[i.op].name);
+					fn->tmp[i.arg[0].val].name, optab[i.op].name);
 		}
 		break;
 	default:
@@ -349,13 +303,13 @@ Emit:
 		if (isload(i.op))
 			goto case_Oload;
 		if (iscmp(i.op, &kc, &x)) {
-			emit(Oxset+x, k, i.to, R, R);
+			emit(Oflag+x, k, i.to, R, R);
 			i1 = curi;
 			if (selcmp(i.arg, kc, fn))
-				i1->op = Oxset + icmpop(x);
+				i1->op = Oflag + cmpop(x);
 			break;
 		}
-		die("unknown instruction %s", opdesc[i.op].name);
+		die("unknown instruction %s", optab[i.op].name);
 	}
 
 	while (i0 > curi && --i0) {
@@ -369,9 +323,9 @@ flagi(Ins *i0, Ins *i)
 {
 	while (i>i0) {
 		i--;
-		if (opdesc[i->op].sflag)
+		if (amd64_op[i->op].zflag)
 			return i;
-		if (opdesc[i->op].lflag)
+		if (amd64_op[i->op].lflag)
 			continue;
 		return 0;
 	}
@@ -402,22 +356,22 @@ seljmp(Blk *b, Fn *fn)
 	fi = flagi(b->ins, &b->ins[b->nins]);
 	if (!fi || !req(fi->to, r)) {
 		selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, long jnz */
-		b->jmp.type = Jxjc + ICne;
+		b->jmp.type = Jjf + Cine;
 	}
 	else if (iscmp(fi->op, &k, &c)) {
 		if (t->nuse == 1) {
 			if (selcmp(fi->arg, k, fn))
-				c = icmpop(c);
+				c = cmpop(c);
 			*fi = (Ins){.op = Onop};
 		}
-		b->jmp.type = Jxjc + c;
+		b->jmp.type = Jjf + c;
 	}
 	else if (fi->op == Oand && t->nuse == 1
 	     && (rtype(fi->arg[0]) == RTmp ||
 	         rtype(fi->arg[1]) == RTmp)) {
 		fi->op = Oxtest;
 		fi->to = R;
-		b->jmp.type = Jxjc + ICne;
+		b->jmp.type = Jjf + Cine;
 		if (rtype(fi->arg[1]) == RCon) {
 			r = fi->arg[1];
 			fi->arg[1] = fi->arg[0];
@@ -431,7 +385,7 @@ seljmp(Blk *b, Fn *fn)
 		 */
 		if (t->nuse == 1)
 			emit(Ocopy, Kw, R, r, R);
-		b->jmp.type = Jxjc + ICne;
+		b->jmp.type = Jjf + Cine;
 	}
 }
 
@@ -593,7 +547,7 @@ amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top)
  * requires use counts (as given by parsing)
  */
 void
-isel(Fn *fn)
+amd64_isel(Fn *fn)
 {
 	Blk *b, **sb;
 	Ins *i;
diff --git a/sysv.c b/amd64/sysv.c
index a88b044..dcaa812 100644
--- a/sysv.c
+++ b/amd64/sysv.c
@@ -90,25 +90,6 @@ typclass(AClass *a, Typ *t)
 	classify(a, t, &n, &e);
 }
 
-static void
-blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn)
-{
-	Ref r, r1;
-	uint boff;
-
-	/* it's an impolite blit, we might go across the end
-	 * of the source object a little bit... */
-	for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) {
-		r = newtmp("abi", Kl, fn);
-		r1 = newtmp("abi", Kl, fn);
-		emit(Ostorel, 0, R, r, r1);
-		emit(Oadd, Kl, r1, rstk, getcon(soff, fn));
-		r1 = newtmp("abi", Kl, fn);
-		emit(Oload, Kl, r, r1, R);
-		emit(Oadd, Kl, r1, rsrc, getcon(boff, fn));
-	}
-}
-
 static int
 retr(Ref reg[2], AClass *aret)
 {
@@ -226,15 +207,17 @@ argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
 	return ((6-nint) << 4) | ((8-nsse) << 8);
 }
 
-int rsave[] = {
+int amd64_sysv_rsave[] = {
 	RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
 	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14
+	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
 };
-int rclob[] = {RBX, R12, R13, R14, R15};
+int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1};
 
-MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int));
-MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int));
+MAKESURE(sysv_arrays_ok,
+	sizeof amd64_sysv_rsave == (NGPS+NFPS+1) * sizeof(int) &&
+	sizeof amd64_sysv_rclob == (NCLR+1) * sizeof(int)
+);
 
 /* layout of call's second argument (RCall)
  *
@@ -248,7 +231,7 @@ MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int));
  */
 
 bits
-retregs(Ref r, int p[2])
+amd64_sysv_retregs(Ref r, int p[2])
 {
 	bits b;
 	int ni, nf;
@@ -273,7 +256,7 @@ retregs(Ref r, int p[2])
 }
 
 bits
-argregs(Ref r, int p[2])
+amd64_sysv_argregs(Ref r, int p[2])
 {
 	bits b;
 	int j, ni, nf, ra;
@@ -284,7 +267,7 @@ argregs(Ref r, int p[2])
 	nf = (r.val >> 8) & 15;
 	ra = (r.val >> 12) & 1;
 	for (j=0; j<ni; j++)
-		b |= BIT(rsave[j]);
+		b |= BIT(amd64_sysv_rsave[j]);
 	for (j=0; j<nf; j++)
 		b |= BIT(XMM0+j);
 	if (p) {
@@ -298,7 +281,7 @@ static Ref
 rarg(int ty, int *ni, int *ns)
 {
 	if (KBASE(ty) == 0)
-		return TMP(rsave[(*ni)++]);
+		return TMP(amd64_sysv_rsave[(*ni)++]);
 	else
 		return TMP(XMM0 + (*ns)++);
 }
@@ -531,7 +514,7 @@ chpred(Blk *b, Blk *bp, Blk *bp1)
 	}
 }
 
-void
+static void
 selvaarg(Fn *fn, Blk *b, Ins *i)
 {
 	Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
@@ -618,12 +601,12 @@ selvaarg(Fn *fn, Blk *b, Ins *i)
 	b->s1 = breg;
 	b->s2 = bstk;
 	c = getcon(isint ? 48 : 176, fn);
-	emit(Ocmpw+ICult, Kw, r1, nr, c);
+	emit(Ocmpw+Ciult, Kw, r1, nr, c);
 	emit(Oloadsw, Kl, nr, r0, R);
 	emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
 }
 
-void
+static void
 selvastart(Fn *fn, int fa, Ref ap)
 {
 	Ref r0, r1;
@@ -649,7 +632,7 @@ selvastart(Fn *fn, int fa, Ref ap)
 }
 
 void
-abi(Fn *fn)
+amd64_sysv_abi(Fn *fn)
 {
 	Blk *b;
 	Ins *i, *i0, *ip;
diff --git a/amd64/targ.c b/amd64/targ.c
new file mode 100644
index 0000000..e227574
--- /dev/null
+++ b/amd64/targ.c
@@ -0,0 +1,30 @@
+#include "all.h"
+
+Amd64Op amd64_op[NOp] = {
+#define O(op, t, x) [O##op] =
+#define X(nm, zf, lf) { nm, zf, lf, },
+	#include "../ops.h"
+};
+
+static int
+amd64_memargs(int op)
+{
+	return amd64_op[op].nmem;
+}
+
+Target T_amd64_sysv = {
+	.gpr0 = RAX,
+	.ngpr = NGPR,
+	.fpr0 = XMM0,
+	.nfpr = NFPR,
+	.rglob = BIT(RBP) | BIT(RSP),
+	.nrglob = 2,
+	.rsave = amd64_sysv_rsave,
+	.nrsave = {NGPS, NFPS},
+	.retregs = amd64_sysv_retregs,
+	.argregs = amd64_sysv_argregs,
+	.memargs = amd64_memargs,
+	.abi = amd64_sysv_abi,
+	.isel = amd64_isel,
+	.emitfn = amd64_emitfn,
+};
diff --git a/cfg.c b/cfg.c
index dff0765..ea1ae12 100644
--- a/cfg.c
+++ b/cfg.c
@@ -312,8 +312,8 @@ simpljmp(Fn *fn)
 			uffind(&b->s1, uf);
 		if (b->s2)
 			uffind(&b->s2, uf);
-		c = b->jmp.type - Jxjc;
-		if (0 <= c && c <= NXICmp)
+		c = b->jmp.type - Jjf;
+		if (0 <= c && c <= NCmp)
 		if (b->s1 == b->s2) {
 			b->jmp.type = Jjmp;
 			b->s2 = 0;
diff --git a/fold.c b/fold.c
index 6129421..55672dd 100644
--- a/fold.c
+++ b/fold.c
@@ -100,7 +100,7 @@ visitins(Ins *i, Fn *fn)
 
 	if (rtype(i->to) != RTmp)
 		return;
-	if (opdesc[i->op].cfold) {
+	if (optab[i->op].canfold) {
 		l = latval(i->arg[0]);
 		if (!req(i->arg[1], R))
 			r = latval(i->arg[1]);
@@ -114,7 +114,7 @@ visitins(Ins *i, Fn *fn)
 			v = opfold(i->op, i->cls, &fn->con[l], &fn->con[r], fn);
 	} else
 		v = Bot;
-	/* fprintf(stderr, "\nvisiting %s (%p)", opdesc[i->op].name, (void *)i); */
+	/* fprintf(stderr, "\nvisiting %s (%p)", optab[i->op].name, (void *)i); */
 	update(i->to.val, v, fn);
 }
 
@@ -360,7 +360,7 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
 	else if (cl->type == CAddr || cr->type == CAddr) {
 		if (Ocmpl <= op && op <= Ocmpl1)
 			return 1;
-		err("invalid address operand for '%s'", opdesc[op].name);
+		err("invalid address operand for '%s'", optab[op].name);
 	}
 	switch (op) {
 	case Oadd:  x = l.u + r.u; break;
@@ -397,42 +397,42 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
 			} else
 				op -= Ocmpl - Ocmpw;
 			switch (op - Ocmpw) {
-			case ICule: x = l.u <= r.u; break;
-			case ICult: x = l.u < r.u;  break;
-			case ICsle: x = l.s <= r.s; break;
-			case ICslt: x = l.s < r.s;  break;
-			case ICsgt: x = l.s > r.s;  break;
-			case ICsge: x = l.s >= r.s; break;
-			case ICugt: x = l.u > r.u;  break;
-			case ICuge: x = l.u >= r.u; break;
-			case ICeq:  x = l.u == r.u; break;
-			case ICne:  x = l.u != r.u; break;
+			case Ciule: x = l.u <= r.u; break;
+			case Ciult: x = l.u < r.u;  break;
+			case Cisle: x = l.s <= r.s; break;
+			case Cislt: x = l.s < r.s;  break;
+			case Cisgt: x = l.s > r.s;  break;
+			case Cisge: x = l.s >= r.s; break;
+			case Ciugt: x = l.u > r.u;  break;
+			case Ciuge: x = l.u >= r.u; break;
+			case Cieq:  x = l.u == r.u; break;
+			case Cine:  x = l.u != r.u; break;
 			default: die("unreachable");
 			}
 		}
 		else if (Ocmps <= op && op <= Ocmps1) {
 			switch (op - Ocmps) {
-			case FCle: x = l.fs <= r.fs; break;
-			case FClt: x = l.fs < r.fs;  break;
-			case FCgt: x = l.fs > r.fs;  break;
-			case FCge: x = l.fs >= r.fs; break;
-			case FCne: x = l.fs != r.fs; break;
-			case FCeq: x = l.fs == r.fs; break;
-			case FCo: x = l.fs < r.fs || l.fs >= r.fs; break;
-			case FCuo: x = !(l.fs < r.fs || l.fs >= r.fs); break;
+			case Cfle: x = l.fs <= r.fs; break;
+			case Cflt: x = l.fs < r.fs;  break;
+			case Cfgt: x = l.fs > r.fs;  break;
+			case Cfge: x = l.fs >= r.fs; break;
+			case Cfne: x = l.fs != r.fs; break;
+			case Cfeq: x = l.fs == r.fs; break;
+			case Cfo: x = l.fs < r.fs || l.fs >= r.fs; break;
+			case Cfuo: x = !(l.fs < r.fs || l.fs >= r.fs); break;
 			default: die("unreachable");
 			}
 		}
 		else if (Ocmpd <= op && op <= Ocmpd1) {
 			switch (op - Ocmpd) {
-			case FCle: x = l.fd <= r.fd; break;
-			case FClt: x = l.fd < r.fd;  break;
-			case FCgt: x = l.fd > r.fd;  break;
-			case FCge: x = l.fd >= r.fd; break;
-			case FCne: x = l.fd != r.fd; break;
-			case FCeq: x = l.fd == r.fd; break;
-			case FCo: x = l.fd < r.fd || l.fd >= r.fd; break;
-			case FCuo: x = !(l.fd < r.fd || l.fd >= r.fd); break;
+			case Cfle: x = l.fd <= r.fd; break;
+			case Cflt: x = l.fd < r.fd;  break;
+			case Cfgt: x = l.fd > r.fd;  break;
+			case Cfge: x = l.fd >= r.fd; break;
+			case Cfne: x = l.fd != r.fd; break;
+			case Cfeq: x = l.fd == r.fd; break;
+			case Cfo: x = l.fd < r.fd || l.fd >= r.fd; break;
+			case Cfuo: x = !(l.fd < r.fd || l.fd >= r.fd); break;
 			default: die("unreachable");
 			}
 		}
@@ -453,7 +453,7 @@ foldflt(Con *res, int op, int w, Con *cl, Con *cr)
 	double xd, ld, rd;
 
 	if (cl->type != CBits || cr->type != CBits)
-		err("invalid address operand for '%s'", opdesc[op].name);
+		err("invalid address operand for '%s'", optab[op].name);
 	if (w)  {
 		ld = cl->bits.d;
 		rd = cr->bits.d;
@@ -495,7 +495,7 @@ opfold(int op, int cls, Con *cl, Con *cr, Fn *fn)
 
 	if ((op == Odiv || op == Oudiv
 	|| op == Orem || op == Ourem) && czero(cr, KWIDE(cls)))
-		err("null divisor in '%s'", opdesc[op].name);
+		err("null divisor in '%s'", optab[op].name);
 	if (cls == Kw || cls == Kl) {
 		if (foldint(&c, op, cls == Kl, cl, cr))
 			return Bot;
diff --git a/gas.c b/gas.c
new file mode 100644
index 0000000..c1fd6df
--- /dev/null
+++ b/gas.c
@@ -0,0 +1,122 @@
+#include "all.h"
+
+
+char *gasloc, *gassym;
+
+void
+gasemitdat(Dat *d, FILE *f)
+{
+	static int align;
+	static char *dtoa[] = {
+		[DAlign] = ".align",
+		[DB] = "\t.byte",
+		[DH] = "\t.short",
+		[DW] = "\t.int",
+		[DL] = "\t.quad"
+	};
+
+	switch (d->type) {
+	case DStart:
+		align = 0;
+		fprintf(f, ".data\n");
+		break;
+	case DEnd:
+		break;
+	case DName:
+		if (!align)
+			fprintf(f, ".align 8\n");
+		if (d->export)
+			fprintf(f, ".globl %s%s\n", gassym, d->u.str);
+		fprintf(f, "%s%s:\n", gassym, d->u.str);
+		break;
+	case DZ:
+		fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
+		break;
+	default:
+		if (d->type == DAlign)
+			align = 1;
+
+		if (d->isstr) {
+			if (d->type != DB)
+				err("strings only supported for 'b' currently");
+			fprintf(f, "\t.ascii \"%s\"\n", d->u.str);
+		}
+		else if (d->isref) {
+			fprintf(f, "%s %s%+"PRId64"\n",
+				dtoa[d->type], d->u.ref.nam,
+				d->u.ref.off);
+		}
+		else {
+			fprintf(f, "%s %"PRId64"\n",
+				dtoa[d->type], d->u.num);
+		}
+		break;
+	}
+}
+
+typedef struct FBits FBits;
+
+struct FBits {
+	union {
+		int64_t n;
+		float f;
+		double d;
+	} bits;
+	int wide;
+	FBits *link;
+};
+
+static FBits *stash;
+
+int
+gasstashfp(int64_t n, int w)
+{
+	FBits **pb, *b;
+	int i;
+
+	/* does a dumb de-dup of fp constants
+	 * this should be the linker's job */
+	for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
+		if (n == b->bits.n && w == b->wide)
+			return i;
+	b = emalloc(sizeof *b);
+	b->bits.n = n;
+	b->wide = w;
+	b->link = 0;
+	*pb = b;
+	return i;
+}
+
+void
+gasemitfin(FILE *f)
+{
+	FBits *b;
+	int i;
+
+	if (!stash)
+		return;
+	fprintf(f, "/* floating point constants */\n");
+	fprintf(f, ".data\n.align 8\n");
+	for (b=stash, i=0; b; b=b->link, i++)
+		if (b->wide)
+			fprintf(f,
+				"%sfp%d:\n"
+				"\t.quad %"PRId64
+				" /* %f */\n",
+				gasloc, i, b->bits.n,
+				b->bits.d
+			);
+	for (b=stash, i=0; b; b=b->link, i++)
+		if (!b->wide)
+			fprintf(f,
+				"%sfp%d:\n"
+				"\t.long %"PRId64
+				" /* %lf */\n",
+				gasloc, i, b->bits.n & 0xffffffff,
+				b->bits.f
+			);
+	while ((b=stash)) {
+		stash = b->link;
+		free(b);
+	}
+}
diff --git a/live.c b/live.c
index 18c9b63..6e63705 100644
--- a/live.c
+++ b/live.c
@@ -104,31 +104,39 @@ Again:
 
 		memset(phi, 0, f->ntmp * sizeof phi[0]);
 		memset(nlv, 0, sizeof nlv);
-		b->out->t[0] |= RGLOB;
+		b->out->t[0] |= T.rglob;
 		bscopy(b->in, b->out);
 		for (t=0; bsiter(b->in, &t); t++) {
 			phifix(t, phi, f->tmp);
 			nlv[KBASE(f->tmp[t].cls)]++;
 		}
 		if (rtype(b->jmp.arg) == RCall) {
-			assert(bscount(b->in) == NRGlob && nlv[0] == NRGlob && nlv[1] == 0);
-			b->in->t[0] |= retregs(b->jmp.arg, nlv);
+			assert((int)bscount(b->in) == T.nrglob &&
+				nlv[0] == T.nrglob &&
+				nlv[1] == 0);
+			b->in->t[0] |= T.retregs(b->jmp.arg, nlv);
 		} else
 			bset(b->jmp.arg, b, nlv, phi, f->tmp);
 		for (k=0; k<2; k++)
 			b->nlive[k] = nlv[k];
 		for (i=&b->ins[b->nins]; i!=b->ins;) {
 			if ((--i)->op == Ocall && rtype(i->arg[1]) == RCall) {
-				b->in->t[0] &= ~retregs(i->arg[1], m);
-				for (k=0; k<2; k++)
+				b->in->t[0] &= ~T.retregs(i->arg[1], m);
+				for (k=0; k<2; k++) {
 					nlv[k] -= m[k];
-				if (nlv[0] + NISave > b->nlive[0])
-					b->nlive[0] = nlv[0] + NISave;
-				if (nlv[1] + NFSave > b->nlive[1])
-					b->nlive[1] = nlv[1] + NFSave;
-				b->in->t[0] |= argregs(i->arg[1], m);
-				for (k=0; k<2; k++)
+					/* caller-save registers are used
+					 * by the callee, in that sense,
+					 * right in the middle of the call,
+					 * they are live: */
+					nlv[k] += T.nrsave[k];
+					if (nlv[k] > b->nlive[k])
+						b->nlive[k] = nlv[k];
+				}
+				b->in->t[0] |= T.argregs(i->arg[1], m);
+				for (k=0; k<2; k++) {
+					nlv[k] -= T.nrsave[k];
 					nlv[k] += m[k];
+				}
 			}
 			if (!req(i->to, R)) {
 				assert(rtype(i->to) == RTmp);
diff --git a/main.c b/main.c
index 4d2e6bd..6098dee 100644
--- a/main.c
+++ b/main.c
@@ -3,6 +3,18 @@
 #include <ctype.h>
 #include <getopt.h>
 
+Target T;
+
+extern Target T_amd64_sysv;
+
+static struct TMap {
+	char *name;
+	Target *T;
+} tmap[] = {
+	{ "amd64_sysv", &T_amd64_sysv },
+	{ 0, 0 }
+};
+
 enum Asm {
 	Gasmacho,
 	Gaself,
@@ -33,7 +45,7 @@ data(Dat *d)
 		fputs("/* end data */\n\n", outf);
 		freeall();
 	}
-	emitdat(d, outf);
+	gasemitdat(d, outf);
 }
 
 static void
@@ -62,10 +74,10 @@ func(Fn *fn)
 	copy(fn);
 	filluse(fn);
 	fold(fn);
-	abi(fn);
+	T.abi(fn);
 	fillpreds(fn);
 	filluse(fn);
-	isel(fn);
+	T.isel(fn);
 	fillrpo(fn);
 	filllive(fn);
 	fillcost(fn);
@@ -83,7 +95,7 @@ func(Fn *fn)
 		} else
 			fn->rpo[n]->link = fn->rpo[n+1];
 	if (!dbg) {
-		emitfn(fn, outf);
+		T.emitfn(fn, outf);
 		fprintf(outf, "/* end function %s */\n\n", fn->name);
 	} else
 		fprintf(stderr, "\n");
@@ -93,13 +105,15 @@ func(Fn *fn)
 int
 main(int ac, char *av[])
 {
-	FILE *inf;
-	char *f;
+	struct TMap *tm;
+	FILE *inf, *hf;
+	char *f, *sep;
 	int c, asm;
 
-	asm = Defaultasm;
+	asm = Defasm;
+	T = Deftgt;
 	outf = stdout;
-	while ((c = getopt(ac, av, "hd:o:G:")) != -1)
+	while ((c = getopt(ac, av, "hd:o:G:t:")) != -1)
 		switch (c) {
 		case 'd':
 			for (; *optarg; optarg++)
@@ -112,6 +126,18 @@ main(int ac, char *av[])
 			if (strcmp(optarg, "-") != 0)
 				outf = fopen(optarg, "w");
 			break;
+		case 't':
+			for (tm=tmap;; tm++) {
+				if (!tm->name) {
+					fprintf(stderr, "unknown target '%s'\n", optarg);
+					exit(1);
+				}
+				if (strcmp(optarg, tm->name) == 0) {
+					T = *tm->T;
+					break;
+				}
+			}
+			break;
 		case 'G':
 			if (strcmp(optarg, "e") == 0)
 				asm = Gaself;
@@ -124,22 +150,28 @@ main(int ac, char *av[])
 			break;
 		case 'h':
 		default:
-			fprintf(stderr, "%s [OPTIONS] {file.ssa, -}\n", av[0]);
-			fprintf(stderr, "\t%-10s prints this help\n", "-h");
-			fprintf(stderr, "\t%-10s output to file\n", "-o file");
-			fprintf(stderr, "\t%-10s generate gas (e) or osx (m) asm\n", "-G {e,m}");
-			fprintf(stderr, "\t%-10s dump debug information\n", "-d <flags>");
+			hf = c != 'h' ? stderr : stdout;
+			fprintf(hf, "%s [OPTIONS] {file.ssa, -}\n", av[0]);
+			fprintf(hf, "\t%-11s prints this help\n", "-h");
+			fprintf(hf, "\t%-11s output to file\n", "-o file");
+			fprintf(hf, "\t%-11s generate for a target among:\n", "-t <target>");
+			fprintf(hf, "\t%-11s ", "");
+			for (tm=tmap, sep=""; tm->name; tm++, sep=", ")
+				fprintf(hf, "%s%s", sep, tm->name);
+			fprintf(hf, "\n");
+			fprintf(hf, "\t%-11s generate gas (e) or osx (m) asm\n", "-G {e,m}");
+			fprintf(hf, "\t%-11s dump debug information\n", "-d <flags>");
 			exit(c != 'h');
 		}
 
 	switch (asm) {
 	case Gaself:
-		locprefix = ".L";
-		symprefix = "";
+		gasloc = ".L";
+		gassym = "";
 		break;
 	case Gasmacho:
-		locprefix = "L";
-		symprefix = "_";
+		gasloc = "L";
+		gassym = "_";
 		break;
 	}
 
@@ -159,7 +191,7 @@ main(int ac, char *av[])
 	} while (++optind < ac);
 
 	if (!dbg)
-		emitfin(outf);
+		gasemitfin(outf);
 
 	exit(0);
 }
diff --git a/mem.c b/mem.c
index fd6ee16..eda3d18 100644
--- a/mem.c
+++ b/mem.c
@@ -34,9 +34,9 @@ memopt(Fn *fn)
 			if (isstore(l->op))
 			if (req(i->to, l->arg[1]) && !req(i->to, l->arg[0]))
 			if (s == -1 || s == storesz(l))
-			if (k == -1 || k == opdesc[l->op].argcls[0][0]) {
+			if (k == -1 || k == optab[l->op].argcls[0][0]) {
 				s = storesz(l);
-				k = opdesc[l->op].argcls[0][0];
+				k = optab[l->op].argcls[0][0];
 				continue;
 			}
 			goto Skip;
diff --git a/ops.h b/ops.h
new file mode 100644
index 0000000..9b357a5
--- /dev/null
+++ b/ops.h
@@ -0,0 +1,167 @@
+#ifndef X /* amd64 */
+	#define X(NMemArgs, SetsZeroFlag, LeavesFlags)
+#endif
+
+#define T(a,b,c,d,e,f,g,h) {                          \
+	{[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}, \
+	{[Kw]=K##e, [Kl]=K##f, [Ks]=K##g, [Kd]=K##h}  \
+}
+
+
+/*********************/
+/* PUBLIC OPERATIONS */
+/*********************/
+
+/* Arithmetic and Bits */
+O(add,     T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0)
+O(sub,     T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0)
+O(div,     T(w,l,s,d, w,l,s,d), 1) X(0, 0, 0)
+O(rem,     T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
+O(udiv,    T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
+O(urem,    T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
+O(mul,     T(w,l,s,d, w,l,s,d), 1) X(2, 0, 0)
+O(and,     T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
+O(or,      T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
+O(xor,     T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
+O(sar,     T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
+O(shr,     T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
+O(shl,     T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
+
+/* Comparisons */
+O(ceqw,    T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cnew,    T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(csgew,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(csgtw,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cslew,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(csltw,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cugew,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cugtw,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(culew,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cultw,   T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+
+O(ceql,    T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cnel,    T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(csgel,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(csgtl,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cslel,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(csltl,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cugel,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cugtl,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(culel,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cultl,   T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+
+O(ceqs,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cges,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cgts,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cles,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(clts,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cnes,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cos,     T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cuos,    T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+
+O(ceqd,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cged,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cgtd,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cled,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cltd,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cned,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cod,     T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cuod,    T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+
+/* Memory */
+O(storeb,  T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(storeh,  T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(storew,  T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(storel,  T(l,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(stores,  T(s,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(stored,  T(d,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+
+O(loadsb,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loadub,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loadsh,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loaduh,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loadsw,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loaduw,  T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(load,    T(m,m,m,m, x,x,x,x), 0) X(0, 0, 1)
+
+/* Extensions and Truncations */
+O(extsb,   T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extub,   T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extsh,   T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extuh,   T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extsw,   T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1)
+O(extuw,   T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1)
+
+O(exts,    T(e,e,e,s, e,e,e,x), 1) X(0, 0, 1)
+O(truncd,  T(e,e,d,e, e,e,x,e), 1) X(0, 0, 1)
+O(stosi,   T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(dtosi,   T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(swtof,   T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
+O(sltof,   T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
+O(cast,    T(s,d,w,l, x,x,x,x), 1) X(0, 0, 1)
+
+/* Stack Allocation */
+O(alloc4,  T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(alloc8,  T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(alloc16, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+
+/* Variadic Function Helpers */
+O(vaarg,   T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0)
+O(vastart, T(m,e,e,e, x,e,e,e), 0) X(0, 0, 0)
+
+O(copy,    T(w,l,s,d, x,x,x,x), 0) X(0, 0, 1)
+
+
+/****************************************/
+/* INTERNAL OPERATIONS (keep nop first) */
+/****************************************/
+
+/* Miscellaneous and Architecture-Specific Operations */
+O(nop,     T(x,x,x,x, x,x,x,x), 0) X(0, 0, 1)
+O(addr,    T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(swap,    T(w,l,s,d, w,l,s,d), 0) X(1, 0, 0)
+O(sign,    T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0)
+O(salloc,  T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(xidiv,   T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0)
+O(xdiv,    T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0)
+O(xcmp,    T(w,l,s,d, w,l,s,d), 0) X(1, 1, 0)
+O(xtest,   T(w,l,e,e, w,l,e,e), 0) X(1, 1, 0)
+
+/* Arguments, Parameters, and Calls */
+O(par,     T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0)
+O(parc,    T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(pare,    T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0)
+O(arg,     T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0)
+O(argc,    T(e,x,e,e, e,l,e,e), 0) X(0, 0, 0)
+O(arge,    T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0)
+O(call,    T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0)
+O(vacall,  T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0)
+
+/* Flags Setting */
+O(flagieq,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagine,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagisge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagisgt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagisle, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagislt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiuge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiugt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiule, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiult, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfeq,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfge,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfgt,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfle,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagflt,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfne,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfo,   T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfuo,  T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+
+
+#undef T
+#undef X
+#undef O
+
+/*
+| column -t -o ' '
+*/
diff --git a/parse.c b/parse.c
index b393fc2..69bd74e 100644
--- a/parse.c
+++ b/parse.c
@@ -4,91 +4,13 @@
 
 enum {
 	Ke = -2, /* Erroneous mode */
-	Km = Kl, /* Memory pointer (for x64) */
+	Km = Kl, /* Memory pointer */
 };
 
-OpDesc opdesc[NOp] = {
-#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}
-
-	/*            NAME       NM      ARGCLS0     ARGCLS1  SF LF FLD*/
-	[Oadd]    = { "add",      2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 1 },
-	[Osub]    = { "sub",      2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 1 },
-	[Odiv]    = { "div",      2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 1 },
-	[Orem]    = { "rem",      2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 },
-	[Oudiv]   = { "udiv",     2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 },
-	[Ourem]   = { "urem",     2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 },
-	[Omul]    = { "mul",      2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 1 },
-	[Oand]    = { "and",      2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 },
-	[Oor]     = { "or",       2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 },
-	[Oxor]    = { "xor",      2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 },
-	[Osar]    = { "sar",      1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 },
-	[Oshr]    = { "shr",      1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 },
-	[Oshl]    = { "shl",      1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 },
-	[Ostored] = { "stored",   0, {A(d,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Ostores] = { "stores",   0, {A(s,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Ostorel] = { "storel",   0, {A(l,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Ostorew] = { "storew",   0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Ostoreh] = { "storeh",   0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Ostoreb] = { "storeb",   0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
-	[Oload]   = { "load",     0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1, 0 },
-	[Oloadsw] = { "loadsw",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oloaduw] = { "loaduw",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oloadsh] = { "loadsh",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oloaduh] = { "loaduh",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oloadsb] = { "loadsb",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oloadub] = { "loadub",   0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Oextsw]  = { "extsw",    0, {A(e,w,e,e), A(e,x,e,e)}, 0, 1, 1 },
-	[Oextuw]  = { "extuw",    0, {A(e,w,e,e), A(e,x,e,e)}, 0, 1, 1 },
-	[Oextsh]  = { "extsh",    0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Oextuh]  = { "extuh",    0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Oextsb]  = { "extsb",    0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Oextub]  = { "extub",    0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Oexts]   = { "exts",     0, {A(e,e,e,s), A(e,e,e,x)}, 0, 1, 1 },
-	[Otruncd] = { "truncd",   0, {A(e,e,d,e), A(e,e,x,e)}, 0, 1, 1 },
-	[Ostosi]  = { "stosi",    0, {A(s,s,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Odtosi]  = { "dtosi",    0, {A(d,d,e,e), A(x,x,e,e)}, 0, 1, 1 },
-	[Oswtof]  = { "swtof",    0, {A(e,e,w,w), A(e,e,x,x)}, 0, 1, 1 },
-	[Osltof]  = { "sltof",    0, {A(e,e,l,l), A(e,e,x,x)}, 0, 1, 1 },
-	[Ocast]   = { "cast",     0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1, 1 },
-	[Ocopy]   = { "copy",     1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1, 0 },
-	[Onop]    = { "nop",      0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1, 0 },
-	[Oswap]   = { "swap",     2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 0 },
-	[Osign]   = { "sign",     0, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 },
-	[Osalloc] = { "salloc",   0, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
-	[Oxidiv]  = { "xidiv",    1, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 },
-	[Oxdiv]   = { "xdiv",     1, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 },
-	[Oxcmp]   = { "xcmp",     1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 0 },
-	[Oxtest]  = { "xtest",    1, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 0 },
-	[Oaddr]   = { "addr",     0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	[Opar]    = { "par",      0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0, 0 },
-	[Opare]   = { "pare",     0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0, 0 },
-	[Oparc]   = { "parc",     0, {A(e,x,e,e), A(e,x,e,e)}, 0, 0, 0 },
-	[Oarg]    = { "arg",      0, {A(w,l,s,d), A(x,x,x,x)}, 0, 0, 0 },
-	[Oarge]   = { "arge",     0, {A(w,l,s,d), A(x,x,x,x)}, 0, 0, 0 },
-	[Oargc]   = { "argc",     0, {A(e,x,e,e), A(e,l,e,e)}, 0, 0, 0 },
-	[Ocall]   = { "call",     0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 },
-	[Ovacall] = { "vacall",   0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 },
-	[Oxsetnp] = { "xsetnp",   0, {A(x,x,e,e), A(x,x,e,e)}, 0, 0, 0 },
-	[Oxsetp]  = { "xsetp",    0, {A(x,x,e,e), A(x,x,e,e)}, 0, 0, 0 },
-	[Oalloc]   = { "alloc4",  1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
-	[Oalloc+1] = { "alloc8",  1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
-	[Oalloc+2] = { "alloc16", 1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
-	[Ovaarg]   = { "vaarg",   0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 },
-	[Ovastart] = { "vastart", 0, {A(m,e,e,e), A(x,e,e,e)}, 0, 0, 0 },
-#define X(c) \
-	[Ocmpw+IC##c] = { "c"    #c "w", 0, {A(w,w,e,e), A(w,w,e,e)}, 1, 0, 1 }, \
-	[Ocmpl+IC##c] = { "c"    #c "l", 0, {A(l,l,e,e), A(l,l,e,e)}, 1, 0, 1 }, \
-	[Oxset+IC##c] = { "xset" #c,     0, {A(x,x,e,e), A(x,x,e,e)}, 0, 1, 0 },
-	ICMPS(X)
-#undef X
-#define X(c) \
-	[Ocmps+FC##c] = { "c"    #c "s", 0, {A(s,s,e,e), A(s,s,e,e)}, 1, 0, 1 }, \
-	[Ocmpd+FC##c] = { "c"    #c "d", 0, {A(d,d,e,e), A(d,d,e,e)}, 1, 0, 1 },
-	FCMPS(X)
-#undef X
-
+Op optab[NOp] = {
+#define O(op, t, cf) [O##op]={#op, t, cf},
+	#include "ops.h"
 };
-#undef A
 
 typedef enum {
 	PXXX,
@@ -242,8 +164,8 @@ lexinit()
 	if (done)
 		return;
 	for (i=0; i<NPubOp; ++i)
-		if (opdesc[i].name)
-			kwmap[i] = opdesc[i].name;
+		if (optab[i].name)
+			kwmap[i] = optab[i].name;
 	assert(Ntok <= CHAR_MAX);
 	for (i=0; i<Ntok; ++i)
 		if (kwmap[i]) {
@@ -810,26 +732,26 @@ typecheck(Fn *fn)
 		}
 		for (i=b->ins; i-b->ins < b->nins; i++)
 			for (n=0; n<2; n++) {
-				k = opdesc[i->op].argcls[n][i->cls];
+				k = optab[i->op].argcls[n][i->cls];
 				r = i->arg[n];
 				t = &fn->tmp[r.val];
 				if (k == Ke)
 					err("invalid instruction type in %s",
-						opdesc[i->op].name);
+						optab[i->op].name);
 				if (rtype(r) == RType)
 					continue;
 				if (rtype(r) != -1 && k == Kx)
 					err("no %s operand expected in %s",
 						n == 1 ? "second" : "first",
-						opdesc[i->op].name);
+						optab[i->op].name);
 				if (rtype(r) == -1 && k != Kx)
 					err("missing %s operand in %s",
 						n == 1 ? "second" : "first",
-						opdesc[i->op].name);
+						optab[i->op].name);
 				if (!usecheck(r, k, fn))
 					err("invalid type for %s operand %%%s in %s",
 						n == 1 ? "second" : "first",
-						t->name, opdesc[i->op].name);
+						t->name, optab[i->op].name);
 			}
 		r = b->jmp.arg;
 		if (isret(b->jmp.type)) {
@@ -866,7 +788,10 @@ parsefn(int export)
 	curf->tmp = vnew(curf->ntmp, sizeof curf->tmp[0], Pfn);
 	curf->con = vnew(curf->ncon, sizeof curf->con[0], Pfn);
 	for (i=0; i<Tmp0; ++i)
-		newtmp(0, i < XMM0 ? Kl : Kd, curf);
+		if (T.fpr0 <= i && i < T.fpr0 + T.nfpr)
+			newtmp(0, Kd, curf);
+		else
+			newtmp(0, Kl, curf);
 	curf->con[0].type = CBits;
 	curf->export = export;
 	blink = &curf->start;
@@ -1228,29 +1153,12 @@ printref(Ref r, Fn *fn, FILE *f)
 void
 printfn(Fn *fn, FILE *f)
 {
+	static char ktoc[] = "wlsd";
 	static char *jtoa[NJmp] = {
-		[Jret0]     = "ret",
-		[Jretw]     = "retw",
-		[Jretl]     = "retl",
-		[Jretc]     = "retc",
-		[Jrets]     = "rets",
-		[Jretd]     = "retd",
-		[Jjnz]      = "jnz",
-		[Jxjnp]     = "xjnp",
-		[Jxjp]      = "xjp",
-	#define X(c) [Jxjc+IC##c] = "xj" #c,
-		ICMPS(X)
+	#define X(j) [J##j] = #j,
+		JMPS(X)
 	#undef X
 	};
-	static char prcls[NOp] = {
-		[Oarg] = 1,
-		[Oswap] = 1,
-		[Oxcmp] = 1,
-		[Oxtest] = 1,
-		[Oxdiv] = 1,
-		[Oxidiv] = 1,
-	};
-	static char ktoc[] = "wlsd";
 	Blk *b;
 	Phi *p;
 	Ins *i;
@@ -1282,10 +1190,18 @@ printfn(Fn *fn, FILE *f)
 				printref(i->to, fn, f);
 				fprintf(f, " =%c ", ktoc[i->cls]);
 			}
-			assert(opdesc[i->op].name);
-			fprintf(f, "%s", opdesc[i->op].name);
-			if (req(i->to, R) && prcls[i->op])
-				fputc(ktoc[i->cls], f);
+			assert(optab[i->op].name);
+			fprintf(f, "%s", optab[i->op].name);
+			if (req(i->to, R))
+				switch (i->op) {
+				case Oarg:
+				case Oswap:
+				case Oxcmp:
+				case Oxtest:
+				case Oxdiv:
+				case Oxidiv:
+					fputc(ktoc[i->cls], f);
+				}
 			if (!req(i->arg[0], R)) {
 				fprintf(f, " ");
 				printref(i->arg[0], fn, f);
diff --git a/rega.c b/rega.c
index 3d83327..02429a6 100644
--- a/rega.c
+++ b/rega.c
@@ -8,8 +8,8 @@
 typedef struct RMap RMap;
 
 struct RMap {
-	int t[NIReg+NFReg];
-	int r[NIReg+NFReg];
+	int t[Tmp0];
+	int r[Tmp0];
 	BSet b[1];
 	int n;
 };
@@ -78,10 +78,12 @@ static void
 radd(RMap *m, int t, int r)
 {
 	assert((t >= Tmp0 || t == r) && "invalid temporary");
-	assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register");
+	assert(((T.gpr0 <= r && r < T.gpr0 + T.ngpr)
+		|| (T.fpr0 <= r && r < T.fpr0 + T.nfpr))
+		&& "invalid register");
 	assert(!bshas(m->b, t) && "temporary has mapping");
 	assert(!bshas(m->b, r) && "register already allocated");
-	assert(m->n <= NIReg+NFReg && "too many mappings");
+	assert(m->n <= T.ngpr+T.nfpr && "too many mappings");
 	bsset(m->b, t);
 	bsset(m->b, r);
 	m->t[m->n] = t;
@@ -110,11 +112,11 @@ ralloc(RMap *m, int t)
 		regs = tmp[phicls(t, tmp)].hint.m;
 		regs |= m->b->t[0];
 		if (KBASE(tmp[t].cls) == 0) {
-			r0 = RAX;
-			r1 = RAX + NIReg;
+			r0 = T.gpr0;
+			r1 = r0 + T.ngpr;
 		} else {
-			r0 = XMM0;
-			r1 = XMM0 + NFReg;
+			r0 = T.fpr0;
+			r1 = r0 + T.nfpr;
 		}
 		for (r=r0; r<r1; r++)
 			if (!(regs & BIT(r)))
@@ -135,7 +137,7 @@ rfree(RMap *m, int t)
 {
 	int i, r;
 
-	assert(t >= Tmp0 || !(BIT(t) & RGLOB));
+	assert(t >= Tmp0 || !(BIT(t) & T.rglob));
 	if (!bshas(m->b, t))
 		return -1;
 	for (i=0; m->t[i] != t; i++)
@@ -295,10 +297,10 @@ dopm(Blk *b, Ins *i, RMap *m)
 	} while (i != b->ins && regcpy(i-1));
 	assert(m0.n <= m->n);
 	if (i != b->ins && (i-1)->op == Ocall) {
-		def = retregs((i-1)->arg[1], 0);
-		for (r=0; r<NRSave; r++)
-			if (!(BIT(rsave[r]) & def))
-				move(rsave[r], R, m);
+		def = T.retregs((i-1)->arg[1], 0) | T.rglob;
+		for (r=0; T.rsave[r]>=0; r++)
+			if (!(BIT(T.rsave[r]) & def))
+				move(T.rsave[r], R, m);
 	}
 	for (npm=0, n=0; n<m->n; n++) {
 		t = m->t[n];
@@ -370,10 +372,10 @@ doblk(Blk *b, RMap *cur)
 	for (i=&b->ins[b->nins]; i!=b->ins;) {
 		switch ((--i)->op) {
 		case Ocall:
-			rs = argregs(i->arg[1], 0);
-			for (r=0; r<NRSave; r++)
-				if (!(BIT(rsave[r]) & rs))
-					rfree(cur, rsave[r]);
+			rs = T.argregs(i->arg[1], 0) | T.rglob;
+			for (r=0; T.rsave[r]>=0; r++)
+				if (!(BIT(T.rsave[r]) & rs))
+					rfree(cur, T.rsave[r]);
 			break;
 		case Ocopy:
 			if (isreg(i->arg[0])) {
@@ -388,7 +390,7 @@ doblk(Blk *b, RMap *cur)
 			if (!req(i->to, R)) {
 				assert(rtype(i->to) == RTmp);
 				r = i->to.val;
-				if (r >= Tmp0 || !(BIT(r) & RGLOB))
+				if (r >= Tmp0 || !(BIT(r) & T.rglob))
 					r = rfree(cur, r);
 				if (r == -1) {
 					assert(!isreg(i->to));
diff --git a/spill.c b/spill.c
index 0872fd5..3871247 100644
--- a/spill.c
+++ b/spill.c
@@ -196,8 +196,8 @@ limit2(BSet *b1, int k1, int k2, BSet *fst)
 	bscopy(b2, b1);
 	bsinter(b1, mask[0]);
 	bsinter(b2, mask[1]);
-	limit(b1, NIReg - k1, fst);
-	limit(b2, NFReg - k2, fst);
+	limit(b1, T.ngpr - k1, fst);
+	limit(b2, T.nfpr - k2, fst);
 	bsunion(b1, b2);
 }
 
@@ -265,11 +265,11 @@ dopm(Blk *b, Ins *i, BSet *v)
 	} while (i != b->ins && regcpy(i-1));
 	bscopy(u, v);
 	if (i != b->ins && (i-1)->op == Ocall) {
-		v->t[0] &= ~retregs((i-1)->arg[1], 0);
-		limit2(v, NISave, NFSave, 0);
-		for (r=0, n=0; n<NRSave; n++)
-			r |= BIT(rsave[n]);
-		v->t[0] |= argregs((i-1)->arg[1], 0);
+		v->t[0] &= ~T.retregs((i-1)->arg[1], 0);
+		limit2(v, T.nrsave[0], T.nrsave[1], 0);
+		for (n=0, r=0; T.rsave[n]>=0; n++)
+			r |= BIT(T.rsave[n]);
+		v->t[0] |= T.argregs((i-1)->arg[1], 0);
 	} else {
 		limit2(v, 0, 0, 0);
 		r = v->t[0];
@@ -318,9 +318,9 @@ spill(Fn *fn)
 	slot8 = 0;
 	for (t=0; t<ntmp; t++) {
 		k = 0;
-		if (t >= XMM0 && t < XMM0 + NFReg)
+		if (t >= T.fpr0 && t < T.fpr0 + T.nfpr)
 			k = 1;
-		else if (t >= Tmp0)
+		if (t >= Tmp0)
 			k = KBASE(tmp[t].cls);
 		bsset(mask[k], t);
 	}
@@ -344,9 +344,9 @@ spill(Fn *fn)
 		if (hd) {
 			/* back-edge */
 			bszero(v);
-			hd->gen->t[0] |= RGLOB; /* don't spill registers */
+			hd->gen->t[0] |= T.rglob; /* don't spill registers */
 			for (k=0; k<2; k++) {
-				n = k == 0 ? NIReg : NFReg;
+				n = k == 0 ? T.ngpr : T.nfpr;
 				bscopy(u, b->out);
 				bsinter(u, mask[k]);
 				bscopy(w, u);
@@ -373,7 +373,7 @@ spill(Fn *fn)
 		} else {
 			bscopy(v, b->out);
 			if (rtype(b->jmp.arg) == RCall)
-				v->t[0] |= retregs(b->jmp.arg, 0);
+				v->t[0] |= T.retregs(b->jmp.arg, 0);
 		}
 		for (t=Tmp0; bsiter(b->out, &t); t++)
 			if (!bshas(v, t))
@@ -381,7 +381,7 @@ spill(Fn *fn)
 		bscopy(b->out, v);
 
 		/* 2. process the block instructions */
-		r = v->t[0] & (BIT(Tmp0)-1);
+		r = v->t[0];
 		curi = &insb[NIns];
 		for (i=&b->ins[b->nins]; i!=b->ins;) {
 			i--;
@@ -402,7 +402,7 @@ spill(Fn *fn)
 					bsset(w, t);
 				}
 			}
-			j = opdesc[i->op].nmem;
+			j = T.memargs(i->op);
 			for (n=0; n<2; n++)
 				if (rtype(i->arg[n]) == RMem)
 					j--;
@@ -449,11 +449,11 @@ spill(Fn *fn)
 				bsclr(v, t);
 			}
 			emiti(*i);
-			r = v->t[0] & (BIT(Tmp0)-1);
+			r = v->t[0]; /* Tmp0 is NBit */
 			if (r)
 				sethint(v, r);
 		}
-		assert(r == RGLOB || b == fn->start);
+		assert(r == T.rglob || b == fn->start);
 
 		for (p=b->phi; p; p=p->link) {
 			assert(rtype(p->to) == RTmp);
diff --git a/util.c b/util.c
index 9b73771..aae1481 100644
--- a/util.c
+++ b/util.c
@@ -87,6 +87,36 @@ freeall()
 	nptr = 1;
 }
 
+int
+iscmp(int op, int *pk, int *pc)
+{
+	if (Ocmpw <= op && op <= Ocmpw1) {
+		*pc = op - Ocmpw;
+		*pk = Kw;
+	}
+	else if (Ocmpl <= op && op <= Ocmpl1) {
+		*pc = op - Ocmpl;
+		*pk = Kl;
+	}
+	else if (Ocmps <= op && op <= Ocmps1) {
+		*pc = NCmpI + op - Ocmps;
+		*pk = Ks;
+	}
+	else if (Ocmpd <= op && op <= Ocmpd1) {
+		*pc = NCmpI + op - Ocmpd;
+		*pk = Kd;
+	}
+	else
+		return 0;
+	return 1;
+}
+
+int
+argcls(Ins *i, int n)
+{
+	return optab[i->op].argcls[n][i->cls];
+}
+
 void
 emit(int op, int k, Ref to, Ref arg0, Ref arg1)
 {
@@ -165,6 +195,42 @@ vgrow(void *vp, ulong len)
 	*(Vec **)vp = v1;
 }
 
+static int cmptab[][2] ={
+	             /* negation    swap */
+	[Ciule]      = {Ciugt,      Ciuge},
+	[Ciult]      = {Ciuge,      Ciugt},
+	[Ciugt]      = {Ciule,      Ciult},
+	[Ciuge]      = {Ciult,      Ciule},
+	[Cisle]      = {Cisgt,      Cisge},
+	[Cislt]      = {Cisge,      Cisgt},
+	[Cisgt]      = {Cisle,      Cislt},
+	[Cisge]      = {Cislt,      Cisle},
+	[Cieq]       = {Cine,       Cieq},
+	[Cine]       = {Cieq,       Cine},
+	[NCmpI+Cfle] = {NCmpI+Cfgt, NCmpI+Cfge},
+	[NCmpI+Cflt] = {NCmpI+Cfge, NCmpI+Cfgt},
+	[NCmpI+Cfgt] = {NCmpI+Cfle, NCmpI+Cflt},
+	[NCmpI+Cfge] = {NCmpI+Cflt, NCmpI+Cfle},
+	[NCmpI+Cfeq] = {NCmpI+Cfne, NCmpI+Cfeq},
+	[NCmpI+Cfne] = {NCmpI+Cfeq, NCmpI+Cfne},
+	[NCmpI+Cfo]  = {NCmpI+Cfuo, NCmpI+Cfo},
+	[NCmpI+Cfuo] = {NCmpI+Cfo,  NCmpI+Cfuo},
+};
+
+int
+cmpneg(int c)
+{
+	assert(0 <= c && c < NCmp);
+	return cmptab[c][0];
+}
+
+int
+cmpop(int c)
+{
+	assert(0 <= c && c < NCmp);
+	return cmptab[c][1];
+}
+
 int
 clsmerge(short *pk, short k)
 {
@@ -257,6 +323,30 @@ addcon(Con *c0, Con *c1)
 }
 
 void
+blit(Ref rdst, uint doff, Ref rsrc, uint sz, Fn *fn)
+{
+	struct { int st, ld, cls, size; } *p, tbl[] = {
+		{ Ostorel, Oload,   Kl, 8 },
+		{ Ostorew, Oload,   Kw, 8 },
+		{ Ostoreh, Oloaduh, Kw, 2 },
+		{ Ostoreb, Oloadub, Kw, 1 }
+	};
+	Ref r, r1;
+	uint boff, s;
+
+	for (boff=0, p=tbl; sz; p++)
+		for (s=p->size; sz>=s; sz-=s, doff+=s, boff+=s) {
+			r = newtmp("blt", Kl, fn);
+			r1 = newtmp("blt", Kl, fn);
+			emit(p->st, 0, R, r, r1);
+			emit(Oadd, Kl, r1, rdst, getcon(doff, fn));
+			r1 = newtmp("blt", Kl, fn);
+			emit(p->ld, p->cls, r, r1, R);
+			emit(Oadd, Kl, r1, rsrc, getcon(boff, fn));
+		}
+}
+
+void
 bsinit(BSet *bs, uint n)
 {
 	n = (n + NBit-1) / NBit;