summary refs log tree commit diff
path: root/proto
diff options
context:
space:
mode:
Diffstat (limited to 'proto')
-rw-r--r--proto/.gitignore7
-rw-r--r--proto/Makefile13
-rw-r--r--proto/TODO54
-rw-r--r--proto/bak.ml132
-rw-r--r--proto/elf.ml200
-rw-r--r--proto/heap.ml60
-rw-r--r--proto/lo.ml478
-rw-r--r--proto/lo2.ml713
-rw-r--r--proto/tmain.c24
9 files changed, 1681 insertions, 0 deletions
diff --git a/proto/.gitignore b/proto/.gitignore
new file mode 100644
index 0000000..ff9fbe2
--- /dev/null
+++ b/proto/.gitignore
@@ -0,0 +1,7 @@
+t
+bak
+.comfile
+*.o
+*.cm[io]
+lisc/lo
+t.out
diff --git a/proto/Makefile b/proto/Makefile
new file mode 100644
index 0000000..c6ebd16
--- /dev/null
+++ b/proto/Makefile
@@ -0,0 +1,13 @@
+.PHONY: all test clean
+
+all: bak
+
+bak: elf.ml lo2.ml
+	ocamlc -g -o bak elf.ml lo2.ml
+
+test: bak
+	@./bak test
+	@cc -O2 -o t.out tmain.c t.o && ./t.out
+
+clean:
+	rm -f bak *.out *.o *.cm[io]
diff --git a/proto/TODO b/proto/TODO
new file mode 100644
index 0000000..e144c66
--- /dev/null
+++ b/proto/TODO
@@ -0,0 +1,54 @@
+Features
+- Operand classes for instructions
+- Hints in register allocation
+- Fixed register instructions (div, mul)
+  Is this part of "operand classes"?
+
+Instructions
+- ADD SUB SDIV UDIV SREM UREM MUL LSL LSR ASL ASR
+- SEXT ZEXT (we need 8, 16, 32, 64 bits)
+- CMP ...
+- ALLOC STORE LOAD (we need 8, 16, 32, 64 bits)
+(- PTR)
+- CALL BRZ JMP RET
+
+Machine
+- SREG GREG
+- Register use/defs for all instructions.
+
+Types
+- Integer (32 & 64 bits)
+(- Structure "{a,b,c}")
+(- Pointer (to type "t"))
+
+Questions
+- Q: Should we allow constant operands?
+  A:
+  It looks like `Con instructions are a bad idea because
+  they introduce spurious live ranges.
+  This was not a huge problem, modifications s in loc and
+  getreg only fixed this.  Still, it makes use larger bit
+  vectors during the liveness analysis.
+- Q: How to represent the IR?
+  A:
+  So far, a graph of basic blocks composed of quadruples
+  seems to be the most convenient.
+- Q: Do we need types?
+
+Problems
+- x = y op z [fixed using freeze]
+  if x is spilled, y can be moved to a spill location
+  to free one register for x, this is kind of stupid.
+  We can probably fix this by having a better heuristic
+  for spilling decisions.
+- [tentative fix: 4fc98da]
+  Phi defined variables with spill location do not work.
+- At the end of a block we call loc on all live variables,
+  if there are not enough registers, some variables get
+  assigned spill locations.  We need to be able to spill
+  variables that are already in register.
+  NOTE: Following Braun & Hack we could do one pass
+  first that determines what variables are in register
+  at the end of loops.  This sounds good because
+  back-edges are actually easier to detect than loop
+  headers!
diff --git a/proto/bak.ml b/proto/bak.ml
new file mode 100644
index 0000000..cd1aff2
--- /dev/null
+++ b/proto/bak.ml
@@ -0,0 +1,132 @@
+type id = int
+type ty =
+  | TInt of bool * int
+  | TArr of int * ty
+  | TPtr of ty
+  | TVoid
+type con = CInt of int
+type cnd = Ge | Le | Gt | Lt | Ne | Eq
+type ins =
+  | IAlloc of ty
+  | IMem of id
+  | ISto of id * id
+  | IAdd of id * id
+  | ISub of id * id
+  | ICon of ty * con
+  | IBr  of id * cnd * id * id
+  | IJmp of id
+  | IPhi of ty * id * id
+
+let isint = function TInt _ -> true | _ -> false
+let isbranch = function IBr _ | IJmp _ -> true | _ -> false
+
+exception Type of string
+let tychk blk =
+  let typs = Array.make (Array.length blk) TVoid in
+  let blks = ref [] in
+  let jmp src dst =
+    let rec f = function
+      | (blk, srcs) :: tl when blk = dst ->
+        (blk, src :: srcs) :: tl
+      | b :: tl when fst b < dst -> b :: f tl
+      | l ->
+        let srcs =
+          if dst = 0 then [src] else
+          if isbranch blk.(dst-1)
+          then [src] else [dst-1; src] in
+        (dst, srcs) :: l in
+    blks := f !blks in
+  let f i =                                           (* do easy type checks *)
+    let chn n =
+      if n >= i || n < 0 then
+        raise (Type "broken data dependency") in
+    function
+    | IPhi (ty, _, _) ->
+      if ty = TVoid then
+        raise (Type "invalid void phi");
+      typs.(i) <- ty
+    | ICon (ty, _) -> typs.(i) <- ty
+    | IAlloc ty ->
+      if ty = TVoid then
+        raise (Type "invalid void alloc");
+      typs.(i) <- TPtr ty
+    | IMem n ->
+      chn n;
+      (match typs.(n) with
+      | TPtr ty -> typs.(i) <- ty
+      | _ -> raise (Type "invalid dereference")
+      )
+    | ISto (a, b) ->
+      chn a; chn b;
+      if typs.(a) <> TPtr typs.(b) then
+        raise (Type "invalid store")
+    | IAdd (a, b) ->
+      chn a; chn b;
+      if not (isint typs.(b)) then
+        raise (Type "second add operand not integral");
+      (match typs.(a) with
+      | (TPtr _) as t -> typs.(i) <- t
+      | (TInt _) as t ->
+        if t <> typs.(b) then
+          raise (Type "invalid heterogeneous addition");
+        typs.(i) <- t
+      | _ -> raise (Type "invalid type for addition")
+      )
+    | ISub (a, b) ->
+      chn a; chn b;
+      (match typs.(a), typs.(b) with
+      | (TPtr _ as ta), (TPtr _ as tb) ->
+        if ta <> tb then
+          raise (Type "substracted pointers have different types");
+        typs.(i) <- TInt (true, 64)
+      | (TInt _ as ta), (TInt _ as tb) ->
+        if ta <> tb then
+          raise (Type "invalid heterogeneous substraction");
+        typs.(i) <- ta
+      | _ -> raise (Type "invalid type for substraction")
+      )
+    | IBr (_, _, _, dst) -> jmp i dst; jmp i (i+1)
+    | IJmp dst -> jmp i dst in
+  Array.iteri f blk;
+  let f = function                                (* check types at phi nodes *)
+    | IPhi (_, a, b) ->
+      if typs.(a) <> typs.(b) then
+        raise (Type "ill-typed phi node")
+    | _ -> () in
+  Array.iter f blk;
+  let bbase i =
+    let rec f base = function
+      | [] -> base
+      | (b, _) :: _ when b > i -> base
+      | (b, _) :: tl -> f b tl in
+    f 0 !blks in
+  let f i = function                               (* check validity of ssa *)
+    | IPhi (_, a, b) ->
+      let callers =
+        List.map bbase (List.assoc (bbase i) !blks) in
+      let ba = bbase a and bb = bbase b in
+      if ba = bb
+      || not (List.mem ba callers)
+      || not (List.mem bb callers)
+      then
+        raise (Type "invalid phi node");
+    | IAdd (a, b) | ISub (a, b) | ISto (a, b) | IBr (a, _, b, _) ->
+      let bi = bbase i in
+      if bbase a <> bi || bbase b <> bi then
+        raise (Type "operands of non-phy not in current block")
+    | IMem a ->
+      if bbase a <> bbase i then
+        raise (Type "operands of non-phy not in current block")
+    | IJmp _ | ICon _ | IAlloc _ -> () in
+  Array.iteri f blk
+
+                                                          (* tests *)
+let _ =
+  let int = TInt (true, 32) in
+  let p0 = [|
+    (* 0 *) ICon (int, CInt 1);
+    (* 1 *) ICon (int, CInt 42);
+    (* 2 *) IPhi (int, 0, 2);
+    (* 3 *) IAdd (1, 2);
+    (* 4 *) IJmp 1
+  |] in tychk p0
diff --git a/proto/elf.ml b/proto/elf.ml
new file mode 100644
index 0000000..d83f4fd
--- /dev/null
+++ b/proto/elf.ml
@@ -0,0 +1,200 @@
+(* This is a module to spit simple ELF
+   object files that can afterwards be
+   linked to build an application.
+*)
+
+let hash s =
+  (* The ELF hash function. *)
+  let open Int64 in (* I doubt this is necessary... *)
+  let rec f p h =
+    if p = String.length s then to_int h else
+    let h = shift_left h 4 in
+    let h = add h (of_int (int_of_char s.[p])) in
+    let g = logand h (of_int 0xf0000000) in
+    let h = logxor h (shift_right g 24) in
+    f (p+1) (logand h (of_int 0x0fffffff)) in
+  f 0 (of_int 0)
+
+let le n x =
+  (* Make a string of bytes in little endian convention. *)
+  let b = Bytes.create n in
+  let rec f i x =
+    if i = n then () else
+    let d = char_of_int (x land 0xff) in
+    Bytes.set b i d;
+    f (i+1) (x lsr 8) in
+  f 0 x; Bytes.to_string b
+
+let stt_NOTYPE = 0
+let stt_OBJECT = 1
+let stt_FUNC   = 2
+
+let stb_LOCAL  = 0
+let stb_GLOBAL = 16
+let stb_WEAK   = 32
+
+let sht_NULL     = le 4 0
+let sht_PROGBITS = le 4 1
+let sht_SYMTAB   = le 4 2
+let sht_STRTAB   = le 4 3
+let sht_RELA     = le 4 4
+let sht_NOTE     = le 4 7
+let sht_NOBITS   = le 4 8
+
+let shf_WRITE     = 1
+let shf_ALLOC     = 2
+let shf_EXECINSTR = 4
+
+let barebones_elf oc fn text =
+  let header = String.concat ""
+    [ "\x7fELF"                          (* e_ident, magic *)
+    ; "\x02"                             (* e_ident, ELFCLASS64 *)
+    ; "\x01"                             (* e_ident, ELFDATA2LSB *)
+    ; "\x01"                             (* e_indent, EV_CURRENT *)
+    ; "\x00"                             (* e_ident, ELFOSABI_SYSV *)
+    ; "\x00"                             (* e_ident, ABI version *)
+    ; "\x00\x00\x00\x00\x00\x00\x00"     (* e_ident, padding *)
+    ; "\x01\x00"                         (* e_type, ET_REL *)
+    ; "\x3e\x00"                         (* e_machine, EM_X86_64 *)
+    ; "\x01\x00\x00\x00"                 (* e_version, EV_CURRENT *)
+    ; "\x00\x00\x00\x00\x00\x00\x00\x00" (* e_entry, unused *)
+    ; "\x00\x00\x00\x00\x00\x00\x00\x00" (* e_phoff, unused *)
+    ; "\x40\x00\x00\x00\x00\x00\x00\x00" (* e_shoff, 64 *)
+    ; "\x00\x00\x00\x00"                 (* e_flags, 0 *)
+    ; "\x40\x00"                         (* e_hsize, 64 *)
+    ; "\x00\x00"                         (* e_phentsize, 0 *)
+    ; "\x00\x00"                         (* e_phnum, 0 *)
+    ; "\x40\x00"                         (* e_shentsize, 64 *)
+    ; "\x07\x00"                         (* e_shnum, 7 *)
+    ; "\x06\x00"                         (* e_shstrndx, 6 *)
+    ] in
+
+  (* We will use the following section organization.
+     1- .text   PROGBITS
+     2- .data   PROGBITS
+     3- .bss    NOBITS
+     4- .rela   RELA
+     5- .symtab SYMTAB
+     6- .strtab STRTAB
+  *)
+
+  let adds s x = (String.length s, s ^ x ^ "\x00") in
+  (* section names *)
+  let textstr, strtab = adds "\x00" ".text" in
+  let datastr, strtab = adds strtab ".data" in
+  let bssstr , strtab = adds strtab ".bss"  in
+  let relastr, strtab = adds strtab ".rela" in
+  let symtstr, strtab = adds strtab ".symt" in
+  let strtstr, strtab = adds strtab ".strt" in
+  (* function names *)
+  let fnstr, strtab = adds strtab fn in
+
+  let symtab = String.concat ""
+    [ le 0x18 0                      (* first symbol is reserved *)
+    ; le 4 fnstr                     (* st_name *)
+    ; le 1 (stt_FUNC lor stb_GLOBAL) (* st_info *)
+    ; "\x00"                         (* st_other *)
+    ; le 2 1                         (* st_shndx, .text *)
+    ; le 8 0                         (* st_value, offset in .text section *)
+    ; le 8 (String.length text)      (* st_size *)
+    ] in
+
+  let textoff = 64 + 7 * 64 in
+  let txtlen, txtpad =
+    let l = String.length text in
+    let p = (l + 7) land 7 in
+    (l, p) in
+  let dataoff = textoff + txtlen + txtpad in
+  let bssoff  = dataoff + 0 in
+  let relaoff = bssoff + 0 in
+  let symtoff = relaoff + 0 in
+  let strtoff = symtoff + String.length symtab in
+
+  let sh = String.concat ""
+    [ le 64 0 (* first section header is reserved *)
+    (* .text *)
+    ; le 4 textstr                   (* sh_name *)
+    ; sht_PROGBITS                   (* sh_type *)
+    ; le 8 (shf_ALLOC lor shf_EXECINSTR) (* sh_flags *)
+    ; le 8 0                         (* sh_addr *)
+    ; le 8 textoff                   (* sh_offset *)
+    ; le 8 txtlen                    (* sh_size *)
+    ; le 4 0                         (* sh_link *)
+    ; le 4 0                         (* sh_info *)
+    ; le 8 1                         (* sh_addralign *)
+    ; le 8 0                         (* sh_entsize *)
+    (* .data *)
+    ; le 4 datastr                   (* sh_name *)
+    ; sht_PROGBITS                   (* sh_type *)
+    ; le 8 (shf_ALLOC lor shf_WRITE) (* sh_flags *)
+    ; le 8 0                         (* sh_addr *)
+    ; le 8 dataoff                   (* sh_offset *)
+    ; le 8 0                         (* sh_size *)
+    ; le 4 0                         (* sh_link *)
+    ; le 4 0                         (* sh_info *)
+    ; le 8 1                         (* sh_addralign *)
+    ; le 8 0                         (* sh_entsize *)
+    (* .bss *)
+    ; le 4 bssstr                    (* sh_name *)
+    ; sht_NOBITS                     (* sh_type *)
+    ; le 8 (shf_ALLOC lor shf_WRITE) (* sh_flags *)
+    ; le 8 0                         (* sh_addr *)
+    ; le 8 bssoff                    (* sh_offset *)
+    ; le 8 0                         (* sh_size *)
+    ; le 4 0                         (* sh_link *)
+    ; le 4 0                         (* sh_info *)
+    ; le 8 1                         (* sh_addralign *)
+    ; le 8 0                         (* sh_entsize *)
+    (* .rela *)
+    ; le 4 relastr                   (* sh_name *)
+    ; sht_RELA                       (* sh_type *)
+    ; le 8 0                         (* sh_flags *)
+    ; le 8 0                         (* sh_addr *)
+    ; le 8 relaoff                   (* sh_offset *)
+    ; le 8 0                         (* sh_size *)
+    ; le 4 5                         (* sh_link, symtab index *)
+    ; le 4 1                         (* sh_info, text section *)
+    ; le 8 1                         (* sh_addralign *)
+    ; le 8 0x18                      (* sh_entsize *)
+    (* .symtab *)
+    ; le 4 symtstr                   (* sh_name *)
+    ; sht_SYMTAB                     (* sh_type *)
+    ; le 8 0                         (* sh_flags *)
+    ; le 8 0                         (* sh_addr *)
+    ; le 8 symtoff                   (* sh_offset *)
+    ; le 8 (String.length symtab)    (* sh_size *)
+    ; le 4 6                         (* sh_link, strtab index *)
+    ; le 4 1                         (* sh_info, first non-local symbol *)
+    ; le 8 1                         (* sh_addralign *)
+    ; le 8 0x18                      (* sh_entsize *)
+    (* .strtab *)
+    ; le 4 strtstr                   (* sh_name *)
+    ; sht_STRTAB                     (* sh_type *)
+    ; le 8 0                         (* sh_flags *)
+    ; le 8 0                         (* sh_addr *)
+    ; le 8 strtoff                   (* sh_offset *)
+    ; le 8 (String.length strtab)    (* sh_size *)
+    ; le 4 0                         (* sh_link *)
+    ; le 4 0                         (* sh_info *)
+    ; le 8 1                         (* sh_addralign *)
+    ; le 8 0x18                      (* sh_entsize *)
+    ] in
+
+  List.iter (output_string oc)
+    [ header
+    ; sh
+    ; text; String.make txtpad '\x90'
+    ; symtab
+    ; strtab
+    ]
+
+
+(*
+let _ =
+  let oc = open_out "test.o" in
+  let text = String.concat ""
+    [ "\xb8\x2a\x00\x00\x00" (* mov 42, %eax *)
+    ; "\xc3"                 (* retq *)
+    ] in
+  barebones_elf oc "main" text
+*)
diff --git a/proto/heap.ml b/proto/heap.ml
new file mode 100644
index 0000000..79081b9
--- /dev/null
+++ b/proto/heap.ml
@@ -0,0 +1,60 @@
+(* Generic binary heaps. *)
+module Heap: sig
+  type 'a t
+  val create: ('a -> 'a -> int) -> 'a t
+  val add: 'a t -> 'a -> unit
+  val popd: 'a t -> unit
+  val pop: 'a t -> 'a option
+  val top: 'a t -> 'a option
+end = struct
+  type 'a t =
+    { mutable arr: 'a array
+    ; mutable len: int
+    ; cmp: 'a -> 'a -> int
+    }
+
+  let mkarray n = Array.make n (Obj.magic 0)
+  let create cmp = {arr = mkarray 2; len = 0; cmp }
+  let top {arr; len; _} =
+    if len = 0 then None else Some arr.(1)
+  let swap arr i j =
+    let tmp = arr.(i) in
+    arr.(i) <- arr.(j);
+    arr.(j) <- tmp
+
+  let rec bblup cmp arr i =
+    let prnt = i/2 in
+    if prnt = 0 then () else
+    if cmp arr.(prnt) arr.(i) < 0 then () else
+    (swap arr prnt i; bblup cmp arr prnt)
+  let add ({arr; len; cmp} as hp) x =
+    let arr =
+      let alen = Array.length arr in
+      if alen > len+1 then arr else
+      let arr' = mkarray (alen * 2) in
+      Array.blit arr 0 arr' 0 alen;
+      hp.arr <- arr';
+      arr' in
+    hp.len <- len + 1;
+    arr.(hp.len) <- x;
+    bblup cmp arr hp.len
+
+  let rec bbldn cmp arr i len =
+    let ch0 = 2*i in
+    let ch1 = ch0 + 1 in
+    if ch0 > len then () else
+    let mn =
+      if ch1 > len then ch0 else
+      if cmp arr.(ch0) arr.(ch1) < 0
+      then ch0 else ch1 in
+    if cmp arr.(i) arr.(mn) <= 0 then () else
+    (swap arr i mn; bbldn cmp arr mn len)
+  let popd ({arr; len; cmp} as hp) =
+    if len = 0 then () else
+    arr.(1) <- arr.(len);
+    hp.len <- len - 1;
+    bbldn cmp arr 1 len
+  let pop hp =
+    let r = top hp in
+    popd hp; r
+end
diff --git a/proto/lo.ml b/proto/lo.ml
new file mode 100644
index 0000000..be2323d
--- /dev/null
+++ b/proto/lo.ml
@@ -0,0 +1,478 @@
+module ISet = Set.Make
+  (struct
+    type t = int
+    let compare = compare
+  end)
+
+type unop = Not
+type binop =
+  | Add | Sub
+  | Le | Ge | Lt | Gt | Eq | Ne
+
+type ('ref, 'loc) phi = { pjmp: 'loc; pvar: 'ref }
+
+type ('ref, 'loc) ir =
+  | INop
+  | ICon of int
+  | IUop of unop * 'ref
+  | IBop of 'ref * binop * 'ref
+  | IBrz of 'ref * 'loc * 'loc
+  | IJmp of 'loc
+  | IPhi of ('ref, 'loc) phi list
+
+(* Phi nodes must be at the join of branches
+   in the control flow graph, if n branches
+   join, the phi node must have n elements in
+   its list that indicate the value to merge
+   from each of the branches.
+   The id given in each of
+*)
+
+
+(* Here, we analyze a program backwards to
+   compute the liveness of all variables.
+   We assume that all phi nodes are placed
+   correctly.
+*)
+let liveness p =
+  (* The idea is now to reach a fixpoint
+     by applying the same backward liveness
+     propagation a sufficient number of
+     times.
+     The [changed] variable will tell us
+     when we reached the fixpoint, it is
+     reset to false at each iteration.
+  *)
+  let changed = ref true in
+  let liveout = Array.make (Array.length p) ISet.empty in
+
+  let setlive v l =
+    (* Extend the liveness of v to l. *)
+    if not (ISet.mem v liveout.(l)) then begin
+      changed := true;
+      liveout.(l) <- ISet.add v liveout.(l);
+    end in
+
+  let succs i =
+    (* Retreive the successor nodes of i. *)
+    if i = Array.length p -1 then [] else
+    match p.(i) with
+    | IBrz (_, i1, i2) -> [i1; i2]
+    | IJmp i1 -> [i1]
+    | _ -> [i+1] in
+
+  let gen i = ISet.of_list
+    (* Get the Gen set of i. *)
+    begin match p.(i) with
+    | IUop (_, i1) -> [i1]
+    | IBop (i1, _, i2) -> [i1; i2]
+    | IPhi l ->
+      List.iter (fun {pjmp; pvar} ->
+        setlive pvar pjmp
+      ) l; []
+    | _ -> []
+    end in
+
+  let livein i =
+    (* Get the live In set of i. *)
+    let s = liveout.(i) in
+    let s = ISet.union s (gen i) in
+    ISet.remove i s in
+
+  (* The fixpoint computation. *)
+  while !changed do
+    changed := false;
+    for i = Array.length p -1 downto 0 do
+      (* Collect live Ins of all successor blocks. *)
+      let live = List.fold_left (fun live i' ->
+          ISet.union live (livein i')
+        ) ISet.empty (succs i) in
+      ISet.iter (fun i' ->
+        setlive i' i
+      ) live
+    done
+  done;
+  liveout
+
+
+type loc =
+  | L0          (* No location. *)
+  | LCon of int (* Constant. *)
+  | LReg of int (* Machine register. *)
+  | LSpl of int (* Spill location. *)
+
+type spill = { sreg: int; soff: int }
+
+type regir =
+  | RIR of int * (loc, int ref) ir
+  | RMove of loc * loc
+
+(* The reg IR adds spill saves and restores to standard
+   IR instructions.  The register allocator below uses
+   these new instructions when the physical machine lacks
+   registers.
+*)
+
+let regalloc nr p l =
+  (* The final reg IR is built here. *)
+  let rir = ref [] in
+  let emit r = rir := r :: !rir in
+  let ipos = Array.init (Array.length p) ref in
+  emit (RIR (-1, INop));
+
+  (* Hints help the allocator to know what register
+     to use.  They can be combined using the |>
+     operator below. *)
+  let hints = Array.make (Array.length p) (-1) in
+  (* let ( |> ) a b = if a < 0 then b else a in *)
+
+  (* Number of spill slots. *)
+  let spill = ref 0 in
+
+  (* Associative list binding live ir to locations,
+     ordered by freshness. *)
+  let locs = ref [] in
+  let setloc i l = locs := (i, l) :: !locs in
+  let setspill i =
+    setloc i (LSpl !spill);
+    incr spill; !spill - 1 in
+
+  (* Get free registers. *)
+  let free () =
+    let rl = Array.to_list (Array.init nr (fun i -> i)) in
+    List.filter (fun r ->
+      not (List.mem (LReg r) (List.map snd !locs))
+    ) rl in
+
+  (* Allocate a register for an ir. *)
+  let alloc hint i =
+    let ret r = setloc i (LReg r); r in
+    let free = free () in
+    if List.mem hint free then ret hint
+    else match free with  r::_ -> ret r
+    | [] -> (* No more free registers, force spill. *)
+      let regof = function LReg r -> r | _ -> -1 in
+      let cmpf (a,_) (b,_) = compare a b in
+      let l = List.map (fun (i,l) -> (i,regof l)) !locs in
+      let l = List.filter (fun (_,r) -> r >= 0) l in
+      let sir, sreg = List.hd (List.sort cmpf l) in (* Take the oldest. *)
+      locs := snd (List.partition ((=) (sir, LReg sreg)) !locs);
+      let soff =
+        match try List.assoc sir !locs with _ -> L0 with
+        | LSpl n -> n
+        | _ -> setspill sir in
+      emit (RMove (LReg sreg, LSpl soff));
+      ret sreg in
+
+  (* Find a register for a destination. *)
+  let dst i =
+    let li =
+      try List.assoc i !locs with Not_found -> L0 in
+    let r = match li with
+      | LReg r -> r
+      | _ -> alloc hints.(i) i in
+    begin match li with
+    | LSpl l -> emit (RMove (LSpl l, LReg r))
+    | _ -> ()
+    end;
+    locs := snd (List.partition (fun (j,_) -> j=i) !locs);
+    r in
+
+  let phis = ref [] in
+
+  (* Find a location for an operand. *)
+  let loc i =
+    try List.assoc i !locs with Not_found ->
+    try List.assoc i !phis with Not_found ->
+    match p.(i) with
+    | ICon k -> setloc i (LCon k); LCon k
+    | _ -> LReg (alloc hints.(i) i) in
+
+  let loc2 i =
+    try List.assoc i !locs with Not_found ->
+    try List.assoc i !phis with Not_found ->
+    match p.(i) with
+    | ICon k -> setloc i (LCon k); LCon k
+    | _ ->
+      (* Here, we just want to avoid using the
+         same register we used for the first
+         operand. *)
+      if free () = [] then LSpl (setspill i)
+      else LReg (alloc hints.(i) i) in
+
+  let philoc i =
+    match p.(i) with
+    | IPhi pl ->
+      (try List.assoc i !phis with Not_found ->
+      let l = loc2 i in
+      phis := (i, l) :: !phis;
+      begin match l with
+      | LReg h -> List.iter (fun x -> hints.(x.pvar) <- h) pl;
+      | _ -> ()
+      end;
+      l)
+    | _ -> failwith "regalloc: invalid call to philoc" in
+  let rec movs jmp i =
+    if i >= Array.length p then () else
+    match p.(i) with
+    | IPhi l ->
+      let l = List.filter (fun x -> x.pjmp = jmp) l in
+      assert (List.length l = 1);
+      let pl = philoc i in
+      let v = (List.hd l).pvar in
+      let vl = loc2 v in
+      emit (RMove (pl, vl));
+      movs jmp (i+1)
+    | _ -> () in
+
+
+  (* Going backwards. *)
+  for i = Array.length p -1 downto 0 do
+
+    (* Forget about all bindings not live
+       at the end of the instruction. *)
+    locs := List.filter
+      (fun (i',_) -> ISet.mem i' l.(i)) !locs;
+
+    begin match p.(i) with
+    | IPhi _ -> ()
+    | ICon _ | INop ->
+      movs i (i+1)
+    | IBrz (i', l1, l2) ->
+      emit (RIR (-1, IJmp ipos.(l2)));
+      movs i l2;
+      let li' = loc i' in
+      let p = List.length !rir in
+      emit (RIR (-1, IBrz (li', ipos.(l1), ref p)));
+      movs i l1
+    | IJmp l ->
+      emit (RIR (-1, IJmp ipos.(l)));
+      movs i l;
+    | IUop (op, i') ->
+      let r = dst i in
+      let li' = hints.(i') <- r; loc i' in
+      emit (RIR (r, IUop (op, li')));
+      movs i (i+1)
+    | IBop (il, op, ir) ->
+      let r = dst i in
+      let lil = hints.(il) <- r; loc il in
+      let lir = loc2 ir in
+      emit (RIR (r, IBop (lil, op, lir)));
+      movs i (i+1)
+    end;
+
+    (* Update position of the current instruction. *)
+    ipos.(i) := List.length !rir;
+  done;
+
+  (Array.of_list !rir, !spill)
+
+
+module type ARCH = sig
+  type label type reg
+  type brtype = Jump | NonZ of reg
+
+  (* Labels for branching. *)
+  val newlbl: unit -> label
+  val setlbl: label -> unit
+
+  (* Register creation. *)
+  val regk: int -> reg
+  val regn: int -> reg
+
+  (* Register spilling and restoration. *)
+  val spill: reg -> int -> unit
+  val resto: int -> reg -> unit
+  (* Boring instructions. *)
+  val mov: reg -> reg -> unit
+  val bop: binop -> reg -> reg -> reg -> unit
+  val uop: unop -> reg -> reg -> unit
+  val br: brtype -> label -> unit
+
+  (* Initialization finalization. *)
+  val reset: int -> unit
+  val code: unit -> string
+end
+
+
+
+(* Testing. *)
+
+let parse src =
+  let blocks = Hashtbl.create 31 in
+  let rec addlbl idx l =
+    let l = String.trim l in
+    try
+      let il = String.index l ':' in
+      let lbl = String.sub l 0 il in
+      Hashtbl.add blocks lbl idx;
+      let l =
+        String.sub l (il+1)
+          (String.length l -(il+1)) in
+      addlbl idx l
+    with Not_found -> l ^ " " in
+  let src = List.mapi addlbl src in
+  let p = Array.make (List.length src) INop in
+  List.iteri (fun idx l ->
+    let fail s =
+      failwith
+        (Printf.sprintf "line %d: %s" (idx+1) s) in
+    let tok =
+      let p = ref 0 in fun () ->
+      try
+        while l.[!p] = ' ' do incr p done;
+        let p0 = !p in
+        while l.[!p] <> ' ' do incr p done;
+        String.sub l p0 (!p - p0)
+      with _ -> fail "token expected" in
+    let id () =
+      let v = tok () in
+      try Hashtbl.find blocks v
+      with _ -> fail ("unknown variable " ^ v) in
+    let instr =
+      if l = " " then INop else
+      let bop o =
+        let i1 = id () in
+        let i2 = id () in
+        IBop (i1, o, i2) in
+      match tok () with
+      | "con" -> ICon (int_of_string (tok ()))
+      | "not" -> IUop (Not, id ())
+      | "add" -> bop Add
+      | "sub" -> bop Sub
+      | "cle" -> bop Le
+      | "cge" -> bop Ge
+      | "clt" -> bop Lt
+      | "cgt" -> bop Gt
+      | "ceq" -> bop Eq
+      | "cne" -> bop Ne
+      | "phi" ->
+        let exp t =
+          let t' = tok () in
+          if t' <> t then
+            fail ("unexpected " ^ t') in
+        let rec f () =
+          match tok () with
+          | "[" ->
+            let pjmp = id () in
+            let pvar = id () in
+            exp "]";
+            {pjmp; pvar} :: f ()
+          | "." -> []
+          | t -> fail ("unexpected " ^ t) in
+        IPhi (f ())
+      | "brz" ->
+        let v = id () in
+        let bz = id () in
+        let bn = id () in
+        IBrz (v, bz, bn)
+      | "jmp" -> IJmp (id ())
+      | i -> fail ("invalid " ^ i) in
+    p.(idx) <- instr
+  ) src;
+  p
+
+let t_sum =
+  [ "k0:  con 0"
+  ; "ni:  con 1234"
+  ; "k1:  con 1"
+  ; "n0:  phi [ jmp n1 ] [ k1 ni ] ."
+  ; "f1:  phi [ jmp f2 ] [ k1 k1 ] ."
+  ; "n1:  sub n0 k1"
+  ; "f2:  add f1 n0"
+  ; "jmp: brz n1 end n0"
+  (* ; "jmp: jmp n0" *)
+  ; "end:"
+  ]
+
+(*
+  The following program has irreducible
+  control-flow.  The control flow is
+  pictured below.
+
+  +--b1      <- defs r0, r1
+  |  |
+  b2 b3
+  |  |
+  \  b4<-+   <- uses r0
+   \ |   |
+  +--b5  |   <- uses r1
+  |  |   |
+  b7 b6--+
+
+  A simple implementation (that works for
+  non-irreducible control flows) proceeds
+  backwards, it would successfully make r1
+  live in b2 and b3 but r0 would fail to be
+  live in b2. It would become live for the
+  loop b4-b5-b6 when reaching the loop header
+  b4, but the simple algorithm would not
+  propagate back to b2.
+*)
+
+let t_irred =
+  [ "k0:  con 0"
+  ; "r0:  con 1"
+  ; "r1:  con 2"
+  ; "b1:  brz k0 b2 b3"
+  ; "b2:  jmp b5"
+  ; "b3:"
+  ; "b4:  add r0 k0"
+  ; "b50: add r1 k0"
+  ; "b5:  brz k0 b6 b7"
+  ; "b6:  jmp b4"
+  ; "b7:"
+  ]
+
+let _ =
+  let src = t_sum in
+  let p = parse src in
+  let open Printf in
+
+  printf "** Program:\n";
+  List.iter (printf "%s\n") src;
+
+  printf "\n** Liveness analysis:\n";
+  let l = liveness p in
+  for i = 0 to Array.length p -1 do
+    printf "%04d:" i;
+    ISet.iter (printf " %04d") l.(i);
+    printf "\n";
+  done;
+
+  printf "\n** Register allocation:\n";
+  let regs = [| "rax"; "rbx" |] in (* ; "rbx"; "rcx" |] in *)
+  let loc = function
+    | L0 -> assert false
+    | LReg r -> regs.(r)
+    | LCon k -> sprintf "$%d" k
+    | LSpl n -> sprintf "%d(sp)" n in
+  let r, _ = regalloc (Array.length regs) p l in
+  let bop_str = function
+    | Add -> "add" | Sub -> "sub"
+    | Le -> "cle" | Ge -> "cge"
+    | Lt -> "clt" | Gt -> "cgt"
+    | Eq -> "ceq" | Ne -> "cne" in
+  let lr = Array.length r in
+  let inum l = lr - !l in
+  for i = 0 to lr -1 do
+    printf "%03d " i;
+    begin match r.(i) with
+    | RIR (r, IUop (Not, i')) ->
+      printf "%s = not %s" regs.(r) (loc i')
+    | RIR (r, IBop (i1, o, i2)) ->
+      printf "%s = %s %s %s"
+        regs.(r) (bop_str o) (loc i1) (loc i2)
+    | RIR (_, IBrz (i', l1, l2)) ->
+      printf "brz %s %03d %03d" (loc i')
+        (inum l1) (inum l2)
+    | RIR (_, IJmp l) ->
+      printf "jmp %03d" (inum l)
+    | RIR (_, IPhi l) ->
+      printf "phi"
+    | RMove (t, f) ->
+      printf "%s = %s" (loc t) (loc f)
+    | _ -> ()
+    end;
+    printf "\n"
+  done
diff --git a/proto/lo2.ml b/proto/lo2.ml
new file mode 100644
index 0000000..64bf3ae
--- /dev/null
+++ b/proto/lo2.ml
@@ -0,0 +1,713 @@
+type uop = Neg
+type bop = Add | Sub | Mul | Div | Rem | CLe | CEq
+
+type bref = int (* Block references. *)
+type 'op seqi = [ `Con of int | `Uop of uop * 'op | `Bop of 'op * bop * 'op ]
+type 'op jmpi = [ `Brz of 'op * bref * bref | `Jmp of bref | `Ret of 'op ]
+
+type ('ins, 'phi, 'jmp) bb =
+  { mutable bb_name: string
+  ; mutable bb_phis: 'phi array
+  ; mutable bb_inss: 'ins array
+  ; mutable bb_jmp: 'jmp
+  }
+
+
+(* ** Liveness analysis. ** *)
+type iref = IRPhi of (bref * int) | IRIns of (bref * int)
+let blk = function IRPhi (b, _) | IRIns (b, _) -> b
+type iprog = (iref seqi, [`Phi of iref list], iref jmpi) bb array
+
+module IRSet = Set.Make(
+  struct type t = iref let compare = compare end
+)
+
+let liveout lh ir =
+  try Hashtbl.find lh ir with Not_found ->
+  let e = IRSet.empty in Hashtbl.add lh ir e; e
+let livein lh p ir =
+  let gen (b, i) = IRSet.of_list begin
+    let {bb_inss; bb_jmp; _} = p.(b) in
+      if i = -1 then [] else
+      if i = Array.length bb_inss
+      then match bb_jmp with
+      | `Brz (i1, _, _) | `Ret i1 -> [i1]
+      | `Jmp _ -> []
+      else match bb_inss.(i) with
+      | `Uop (_, i1) -> [i1]
+      | `Bop (i1, _, i2) -> [i1; i2]
+      | `Con _ -> []
+    end in
+  let kill ((b, i) as ir) =
+    if i >= 0 then IRSet.singleton (IRIns ir) else
+    fst (Array.fold_left
+      (fun (k, i) _ -> (IRSet.add (IRPhi (b, i)) k, i+1))
+      (IRSet.empty, 0) p.(b).bb_phis
+    ) in
+  let s = liveout lh ir in
+  let s = IRSet.union s (gen ir) in
+  IRSet.diff s (kill ir)
+
+let liveness (p: iprog) =
+  let module H = Hashtbl in
+  let changed = ref true in (* Witness for fixpoint. *)
+  let nbb = Array.length p in
+  let lh = H.create 1001 in
+  let setlive ir ir' = (* Mark ir live at ir'. *)
+    let lir' = liveout lh ir' in
+    if not (IRSet.mem ir lir') then begin
+      changed := true;
+      H.replace lh ir' (IRSet.add ir lir');
+    end in
+  let succs (b, i) = (* Successor nodes of an instruction. *)
+    let {bb_inss; bb_jmp; _} = p.(b) in
+    if i = Array.length bb_inss then
+      if b+1 = nbb then [] else
+      match bb_jmp with
+      | `Brz (_, b1, b2) -> [(b1, -1); (b2, -1)]
+      | `Jmp b1 -> [(b1, -1)]
+      | `Ret _ -> []
+    else [(b, i+1)] in
+  while !changed do
+    changed := false;
+    for b = nbb - 1 downto 0 do
+      let bb = p.(b) in
+      for i = Array.length bb.bb_inss downto -1 do
+        let ir = (b, i) in
+        let live = List.fold_left (fun live ir' ->
+            IRSet.union live (livein lh p ir')
+          ) IRSet.empty (succs ir) in
+        IRSet.iter (fun ir' -> setlive ir' ir) live
+      done;
+      Array.iter (fun (`Phi il) ->
+        List.iter (fun ir ->
+          let br = blk ir in
+          setlive ir (br, Array.length p.(br).bb_inss)
+        ) il
+      ) bb.bb_phis;
+    done
+  done;
+  lh (* Return the final hash table. *)
+
+
+(* ** Register allocation. ** *)
+type loc = LVoid | LReg of int | LSpill of int | LCon of int
+type 'op rins = { ri_res: 'op; ri_ins: [ 'op seqi | `Mov of 'op ] }
+type 'op rphi = { rp_res: 'op; rp_spill: int option; rp_list: (bref * loc) list }
+type rprog = (loc rins, loc rphi, loc jmpi) bb array
+
+let nregs = ref 3
+let regalloc (p: iprog): rprog =
+  let module H = struct
+    include Hashtbl
+    let find h ir =
+      try find h ir with Not_found ->
+      let k = ref 0 in
+      let isconst = function
+        `Con c -> k := c; true | _ -> false in
+      match ir with
+      | IRIns (b, i) when isconst p.(b).bb_inss.(i) -> LCon !k
+      | _ -> LVoid
+  end in
+
+  let lh = liveness p in
+  let nbb = Array.length p in
+  let rp = Array.init nbb (fun i ->
+      { bb_name = p.(i).bb_name
+      ; bb_phis = [| |]
+      ; bb_inss = [| |]
+      ; bb_jmp = `Jmp (-1)
+      }
+    ) in
+  let outmaps = Array.make nbb [] in
+  let inmaps  = Array.make nbb [] in
+  let bb = ref [] in (* Basic block in construction. *)
+  let emiti l i = bb := {ri_res=l; ri_ins=i} :: !bb in
+  let act = H.create 101 in (* The active list. *)
+  let regs = Array.init !nregs (fun i -> i) |> Array.to_list in
+  let free = ref regs in (* Free registers. *)
+
+  let nspill = ref 0 in
+  let newspill () = incr nspill; !nspill - 1 in
+  let getspill ir =
+    match H.find act ir with
+    | LSpill s -> s
+    | _ -> -1 in
+
+  let kill ir =
+    match H.find act ir with
+    | LReg r -> H.remove act ir; free := r :: !free
+    | _ -> H.remove act ir in
+
+  let loc ir =
+    match H.find act ir with
+    | LVoid ->
+      let l =
+        match !free with
+        | r :: f -> free := f; LReg r
+        | [] -> LSpill (newspill ())
+      in
+      H.add act ir l; l
+    | l -> l in
+
+  let rec getreg frz = (* Aggressively obtain one register. *)
+    match !free with
+    | r :: f when List.mem r frz -> (* Frozen, can't use it. *)
+      free := f;
+      let r' = getreg frz in
+      free := r :: !free; r'
+    | r :: f -> free := f; r
+    | [] -> (* Spill needed! *)
+      match
+        H.fold (fun ir loc l -> (* Find candidates. *)
+          match loc with
+          | LReg r when not (List.mem r frz) ->
+            (ir, r) :: l
+          | _ -> l
+        ) act [] (* |> sort by spill cost *)
+      with [] -> failwith "god damn it, not enough registers"
+      | (ir, r) :: _ ->
+        H.remove act ir;
+        let s = getspill ir in
+        let s =
+          if s >= 0 then s else
+          let s' = newspill () in
+          H.add act ir (LSpill s'); s' in
+        emiti (LReg r) (`Mov (LSpill s));
+        r in
+
+  let getreg frz =
+    let r = getreg frz in
+    assert (not (List.mem r !free));
+    r in
+
+  let regloc frz ir =
+    match H.find act ir with
+    | (LCon _ | LReg _) as loc -> loc
+    | _ ->
+      let r = getreg frz in
+      H.add act ir (LReg r);
+      LReg r in
+
+  for b = nbb - 1 downto 0 do
+    let bi = p.(b).bb_inss in
+    let bl = Array.length bi in
+
+    (* Fill outmaps with the allocation state at
+     * the end of the block (after the final branch).
+     *)
+    let lvout = liveout lh (b, bl) in
+    outmaps.(b) <- begin
+      IRSet.fold (fun ir m -> (ir, loc ir) :: m) lvout []
+    end;
+
+    let jmp =
+      match p.(b).bb_jmp with
+      | `Jmp br -> `Jmp br
+      | `Ret (ir) -> `Ret (loc ir)
+      | `Brz (ir, br1, br2) ->
+        `Brz (loc ir, br1, br2) in
+    rp.(b).bb_jmp <- jmp;
+
+    for i = bl - 1 downto 0 do
+      let ir = IRIns (b, i) in
+      begin match H.find act ir with
+      | LCon _ | LVoid -> () (* Dead code. *)
+      | lir ->
+        let r, frz =
+          match lir with
+          | LSpill s ->
+            let frz =
+              let block ir l =
+                match H.find act ir with
+                | LReg r -> r :: l
+                | _ -> l in
+              match bi.(i) with
+              | `Uop (_, ir) ->
+                [] |> block ir
+              | `Bop (ir1, _, ir2) ->
+                [] |> block ir1 |> block ir2
+              | _ -> [] in
+            let r = getreg frz in
+            free := r :: !free; (* Add it straight back to free, but freeze it. *)
+            (r, [r])
+          | LReg r -> kill ir; (r, [])
+          | _ -> assert false
+          in
+        let s = getspill ir in
+        begin match bi.(i) with
+        | `Con k -> ()
+        | `Uop (op, ir') ->
+          let l' = regloc frz ir' in
+          if s >= 0 then emiti (LSpill s) (`Mov (LReg r));
+          emiti (LReg r) (`Uop (op, l'))
+        | `Bop (ir1, op, ir2) ->
+          (* Special case: Division uses RDX, we
+           * need to make sure it is free for use.
+           *)
+          let rdx = 1 in
+          if (op = Div || op = Rem) && not (List.mem rdx !free) then
+            getreg (List.filter ((<>) rdx) regs) |> ignore
+          else
+            free := (List.filter ((<>) rdx) !free);
+          let l1 = regloc frz ir1 in
+          let frz = match l1 with
+            | LReg r1 -> r1 :: frz
+            | _ -> frz in
+          let l2 = regloc frz ir2 in
+          if s >= 0 then emiti (LSpill s) (`Mov (LReg r));
+          emiti (LReg r) (`Bop (l1, op, l2));
+          if op = Div || op = Rem then
+            free := rdx :: !free;
+        end;
+      end
+    done;
+
+    let lvin = liveout lh (b, -1) in
+    inmaps.(b) <- begin
+      IRSet.fold (fun ir l ->
+        let loc = H.find act ir in
+        if blk ir = b then
+          kill ir; (* Kill current block's phis *)
+        let s = getspill ir in
+        kill ir;
+        if s >= 0 then
+          (ir, (loc, Some s)) :: l
+        else
+          (ir, (loc, None)) :: l
+      ) lvin []
+    end;
+
+    rp.(b).bb_inss <- Array.of_list !bb;
+    bb := [];
+  done;
+
+  (* Compute phis. *)
+  for b = 0 to nbb - 1 do
+    rp.(b).bb_phis <- Array.of_list begin
+      IRSet.fold (fun ir l ->
+        match ir with
+        | IRPhi (b', pr) when b' = b ->
+          let `Phi pl = p.(b).bb_phis.(pr) in
+          let pl =
+            let f ir =
+              let b = blk ir in
+              (b, List.assoc ir outmaps.(b)) in
+            List.map f pl |>
+            List.sort (fun (a,_) (b,_) -> compare a b) in
+          let res, spl = List.assoc ir inmaps.(b) in
+          { rp_res = res
+          ; rp_spill = spl
+          ; rp_list =  pl
+          } :: l
+        | _ -> assert (blk ir <> b);
+          (* Forgive me, I sin!! *)
+          let rl = ref [] in
+          for b = 0 to nbb - 1 do
+            let bl = Array.length p.(b).bb_inss in
+            if IRSet.mem ir (liveout lh (b, bl)) then
+              rl := (b, List.assoc ir outmaps.(b)) :: !rl
+          done;
+          { rp_res = fst (List.assoc ir inmaps.(b))
+          ; rp_spill = None
+          ; rp_list = List.rev !rl
+          } :: l
+      ) (liveout lh (b, -1)) []
+    end
+  done;
+
+  rp
+
+
+(* ** Phi resolution. ** *)
+(* Machine program, ready for code generation. *)
+type mprog = (loc rins, unit, loc jmpi) bb array
+
+let movgen (p: rprog): mprog =
+
+  let parmov b b' =
+    let tmp = LReg (-1) in
+    let src, dst =
+      let phis = p.(b').bb_phis in
+      Array.map (fun x -> List.assoc b x.rp_list) phis,
+      Array.map (fun x -> x.rp_res) phis in
+    let n = Array.length dst in
+    let status = Array.make n `ToMove in
+    let ms = ref [] in
+    let emov dst src =
+      ms := {ri_res = dst; ri_ins = `Mov src} :: !ms in
+    let rec mv i =
+      if src.(i) <> dst.(i) then begin
+        status.(i) <- `Moving;
+        for j = 0 to n - 1 do
+          if src.(j) = dst.(i) then
+            match status.(j) with
+            | `ToMove -> mv j
+            | `Moving -> emov tmp src.(j); src.(j) <- tmp
+            | `Moved -> ()
+        done;
+        emov dst.(i) src.(i);
+        status.(i) <- `Moved;
+      end in
+    for i = 0 to n - 1 do
+      if status.(i) = `ToMove then mv i
+    done;
+    Array.iter (fun {rp_res; rp_spill} ->
+      match rp_spill with
+      | Some spl when LSpill spl <> rp_res ->
+        emov (LSpill spl) rp_res
+      | _ -> ()
+    ) p.(b').bb_phis;
+    List.rev !ms |> Array.of_list in
+
+  let nbb = Array.length p in
+  let bmap = Array.init nbb (fun i -> -i - 1) in
+  let bn = ref 0 in
+  let mp = ref [] in
+  let addb b = mp := b :: !mp; incr bn; !bn - 1 in
+
+  for b = 0 to nbb - 1 do
+    let b' =
+      { bb_name = p.(b).bb_name
+      ; bb_phis = [| |]
+      ; bb_inss = p.(b).bb_inss
+      ; bb_jmp = `Jmp (-1)
+      } in
+    bmap.(b) <- addb b';
+    let movbb suff jb =
+      if jb = -1 then -1 else
+      let c = parmov b jb in
+      if c = [| |] then bmap.(jb) else
+      addb
+        { bb_name = p.(b).bb_name ^ suff
+        ; bb_phis = [| |]
+        ; bb_inss = c
+        ; bb_jmp = `Jmp bmap.(jb)
+        } in
+    b'.bb_jmp <- begin
+      match p.(b).bb_jmp with
+      | `Jmp b1 -> `Jmp (movbb "_mov" b1)
+      | `Ret (l) -> `Ret (l)
+      | `Brz (l, b1, b2) ->
+        let b1', b2' =
+          if b1 = b + 1 then
+            let b2' = movbb "_mov2" b2 in
+            let b1' = movbb "_mov1" b1 in
+            (b1', b2')
+          else
+            let b1' = movbb "_mov1" b1 in
+            let b2' = movbb "_mov2" b2 in
+            (b1', b2') in
+        `Brz (l, b1', b2')
+    end;
+  done;
+  List.rev !mp
+  |> Array.of_list
+  |> Array.map (fun b ->
+    let f n =
+      if n >= -1 then n else bmap.(-n - 1) in
+    { b with bb_jmp =
+      match b.bb_jmp with
+      | `Ret (l) -> `Ret (l)
+      | `Jmp b1 -> `Jmp (f b1)
+      | `Brz (l, b1, b2) -> `Brz (l, f b1, f b2)
+    }
+  )
+
+
+(* ** X86-64 code generation. ** *)
+let codegen (p: mprog): string =
+  let cl = ref [] and off = ref 0 in
+  let outs s = cl := s :: !cl; off := !off + String.length s in
+  let outb b = outs (String.make 1 (Char.chr b)) in
+
+  (* output prelude *)
+  outb 0x55;              (* push %rbp      *)
+  outs "\x48\x89\xe5";    (* mov %rsp, %rbp *)
+
+  let regmap = [| (* only caller-save regs, for now *)
+      0;  (* rax *)
+      1;  (* rcx *)
+      2;  (* rdx *) (* comes late because of division *)
+                    (* look for RDX and change there too! *)
+      6;  (* rsi *)
+      7;  (* rdi *)
+      8;  (* r8  *)
+      9;  (* r9  *)
+      10; (* r10 *)
+      11; (* r11 *)
+    |] in
+  let regn = function
+    | LReg r -> regmap.(r+1)
+    | _ -> failwith "register expected in regn" in
+
+  let rexp rg rm =
+    let rex = 0x48 in
+    let rg, rex = if rg > 7
+      then rg-8, rex lor 4
+      else rg, rex in
+    let rm, rex = if rm > 7
+      then rm-8, rex lor 1
+      else rm, rex in
+    (rex, rg, rm) in
+
+  let modrm ?(md=3) r m =
+    (md lsl 6) + (r lsl 3) + m in
+
+  let lite ?byt x =
+    let byt = match byt with
+      Some b -> b | None -> Bytes.create 4 in
+    let rec f i x =
+      if i = 4 then () else begin
+        Bytes.set byt i (Char.chr (x land 0xff));
+        f (i+1) (x lsr 8)
+      end in
+    f 0 x; Bytes.unsafe_to_string byt in
+
+  let oins op r m =
+    let rex, r, m = rexp r m in
+    outb rex; outb op; outb (modrm r m) in
+
+  let slot s =
+    let c = ((-1-s) * 8) land 0xff in
+    assert (c < 256);
+    c in
+
+  let move l l1 = match l, l1 with
+    | (LReg _ as r), LCon k ->
+      oins 0xc7 0 (regn r); outs (lite k)
+    | LSpill s, LCon k ->
+      outb 0x48;
+      outb 0xc7;
+      outb (modrm ~md:1 0 5);
+      outb (slot s);
+      outs (lite k)
+    | (LReg _ as r), (LReg _ as r1) ->
+      let rex, r1, r = rexp (regn r1) (regn r) in
+      outb rex; outb 0x89; outb (modrm r1 r)
+    | (LReg _ as r), LSpill s ->
+      let rex, r, m = rexp (regn r) 5 in
+      outb rex; outb 0x8b; outb (modrm ~md:1 r m); outb (slot s)
+    | LSpill s, (LReg _ as r) ->
+      let rex, r, m = rexp (regn r) 5 in
+      outb rex; outb 0x89; outb (modrm ~md:1 r m); outb (slot s)
+    | _ -> failwith "invalid move" in
+
+  let nbb = Array.length p in
+  let boffs = Array.make nbb (`Unk []) in
+  let label b =
+    let p0 = !off + 4 in
+    match boffs.(b) with
+    | `Unk l ->
+      let lbl = lite p0 in
+      boffs.(b) <- `Unk (lbl :: l);
+      lbl
+    | `Kno p -> lite (p - p0) in
+
+  for b = 0 to nbb - 1 do
+    let pl =
+      match boffs.(b) with
+      | `Unk pl -> pl | _ -> [] in
+    List.iter (fun s -> (* back-patching *)
+      let p =
+        Char.code s.[0] +
+        Char.code s.[1] lsl 8 +
+        Char.code s.[2] lsl 16 +
+        Char.code s.[3] lsl 24 in
+      let byt = Bytes.unsafe_of_string s in
+      ignore (lite ~byt (!off - p))
+    ) pl;
+    boffs.(b) <- `Kno !off;
+
+    let is = p.(b).bb_inss in
+    for i = 0 to Array.length is - 1 do
+      match is.(i) with
+      | { ri_res = l; ri_ins = `Bop (l1, op, l2) } ->
+	let l2 =
+          if l1 = l || op = Div || op = Rem then l2 else
+          if l2 = l then begin
+            move (LReg (-1)) l;
+	    move l l1;
+	    LReg (-1)
+          end else
+            (move l l1; l2) in
+        begin match op with
+        | Add ->
+          begin match l2 with
+          | LCon k -> oins 0x81 0 (regn l); outs (lite k)
+          | LReg _ -> oins 0x01 (regn l2) (regn l)
+          | _ -> assert false
+          end
+        | Sub ->
+          begin match l2 with
+          | LCon k -> oins 0x81 5 (regn l); outs (lite k)
+          | LReg _ -> oins 0x29 (regn l2) (regn l)
+          | _ -> assert false
+          end
+        | Div ->
+          move (LReg (-1)) l1;
+          outb 0x99;           (* cltd *)
+          oins 0xf7 7 (regn l2);
+          move l (LReg (-1));  (* quotient in rax *)
+        | Rem ->
+          move (LReg (-1)) l1;
+          outb 0x99;           (* cltd *)
+          oins 0xf7 7 (regn l2);
+          if l <> LReg 1 then (* RDX *)
+            move l (LReg 1);  (* remainder in rdx *)
+        | Mul -> failwith "Mul not implemented"
+        | CLe -> failwith "CLe not implemented"
+        | CEq -> failwith "CEq not implemented"
+        end
+      | { ri_res = l; ri_ins = `Uop (Neg, l1) } ->
+        if l <> l1 then
+          move l l1;
+        oins 0xf7 3 (regn l)
+      | { ri_res = l; ri_ins = `Mov l1 } ->
+        move l l1
+      | { ri_res = l; ri_ins = `Con k } ->
+        move l (LCon k)
+    done;
+
+    begin match p.(b).bb_jmp with
+    | `Brz (r, b1, b2) when b1 >= 0 && b2 >= 0 ->
+      oins 0x85 (regn r) (regn r);
+      if b1 = b+1 then
+        (outb 0x0f; outb 0x85; outs (label b2))
+      else if b2 = b+1 then
+        (outb 0x0f; outb 0x84; outs (label b1))
+      else
+        failwith "double branch"
+    | `Jmp b1 when b1 >= 0 ->
+      if b1 <> b+1 then
+        (outb 0xe9; outs (label b1))
+    | `Ret (l) ->
+      move (LReg (-1)) l;
+      outb 0x5d;           (* pop %rbp *)
+      outb 0xc3;           (* retq     *)
+    | _ -> ()
+    end
+  done;
+
+  String.concat "" (List.rev !cl)
+
+
+(* Little test programs. *)
+let pbasic: iprog =
+  [| { bb_name = "start"
+     ; bb_phis = [| |]
+     ; bb_inss =
+       [| `Con 2
+        ; `Con 3
+        ; `Bop (IRIns (0, 0), Add, IRIns (0, 1))
+        ; `Bop (IRIns (0, 0), Add, IRIns (0, 2))
+       |]
+     ; bb_jmp = `Ret (IRIns (0, 3))
+     }
+  |]
+
+let pcount: iprog =
+  [| { bb_name = "init"
+     ; bb_phis = [||]
+     ; bb_inss = [| `Con 1234567; `Con 1 |]
+     ; bb_jmp = `Jmp 1
+     }
+   ; { bb_name = "loop"
+     ; bb_phis = [| `Phi [IRIns (0, 0); IRIns (1, 0)] |]
+     ; bb_inss = [| `Bop (IRPhi (1, 0), Sub, IRIns (0, 1)) |]
+     ; bb_jmp = `Brz (IRIns (1, 0), 2, 1)
+     }
+   ; { bb_name = "end"
+     ; bb_phis = [||]
+     ; bb_inss = [| `Con 42 |]
+     ; bb_jmp = `Ret (IRIns (0,1))
+     }
+  |]
+
+let psum: iprog =
+  [| { bb_name = "init"
+     ; bb_phis = [||]
+     ; bb_inss = [| `Con 1234567; `Con 1; `Con 0 |]
+     ; bb_jmp = `Jmp 1
+     }
+   ; { bb_name = "loop"
+     ; bb_phis =
+       [| `Phi [IRIns (0, 0); IRIns (1, 0)]       (* n  = phi(100, n1) *)
+        ; `Phi [IRIns (0, 2); IRIns (1, 1)]       (* s  = phi(1, s1) *)
+       |]
+     ; bb_inss =
+       [| `Bop (IRPhi (1, 0), Sub, IRIns (0, 1))  (* n1 = n - 1 *)
+        ; `Bop (IRPhi (1, 1), Add, IRPhi (1, 0))  (* s1 = s + n *)
+       |]
+     ; bb_jmp = `Brz (IRIns (1, 0), 2, 1)
+     }
+   ; { bb_name = "end"
+     ; bb_phis = [||]
+     ; bb_inss = [| `Con 42 |]
+     ; bb_jmp = `Ret (IRIns (1,1))
+     }
+  |]
+
+let peucl: iprog =
+  [| { bb_name = "init"
+     ; bb_phis = [||]
+     ; bb_inss = [| `Con 123456; `Con 32223 |]
+     ; bb_jmp = `Jmp 1
+     }
+   ; { bb_name = "loop"
+     ; bb_phis =
+       [| `Phi [IRIns (0, 0); IRPhi (1, 1)]
+        ; `Phi [IRIns (0, 1); IRIns (1, 0)]
+       |]
+     ; bb_inss =
+       [| `Bop (IRPhi (1, 0), Rem, IRPhi (1, 1))
+       |]
+     ; bb_jmp = `Brz (IRIns (1, 0), 2, 1)
+     }
+   ; { bb_name = "end"
+     ; bb_phis = [||]
+     ; bb_inss = [||]
+     ; bb_jmp = `Ret (IRPhi (1, 1))
+     }
+  |]
+
+let pspill: iprog =
+  [| { bb_name = "init"
+     ; bb_phis = [||]
+     ; bb_inss =
+(* 00 *)    [| `Con 42
+(* 01 *)     ; `Bop (IRIns (0, 0), Add, IRIns (0, 0))
+(* 02 *)     ; `Bop (IRIns (0, 0), Add, IRIns (0, 1))
+(* 03 *)     ; `Bop (IRIns (0, 0), Add, IRIns (0, 2))
+(* 04 *)     ; `Bop (IRIns (0, 0), Add, IRIns (0, 3))
+(* 05 *)     ; `Bop (IRIns (0, 0), Add, IRIns (0, 4))
+(* 06 *)     ; `Bop (IRIns (0, 0), Add, IRIns (0, 5))
+(* 07 *)     ; `Bop (IRIns (0, 6), Add, IRIns (0, 6))
+(* 08 *)     ; `Bop (IRIns (0, 5), Add, IRIns (0, 7))
+(* 09 *)     ; `Bop (IRIns (0, 4), Add, IRIns (0, 8))
+(* 10 *)     ; `Bop (IRIns (0, 3), Add, IRIns (0, 9))
+(* 11 *)     ; `Bop (IRIns (0, 2), Add, IRIns (0, 10))
+(* 12 *)     ; `Bop (IRIns (0, 1), Add, IRIns (0, 11))
+(* 13 *)     ; `Bop (IRIns (0, 0), Add, IRIns (0, 12))
+           |]
+     ; bb_jmp = `Ret (IRIns (0, 13))
+     }
+  |]
+
+
+(* ------------------------------------------------------------------------ *)
+
+let oneshot () =
+  ()
+
+let _ =
+  if Array.length Sys.argv > 1 && Sys.argv.(1) = "test" then
+    let oc = open_out "t.o" in
+    nregs := 3;
+    let s = peucl |> regalloc |> movgen |> codegen in
+    Elf.barebones_elf oc "f" s;
+    close_out oc;
+  else
+    oneshot ()
+
+(* ------------------------------------------------------------------------ *)
diff --git a/proto/tmain.c b/proto/tmain.c
new file mode 100644
index 0000000..79e1c24
--- /dev/null
+++ b/proto/tmain.c
@@ -0,0 +1,24 @@
+#include <stdio.h>
+#include <time.h>
+
+enum { NRounds = 150 };
+
+extern long f(void);
+
+int main()
+{
+	clock_t t0, tmin;
+	long i, l;
+
+	tmin = 10 * CLOCKS_PER_SEC;
+	for (i=0; i<NRounds; i++) {
+		t0 = clock();
+		l = f();
+		t0 = clock() - t0;
+		if (t0 < tmin)
+			tmin = t0;
+	}
+	printf("f() = %ld\n", l);
+	printf("  %.4f secs\n", (double)t0/CLOCKS_PER_SEC);
+	return 0;
+}