# Zig parser # Copyright (C) 2024 Nguyễn Gia Phong # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . (def keywords "Zig keywords." ["addrspace" "align" "allowzero" "and" "anyframe" "anytype" "asm" "async" "await" "break" "callconv" "catch" "comptime" "const" "continue" "defer" "else" "enum" "errdefer" "error" "export" "extern" "fn" "for" "if" "inline" "linksection" "noalias" "noinline" "nosuspend" "opaque" "or" "orelse" "packed" "pub" "resume" "return" "struct" "suspend" "switch" "test" "threadlocal" "try" "union" "unreachable" "usingnamespace" "var" "volatile" "while"]) (def ops "Mapping of Zig operator tokens and characters they are not followed by." {:: "" :? "" :! "=" :-> "" :=> "" :. "*?." :.* "" :.? "" :.. "." :... "" :** "" :++ "" (keyword "||") "" :* "%*=|" :*% "=" (keyword "*|") "=" :/ "=" :% "=" :+ "%+=|" :+% "=" (keyword "+|") "=" :- "%=>|" :-% "=" (keyword "-|") "=" :<< "=|" (keyword "<<|") "=" :>> "=" :& "=" :^ "=" (keyword "|") "|=" (keyword "~") "" :!= "" :< "<=" :> ">=" :<= "" :>= "" := ">=" :*= "" :*%= "" (keyword "*|=") "" :/= "" :%= "" :+= "" :+%= "" (keyword "+|=") "" :-= "" :-%= "" (keyword "-|=") "" :<<= "" (keyword "<<|=") "" :>>= "" :&= "" :^= "" (keyword "|=") ""}) (defn collect "Collect fields and declarations in a Zig container definition." [& kwargs] (let [container @{:doc @[] :field @[] :decl @[]}] (each i (range 0 (length kwargs) 2) (if (container (kwargs i)) (array/push (container (kwargs i)) (kwargs (inc i))) (set (container (kwargs i)) (kwargs (inc i))))) (freeze container))) (def grammar "PEG grammar for Zig." (peg/compile (merge (zipcoll (map |(keyword "keyword/" $) keywords) (map |~(* ,$ (! (+ :w "_")) :skip) keywords)) (zipcoll (map keyword (keys ops)) (map |~(* (constant ,$) ,(string $) (! (set ,(ops $))) :skip) (keys ops))) ~{:brace/l (* "{" :skip) :brace/r (* "}" :skip) :bracket/l (* "[" :skip) :bracket/r (* "]" :skip) :paren/l (* "(" :skip) :paren/r (* ")" :skip) :pipe/l ,(keyword "|") :pipe/r ,(keyword "|") :comma (* "," :skip) :semicolon (* ";" :skip) :op/assign (+ :*= :*%= ,(keyword "*|=") :/= :%= :+= :+%= ,(keyword "+|=") :-= :-%= ,(keyword "-|=") :<<= ,(keyword "<<|=") :>>= :&= :^= ,(keyword "|=") :=) :op/suffix (+ (* :bracket/l :expr (? (* :.. (? (* (? :expr) (? (* :: :expr)))))) :bracket/r) (* :. :ident) :.* :.?) :line-comment (* "//" (+ (! (set "!/")) (at-least 2 "/")) (to "\n")) :skip (any (+ (set " \n") :line-comment)) :doc-comment (* (constant :doc) (/ (some (* "///" (<- (to "\n")) :skip)) ,tuple)) :keyword ,(tuple/slice (array/concat @['+] (map |(keyword "keyword/" $) keywords))) :int/bin (* (set "01") (any (* (? "_") (set "01")))) :int/oct (* (range "07") (any (* (? "_") (range "07")))) :int/dec (* :d (any (* (? "_") :d))) :int/hex (* :h (any (* (? "_") :h))) :float-exp/dec (* (set "eE") (? (set "-+")) :int/dec) :float-exp/hex (* (set "pP") (? (set "-+")) :int/hex) :char/escape (+ (* "\\x" :h :h) (* "\\u{" :h+ "}") (* "\\" (set "tnr\"'\\"))) :char/char (+ :char/escape # ASCII except newline, single quote and backslash (range "\x00\x09" "\x0b\x26" "\x28\x5b" "\x5d\x7f") # UTF-8 (* (+ (range "\xc2\xdf") (* "\xe0" (range "\xa0\xbf")) (* (range "\xe1\xec") (range "\x80\xbf")) (* "\xed" (range "\x80\x9f")) (* (range "\xee\xef") (range "\x80\xbf")) (* (+ (* "\xf0" (range "\x90\xbf")) (* (range "\xf1\xf3") (range "\x80\xbf")) (* "\xf4" (range "\x80\x8f"))) (range "\x80\xbf"))) (range "\x80\xbf"))) :char/string (+ :char/escape (if (! (set "\n\"\\")) 1)) :literal/float (* (/ (<- (+ (* "0x" :int/hex "." :int/hex (? :float-exp/hex)) (* "0x" :int/hex :float-exp/hex) (* :int/dec "." :int/dec (? :float-exp/dec)) (* :int/dec :float-exp/dec))) ,|(->> $ (string/replace "p" "&") (string/replace "P" "&") (scan-number))) :skip) :literal/integer (* (+ (* "0b" (/ (<- :int/bin) ,|(scan-number $ 2))) (* "0o" (/ (<- :int/oct) ,|(scan-number $ 8))) (* "0x" (/ (<- :int/hex) ,|(scan-number $ 16))) (/ (<- :int/dec) ,scan-number)) :skip) :literal/char (* "'" (/ (<- :char/char) ,|(symbol $)) "'" :skip) :literal/string/single (* "\"" (<- (any :char/string)) "\"" :skip) :literal/string (+ :literal/string/single (some (* (some (* "\\\\" (to "\n") (any (set " \n")))) :skip))) :ident (+ (* (! :keyword) (<- (* (+ :a "_") (any (+ :w "_")))) :skip) (* "@" (<- :literal/string/single))) :ident/builtin (* "@" (<- (* (+ :a "_") (any (+ :w "_")))) :skip) :field-init (* :. :ident := :expr) :label/block (* :ident ::) :label/break (* :: :ident) :list/asm/input (* (any (* :item/asm/input :comma)) (? :item/asm/input)) :list/asm/output (* (any (* :item/asm/output :comma)) (? :item/asm/output)) :list/expr (* (any (* :expr :comma)) (? :expr)) :list/ident (* (any (* (? :doc-comment) :ident :comma)) (? (* (? :doc-comment) :ident))) :list/init (* :brace/l (? (* (+ (* :expr (any (* :comma :expr))) (* :field-init (any (* :comma :field-init)))) (? :comma))) :brace/r) :list/params (* (any (* :decl/param :comma)) (? :decl/param)) :list/string (* (any (* :literal/string :comma)) (? :literal/string)) :call-args (* :paren/l :list/expr :paren/r) :call-conv (* :keyword/callconv :paren/l :expr :paren/r) :byte-align (* :keyword/align :paren/l :expr :paren/r) :addr-space (* :keyword/addrspace :paren/l :expr :paren/r) :link-section (* :keyword/linksection :paren/l :expr :paren/r) :payload (* :pipe/l :ident :pipe/r) :payload/ptr (* :pipe/l (? :*) :ident :pipe/r) :payload/ptr/list (* :pipe/l (? :*) :ident (any (* :comma (? :*) :ident)) :pipe/r) :payload/ptr/index (* :pipe/l (? :*) :ident (? (* :comma :ident)) # inline switch :pipe/r) :item/asm/input (* :bracket/l :ident :bracket/r :literal/string :paren/l :expr :paren/r) :item/asm/output (* :bracket/l :ident :bracket/r :literal/string :paren/l (+ (* :-> :expr/type) :ident) :paren/r) :item/for (* :expr (? (* :.. (? :expr)))) :item/switch (* :expr (? (* :... :expr))) :switch-prong (* (? :keyword/inline) (+ (* (any (* :item/switch :comma)) :item/switch (? :comma)) :keyword/else) :=> (? :payload/ptr/index) :expr/assign-single) :proto/if (* :keyword/if :paren/l :expr :paren/r (? :payload/ptr)) :proto/while (* :keyword/while :paren/l :expr :paren/r (? :payload/ptr) (? (* :: :paren/l :expr/assign :paren/r))) :proto/for (* :keyword/for :paren/l :item/for (any (* :comma :item/for)) (? :comma) :paren/r :payload/ptr/list) :proto/fn (* :keyword/fn (? :ident) :paren/l :list/params :paren/r (? :byte-align) (? :addr-space) (? :link-section) (? :call-conv) (? :!) :expr/type) :proto/enum (* :keyword/enum (? (* :paren/l :expr :paren/r))) :proto/slice (* :bracket/l (? (* :: :expr)) :bracket/r (any (+ :byte-align :addr-space :keyword/const :keyword/volatile :keyword/allowzero))) :proto/pointer (* (+ :* :** (* :bracket/l :* (+ "c" (* :: :expr) 0) :bracket/r)) (any (+ :addr-space (* :keyword/align :paren/l :expr (? (* :: :expr :: :expr)) :paren/r) :keyword/const :keyword/volatile :keyword/allowzero))) :proto/array (* (constant :kind) (constant :array) :bracket/l (constant :len) :expr (? (* (constant :sentinel) :: :expr)) :bracket/r (constant :child)) :proto/type (+ :? (* :keyword/anyframe :->) :proto/slice :proto/pointer :proto/array) :proto/decl-var (* (constant :const?) (+ (* (constant true) :keyword/const) (* (constant false) :keyword/var)) (constant :ident) :ident (? (* :: :expr/type)) (? :byte-align) (? :addr-space) (? :link-section)) :proto/decl-var/rest (* :comma (+ :proto/decl-var :expr)) :stmt/decl-var (* (+ (* (+ :proto/decl-var (* :expr :proto/decl-var/rest)) (any :proto/decl-var/rest) :=) (* :expr :op/assign) 0) :expr :semicolon) :stmt/for (* :proto/for (+ :expr/block (* :expr/assign :semicolon) (* (+ :expr/block :expr/assign) (* :keyword/else :stmt)))) :stmt/while (* :proto/while (+ :expr/block (* :expr/assign :semicolon) (* (+ :expr/block :expr/assign) (* :keyword/else (? :payload) :stmt)))) :stmt (/ (+ (* :keyword/comptime (+ :expr/block :stmt/decl-var)) (* (+ :keyword/nosuspend :keyword/suspend :keyword/defer (* :keyword/defer (? :payload))) (+ :expr/block (* :expr/assign :semicolon))) (* :proto/if (+ :expr/block (* :expr/assign :semicolon) (* (+ :expr/block :expr/assign) (* :keyword/else (? :payload) :stmt)))) :expr/block (* (? :label/block) (? :keyword/inline) (+ :stmt/for :stmt/while)) :expr/switch :stmt/decl-var) ,struct) :block (* :brace/l (/ (any :stmt) ,tuple) :brace/r) :expr/block (* (? :label/block) :block) :expr/switch (* :keyword/switch :paren/l :expr :paren/r :brace/l (any (* :switch-prong :comma)) (? :switch-prong) :brace/r) :expr/assign-single (* :expr (? (* :op/assign :expr))) :expr/assign (* :expr (+ (* :op/assign :expr) (+ (some (* :comma :expr)) := :expr) 0)) :expr/container (/ (* (+ :keyword/extern :keyword/packed 0) (constant :kind) (+ (* (constant :struct) :keyword/struct (? (* :paren/l :expr :paren/r))) (* (constant :opaque) :keyword/opaque) :proto/enum (* (constant :union) :keyword/union (? (* :paren/l (+ :proto/enum :expr) :paren/r)))) :brace/l :container/members :brace/r) ,collect) :expr/type (* (any :proto/type) :expr/error-union) :expr/type/primary (+ :literal/char :literal/float :literal/integer :literal/string (* :. :ident) # enum (* :. :list/init) # tuple (* :keyword/error :. :ident) (* :keyword/error :brace/l :list/ident :brace/r) (* :keyword/comptime :expr/type) :keyword/anyframe :keyword/unreachable :proto/fn :expr/container :ident (* :ident/builtin :call-args) (* :paren/l :expr :paren/r) :expr/block (* (? :label/block) (? :keyword/inline) (+ (* :proto/for :expr/type (? (* :keyword/else :expr/type))) (* :proto/while :expr/type (? (* :keyword/else (? :payload) :expr/type))))) (* :proto/if :expr/type (? (* :keyword/else (? :payload) :expr/type))) :expr/switch) :expr/error-union (* (+ (* :keyword/async :expr/type/primary (any :op/suffix) :call-args) (* :expr/type/primary (any (+ :op/suffix :call-args)))) (? (* :! :expr/type))) :expr/asm (* :keyword/asm (? :keyword/volatile) :paren/l :expr (? (* :: :list/asm/output (? (* :: :list/asm/input (? (* :: :list/string)))))) :paren/r) :expr/prefix (* (any (+ :! :- ,(keyword "~") :-% :& :keyword/try :keyword/await)) (+ :expr/asm (* :proto/if :expr (? (* :keyword/else (? :payload) :expr))) (* :keyword/break (? :label/break) (? :expr)) (* :keyword/comptime :expr) (* :keyword/nosuspend :expr) (* :keyword/continue (? :label/break)) (* :keyword/resume :expr) (* :keyword/return (? :expr)) (* (? :label/block) (? :keyword/inline) (+ (* :proto/for :expr (? (* :keyword/else :expr))) (* :proto/while :expr (? (* :keyword/else (? :payload) :expr))))) :block (/ (* :expr/type (constant :init) (/ :list/init ,tuple)) ,struct) :expr/type/primary)) :expr/mul (* :expr/prefix (any (* (+ ,(keyword "||") :** :* :*% ,(keyword "*|") :/ :%) :expr/prefix))) :expr/add (* :expr/mul (any (* (+ :++ :+% :+ ,(keyword "+|") :- :-% ,(keyword "+|")) :expr/mul))) :expr/bit-shift (* :expr/add (any (* (+ :<< ,(keyword "<<|") :>>) :expr/add))) :expr/bitwise (* :expr/bit-shift (any (* (+ :& :^ ,(keyword "|") :keyword/orelse (* :keyword/catch (? :payload))) :expr/bit-shift))) :expr/cmp (* :expr/bitwise (? (* (+ := :!= :< :> :<= :>=) :expr/bitwise))) :expr/and (* :expr/cmp (any (* :keyword/and :expr/cmp))) :expr (* :expr/and (any (* :keyword/or :expr/and))) :decl/param (+ (* (? :doc-comment) (+ :keyword/noalias :keyword/comptime 0) (? (* :ident ::)) (+ :keyword/anytype :expr)) :...) :decl/test (* (constant :kind) (constant :test) :keyword/test (+ (* (constant :name) :literal/string/single) (* (constant :ident) :ident) 0) (constant :block) :block) :decl/comptime (* :keyword/comptime :block) :decl/fn (* (+ :keyword/export (* :keyword/extern (? :literal/string/single)) :keyword/inline :keyword/noinline 0) :proto/fn (+ :semicolon :block)) :decl/global (* (+ (* (constant :export?) (constant true) (drop :keyword/export)) (* (constant :extern) (drop :keyword/extern) (+ :literal/string/single (constant true))) 0) (? (* (constant :thread-local?) (constant true) (drop :keyword/threadlocal))) :proto/decl-var (? (* := :expr)) :semicolon) :container/doc-comment (* (constant :doc) (some (* "//!" (* " ") (<- (to "\n")) :skip))) :container/decl (* (constant :decl) (/ (+ :decl/test :decl/comptime (* (? :doc-comment) (? (* (constant :pub) (constant true) (drop :keyword/pub))) (constant :kind) (+ (* (constant :fn) :decl/fn) (* (constant :global) :decl/global) (* (constant :using-namespace) (drop :keyword/usingnamespace) :expr :semicolon)))) ,struct)) :container/field (* (constant :field) (/ (* (? :doc-comment) (? :keyword/comptime) (! :keyword/fn) (? (* (constant :ident) :ident (drop ::))) (constant :type) :expr/type (? :byte-align) (? (* := :expr))) ,struct)) :container/members (* (? :container/doc-comment) (any :container/decl) (any (* :container/field :comma)) (+ :container/field (any :container/decl))) :main (/ (* :skip (constant :kind) (constant :struct) :container/members -1) ,collect)}))) (defn ast "Parse given Zig file into AST." [text] (when-let [captures (peg/match grammar text)] (captures 0))) (pp (ast (:read stdin :all)))