From b3a83e1277b1656712518388a5dbbf781c631e47 Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Thu, 4 Apr 2024 16:19:48 +0900 Subject: Cover full grammar --- spec | 2 +- src/parse.janet | 514 ++++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 331 insertions(+), 185 deletions(-) diff --git a/spec b/spec index 78c2e2e..75129c7 160000 --- a/spec +++ b/spec @@ -1 +1 @@ -Subproject commit 78c2e2e5cfa7090965deaf631cb8ca6f405b7c42 +Subproject commit 75129c7a34010ead828055c26a6d41d1516faa97 diff --git a/src/parse.janet b/src/parse.janet index a391da4..342fc11 100644 --- a/src/parse.janet +++ b/src/parse.janet @@ -26,10 +26,17 @@ (def ops "Mapping of Zig operator tokens and characters they are not followed by." - {:: "" :* "*%=|" :** "" :? "" :! "" - :.* "" :.? "" :. "*?." :.. "." :... "" - :-> "" :=> "" - := ">=" :!= "" :< "<=" :> ">=" :<= "" :>= ""}) + {:: "" :? "" :! "=" :-> "" :=> "" :. "*?." :.* "" :.? "" :.. "." :... "" + :** "" :++ "" (keyword "||") "" + :* "%*=|" :*% "=" (keyword "*|") "=" :/ "=" :% "=" + :+ "%+=|" :+% "=" (keyword "+|") "=" :- "%=>|" :-% "=" (keyword "-|") "=" + :<< "=|" (keyword "<<|") "=" :>> "=" + :& "=" :^ "=" (keyword "|") "|=" (keyword "~") "" + :!= "" :< "<=" :> ">=" :<= "" :>= "" := ">=" + :*= "" :*%= "" (keyword "*|=") "" :/= "" :%= "" + :+= "" :+%= "" (keyword "+|=") "" :-= "" :-%= "" (keyword "-|=") "" + :<<= "" (keyword "<<|=") "" :>>= "" + :&= "" :^= "" (keyword "|=") ""}) (defn collect "Collect fields and declarations in a Zig container definition." @@ -44,8 +51,7 @@ "PEG grammar for Zig." (peg/compile (merge - (zipcoll (map |(keyword "keyword-" $) - keywords) + (zipcoll (map |(keyword "keyword/" $) keywords) (map |~(* (constant :keyword) (<- ,$) (! (+ :w "_")) @@ -56,46 +62,23 @@ (! (set ,(ops $))) :skip) (keys ops))) - ~{:brace-l (* "{" :skip) - :brace-r (* "}" :skip) - :bracket-l (* "[" :skip) - :bracket-r (* "]" :skip) - :paren-l (* "(" :skip) - :paren-r (* ")" :skip) - :pipe (* "|" (! (set "|=")) :skip) + ~{:brace/l (* "{" :skip) + :brace/r (* "}" :skip) + :bracket/l (* "[" :skip) + :bracket/r (* "]" :skip) + :paren/l (* "(" :skip) + :paren/r (* ")" :skip) + :pipe/l ,(keyword "|") + :pipe/r ,(keyword "|") :comma (* "," :skip) :semicolon (* ";" :skip) - :op-assign (+ :=) # FIXME - :op-prefix-type (+ :? - (* :keyword-anyframe :->) - (* :bracket-l - (? (* :: :expr)) - :bracket-r - (any (+ :byte-align - :addr-space - :keyword-const - :keyword-volatile - :keyword-allowzero))) - (* (+ :* :** (* :bracket-l - :* - (? (+ "c" (* :: :expr))) - :bracket-r)) - (any (+ :addr-space - (* :keyword-align - :paren-l - :expr - (? (* :: :expr :: :expr)) - :paren-r) - :keyword-const - :keyword-volatile - :keyword-allowzero))) - (* :bracket-l - :expr - (? (* :: :expr)) - :bracket-r)) - :op-suffix (+ (* :bracket-l :expr + :op/assign (+ :*= :*%= ,(keyword "*|=") :/= :%= + :+= :+%= ,(keyword "+|=") :-= :-%= ,(keyword "-|=") + :<<= ,(keyword "<<|=") :>>= :&= :^= ,(keyword "|=") + :=) + :op/suffix (+ (* :bracket/l :expr (? (* :.. (? (* (? :expr) (? (* :: :expr)))))) - :bracket-r) + :bracket/r) (* :. :ident) :.* :.?) :line-comment (* "//" (+ (! (set "!/")) @@ -104,184 +87,347 @@ :skip (any (+ (set " \n") :line-comment)) :doc-comment (* (constant :doc) - (some (* "///" - (* " ") - (<- (to "\n")) - :skip))) - :keyword ,(tuple/slice (array/concat @['+] (map |(keyword "keyword-" $) + (/ (some (* "///" + (* " ") + (<- (to "\n")) + :skip)) + ,tuple)) + :keyword ,(tuple/slice (array/concat @['+] (map |(keyword "keyword/" $) keywords))) - :char-escape (+ (* "\\x" :h :h) + :int/bin (* (set "01") (any (* (? "_") (set "01")))) + :int/oct (* (range "07") (any (* (? "_") (range "07")))) + :int/dec (* :d (any (* (? "_") :d))) + :int/hex (* :h (any (* (? "_") :h))) + :float-exp/dec (* (set "eE") (? (set "-+")) :int/dec) + :float-exp/hex (* (set "pP") (? (set "-+")) :int/hex) + :char/escape (+ (* "\\x" :h :h) (* "\\u{" :h+ "}") (* "\\" (set "tnr\"'\\"))) - :string-char (+ :char-escape + :char/char (+ :char/escape + # ASCII except newline, single quote and backslash + (range "\x00\x09" "\x0b\x26" "\x28\x5b" "\x5d\x7f") + # UTF-8 + (* (+ (range "\xc2\xdf") + (* "\xe0" (range "\xa0\xbf")) + (* (range "\xe1\xec") (range "\x80\xbf")) + (* "\xed" (range "\x80\x9f")) + (* (range "\xee\xef") (range "\x80\xbf")) + (* (+ (* "\xf0" (range "\x90\xbf")) + (* (range "\xf1\xf3") (range "\x80\xbf")) + (* "\xf4" (range "\x80\x8f"))) + (range "\x80\xbf"))) + (range "\x80\xbf"))) + :char/string (+ :char/escape (if (! (set "\n\"\\")) 1)) - :string-literal-single (* "\"" - (any :string-char) - "\"" - :skip) - :ident (* (constant :ident) - (! :keyword) - (<- (* (+ :a "_") - (any (+ :w "_")))) - :skip) - :label-block (* :ident ::) - :label-break (* :: :ident) - :list-params (* (any (* :decl-param :comma)) - (? :decl-param)) - :list-expr (* (any (* :expr :comma)) + :literal/integer (* (+ (* "0b" :int/bin) + (* "0o" :int/oct) + :int/dec + (* "0x" :int/hex)) + :skip) + :literal/float (* (+ (* :int/dec "." :int/dec (? :float-exp/dec)) + (* :int/dec :float-exp/dec) + (* "0x" :int/hex "." :int/hex (? :float-exp/hex)) + (* "0x" :int/hex :float-exp/hex)) + :skip) + :literal/char (* "'" :char/char "'" :skip) + :literal/string/single (* "\"" (<- (any :char/string)) + "\"" :skip) + :literal/string (+ :literal/string/single + (some (* (some (* "\\\\" (to "\n") + (any (set " \n")))) + :skip))) + :ident (+ (* (constant :ident) + (! :keyword) + (<- (* (+ :a "_") + (any (+ :w "_")))) + :skip) + (* "@" :literal/string/single)) + :ident/builtin (* (constant :builtin) + "@" + (<- (* (+ :a "_") + (any (+ :w "_")))) + :skip) + :field-init (* :. :ident := :expr) + :label/block (* :ident ::) + :label/break (* :: :ident) + :list/asm/input (* (any (* :item/asm/input :comma)) + (? :item/asm/input)) + :list/asm/output (* (any (* :item/asm/output :comma)) + (? :item/asm/output)) + :list/expr (* (any (* :expr :comma)) (? :expr)) - :call-args (* :paren-l :list-expr :paren-r) - :call-conv (* :keyword-callconv :paren-l :expr :paren-r) - :byte-align (* :keyword-align :paren-l :expr :paren-r) - :addr-space (* :keyword-addrspace :paren-l :expr :paren-r) - :link-section (* :keyword-linksection :paren-l :expr :paren-r) - :payload (* :pipe :ident :pipe) - :payload-ptr (* :pipe (? :*) :ident :pipe) - :payload-ptr-list (* :pipe (? :*) :ident + :list/ident (* (any (* (? :doc-comment) :ident :comma)) + (? (* (? :doc-comment) :ident))) + :list/init (* :brace/l + (? (* (+ (* :expr (any (* :comma :expr))) + (* :field-init (any (* :comma :field-init)))) + (? :comma))) + :brace/r) + :list/params (* (any (* :decl/param :comma)) + (? :decl/param)) + :list/string (* (any (* :literal/string :comma)) + (? :literal/string)) + :call-args (* :paren/l :list/expr :paren/r) + :call-conv (* :keyword/callconv :paren/l :expr :paren/r) + :byte-align (* :keyword/align :paren/l :expr :paren/r) + :addr-space (* :keyword/addrspace :paren/l :expr :paren/r) + :link-section (* :keyword/linksection :paren/l :expr :paren/r) + :payload (* :pipe/l :ident :pipe/r) + :payload/ptr (* :pipe/l (? :*) :ident :pipe/r) + :payload/ptr/list (* :pipe/l (? :*) :ident (any (* :comma (? :*) :ident)) - :pipe) - :payload-ptr-index (* :pipe (? :*) :ident + :pipe/r) + :payload/ptr/index (* :pipe/l (? :*) :ident (? (* :comma :ident)) # inline switch - :pipe) - :item-for (* :expr (? (* :.. (? :expr)))) - :item-switch (* :expr (? (* :... :expr))) - :switch-prong (* (? :keyword-inline) - (+ (* (any (* :item-switch :comma)) - :item-switch (? :comma)) - :keyword-else) - :=> (? :payload-ptr-index) :expr-assign-single) - :proto-if (* :keyword-if :paren-l :expr :paren-r (? :payload-ptr)) - :proto-while (* :keyword-while :paren-l :expr :paren-r - (? :payload-ptr) - (? (* :: :paren-l :expr-assign :paren-r))) - :proto-for (* :keyword-for :paren-l - :item-for (any (* :comma :item-for)) (? :comma) - :paren-r :payload-ptr-list) - :proto-fn (* :keyword-fn + :pipe/r) + :item/asm/input (* :bracket/l :ident :bracket/r :literal/string + :paren/l :expr :paren/r) + :item/asm/output (* :bracket/l :ident :bracket/r :literal/string + :paren/l (+ (* :-> :expr/type) :ident) :paren/r) + :item/for (* :expr (? (* :.. (? :expr)))) + :item/switch (* :expr (? (* :... :expr))) + :switch-prong (* (? :keyword/inline) + (+ (* (any (* :item/switch :comma)) + :item/switch (? :comma)) + :keyword/else) + :=> (? :payload/ptr/index) :expr/assign-single) + :proto/if (* :keyword/if :paren/l :expr :paren/r (? :payload/ptr)) + :proto/while (* :keyword/while :paren/l :expr :paren/r + (? :payload/ptr) + (? (* :: :paren/l :expr/assign :paren/r))) + :proto/for (* :keyword/for :paren/l + :item/for (any (* :comma :item/for)) (? :comma) + :paren/r :payload/ptr/list) + :proto/fn (* :keyword/fn (? :ident) - :paren-l :list-params :paren-r + :paren/l :list/params :paren/r (? :byte-align) (? :addr-space) (? :link-section) (? :call-conv) (? :!) - :expr-type) - :proto-decl-var (* (+ :keyword-const :keyword-var) :ident - (? (* :: :expr-type)) + :expr/type) + :proto/enum (* :keyword/enum (? (* :paren/l :expr :paren/r))) + :proto/slice (* :bracket/l (? (* :: :expr)) :bracket/r + (any (+ :byte-align + :addr-space + :keyword/const + :keyword/volatile + :keyword/allowzero))) + :proto/pointer (* (+ :* :** + (* :bracket/l :* (+ "c" (* :: :expr) 0) + :bracket/r)) + (any (+ :addr-space + (* :keyword/align :paren/l + :expr (? (* :: :expr :: :expr)) + :paren/r) + :keyword/const + :keyword/volatile + :keyword/allowzero))) + :proto/array (* :bracket/l :expr (? (* :: :expr)) :bracket/r) + :proto/type (+ :? (* :keyword/anyframe :->) + :proto/slice :proto/pointer :proto/array) + :proto/decl-var (* (constant :const?) + (+ (* (constant true) (drop :keyword/const)) + (* (constant false) (drop :keyword/var))) + :ident + (? (* :: :expr/type)) (? :byte-align) (? :addr-space) (? :link-section)) - :proto-decl-var-rest (* :comma (+ :proto-decl-var :expr)) - :stmt-decl-var (* (+ (? (* :expr :op-assign)) - (* (+ :proto-decl-var - (* :expr :proto-decl-var-rest)) - (any :proto-decl-var-rest) + :proto/decl-var-rest (* :comma (+ :proto/decl-var :expr)) + :stmt/decl-var (* (+ (? (* :expr :op/assign)) + (* (+ :proto/decl-var + (* :expr :proto/decl-var-rest)) + (any :proto/decl-var-rest) :=)) :expr :semicolon) - :stmt-for (* :proto-for - (+ :expr-block - (* :expr-assign :semicolon) - (* (+ :expr-block :expr-assign) - (* :keyword-else :stmt)))) - :stmt-while (* :proto-while - (+ :expr-block - (* :expr-assign :semicolon) - (* (+ :expr-block :expr-assign) - (* :keyword-else (? :payload) :stmt)))) - :stmt (+ (* :keyword-comptime - (+ :expr-block :stmt-decl-var)) - (* (+ :keyword-nosuspend - :keyword-suspend - :keyword-defer - (* :keyword-defer (? :payload))) - (+ :expr-block (* :expr-assign :semicolon))) - (* :proto-if - (+ :expr-block - (* :expr-assign :semicolon) - (* (+ :expr-block :expr-assign) - (* :keyword-else (? :payload) :stmt)))) - :expr-block - (* (? :label-block) - (? :keyword-inline) - (+ :stmt-for :stmt-while)) - :expr-switch - :stmt-decl-var) - :block (* :brace-l (any :stmt) :brace-r) - :expr-block (* (? :label-block) :block) - :expr-switch (* :keyword-switch :paren-l :expr :paren-r :brace-l + :stmt/for (* :proto/for + (+ :expr/block + (* :expr/assign :semicolon) + (* (+ :expr/block :expr/assign) + (* :keyword/else :stmt)))) + :stmt/while (* :proto/while + (+ :expr/block + (* :expr/assign :semicolon) + (* (+ :expr/block :expr/assign) + (* :keyword/else (? :payload) :stmt)))) + :stmt (+ (* :keyword/comptime + (+ :expr/block :stmt/decl-var)) + (* (+ :keyword/nosuspend + :keyword/suspend + :keyword/defer + (* :keyword/defer (? :payload))) + (+ :expr/block (* :expr/assign :semicolon))) + (* :proto/if + (+ :expr/block + (* :expr/assign :semicolon) + (* (+ :expr/block :expr/assign) + (* :keyword/else (? :payload) :stmt)))) + :expr/block + (* (? :label/block) + (? :keyword/inline) + (+ :stmt/for :stmt/while)) + :expr/switch + :stmt/decl-var) + :block (* :brace/l (any :stmt) :brace/r) + :expr/block (* (? :label/block) :block) + :expr/switch (* :keyword/switch :paren/l :expr :paren/r :brace/l (any (* :switch-prong :comma)) (? :switch-prong) - :brace-r) - :expr-assign-single (* :expr (? (* :op-assign :expr))) - :expr-assign (* :expr (? (+ (* :op-assign :expr) - (+ (some (* :comma :expr)) := :expr)))) - :expr-error-union (* (+ (* :keyword-async - :expr-type-primary - (any :op-suffix) - :call-args) - (* :expr-type-primary - (any (+ :op-suffix :call-args)))) - (? (* :! :expr-type))) - :expr-type-primary (+ :proto-fn) # FIXME - :expr-type (* (constant :type) - (/ (* (any :op-prefix-type) - :expr-error-union) + :brace/r) + :expr/assign-single (* :expr (? (* :op/assign :expr))) + :expr/assign (* :expr (+ (* :op/assign :expr) + (+ (some (* :comma :expr)) := :expr) + 0)) + :expr/container (* (+ :keyword/extern :keyword/packed 0) + (+ (* :keyword/struct + (? (* :paren/l :expr :paren/r))) + :keyword/opaque + :proto/enum + (* :keyword/union + (? (* :paren/l + (+ :proto/enum :expr) + :paren/r)))) + :brace/l :container/members :brace/r) + :expr/type (* (constant :type) + (/ (* (any :proto/type) :expr/error-union) ,tuple)) - :expr-bitwise :ident # FIXME - :expr-compare (* :expr-bitwise (? (* (+ := :!= :< :> :<= :>=) - :expr-bitwise))) - :expr-and (* :expr-compare (any (* :keyword-and :expr-compare))) - :expr (* :expr-and (any (* :keyword-or :expr-and))) - :decl-param (+ (* (? :doc-comment) - (? (+ :keyword-noalias :keyword-comptime)) + :expr/type/primary (+ :literal/char + :literal/float + :literal/integer + :literal/string + (* :. :ident) # enum + (* :. :list/init) # tuple + (* :keyword/error :. :ident) + (* :keyword/error :brace/l :list/ident :brace/r) + (* :keyword/comptime :expr/type) + :keyword/anyframe + :keyword/unreachable + :proto/fn + :expr/container + :ident + (* :ident/builtin :call-args) + (* :paren/l :expr :paren/r) + :expr/block + (* (? :label/block) (? :keyword/inline) + (+ (* :proto/for :expr/type + (? (* :keyword/else :expr/type))) + (* :proto/while :expr/type + (? (* :keyword/else (? :payload) + :expr/type))))) + (* :proto/if :expr/type + (? (* :keyword/else (? :payload) :expr/type))) + :expr/switch) + :expr/error-union (* (+ (* :keyword/async + :expr/type/primary + (any :op/suffix) + :call-args) + (* :expr/type/primary + (any (+ :op/suffix :call-args)))) + (? (* :! :expr/type))) + :expr/asm (* :keyword/asm (? :keyword/volatile) :paren/l :expr + (? (* :: :list/asm/output + (? (* :: :list/asm/input + (? (* :: :list/string)))))) + :paren/r) + :expr/prefix (* (any (+ :! :- ,(keyword "~") :-% :& + :keyword/try :keyword/await)) + (+ :expr/asm + (* :proto/if :expr + (? (* :keyword/else (? :payload) :expr))) + (* :keyword/break (? :label/break) (? :expr)) + (* :keyword/comptime :expr) + (* :keyword/nosuspend :expr) + (* :keyword/continue (? :label/break)) + (* :keyword/resume :expr) + (* :keyword/return (? :expr)) + (* (? :label/block) (? :keyword/inline) + (+ (* :proto/for :expr + (? (* :keyword/else :expr))) + (* :proto/while :expr + (? (* :keyword/else (? :payload) :expr))))) + :block + (* :expr/type (? :list/init)))) + :expr/mul (* :expr/prefix + (any (* (+ ,(keyword "||") :** + :* :*% ,(keyword "*|") + :/ :%) + :expr/prefix))) + :expr/add (* :expr/mul + (any (* (+ :++ + :+% :+ ,(keyword "+|") + :- :-% ,(keyword "+|")) + :expr/mul))) + :expr/bit-shift (* :expr/add (any (* (+ :<< ,(keyword "<<|") :>>) + :expr/add))) + :expr/bitwise (* :expr/bit-shift + (any (* (+ :& :^ ,(keyword "|") :keyword/orelse + (* :keyword/catch (? :payload))) + :expr/bit-shift))) + :expr/cmp (* :expr/bitwise (? (* (+ := :!= :< :> :<= :>=) + :expr/bitwise))) + :expr/and (* :expr/cmp (any (* :keyword/and :expr/cmp))) + :expr (* :expr/and (any (* :keyword/or :expr/and))) + :decl/param (+ (* (? :doc-comment) + (+ :keyword/noalias :keyword/comptime 0) (? (* :ident ::)) - (+ :keyword-anytype :expr)) + (+ :keyword/anytype :expr)) :...) - :decl-test (* :keyword-test) # FIXME - :decl-comptime (* :keyword-comptime) # FIXME - :decl-fn (* (? (+ :keyword-export - (* :keyword-extern (? :string-literal-single)) - :keyword-inline - :keyword-noinline)) - :proto-fn + :decl/test (* (constant :kind) (constant :test) (drop :keyword/test) + (? (* (constant :name) (+ :literal/string/single :ident))) + :block) + :decl/comptime (* :keyword/comptime :block) + :decl/fn (* (+ :keyword/export + (* :keyword/extern (? :literal/string/single)) + :keyword/inline + :keyword/noinline + 0) + :proto/fn (+ :semicolon :block)) - :decl-global (* (? (+ :keyword-export - (* :keyword-extern (? :string-literal-single)))) - (? :keyword-threadlocal) - :proto-decl-var + :decl/global (* (+ (* (constant :export?) (constant true) + (drop :keyword/export)) + (* (constant :extern) (drop :keyword/extern) + (+ :literal/string/single + (constant true))) + 0) + (? (* (constant :thread-local?) (constant true) + (drop :keyword/threadlocal))) + :proto/decl-var (? (* := :expr)) :semicolon) - :decl (+ :decl-fn - :decl-global - (* :keyword-usingnamespace :expr :semicolon)) - :container-doc-comment (* (constant :doc) + :container/doc-comment (* (constant :doc) (some (* "//!" (* " ") (<- (to "\n")) :skip))) - :container-decl (* (constant :decl) - (/ (+ :decl-test - :decl-comptime + :container/decl (* (constant :decl) + (/ (+ :decl/test + :decl/comptime (* (? :doc-comment) - (? :keyword-pub) - :decl)) + (? (* (constant :pub) (constant true) + (drop :keyword/pub))) + (constant :kind) + (+ (* (constant :fn) :decl/fn) + (* (constant :global) :decl/global) + (* (constant :using-namespace) + (drop :keyword/usingnamespace) + :expr :semicolon)))) ,struct)) - :container-field (* (constant :field) + :container/field (* (constant :field) (/ (* (? :doc-comment) - (? :keyword-comptime) - (! :keyword-fn) + (? :keyword/comptime) + (! :keyword/fn) (? (* :ident (drop ::))) - :expr-type + :expr/type (? :byte-align) (? (* := :expr))) ,struct)) - :container-members (* (any :container-decl) - (any (* :container-field :comma)) - (+ :container-field (any :container-decl))) - :main (/ (* :skip - (? :container-doc-comment) - :container-members - -1) + :container/members (* (? :container/doc-comment) + (any :container/decl) + (any (* :container/field :comma)) + (+ :container/field (any :container/decl))) + :main (/ (* :skip :container/members -1) ,collect)}))) (defn ast -- cgit 1.4.1