summary refs log tree commit diff
path: root/src/parse.janet
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse.janet')
-rw-r--r--src/parse.janet293
1 files changed, 293 insertions, 0 deletions
diff --git a/src/parse.janet b/src/parse.janet
new file mode 100644
index 0000000..a391da4
--- /dev/null
+++ b/src/parse.janet
@@ -0,0 +1,293 @@
+# Zig parser
+# Copyright (C) 2024  Nguyễn Gia Phong
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+(def keywords
+  "Zig keywords."
+  ["addrspace" "align" "allowzero" "and" "anyframe" "anytype" "asm"
+   "async" "await" "break" "callconv" "catch" "comptime" "const"
+   "continue" "defer" "else" "enum" "errdefer" "error" "export" "extern"
+   "fn" "for" "if" "inline" "linksection" "noalias" "noinline"
+   "nosuspend" "opaque" "or" "orelse" "packed" "pub" "resume" "return"
+   "struct" "suspend" "switch" "test" "threadlocal" "try" "union"
+   "unreachable" "usingnamespace" "var" "volatile" "while"])
+
+(def ops
+  "Mapping of Zig operator tokens and characters they are not followed by."
+  {:: "" :* "*%=|" :** "" :? "" :! ""
+   :.* "" :.? "" :. "*?." :.. "." :... ""
+   :-> "" :=> ""
+   := ">=" :!= "" :< "<=" :> ">=" :<= "" :>= ""})
+
+(defn collect
+  "Collect fields and declarations in a Zig container definition."
+  [& kwargs]
+  (let [container @{:doc @[] :field @[] :decl @[]}]
+    (each i (range 0 (length kwargs) 2)
+      (array/push (container (kwargs i))
+                  (kwargs (inc i))))
+    (freeze container)))
+
+(def grammar
+  "PEG grammar for Zig."
+  (peg/compile
+    (merge
+      (zipcoll (map |(keyword "keyword-" $)
+                    keywords)
+               (map |~(* (constant :keyword)
+                         (<- ,$)
+                         (! (+ :w "_"))
+                         :skip)
+                    keywords))
+      (zipcoll (map keyword (keys ops))
+               (map |~(* (<- ,(string $))
+                         (! (set ,(ops $)))
+                         :skip)
+                    (keys ops)))
+      ~{:brace-l (* "{" :skip)
+        :brace-r (* "}" :skip)
+        :bracket-l (* "[" :skip)
+        :bracket-r (* "]" :skip)
+        :paren-l (* "(" :skip)
+        :paren-r (* ")" :skip)
+        :pipe (* "|" (! (set "|=")) :skip)
+        :comma (* "," :skip)
+        :semicolon (* ";" :skip)
+        :op-assign (+ :=) # FIXME
+        :op-prefix-type (+ :?
+                           (* :keyword-anyframe :->)
+                           (* :bracket-l
+                              (? (* :: :expr))
+                              :bracket-r
+                              (any (+ :byte-align
+                                      :addr-space
+                                      :keyword-const
+                                      :keyword-volatile
+                                      :keyword-allowzero)))
+                           (* (+ :* :** (* :bracket-l
+                                           :*
+                                           (? (+ "c" (* :: :expr)))
+                                           :bracket-r))
+                              (any (+ :addr-space
+                                      (* :keyword-align
+                                         :paren-l
+                                         :expr
+                                         (? (* :: :expr :: :expr))
+                                         :paren-r)
+                                      :keyword-const
+                                      :keyword-volatile
+                                      :keyword-allowzero)))
+                           (* :bracket-l
+                              :expr
+                              (? (* :: :expr))
+                              :bracket-r))
+        :op-suffix (+ (* :bracket-l :expr
+                         (? (* :.. (? (* (? :expr) (? (* :: :expr))))))
+                         :bracket-r)
+                      (* :. :ident)
+                      :.* :.?)
+        :line-comment (* "//" (+ (! (set "!/"))
+                                 (at-least 2 "/"))
+                         (to "\n"))
+        :skip (any (+ (set " \n")
+                      :line-comment))
+        :doc-comment (* (constant :doc)
+                        (some (* "///"
+                                 (* " ")
+                                 (<- (to "\n"))
+                                 :skip)))
+        :keyword ,(tuple/slice (array/concat @['+] (map |(keyword "keyword-" $)
+                                                        keywords)))
+        :char-escape (+ (* "\\x" :h :h)
+                        (* "\\u{" :h+ "}")
+                        (* "\\" (set "tnr\"'\\")))
+        :string-char (+ :char-escape
+                        (if (! (set "\n\"\\"))
+                          1))
+        :string-literal-single (* "\""
+                                  (any :string-char)
+                                  "\""
+                                  :skip)
+        :ident (* (constant :ident)
+                       (! :keyword)
+                       (<- (* (+ :a "_")
+                              (any (+ :w "_"))))
+                       :skip)
+        :label-block (* :ident ::)
+        :label-break (* :: :ident)
+        :list-params (* (any (* :decl-param :comma))
+                        (? :decl-param))
+        :list-expr (* (any (* :expr :comma))
+                      (? :expr))
+        :call-args (* :paren-l :list-expr :paren-r)
+        :call-conv (* :keyword-callconv :paren-l :expr :paren-r)
+        :byte-align (* :keyword-align :paren-l :expr :paren-r)
+        :addr-space (* :keyword-addrspace :paren-l :expr :paren-r)
+        :link-section (* :keyword-linksection :paren-l :expr :paren-r)
+        :payload (* :pipe :ident :pipe)
+        :payload-ptr (* :pipe (? :*) :ident :pipe)
+        :payload-ptr-list (* :pipe (? :*) :ident
+                             (any (* :comma (? :*) :ident))
+                             :pipe)
+        :payload-ptr-index (* :pipe (? :*) :ident
+                              (? (* :comma :ident)) # inline switch
+                              :pipe)
+        :item-for (* :expr (? (* :.. (? :expr))))
+        :item-switch (* :expr (? (* :... :expr)))
+        :switch-prong (* (? :keyword-inline)
+                         (+ (* (any (* :item-switch :comma))
+                               :item-switch (? :comma))
+                            :keyword-else)
+                         :=> (? :payload-ptr-index) :expr-assign-single)
+        :proto-if (* :keyword-if :paren-l :expr :paren-r (? :payload-ptr))
+        :proto-while (* :keyword-while :paren-l :expr :paren-r
+                        (? :payload-ptr)
+                        (? (* :: :paren-l :expr-assign :paren-r)))
+        :proto-for (* :keyword-for :paren-l
+                      :item-for (any (* :comma :item-for)) (? :comma)
+                      :paren-r :payload-ptr-list)
+        :proto-fn (* :keyword-fn
+                     (? :ident)
+                     :paren-l :list-params :paren-r
+                     (? :byte-align)
+                     (? :addr-space)
+                     (? :link-section)
+                     (? :call-conv)
+                     (? :!)
+                     :expr-type)
+        :proto-decl-var (* (+ :keyword-const :keyword-var) :ident
+                           (? (* :: :expr-type))
+                           (? :byte-align)
+                           (? :addr-space)
+                           (? :link-section))
+        :proto-decl-var-rest (* :comma (+ :proto-decl-var :expr))
+        :stmt-decl-var (* (+ (? (* :expr :op-assign))
+                             (* (+ :proto-decl-var
+                                   (* :expr :proto-decl-var-rest))
+                                (any :proto-decl-var-rest)
+                                :=))
+                          :expr :semicolon)
+        :stmt-for (* :proto-for
+                     (+ :expr-block
+                        (* :expr-assign :semicolon)
+                        (* (+ :expr-block :expr-assign)
+                           (* :keyword-else :stmt))))
+        :stmt-while (* :proto-while
+                       (+ :expr-block
+                          (* :expr-assign :semicolon)
+                          (* (+ :expr-block :expr-assign)
+                             (* :keyword-else (? :payload) :stmt))))
+        :stmt (+ (* :keyword-comptime
+                    (+ :expr-block :stmt-decl-var))
+                 (* (+ :keyword-nosuspend
+                       :keyword-suspend
+                       :keyword-defer
+                       (* :keyword-defer (? :payload)))
+                    (+ :expr-block (* :expr-assign :semicolon)))
+                 (* :proto-if
+                    (+ :expr-block
+                       (* :expr-assign :semicolon)
+                       (* (+ :expr-block :expr-assign)
+                          (* :keyword-else (? :payload) :stmt))))
+                 :expr-block
+                 (* (? :label-block)
+                    (? :keyword-inline)
+                    (+ :stmt-for :stmt-while))
+                 :expr-switch
+                 :stmt-decl-var)
+        :block (* :brace-l (any :stmt) :brace-r)
+        :expr-block (* (? :label-block) :block)
+        :expr-switch (* :keyword-switch :paren-l :expr :paren-r :brace-l
+                        (any (* :switch-prong :comma)) (? :switch-prong)
+                        :brace-r)
+        :expr-assign-single (* :expr (? (* :op-assign :expr)))
+        :expr-assign (* :expr (? (+ (* :op-assign :expr)
+                                    (+ (some (* :comma :expr)) := :expr))))
+        :expr-error-union (* (+ (* :keyword-async
+                                   :expr-type-primary
+                                   (any :op-suffix)
+                                   :call-args)
+                                (* :expr-type-primary
+                                   (any (+ :op-suffix :call-args))))
+                             (? (* :! :expr-type)))
+        :expr-type-primary (+ :proto-fn) # FIXME
+        :expr-type (* (constant :type)
+                      (/ (* (any :op-prefix-type)
+                            :expr-error-union)
+                         ,tuple))
+        :expr-bitwise :ident # FIXME
+        :expr-compare (* :expr-bitwise (? (* (+ := :!= :< :> :<= :>=)
+                                             :expr-bitwise)))
+        :expr-and (* :expr-compare (any (* :keyword-and :expr-compare)))
+        :expr (* :expr-and (any (* :keyword-or :expr-and)))
+        :decl-param (+ (* (? :doc-comment)
+                          (? (+ :keyword-noalias :keyword-comptime))
+                          (? (* :ident ::))
+                          (+ :keyword-anytype :expr))
+                       :...)
+        :decl-test (* :keyword-test) # FIXME
+        :decl-comptime (* :keyword-comptime) # FIXME
+        :decl-fn (* (? (+ :keyword-export
+                          (* :keyword-extern (? :string-literal-single))
+                          :keyword-inline
+                          :keyword-noinline))
+                    :proto-fn
+                    (+ :semicolon :block))
+        :decl-global (* (? (+ :keyword-export
+                              (* :keyword-extern (? :string-literal-single))))
+                        (? :keyword-threadlocal)
+                        :proto-decl-var
+                        (? (* := :expr))
+                        :semicolon)
+        :decl (+ :decl-fn
+                 :decl-global
+                 (* :keyword-usingnamespace :expr :semicolon))
+        :container-doc-comment (* (constant :doc)
+                                  (some (* "//!"
+                                           (* " ")
+                                           (<- (to "\n"))
+                                           :skip)))
+        :container-decl (* (constant :decl)
+                           (/ (+ :decl-test
+                                 :decl-comptime
+                                 (* (? :doc-comment)
+                                    (? :keyword-pub)
+                                    :decl))
+                              ,struct))
+        :container-field (* (constant :field)
+                            (/ (* (? :doc-comment)
+                                  (? :keyword-comptime)
+                                  (! :keyword-fn)
+                                  (? (* :ident (drop ::)))
+                                  :expr-type
+                                  (? :byte-align)
+                                  (? (* := :expr)))
+                               ,struct))
+        :container-members (* (any :container-decl)
+                              (any (* :container-field :comma))
+                              (+ :container-field (any :container-decl)))
+        :main (/ (* :skip
+                    (? :container-doc-comment)
+                    :container-members
+                    -1)
+                 ,collect)})))
+
+(defn ast
+  "Parse given Zig file into AST."
+  [text]
+  (when-let [captures (peg/match grammar text)]
+    (captures 0)))
+
+(pp (ast (:read stdin :all)))