diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Token.zig | 97 | ||||
-rw-r--r-- | src/main.zig | 16 | ||||
-rw-r--r-- | src/supported-languages | 1 |
3 files changed, 114 insertions, 0 deletions
diff --git a/src/Token.zig b/src/Token.zig new file mode 100644 index 0000000..1289a0d --- /dev/null +++ b/src/Token.zig @@ -0,0 +1,97 @@ +// Tokenizer +// SPDX-FileCopyrightText: 2025 Nguyễn Gia Phong +// SPDX-License-Identifier: GPL-3.0-or-later + +const order = std.math.order; +const std = @import("std"); + +const Node = tree_sitter.Node; +const Parser = tree_sitter.Parser; +const Tree = tree_sitter.Tree; +const TreeCursor = tree_sitter.TreeCursor; +const tree_sitter = @import("tree-sitter"); + +const CreateLanguage = @import("languages").Create; + +const Token = @This(); +text: []const u8, +node: ?Node = null, + +const Iterator = struct { + text: []const u8, + parser: *Parser, + tree: *Tree, + cursor: TreeCursor, + next_node: ?Node = null, + pos: u32 = 0, + + /// Returns the next tree-sitter node. + fn nextNode(self: *Iterator) ?Node { + if (self.next_node) |node| { + self.next_node = null; + return node; + } + const node = self.cursor.node(); + return if (self.cursor.gotoFirstChild()) + node + else if (self.cursor.gotoNextSibling()) + node + else while (self.cursor.gotoParent()) { + if (self.cursor.gotoNextSibling()) + break node; + } else null; + } + + pub fn next(self: *Iterator) ?Token { + if (self.pos == self.text.len) + return null; + while (self.nextNode()) |node| + if (node.childCount() > 0) { + const start = node.startByte(); + const end = node.endByte(); + switch (order(self.pos, start)) { + .lt => { + defer self.pos = start; + self.next_node = node; + return .{ .text = self.text[self.pos..start] }; + }, + .eq => { + defer self.pos = end; + return .{ .text = self.text[start..end], .node = node }; + }, + .gt => unreachable, + } + }; + switch (order(self.pos, self.text.len)) { + .lt => { + defer self.pos = @intCast(self.text.len); + return .{ .text = self.text[self.pos..] }; + }, + .eq => return null, + .gt => unreachable, + } + } + + pub fn deinit(self: *Iterator) void { + self.cursor.destroy(); + self.tree.destroy(); + self.parser.getLanguage().?.destroy(); + self.parser.destroy(); + self.* = undefined; + } +}; + +/// Parse text in given language and return an iterator of tokens. +pub fn ize(text: []const u8, createLanguage: CreateLanguage) error { + IncompatibleVersion, +}!Iterator { + const parser = Parser.create(); + try parser.setLanguage(createLanguage()); + const tree = parser.parseString(text, null).?; + return .{ + .text = text, + .parser = parser, + .tree = tree, + .cursor = tree.walk(), + }; +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..c55186b --- /dev/null +++ b/src/main.zig @@ -0,0 +1,16 @@ +// Entry point +// SPDX-FileCopyrightText: 2025 Nguyễn Gia Phong +// SPDX-License-Identifier: GPL-3.0-or-later + +const std = @import("std"); + +const Token = @import("Token.zig"); +const languages = @import("languages"); + +pub fn main() !void { + const text = "int main()\n{\n\treturn 0;\n}\n"; + var tokens = try Token.ize(text, languages.c); + defer tokens.deinit(); + while (tokens.next()) |token| + std.debug.print("{s}", .{ token.text }); +} diff --git a/src/supported-languages b/src/supported-languages new file mode 100644 index 0000000..f2ad6c7 --- /dev/null +++ b/src/supported-languages @@ -0,0 +1 @@ +c |