// Tokenizer // SPDX-FileCopyrightText: 2025 Nguyễn Gia Phong // SPDX-License-Identifier: GPL-3.0-or-later const order = std.math.order; const std = @import("std"); const Node = tree_sitter.Node; const Parser = tree_sitter.Parser; const Tree = tree_sitter.Tree; const TreeCursor = tree_sitter.TreeCursor; const tree_sitter = @import("tree-sitter"); const CreateLanguage = @import("languages").Create; const Token = @This(); text: []const u8, node: ?Node = null, const Iterator = struct { text: []const u8, parser: *Parser, tree: *Tree, cursor: TreeCursor, next_node: ?Node = null, pos: u32 = 0, /// Returns the next tree-sitter node. fn nextNode(self: *Iterator) ?Node { if (self.next_node) |node| { self.next_node = null; return node; } const node = self.cursor.node(); return if (self.cursor.gotoFirstChild()) node else if (self.cursor.gotoNextSibling()) node else while (self.cursor.gotoParent()) { if (self.cursor.gotoNextSibling()) break node; } else null; } pub fn next(self: *Iterator) ?Token { if (self.pos == self.text.len) return null; while (self.nextNode()) |node| if (node.childCount() > 0) { const start = node.startByte(); const end = node.endByte(); switch (order(self.pos, start)) { .lt => { defer self.pos = start; self.next_node = node; return .{ .text = self.text[self.pos..start] }; }, .eq => { defer self.pos = end; return .{ .text = self.text[start..end], .node = node }; }, .gt => unreachable, } }; switch (order(self.pos, self.text.len)) { .lt => { defer self.pos = @intCast(self.text.len); return .{ .text = self.text[self.pos..] }; }, .eq => return null, .gt => unreachable, } } pub fn reset(self: *Iterator) void { self.cursor.reset(self.tree.rootNode()); self.next_node = null; self.pos = 0; } pub fn deinit(self: *Iterator) void { self.cursor.destroy(); self.tree.destroy(); self.parser.getLanguage().?.destroy(); self.parser.destroy(); self.* = undefined; } }; /// Parse text in given language and return an iterator of tokens. pub fn ize(text: []const u8, createLanguage: CreateLanguage) error { IncompatibleVersion, }!Iterator { const parser = Parser.create(); try parser.setLanguage(createLanguage()); const tree = parser.parseString(text, null).?; return .{ .text = text, .parser = parser, .tree = tree, .cursor = tree.walk(), }; }