summary refs log tree commit diff
path: root/src/Token.zig
blob: 49b15bb4a35be9b998c4fc1459f239cb0076ca73 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// Tokenizer
// SPDX-FileCopyrightText: 2025 Nguyễn Gia Phong
// SPDX-License-Identifier: GPL-3.0-or-later

const order = std.math.order;
const std = @import("std");

const Node = tree_sitter.Node;
const Parser = tree_sitter.Parser;
const Tree = tree_sitter.Tree;
const TreeCursor = tree_sitter.TreeCursor;
const tree_sitter = @import("tree-sitter");

const CreateLanguage = @import("languages").Create;

const Token = @This();
text: []const u8,
node: ?Node = null,

const Iterator = struct {
    text: []const u8,
    parser: *Parser,
    tree: *Tree,
    cursor: TreeCursor,
    next_node: ?Node = null,
    pos: u32 = 0,

    /// Returns the next tree-sitter node.
    fn nextNode(self: *Iterator) ?Node {
        if (self.next_node) |node| {
            self.next_node = null;
            return node;
        }
        const node = self.cursor.node();
        return if (self.cursor.gotoFirstChild())
            node
        else if (self.cursor.gotoNextSibling())
            node
        else while (self.cursor.gotoParent()) {
            if (self.cursor.gotoNextSibling())
                break node;
        } else null;
    }

    pub fn next(self: *Iterator) ?Token {
        if (self.pos == self.text.len)
            return null;
        while (self.nextNode()) |node|
            if (node.childCount() > 0) {
                const start = node.startByte();
                const end = node.endByte();
                switch (order(self.pos, start)) {
                    .lt => {
                        defer self.pos = start;
                        self.next_node = node;
                        return .{ .text = self.text[self.pos..start] };
                    },
                    .eq => {
                        defer self.pos = end;
                        return .{ .text = self.text[start..end], .node = node };
                    },
                    .gt => unreachable,
                }
            };
        switch (order(self.pos, self.text.len)) {
            .lt => {
                defer self.pos = @intCast(self.text.len);
                return .{ .text = self.text[self.pos..] };
            },
            .eq => return null,
            .gt => unreachable,
        }
    }

    pub fn reset(self: *Iterator) void {
        self.cursor.reset(self.tree.rootNode());
        self.next_node = null;
        self.pos = 0;
    }

    pub fn deinit(self: *Iterator) void {
        self.cursor.destroy();
        self.tree.destroy();
        self.parser.getLanguage().?.destroy();
        self.parser.destroy();
        self.* = undefined;
    }
};

/// Parse text in given language and return an iterator of tokens.
pub fn ize(text: []const u8, createLanguage: CreateLanguage) error {
    IncompatibleVersion,
}!Iterator {
    const parser = Parser.create();
    try parser.setLanguage(createLanguage());
    const tree = parser.parseString(text, null).?;
    return .{
        .text = text,
        .parser = parser,
        .tree = tree,
        .cursor = tree.walk(),
    };
}