about summary refs log tree commit diff
path: root/src/rub/xml.py
blob: 4c3a2aef6c4ab379a95651c4b9b1ec597925f8f9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# XML processing abstractions
# Copyright (C) 2023  Nguyễn Gia Phong
#
# This file is part of rub.
#
# Rub is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Rub is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with rub.  If not, see <https://www.gnu.org/licenses/>.

from copy import deepcopy
from pathlib import Path

from lxml.builder import E
from lxml.html import document_fromstring as from_html
from lxml.etree import QName, XML, XSLT, XSLTExtension, tostring as serialize

__all__ = ['NS', 'Processor', 'recurse']

NS = 'https://rub.parody'


def recurse(extension, context, input_node, output_parent):
    """Apply template recursively on input node."""
    output = deepcopy(input_node)
    for i in output: output.remove(i)
    for i in input_node:
        for j in extension.apply_templates(context, i):
            if not isinstance(j, str):
                output.append(deepcopy(j))
            elif len(output) == 0:
                if output.text is None:
                    output.text = j
                else:
                    output.text += j
            elif output[-1].tail is None:
                output[-1].tail = j
            else:
                output[-1].tail += j
    output_parent.append(output)


class Evaluator(XSLTExtension):
    def __init__(self, **handlers):
        self.handlers = {QName(NS, k).text: v for k, v in handlers.items()}
        super().__init__()

    def execute(self, context, self_node, input_node, output_parent):
        handle = self.handlers.get(input_node.tag, recurse)
        handle(self, context, input_node, output_parent)


class Serializer(XSLTExtension):
    def execute(self, context, self_node, input_node, output_parent):
        output_parent.text = serialize(deepcopy(input_node))


class Processor:
    """Callable XSLT processor."""

    def __init__(self, xslt: Path, change_name, **handlers) -> None:
        self.xslt, self.change_name = xslt, change_name
        stylesheet = xslt.read_bytes()
        extensions = {(NS, 'eval'): Evaluator(**handlers),
                      (NS, 'serialize'): Serializer()}
        self.transform = XSLT(XML(stylesheet), extensions=extensions)

    def process(self, src: Path, dest: Path) -> None:
        dest.parent.mkdir(mode=0o755, parents=True, exist_ok=True)
        dest.write_text(str(self.transform(XML(src.read_bytes()))))


def gen_omnifeed(sources: list[Path], pages: list[Path],
                 out_dir: Path, dest: Path) -> None:
    """Generate generic global feed."""
    entries = []
    for src, page in zip(sources, pages):
        src_root = XML(src.read_bytes())
        desc = src_root.findtext('description', '', {None: NS})
        if not desc: continue
        title = src_root.findtext('title', '', {None: NS})
        date = src_root.findtext('date', '', {None: NS})
        page_root = from_html(page.read_bytes())
        path = str(page.relative_to(out_dir))
        entries.append(E.entry(E.title(title), E.description(desc),
                               E.date(date), E.path(path), page_root))
    dest.parent.mkdir(mode=0o755, parents=True, exist_ok=True)
    dest.write_bytes(serialize(E.feed(*entries), pretty_print=True))