summary refs log tree commit diff
path: root/gnu/packages/patches/borg-fix-hard-link-preloading.patch
blob: 92a4e2267422720bb070fc452fea35634cfdbcbf (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
Fix a bug that would cause the test suite to hang:

https://github.com/borgbackup/borg/issues/4350

Patch copied from upstream source repository:

https://github.com/borgbackup/borg/commit/18242ab9e2f26c450b8507aa1d5eceadab8ad027

From 18242ab9e2f26c450b8507aa1d5eceadab8ad027 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Thu, 2 May 2019 21:02:26 +0200
Subject: [PATCH] preload chunks for hardlink slaves w/o preloaded master,
 fixes #4350

also split the hardlink extraction test into 2 tests.

(cherry picked from commit f33f318d816505161d1449a02ddfdeb97d6fe80a)
---
 src/borg/archive.py            | 42 +++++++++++++++++++++++++++++-----
 src/borg/archiver.py           |  5 ++--
 src/borg/testsuite/archiver.py | 20 +++++++++-------
 3 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/src/borg/archive.py b/src/borg/archive.py
index adc1f42c..0793672a 100644
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -192,7 +192,7 @@ def __init__(self, repository, key):
         self.repository = repository
         self.key = key
 
-    def unpack_many(self, ids, filter=None, preload=False):
+    def unpack_many(self, ids, filter=None, partial_extract=False, preload=False, hardlink_masters=None):
         """
         Return iterator of items.
 
@@ -209,12 +209,40 @@ def unpack_many(self, ids, filter=None, preload=False):
             for item in items:
                 if 'chunks' in item:
                     item.chunks = [ChunkListEntry(*e) for e in item.chunks]
+
+            def preload(chunks):
+                self.repository.preload([c.id for c in chunks])
+
             if filter:
                 items = [item for item in items if filter(item)]
+
             if preload:
-                for item in items:
-                    if 'chunks' in item:
-                        self.repository.preload([c.id for c in item.chunks])
+                if filter and partial_extract:
+                    # if we do only a partial extraction, it gets a bit
+                    # complicated with computing the preload items: if a hardlink master item is not
+                    # selected (== not extracted), we will still need to preload its chunks if a
+                    # corresponding hardlink slave is selected (== is extracted).
+                    # due to a side effect of the filter() call, we now have hardlink_masters dict populated.
+                    masters_preloaded = set()
+                    for item in items:
+                        if 'chunks' in item:  # regular file, maybe a hardlink master
+                            preload(item.chunks)
+                            # if this is a hardlink master, remember that we already preloaded it:
+                            if 'source' not in item and hardlinkable(item.mode) and item.get('hardlink_master', True):
+                                masters_preloaded.add(item.path)
+                        elif 'source' in item and hardlinkable(item.mode):  # hardlink slave
+                            source = item.source
+                            if source not in masters_preloaded:
+                                # we only need to preload *once* (for the 1st selected slave)
+                                chunks, _ = hardlink_masters[source]
+                                preload(chunks)
+                                masters_preloaded.add(source)
+                else:
+                    # easy: we do not have a filter, thus all items are selected, thus we need to preload all chunks.
+                    for item in items:
+                        if 'chunks' in item:
+                            preload(item.chunks)
+
             for item in items:
                 yield item
 
@@ -433,8 +461,10 @@ def item_filter(self, item, filter=None):
             return False
         return filter(item) if filter else True
 
-    def iter_items(self, filter=None, preload=False):
-        for item in self.pipeline.unpack_many(self.metadata.items, preload=preload,
+    def iter_items(self, filter=None, partial_extract=False, preload=False, hardlink_masters=None):
+        assert not (filter and partial_extract and preload) or hardlink_masters is not None
+        for item in self.pipeline.unpack_many(self.metadata.items, partial_extract=partial_extract,
+                                              preload=preload, hardlink_masters=hardlink_masters,
                                               filter=lambda item: self.item_filter(item, filter)):
             yield item
 
diff --git a/src/borg/archiver.py b/src/borg/archiver.py
index 957959d6..dcc20455 100644
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -755,7 +755,8 @@ def peek_and_store_hardlink_masters(item, matched):
         else:
             pi = None
 
-        for item in archive.iter_items(filter, preload=True):
+        for item in archive.iter_items(filter, partial_extract=partial_extract,
+                                       preload=True, hardlink_masters=hardlink_masters):
             orig_path = item.path
             if strip_components:
                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
@@ -997,7 +998,7 @@ def item_to_tarinfo(item, original_path):
                 return None, stream
             return tarinfo, stream
 
-        for item in archive.iter_items(filter, preload=True):
+        for item in archive.iter_items(filter, preload=True, hardlink_masters=hardlink_masters):
             orig_path = item.path
             if strip_components:
                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py
index c35ad800..935b3d79 100644
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -823,7 +823,18 @@ def test_mount_hardlinks(self):
             assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
 
     @requires_hardlinks
-    def test_extract_hardlinks(self):
+    def test_extract_hardlinks1(self):
+        self._extract_hardlinks_setup()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+            assert os.stat('input/source').st_nlink == 4
+            assert os.stat('input/abba').st_nlink == 4
+            assert os.stat('input/dir1/hardlink').st_nlink == 4
+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
+            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
+
+    @requires_hardlinks
+    def test_extract_hardlinks2(self):
         self._extract_hardlinks_setup()
         with changedir('output'):
             self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
@@ -839,13 +850,6 @@ def test_extract_hardlinks(self):
             assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
             assert os.stat('input/dir1/aaaa').st_nlink == 2
             assert os.stat('input/dir1/source2').st_nlink == 2
-        with changedir('output'):
-            self.cmd('extract', self.repository_location + '::test')
-            assert os.stat('input/source').st_nlink == 4
-            assert os.stat('input/abba').st_nlink == 4
-            assert os.stat('input/dir1/hardlink').st_nlink == 4
-            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
-            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
 
     def test_extract_include_exclude(self):
         self.cmd('init', '--encryption=repokey', self.repository_location)
-- 
2.21.0