]> arthur.barton.de Git - bup.git/blob - lib/bup/cmd/restore.py
Update base_version to 0.34~ for 0.34 development
[bup.git] / lib / bup / cmd / restore.py
1
2 from __future__ import absolute_import
3 from stat import S_ISDIR
4 import copy, errno, os, re, stat, sys
5
6 from bup import options, git, vfs
7 from bup._helpers import write_sparsely
8 from bup.compat import argv_bytes, fsencode
9 from bup.helpers import (add_error, chunkyreader, die_if_errors,
10                          mkdirp, parse_rx_excludes, progress, qprogress,
11                          should_rx_exclude_path)
12 from bup.io import byte_stream
13 from bup.repo import LocalRepo, RemoteRepo
14
15
16 optspec = """
17 bup restore [-r host:path] [-C outdir] </branch/revision/path/to/dir ...>
18 --
19 r,remote=   remote repository path
20 C,outdir=   change to given outdir before extracting files
21 numeric-ids restore numeric IDs (user, group, etc.) rather than names
22 exclude-rx= skip paths matching the unanchored regex (may be repeated)
23 exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
24 sparse      create sparse files
25 v,verbose   increase log output (can be used more than once)
26 map-user=   given OLD=NEW, restore OLD user as NEW user
27 map-group=  given OLD=NEW, restore OLD group as NEW group
28 map-uid=    given OLD=NEW, restore OLD uid as NEW uid
29 map-gid=    given OLD=NEW, restore OLD gid as NEW gid
30 q,quiet     don't show progress meter
31 """
32
33 total_restored = 0
34
35 # stdout should be flushed after each line, even when not connected to a tty
36 stdoutfd = sys.stdout.fileno()
37 sys.stdout.flush()
38 sys.stdout = os.fdopen(stdoutfd, 'w', 1)
39 out = byte_stream(sys.stdout)
40
41 def valid_restore_path(path):
42     path = os.path.normpath(path)
43     if path.startswith(b'/'):
44         path = path[1:]
45     if b'/' in path:
46         return True
47     return False
48
49 def parse_owner_mappings(type, options, fatal):
50     """Traverse the options and parse all --map-TYPEs, or call Option.fatal()."""
51     opt_name = '--map-' + type
52     if type in ('uid', 'gid'):
53         value_rx = re.compile(br'^(-?[0-9]+)=(-?[0-9]+)$')
54     else:
55         value_rx = re.compile(br'^([^=]+)=([^=]*)$')
56     owner_map = {}
57     for flag in options:
58         (option, parameter) = flag
59         if option != opt_name:
60             continue
61         parameter = argv_bytes(parameter)
62         match = value_rx.match(parameter)
63         if not match:
64             raise fatal("couldn't parse %r as %s mapping" % (parameter, type))
65         old_id, new_id = match.groups()
66         if type in ('uid', 'gid'):
67             old_id = int(old_id)
68             new_id = int(new_id)
69         owner_map[old_id] = new_id
70     return owner_map
71
72 def apply_metadata(meta, name, restore_numeric_ids, owner_map):
73     m = copy.deepcopy(meta)
74     m.user = owner_map['user'].get(m.user, m.user)
75     m.group = owner_map['group'].get(m.group, m.group)
76     m.uid = owner_map['uid'].get(m.uid, m.uid)
77     m.gid = owner_map['gid'].get(m.gid, m.gid)
78     m.apply_to_path(name, restore_numeric_ids = restore_numeric_ids)
79
80 def hardlink_compatible(prev_path, prev_item, new_item, top):
81     prev_candidate = top + prev_path
82     if not os.path.exists(prev_candidate):
83         return False
84     prev_meta, new_meta = prev_item.meta, new_item.meta
85     if new_item.oid != prev_item.oid \
86             or new_meta.mtime != prev_meta.mtime \
87             or new_meta.ctime != prev_meta.ctime \
88             or new_meta.mode != prev_meta.mode:
89         return False
90     # FIXME: should we be checking the path on disk, or the recorded metadata?
91     # The exists() above might seem to suggest the former.
92     if not new_meta.same_file(prev_meta):
93         return False
94     return True
95
96 def hardlink_if_possible(fullname, item, top, hardlinks):
97     """Find a suitable hardlink target, link to it, and return true,
98     otherwise return false."""
99     # The cwd will be dirname(fullname), and fullname will be
100     # absolute, i.e. /foo/bar, and the caller is expected to handle
101     # restoring the metadata if hardlinking isn't possible.
102
103     # FIXME: we can probably replace the target_vfs_path with the
104     # relevant vfs item
105
106     # hardlinks tracks a list of (restore_path, vfs_path, meta)
107     # triples for each path we've written for a given hardlink_target.
108     # This allows us to handle the case where we restore a set of
109     # hardlinks out of order (with respect to the original save
110     # call(s)) -- i.e. when we don't restore the hardlink_target path
111     # first.  This data also allows us to attempt to handle other
112     # situations like hardlink sets that change on disk during a save,
113     # or between index and save.
114
115     target = item.meta.hardlink_target
116     assert(target)
117     assert(fullname.startswith(b'/'))
118     target_versions = hardlinks.get(target)
119     if target_versions:
120         # Check every path in the set that we've written so far for a match.
121         for prev_path, prev_item in target_versions:
122             if hardlink_compatible(prev_path, prev_item, item, top):
123                 try:
124                     os.link(top + prev_path, top + fullname)
125                     return True
126                 except OSError as e:
127                     if e.errno != errno.EXDEV:
128                         raise
129     else:
130         target_versions = []
131         hardlinks[target] = target_versions
132     target_versions.append((fullname, item))
133     return False
134
135 def write_file_content(repo, dest_path, vfs_file):
136     with vfs.fopen(repo, vfs_file) as inf:
137         with open(dest_path, 'wb') as outf:
138             for b in chunkyreader(inf):
139                 outf.write(b)
140
141 def write_file_content_sparsely(repo, dest_path, vfs_file):
142     with vfs.fopen(repo, vfs_file) as inf:
143         outfd = os.open(dest_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
144         try:
145             trailing_zeros = 0;
146             for b in chunkyreader(inf):
147                 trailing_zeros = write_sparsely(outfd, b, 512, trailing_zeros)
148             pos = os.lseek(outfd, trailing_zeros, os.SEEK_END)
149             os.ftruncate(outfd, pos)
150         finally:
151             os.close(outfd)
152
153 def restore(repo, parent_path, name, item, top, sparse, numeric_ids, owner_map,
154             exclude_rxs, verbosity, hardlinks):
155     global total_restored
156     mode = vfs.item_mode(item)
157     treeish = S_ISDIR(mode)
158     fullname = parent_path + b'/' + name
159     # Match behavior of index --exclude-rx with respect to paths.
160     if should_rx_exclude_path(fullname + (b'/' if treeish else b''),
161                               exclude_rxs):
162         return
163
164     if not treeish:
165         # Do this now so we'll have meta.symlink_target for verbose output
166         item = vfs.augment_item_meta(repo, item, include_size=True)
167         meta = item.meta
168         assert(meta.mode == mode)
169
170     if stat.S_ISDIR(mode):
171         if verbosity >= 1:
172             out.write(b'%s/\n' % fullname)
173     elif stat.S_ISLNK(mode):
174         assert(meta.symlink_target)
175         if verbosity >= 2:
176             out.write(b'%s@ -> %s\n' % (fullname, meta.symlink_target))
177     else:
178         if verbosity >= 2:
179             out.write(fullname + b'\n')
180
181     orig_cwd = os.getcwd()
182     try:
183         if treeish:
184             # Assumes contents() returns '.' with the full metadata first
185             sub_items = vfs.contents(repo, item, want_meta=True)
186             dot, item = next(sub_items, None)
187             assert(dot == b'.')
188             item = vfs.augment_item_meta(repo, item, include_size=True)
189             meta = item.meta
190             meta.create_path(name)
191             os.chdir(name)
192             total_restored += 1
193             if verbosity >= 0:
194                 qprogress('Restoring: %d\r' % total_restored)
195             for sub_name, sub_item in sub_items:
196                 restore(repo, fullname, sub_name, sub_item, top, sparse,
197                         numeric_ids, owner_map, exclude_rxs, verbosity,
198                         hardlinks)
199             os.chdir(b'..')
200             apply_metadata(meta, name, numeric_ids, owner_map)
201         else:
202             created_hardlink = False
203             if meta.hardlink_target:
204                 created_hardlink = hardlink_if_possible(fullname, item, top,
205                                                         hardlinks)
206             if not created_hardlink:
207                 meta.create_path(name)
208                 if stat.S_ISREG(meta.mode):
209                     if sparse:
210                         write_file_content_sparsely(repo, name, item)
211                     else:
212                         write_file_content(repo, name, item)
213             total_restored += 1
214             if verbosity >= 0:
215                 qprogress('Restoring: %d\r' % total_restored)
216             if not created_hardlink:
217                 apply_metadata(meta, name, numeric_ids, owner_map)
218     finally:
219         os.chdir(orig_cwd)
220
221 def main(argv):
222     o = options.Options(optspec)
223     opt, flags, extra = o.parse_bytes(argv[1:])
224     verbosity = (opt.verbose or 0) if not opt.quiet else -1
225     if opt.remote:
226         opt.remote = argv_bytes(opt.remote)
227     if opt.outdir:
228         opt.outdir = argv_bytes(opt.outdir)
229
230     git.check_repo_or_die()
231
232     if not extra:
233         o.fatal('must specify at least one filename to restore')
234
235     exclude_rxs = parse_rx_excludes(flags, o.fatal)
236
237     owner_map = {}
238     for map_type in ('user', 'group', 'uid', 'gid'):
239         owner_map[map_type] = parse_owner_mappings(map_type, flags, o.fatal)
240
241     if opt.outdir:
242         mkdirp(opt.outdir)
243         os.chdir(opt.outdir)
244
245     with RemoteRepo(opt.remote) if opt.remote else LocalRepo() as repo:
246         top = fsencode(os.getcwd())
247         hardlinks = {}
248         for path in [argv_bytes(x) for x in extra]:
249             if not valid_restore_path(path):
250                 add_error("path %r doesn't include a branch and revision" % path)
251                 continue
252             try:
253                 resolved = vfs.resolve(repo, path, want_meta=True, follow=False)
254             except vfs.IOError as e:
255                 add_error(e)
256                 continue
257             if len(resolved) == 3 and resolved[2][0] == b'latest':
258                 # Follow latest symlink to the actual save
259                 try:
260                     resolved = vfs.resolve(repo, b'latest', parent=resolved[:-1],
261                                            want_meta=True)
262                 except vfs.IOError as e:
263                     add_error(e)
264                     continue
265                 # Rename it back to 'latest'
266                 resolved = tuple(elt if i != 2 else (b'latest',) + elt[1:]
267                                  for i, elt in enumerate(resolved))
268             path_parent, path_name = os.path.split(path)
269             leaf_name, leaf_item = resolved[-1]
270             if not leaf_item:
271                 add_error('error: cannot access %r in %r'
272                           % (b'/'.join(name for name, item in resolved),
273                              path))
274                 continue
275             if not path_name or path_name == b'.':
276                 # Source is /foo/what/ever/ or /foo/what/ever/. -- extract
277                 # what/ever/* to the current directory, and if name == '.'
278                 # (i.e. /foo/what/ever/.), then also restore what/ever's
279                 # metadata to the current directory.
280                 treeish = vfs.item_mode(leaf_item)
281                 if not treeish:
282                     add_error('%r cannot be restored as a directory' % path)
283                 else:
284                     items = vfs.contents(repo, leaf_item, want_meta=True)
285                     dot, leaf_item = next(items, None)
286                     assert dot == b'.'
287                     for sub_name, sub_item in items:
288                         restore(repo, b'', sub_name, sub_item, top,
289                                 opt.sparse, opt.numeric_ids, owner_map,
290                                 exclude_rxs, verbosity, hardlinks)
291                     if path_name == b'.':
292                         leaf_item = vfs.augment_item_meta(repo, leaf_item,
293                                                           include_size=True)
294                         apply_metadata(leaf_item.meta, b'.',
295                                        opt.numeric_ids, owner_map)
296             else:
297                 restore(repo, b'', leaf_name, leaf_item, top,
298                         opt.sparse, opt.numeric_ids, owner_map,
299                         exclude_rxs, verbosity, hardlinks)
300
301     if verbosity >= 0:
302         progress('Restoring: %d, done.\n' % total_restored)
303     die_if_errors()