1 """Helper functions and classes for bup."""
3 from __future__ import absolute_import, division
4 from collections import namedtuple
5 from contextlib import ExitStack
6 from ctypes import sizeof, c_void_p
9 from subprocess import PIPE, Popen
10 from tempfile import mkdtemp
11 from shutil import rmtree
12 import sys, os, subprocess, errno, select, mmap, stat, re, struct
13 import hashlib, heapq, math, operator, time
15 from bup import _helpers
17 from bup.compat import argv_bytes, byte_int, nullcontext, pending_raise
18 from bup.io import byte_stream, path_msg
19 # This function should really be in helpers, not in bup.options. But we
20 # want options.py to be standalone so people can include it in other projects.
21 from bup.options import _tty_width as tty_width
24 buglvl = int(os.environ.get('BUP_DEBUG', 0))
28 """Helper to deal with Python scoping issues"""
32 def nullcontext_if_not(manager):
33 return manager if manager is not None else nullcontext()
37 def __init__(self, enter_result=None, finalize=None):
39 self.finalize = finalize
40 self.enter_result = enter_result
42 return self.enter_result
43 def __exit__(self, exc_type, exc_value, traceback):
44 self.finalize(self.enter_result)
46 def temp_dir(*args, **kwargs):
47 # This is preferable to tempfile.TemporaryDirectory because the
48 # latter uses @contextmanager, and so will always eventually be
49 # deleted if it's handed to an ExitStack, whenever the stack is
50 # gc'ed, even if you pop_all() (the new stack will also trigger
51 # the deletion) because
52 # https://github.com/python/cpython/issues/88458
53 return finalized(mkdtemp(*args, **kwargs), lambda x: rmtree(x))
55 sc_page_size = os.sysconf('SC_PAGE_SIZE')
56 assert(sc_page_size > 0)
58 sc_arg_max = os.sysconf('SC_ARG_MAX')
59 if sc_arg_max == -1: # "no definite limit" - let's choose 2M
60 sc_arg_max = 2 * 1024 * 1024
64 for result in iterable:
69 _fdatasync = os.fdatasync
70 except AttributeError:
73 if sys.platform.startswith('darwin'):
74 # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
78 return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
80 # Fallback for file systems (SMB) that do not support F_FULLFSYNC
81 if e.errno == errno.ENOTSUP:
86 fdatasync = _fdatasync
89 def partition(predicate, stream):
90 """Returns (leading_matches_it, rest_it), where leading_matches_it
91 must be completely exhausted before traversing rest_it.
96 ns.first_nonmatch = None
97 def leading_matches():
102 ns.first_nonmatch = (x,)
105 if ns.first_nonmatch:
106 yield ns.first_nonmatch[0]
109 return (leading_matches(), rest())
119 def lines_until_sentinel(f, sentinel, ex_type):
120 # sentinel must end with \n and must contain only one \n
123 if not (line and line.endswith(b'\n')):
124 raise ex_type('Hit EOF while reading line')
130 def stat_if_exists(path):
134 if e.errno != errno.ENOENT:
139 # Write (blockingly) to sockets that may or may not be in blocking mode.
140 # We need this because our stderr is sometimes eaten by subprocesses
141 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
142 # leading to race conditions. Ick. We'll do it the hard way.
143 def _hard_write(fd, buf):
145 (r,w,x) = select.select([], [fd], [], None)
147 raise IOError('select(fd) returned without being writable')
149 sz = os.write(fd, buf)
151 if e.errno != errno.EAGAIN:
159 """Print a log message to stderr."""
162 _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
176 istty1 = os.isatty(1) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 1)
177 istty2 = os.isatty(2) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 2)
180 """Calls log() if stderr is a TTY. Does nothing otherwise."""
181 global _last_progress
188 """Calls progress() only if we haven't printed progress in a while.
190 This avoids overloading the stderr buffer with excess junk.
194 if now - _last_prog > 0.1:
200 """Calls progress() to redisplay the most recent progress message.
202 Useful after you've printed some other message that wipes out the
205 if _last_progress and _last_progress.endswith('\r'):
206 progress(_last_progress)
209 def mkdirp(d, mode=None):
210 """Recursively create directories on path 'd'.
212 Unlike os.makedirs(), it doesn't raise an exception if the last element of
213 the path already exists.
221 if e.errno == errno.EEXIST:
228 def __init__(self, entry, read_it):
230 self.read_it = read_it
232 return self.entry < x.entry
234 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
236 samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
238 samekey = operator.eq
240 total = sum(len(it) for it in iters)
241 iters = (iter(it) for it in iters)
242 heap = ((next(it, None),it) for it in iters)
243 heap = [MergeIterItem(e, it) for e, it in heap if e]
248 if not count % pfreq:
250 e, it = heap[0].entry, heap[0].read_it
251 if not samekey(e, pe):
257 except StopIteration:
258 heapq.heappop(heap) # remove current
260 # shift current to new location
261 heapq.heapreplace(heap, MergeIterItem(e, it))
266 """Delete a file at path 'f' if it currently exists.
268 Unlike os.unlink(), does not throw an exception if the file didn't already
274 if e.errno != errno.ENOENT:
278 _bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
279 _sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$')
284 if _bq_simple_id_rx.match(x):
286 return b"'%s'" % x.replace(b"'", b"'\"'\"'")
291 if _sq_simple_id_rx.match(x):
293 return "'%s'" % x.replace("'", "'\"'\"'")
296 if isinstance(x, bytes):
298 if isinstance(x, str):
301 # some versions of pylint get confused
305 """Return a shell quoted string for cmd if it's a sequence, else cmd.
307 cmd must be a string, bytes, or a sequence of one or the other,
308 and the assumption is that if cmd is a string or bytes, then it's
309 already quoted (because it's what's actually being passed to
310 call() and friends. e.g. log(shstr(cmd)); call(cmd)
313 if isinstance(cmd, (bytes, str)):
315 elif all(isinstance(x, bytes) for x in cmd):
316 return b' '.join(map(bquote, cmd))
317 elif all(isinstance(x, str) for x in cmd):
318 return ' '.join(map(squote, cmd))
319 raise TypeError('unsupported shstr argument: ' + repr(cmd))
322 exc = subprocess.check_call
333 assert stdin in (None, PIPE)
336 stdin=stdin, stdout=PIPE, stderr=stderr,
338 preexec_fn=preexec_fn,
340 out, err = p.communicate(input)
341 if check and p.returncode != 0:
342 raise Exception('subprocess %r failed with status %d%s'
343 % (b' '.join(map(quote, cmd)), p.returncode,
344 ', stderr: %r' % err if err else ''))
347 def readpipe(argv, preexec_fn=None, shell=False):
348 """Run a subprocess and return its output."""
349 return exo(argv, preexec_fn=preexec_fn, shell=shell)[0]
352 def _argmax_base(command):
355 base_size += len(command) + 1
356 for k, v in environ.items():
357 base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
361 def _argmax_args_size(args):
362 return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
365 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
366 """If args is not empty, yield the output produced by calling the
367 command list with args as a sequence of strings (It may be necessary
368 to return multiple strings in order to respect ARG_MAX)."""
369 # The optional arg_max arg is a workaround for an issue with the
370 # current wvtest behavior.
371 base_size = _argmax_base(command)
373 room = arg_max - base_size
376 next_size = _argmax_args_size(args[i:i+1])
377 if room - next_size < 0:
383 assert(len(sub_args))
384 yield readpipe(command + sub_args, preexec_fn=preexec_fn)
387 def resolve_parent(p):
388 """Return the absolute path of a file without following any final symlink.
390 Behaves like os.path.realpath, but doesn't follow a symlink for the last
391 element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
392 will follow symlinks in p's directory)
398 if st and stat.S_ISLNK(st.st_mode):
399 (dir, name) = os.path.split(p)
400 dir = os.path.realpath(dir)
401 out = os.path.join(dir, name)
403 out = os.path.realpath(p)
404 #log('realpathing:%r,%r\n' % (p, out))
408 def detect_fakeroot():
409 "Return True if we appear to be running under fakeroot."
410 return os.getenv("FAKEROOTKEY") != None
413 if sys.platform.startswith('cygwin'):
415 # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
416 groups = os.getgroups()
417 return 544 in groups or 0 in groups
420 return os.geteuid() == 0
423 def cache_key_value(get_value, key, cache):
424 """Return (value, was_cached). If there is a value in the cache
425 for key, use that, otherwise, call get_value(key) which should
426 throw a KeyError if there is no value -- in which case the cached
427 and returned value will be None.
429 try: # Do we already have it (or know there wasn't one)?
436 cache[key] = value = get_value(key)
444 """Get the FQDN of this machine."""
447 _hostname = _helpers.gethostname()
451 def format_filesize(size):
456 exponent = int(math.log(size) // math.log(unit))
457 size_prefix = "KMGTPE"[exponent - 1]
458 return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
461 class NotOk(Exception):
466 def __init__(self, outp):
467 self._base_closed = False
471 self._base_closed = True
476 def __exit__(self, exc_type, exc_value, tb):
477 with pending_raise(exc_value, rethrow=False):
481 assert self._base_closed
483 def _read(self, size):
484 raise NotImplementedError("Subclasses must implement _read")
486 def read(self, size):
487 """Read 'size' bytes from input stream."""
489 return self._read(size)
491 def _readline(self, size):
492 raise NotImplementedError("Subclasses must implement _readline")
495 """Read from input stream until a newline is found."""
497 return self._readline()
499 def write(self, data):
500 """Write 'data' to output stream."""
501 #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
502 self.outp.write(data)
505 """Return true if input stream is readable."""
506 raise NotImplementedError("Subclasses must implement has_input")
509 """Indicate end of output from last sent command."""
510 self.write(b'\nok\n')
513 """Indicate server error to the client."""
514 s = re.sub(br'\s+', b' ', s)
515 self.write(b'\nerror %s\n' % s)
517 def _check_ok(self, onempty):
520 for rl in linereader(self):
521 #log('%d got line: %r\n' % (os.getpid(), rl))
522 if not rl: # empty line
526 elif rl.startswith(b'error '):
527 #log('client: error: %s\n' % rl[6:])
531 raise Exception('server exited unexpectedly; see errors above')
533 def drain_and_check_ok(self):
534 """Remove all data for the current command from input stream."""
537 return self._check_ok(onempty)
540 """Verify that server action completed successfully."""
542 raise Exception('expected "ok", got %r' % rl)
543 return self._check_ok(onempty)
546 class Conn(BaseConn):
547 def __init__(self, inp, outp):
548 BaseConn.__init__(self, outp)
551 def _read(self, size):
552 return self.inp.read(size)
555 return self.inp.readline()
558 [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
560 assert(rl[0] == self.inp.fileno())
566 def checked_reader(fd, n):
568 rl, _, _ = select.select([fd], [], [])
571 if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
576 MAX_PACKET = 128 * 1024
577 def mux(p, outfd, outr, errr):
580 while p.poll() is None:
581 rl, _, _ = select.select(fds, [], [])
584 buf = os.read(outr, MAX_PACKET)
586 os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
588 buf = os.read(errr, 1024)
590 os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
592 os.write(outfd, struct.pack('!IB', 0, 3))
595 class DemuxConn(BaseConn):
596 """A helper class for bup's client-server protocol."""
597 def __init__(self, infd, outp):
598 BaseConn.__init__(self, outp)
599 # Anything that comes through before the sync string was not
600 # multiplexed and can be assumed to be debug/log before mux init.
602 stderr = byte_stream(sys.stderr)
603 while tail != b'BUPMUX':
604 # Make sure to write all pre-BUPMUX output to stderr
605 b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
607 ex = IOError('demux: unexpected EOF during initialization')
608 with pending_raise(ex):
612 stderr.write(tail[:-6])
620 def write(self, data):
622 BaseConn.write(self, data)
624 def _next_packet(self, timeout):
625 if self.closed: return False
626 rl, wl, xl = select.select([self.infd], [], [], timeout)
627 if not rl: return False
628 assert(rl[0] == self.infd)
629 ns = b''.join(checked_reader(self.infd, 5))
630 n, fdw = struct.unpack('!IB', ns)
632 # assume that something went wrong and print stuff
633 ns += os.read(self.infd, 1024)
634 stderr = byte_stream(sys.stderr)
637 raise Exception("Connection broken")
639 self.reader = checked_reader(self.infd, n)
641 for buf in checked_reader(self.infd, n):
642 byte_stream(sys.stderr).write(buf)
645 debug2("DemuxConn: marked closed\n")
648 def _load_buf(self, timeout):
649 if self.buf is not None:
651 while not self.closed:
652 while not self.reader:
653 if not self._next_packet(timeout):
656 self.buf = next(self.reader)
658 except StopIteration:
662 def _read_parts(self, ix_fn):
663 while self._load_buf(None):
664 assert(self.buf is not None)
666 if i is None or i == len(self.buf):
671 self.buf = self.buf[i:]
679 return buf.index(b'\n')+1
682 return b''.join(self._read_parts(find_eol))
684 def _read(self, size):
686 def until_size(buf): # Closes on csize
687 if len(buf) < csize[0]:
692 return b''.join(self._read_parts(until_size))
695 return self._load_buf(0)
699 """Generate a list of input lines from 'f' without terminating newlines."""
707 def chunkyreader(f, count = None):
708 """Generate a list of chunks of data read from 'f'.
710 If count is None, read until EOF is reached.
712 If count is a positive integer, read 'count' bytes from 'f'. If EOF is
713 reached while reading, raise IOError.
717 b = f.read(min(count, 65536))
719 raise IOError('EOF with %d bytes remaining' % count)
729 class atomically_replaced_file:
730 def __init__(self, path, mode='w', buffering=-1):
731 """Return a context manager supporting the atomic replacement of a file.
733 The context manager yields an open file object that has been
734 created in a mkdtemp-style temporary directory in the same
735 directory as the path. The temporary file will be renamed to
736 the target path (atomically if the platform allows it) if
737 there are no exceptions, and the temporary directory will
738 always be removed. Calling cancel() will prevent the
741 The file object will have a name attribute containing the
742 file's path, and the mode and buffering arguments will be
743 handled exactly as with open(). The resulting permissions
744 will also match those produced by open().
748 with atomically_replaced_file('foo.txt', 'w') as f:
749 f.write('hello jack.')
755 self.buffering = buffering
756 self.canceled = False
758 self.cleanup = ExitStack()
761 parent, name = os.path.split(self.path)
762 tmpdir = self.cleanup.enter_context(temp_dir(dir=parent,
764 self.tmp_path = tmpdir + b'/pending'
765 f = open(self.tmp_path, mode=self.mode, buffering=self.buffering)
766 f = self.cleanup.enter_context(f)
767 self.cleanup = self.cleanup.pop_all()
769 def __exit__(self, exc_type, exc_value, traceback):
771 if not (self.canceled or exc_type):
772 os.rename(self.tmp_path, self.path)
778 """Append "/" to 's' if it doesn't aleady end in "/"."""
779 assert isinstance(s, bytes)
780 if s and not s.endswith(b'/'):
786 def _mmap_do(f, sz, flags, prot, close):
788 st = os.fstat(f.fileno())
791 # trying to open a zero-length map gives an error, but an empty
792 # string has all the same behaviour of a zero-length map, ie. it has
795 map = io.mmap(f.fileno(), sz, flags, prot)
797 f.close() # map will persist beyond file close
801 def mmap_read(f, sz = 0, close=True):
802 """Create a read-only memory mapped region on file 'f'.
803 If sz is 0, the region will cover the entire file.
805 return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
808 def mmap_readwrite(f, sz = 0, close=True):
809 """Create a read-write memory mapped region on file 'f'.
810 If sz is 0, the region will cover the entire file.
812 return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
816 def mmap_readwrite_private(f, sz = 0, close=True):
817 """Create a read-write memory mapped region on file 'f'.
818 If sz is 0, the region will cover the entire file.
819 The map is private, which means the changes are never flushed back to the
822 return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
826 _mincore = getattr(_helpers, 'mincore', None)
828 # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
829 MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
831 _fmincore_chunk_size = None
832 def _set_fmincore_chunk_size():
833 global _fmincore_chunk_size
834 pref_chunk_size = 64 * 1024 * 1024
835 chunk_size = sc_page_size
836 if (sc_page_size < pref_chunk_size):
837 chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
838 _fmincore_chunk_size = chunk_size
841 """Return the mincore() data for fd as a bytearray whose values can be
842 tested via MINCORE_INCORE, or None if fd does not fully
843 support the operation."""
845 if (st.st_size == 0):
847 if not _fmincore_chunk_size:
848 _set_fmincore_chunk_size()
849 pages_per_chunk = _fmincore_chunk_size // sc_page_size;
850 page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
851 chunk_count = (st.st_size + _fmincore_chunk_size - 1) // _fmincore_chunk_size
852 result = bytearray(page_count)
853 for ci in range(chunk_count):
854 pos = _fmincore_chunk_size * ci;
855 msize = min(_fmincore_chunk_size, st.st_size - pos)
857 m = io.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
858 except mmap.error as ex:
859 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
860 # Perhaps the file was a pipe, i.e. "... | bup split ..."
865 _mincore(m, msize, 0, result, ci * pages_per_chunk)
866 except OSError as ex:
867 if ex.errno == errno.ENOSYS:
873 def parse_timestamp(epoch_str):
874 """Return the number of nanoseconds since the epoch that are described
875 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
876 throw a ValueError that may contain additional information."""
877 ns_per = {'s' : 1000000000,
881 match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
883 if re.match(r'^([-+]?[0-9]+)$', epoch_str):
884 raise ValueError('must include units, i.e. 100ns, 100ms, ...')
886 (n, units) = match.group(1, 2)
890 return n * ns_per[units]
894 """Parse string or bytes as a possibly unit suffixed number.
897 199.2k means 203981 bytes
898 1GB means 1073741824 bytes
899 2.1 tb means 2199023255552 bytes
901 if isinstance(s, bytes):
902 # FIXME: should this raise a ValueError for UnicodeDecodeError
903 # (perhaps with the latter as the context).
904 s = s.decode('ascii')
905 g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
907 raise ValueError("can't parse %r as a number" % s)
908 (val, unit) = g.groups()
911 if unit in ['t', 'tb']:
912 mult = 1024*1024*1024*1024
913 elif unit in ['g', 'gb']:
914 mult = 1024*1024*1024
915 elif unit in ['m', 'mb']:
917 elif unit in ['k', 'kb']:
919 elif unit in ['', 'b']:
922 raise ValueError("invalid unit %r in number %r" % (unit, s))
928 """Append an error message to the list of saved errors.
930 Once processing is able to stop and output the errors, the saved errors are
931 accessible in the module variable helpers.saved_errors.
933 saved_errors.append(e)
942 def die_if_errors(msg=None, status=1):
946 msg = 'warning: %d errors encountered\n' % len(saved_errors)
952 """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
954 The new exception handler will make sure that bup will exit without an ugly
955 stacktrace when Ctrl-C is hit.
957 oldhook = sys.excepthook
958 def newhook(exctype, value, traceback):
959 if exctype == KeyboardInterrupt:
960 log('\nInterrupted.\n')
962 oldhook(exctype, value, traceback)
963 sys.excepthook = newhook
966 def columnate(l, prefix):
967 """Format elements of 'l' in columns with 'prefix' leading each line.
969 The number of columns is determined automatically based on the string
972 binary = isinstance(prefix, bytes)
973 nothing = b'' if binary else ''
974 nl = b'\n' if binary else '\n'
978 clen = max(len(s) for s in l)
979 ncols = (tty_width() - len(prefix)) // (clen + 2)
984 while len(l) % ncols:
986 rows = len(l) // ncols
987 for s in range(0, len(l), rows):
988 cols.append(l[s:s+rows])
990 fmt = b'%-*s' if binary else '%-*s'
991 for row in zip(*cols):
992 out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl
996 def parse_date_or_fatal(str, fatal):
997 """Parses the given date or calls Option.fatal().
998 For now we expect a string that contains a float."""
1001 except ValueError as e:
1002 raise fatal('invalid date format (should be a float): %r' % e)
1007 def parse_excludes(options, fatal):
1008 """Traverse the options and extract all excludes, or call Option.fatal()."""
1011 for flag in options:
1012 (option, parameter) = flag
1013 if option == '--exclude':
1014 excluded_paths.append(resolve_parent(argv_bytes(parameter)))
1015 elif option == '--exclude-from':
1017 f = open(resolve_parent(argv_bytes(parameter)), 'rb')
1018 except IOError as e:
1019 raise fatal("couldn't read %r" % parameter)
1020 for exclude_path in f.readlines():
1021 # FIXME: perhaps this should be rstrip('\n')
1022 exclude_path = resolve_parent(exclude_path.strip())
1024 excluded_paths.append(exclude_path)
1025 return sorted(frozenset(excluded_paths))
1028 def parse_rx_excludes(options, fatal):
1029 """Traverse the options and extract all rx excludes, or call
1031 excluded_patterns = []
1033 for flag in options:
1034 (option, parameter) = flag
1035 if option == '--exclude-rx':
1037 excluded_patterns.append(re.compile(argv_bytes(parameter)))
1038 except re.error as ex:
1039 fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex))
1040 elif option == '--exclude-rx-from':
1042 f = open(resolve_parent(parameter), 'rb')
1043 except IOError as e:
1044 raise fatal("couldn't read %r" % parameter)
1045 for pattern in f.readlines():
1046 spattern = pattern.rstrip(b'\n')
1050 excluded_patterns.append(re.compile(spattern))
1051 except re.error as ex:
1052 fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex))
1053 return excluded_patterns
1056 def should_rx_exclude_path(path, exclude_rxs):
1057 """Return True if path matches a regular expression in exclude_rxs."""
1058 for rx in exclude_rxs:
1060 debug1('Skipping %r: excluded by rx pattern %r.\n'
1061 % (path, rx.pattern))
1066 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1067 # that resolve against the current filesystem in the strip/graft
1068 # functions for example, but elsewhere as well. I suspect bup's not
1069 # always being careful about that. For some cases, the contents of
1070 # the current filesystem should be irrelevant, and consulting it might
1071 # produce the wrong result, perhaps via unintended symlink resolution,
1074 def path_components(path):
1075 """Break path into a list of pairs of the form (name,
1076 full_path_to_name). Path must start with '/'.
1078 '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1079 if not path.startswith(b'/'):
1080 raise Exception('path must start with "/": %s' % path_msg(path))
1081 # Since we assume path startswith('/'), we can skip the first element.
1082 result = [(b'', b'/')]
1083 norm_path = os.path.abspath(path)
1084 if norm_path == b'/':
1087 for p in norm_path.split(b'/')[1:]:
1088 full_path += b'/' + p
1089 result.append((p, full_path))
1093 def stripped_path_components(path, strip_prefixes):
1094 """Strip any prefix in strip_prefixes from path and return a list
1095 of path components where each component is (name,
1096 none_or_full_fs_path_to_name). Assume path startswith('/').
1097 See thelpers.py for examples."""
1098 normalized_path = os.path.abspath(path)
1099 sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1100 for bp in sorted_strip_prefixes:
1101 normalized_bp = os.path.abspath(bp)
1102 if normalized_bp == b'/':
1104 if normalized_path.startswith(normalized_bp):
1105 prefix = normalized_path[:len(normalized_bp)]
1107 for p in normalized_path[len(normalized_bp):].split(b'/'):
1111 result.append((p, prefix))
1114 return path_components(path)
1117 def grafted_path_components(graft_points, path):
1118 # Create a result that consists of some number of faked graft
1119 # directories before the graft point, followed by all of the real
1120 # directories from path that are after the graft point. Arrange
1121 # for the directory at the graft point in the result to correspond
1122 # to the "orig" directory in --graft orig=new. See t/thelpers.py
1123 # for some examples.
1125 # Note that given --graft orig=new, orig and new have *nothing* to
1126 # do with each other, even if some of their component names
1127 # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1128 # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1131 # FIXME: This can't be the best solution...
1132 clean_path = os.path.abspath(path)
1133 for graft_point in graft_points:
1134 old_prefix, new_prefix = graft_point
1135 # Expand prefixes iff not absolute paths.
1136 old_prefix = os.path.normpath(old_prefix)
1137 new_prefix = os.path.normpath(new_prefix)
1138 if clean_path.startswith(old_prefix):
1139 escaped_prefix = re.escape(old_prefix)
1140 grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path)
1141 # Handle /foo=/ (at least) -- which produces //whatever.
1142 grafted_path = b'/' + grafted_path.lstrip(b'/')
1143 clean_path_components = path_components(clean_path)
1144 # Count the components that were stripped.
1145 strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/')
1146 new_prefix_parts = new_prefix.split(b'/')
1147 result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')]
1148 result = [(p, None) for p in result_prefix] \
1149 + clean_path_components[strip_count:]
1150 # Now set the graft point name to match the end of new_prefix.
1151 graft_point = len(result_prefix)
1152 result[graft_point] = \
1153 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1154 if new_prefix == b'/': # --graft ...=/ is a special case.
1157 return path_components(clean_path)
1163 _localtime = getattr(_helpers, 'localtime', None)
1166 bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1167 'tm_hour', 'tm_min', 'tm_sec',
1168 'tm_wday', 'tm_yday',
1169 'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1171 # Define a localtime() that returns bup_time when possible. Note:
1172 # this means that any helpers.localtime() results may need to be
1173 # passed through to_py_time() before being passed to python's time
1174 # module, which doesn't appear willing to ignore the extra items.
1176 def localtime(time):
1177 return bup_time(*_helpers.localtime(int(floor(time))))
1178 def utc_offset_str(t):
1179 """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1180 If the current UTC offset does not represent an integer number
1181 of minutes, the fractional component will be truncated."""
1182 off = localtime(t).tm_gmtoff
1183 # Note: // doesn't truncate like C for negative values, it rounds down.
1184 offmin = abs(off) // 60
1186 h = (offmin - m) // 60
1187 return b'%+03d%02d' % (-h if off < 0 else h, m)
1189 if isinstance(x, time.struct_time):
1191 return time.struct_time(x[:9])
1193 localtime = time.localtime
1194 def utc_offset_str(t):
1195 return time.strftime(b'%z', localtime(t))
1200 _some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{')
1202 def valid_save_name(name):
1203 # Enforce a superset of the restrictions in git-check-ref-format(1)
1205 or name.startswith(b'/') or name.endswith(b'/') \
1206 or name.endswith(b'.'):
1208 if _some_invalid_save_parts_rx.search(name):
1211 if byte_int(c) < 0x20 or byte_int(c) == 0x7f:
1213 for part in name.split(b'/'):
1214 if part.startswith(b'.') or part.endswith(b'.lock'):
1219 _period_rx = re.compile(br'^([0-9]+)(s|min|h|d|w|m|y)$')
1221 def period_as_secs(s):
1224 match = _period_rx.match(s)
1227 mag = int(match.group(1))
1228 scale = match.group(2)
1229 return mag * {b's': 1,
1233 b'w': 60 * 60 * 24 * 7,
1234 b'm': 60 * 60 * 24 * 31,
1235 b'y': 60 * 60 * 24 * 366}[scale]