lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5
   6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
   7 from collections import namedtuple
   8 from itertools import islice
   9 from numbers import Integral
  10
  11 from bup import _helpers, hashsplit, path, midx, bloom, xstat
  12 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
  13                          fdatasync,
  14                          hostname, localtime, log, merge_iter,
  15                          mmap_read, mmap_readwrite,
  16                          parse_num,
  17                          progress, qprogress, stat_if_exists,
  18                          unlink, username, userfullname,
  19                          utc_offset_str)
  20
  21 verbose = 0
  22 ignore_midx = 0
  23 repodir = None  # The default repository, once initialized
  24
  25 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  26 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  27
  28 _total_searches = 0
  29 _total_steps = 0
  30
  31
  32 class GitError(Exception):
  33     pass
  34
  35
  36 def _git_wait(cmd, p):
  37     rv = p.wait()
  38     if rv != 0:
  39         raise GitError('%s returned %d' % (cmd, rv))
  40
  41 def _git_capture(argv):
  42     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
  43     r = p.stdout.read()
  44     _git_wait(repr(argv), p)
  45     return r
  46
  47 def git_config_get(option, repo_dir=None):
  48     cmd = ('git', 'config', '--get', option)
  49     p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
  50                          preexec_fn=_gitenv(repo_dir=repo_dir))
  51     r = p.stdout.read()
  52     rc = p.wait()
  53     if rc == 0:
  54         return r
  55     if rc != 1:
  56         raise GitError('%s returned %d' % (cmd, rc))
  57     return None
  58
  59
  60 def parse_tz_offset(s):
  61     """UTC offset in seconds."""
  62     tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
  63     if s[0] == '-':
  64         return - tz_off
  65     return tz_off
  66
  67
  68 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
  69 # Make sure that's authoritative.
  70 _start_end_char = r'[^ .,:;<>"\'\0\n]'
  71 _content_char = r'[^\0\n<>]'
  72 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
  73     % (_start_end_char,
  74        _start_end_char, _content_char, _start_end_char)
  75 _tz_rx = r'[-+]\d\d[0-5]\d'
  76 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
  77 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
  78 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
  79 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
  80
  81 (?P<message>(?:.|\n)*)''' % (_parent_rx,
  82                              _safe_str_rx, _safe_str_rx, _tz_rx,
  83                              _safe_str_rx, _safe_str_rx, _tz_rx))
  84 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
  85
  86
  87 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
  88 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
  89                                        'author_name', 'author_mail',
  90                                        'author_sec', 'author_offset',
  91                                        'committer_name', 'committer_mail',
  92                                        'committer_sec', 'committer_offset',
  93                                        'message'])
  94
  95 def parse_commit(content):
  96     commit_match = re.match(_commit_rx, content)
  97     if not commit_match:
  98         raise Exception('cannot parse commit %r' % content)
  99     matches = commit_match.groupdict()
 100     return CommitInfo(tree=matches['tree'],
 101                       parents=re.findall(_parent_hash_rx, matches['parents']),
 102                       author_name=matches['author_name'],
 103                       author_mail=matches['author_mail'],
 104                       author_sec=int(matches['asec']),
 105                       author_offset=parse_tz_offset(matches['atz']),
 106                       committer_name=matches['committer_name'],
 107                       committer_mail=matches['committer_mail'],
 108                       committer_sec=int(matches['csec']),
 109                       committer_offset=parse_tz_offset(matches['ctz']),
 110                       message=matches['message'])
 111
 112
 113 def get_commit_items(id, cp):
 114     commit_it = cp.get(id)
 115     assert(next(commit_it) == 'commit')
 116     commit_content = ''.join(commit_it)
 117     return parse_commit(commit_content)
 118
 119
 120 def _local_git_date_str(epoch_sec):
 121     return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
 122
 123
 124 def _git_date_str(epoch_sec, tz_offset_sec):
 125     offs =  tz_offset_sec // 60
 126     return '%d %s%02d%02d' \
 127         % (epoch_sec,
 128            '+' if offs >= 0 else '-',
 129            abs(offs) // 60,
 130            abs(offs) % 60)
 131
 132
 133 def repo(sub = '', repo_dir=None):
 134     """Get the path to the git repository or one of its subdirectories."""
 135     global repodir
 136     repo_dir = repo_dir or repodir
 137     if not repo_dir:
 138         raise GitError('You should call check_repo_or_die()')
 139
 140     # If there's a .git subdirectory, then the actual repo is in there.
 141     gd = os.path.join(repo_dir, '.git')
 142     if os.path.exists(gd):
 143         repodir = gd
 144
 145     return os.path.join(repo_dir, sub)
 146
 147
 148 def shorten_hash(s):
 149     return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
 150                   r'\1\2*\3', s)
 151
 152
 153 def repo_rel(path):
 154     full = os.path.abspath(path)
 155     fullrepo = os.path.abspath(repo(''))
 156     if not fullrepo.endswith('/'):
 157         fullrepo += '/'
 158     if full.startswith(fullrepo):
 159         path = full[len(fullrepo):]
 160     if path.startswith('index-cache/'):
 161         path = path[len('index-cache/'):]
 162     return shorten_hash(path)
 163
 164
 165 def all_packdirs():
 166     paths = [repo('objects/pack')]
 167     paths += glob.glob(repo('index-cache/*/.'))
 168     return paths
 169
 170
 171 def auto_midx(objdir):
 172     args = [path.exe(), 'midx', '--auto', '--dir', objdir]
 173     try:
 174         rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
 175     except OSError as e:
 176         # make sure 'args' gets printed to help with debugging
 177         add_error('%r: exception: %s' % (args, e))
 178         raise
 179     if rv:
 180         add_error('%r: returned %d' % (args, rv))
 181
 182     args = [path.exe(), 'bloom', '--dir', objdir]
 183     try:
 184         rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
 185     except OSError as e:
 186         # make sure 'args' gets printed to help with debugging
 187         add_error('%r: exception: %s' % (args, e))
 188         raise
 189     if rv:
 190         add_error('%r: returned %d' % (args, rv))
 191
 192
 193 def mangle_name(name, mode, gitmode):
 194     """Mangle a file name to present an abstract name for segmented files.
 195     Mangled file names will have the ".bup" extension added to them. If a
 196     file's name already ends with ".bup", a ".bupl" extension is added to
 197     disambiguate normal files from segmented ones.
 198     """
 199     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
 200         assert(stat.S_ISDIR(gitmode))
 201         return name + '.bup'
 202     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
 203         return name + '.bupl'
 204     else:
 205         return name
 206
 207
 208 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
 209 def demangle_name(name, mode):
 210     """Remove name mangling from a file name, if necessary.
 211
 212     The return value is a tuple (demangled_filename,mode), where mode is one of
 213     the following:
 214
 215     * BUP_NORMAL  : files that should be read as-is from the repository
 216     * BUP_CHUNKED : files that were chunked and need to be reassembled
 217
 218     For more information on the name mangling algorithm, see mangle_name()
 219     """
 220     if name.endswith('.bupl'):
 221         return (name[:-5], BUP_NORMAL)
 222     elif name.endswith('.bup'):
 223         return (name[:-4], BUP_CHUNKED)
 224     elif name.endswith('.bupm'):
 225         return (name[:-5],
 226                 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
 227     else:
 228         return (name, BUP_NORMAL)
 229
 230
 231 def calc_hash(type, content):
 232     """Calculate some content's hash in the Git fashion."""
 233     header = '%s %d\0' % (type, len(content))
 234     sum = Sha1(header)
 235     sum.update(content)
 236     return sum.digest()
 237
 238
 239 def shalist_item_sort_key(ent):
 240     (mode, name, id) = ent
 241     assert(mode+0 == mode)
 242     if stat.S_ISDIR(mode):
 243         return name + '/'
 244     else:
 245         return name
 246
 247
 248 def tree_encode(shalist):
 249     """Generate a git tree object from (mode,name,hash) tuples."""
 250     shalist = sorted(shalist, key = shalist_item_sort_key)
 251     l = []
 252     for (mode,name,bin) in shalist:
 253         assert(mode)
 254         assert(mode+0 == mode)
 255         assert(name)
 256         assert(len(bin) == 20)
 257         s = '%o %s\0%s' % (mode,name,bin)
 258         assert(s[0] != '0')  # 0-padded octal is not acceptable in a git tree
 259         l.append(s)
 260     return ''.join(l)
 261
 262
 263 def tree_decode(buf):
 264     """Generate a list of (mode,name,hash) from the git tree object in buf."""
 265     ofs = 0
 266     while ofs < len(buf):
 267         z = buf.find('\0', ofs)
 268         assert(z > ofs)
 269         spl = buf[ofs:z].split(' ', 1)
 270         assert(len(spl) == 2)
 271         mode,name = spl
 272         sha = buf[z+1:z+1+20]
 273         ofs = z+1+20
 274         yield (int(mode, 8), name, sha)
 275
 276
 277 def _encode_packobj(type, content, compression_level=1):
 278     if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
 279         raise ValueError('invalid compression level %s' % compression_level)
 280     szout = ''
 281     sz = len(content)
 282     szbits = (sz & 0x0f) | (_typemap[type]<<4)
 283     sz >>= 4
 284     while 1:
 285         if sz: szbits |= 0x80
 286         szout += chr(szbits)
 287         if not sz:
 288             break
 289         szbits = sz & 0x7f
 290         sz >>= 7
 291     z = zlib.compressobj(compression_level)
 292     yield szout
 293     yield z.compress(content)
 294     yield z.flush()
 295
 296
 297 def _encode_looseobj(type, content, compression_level=1):
 298     z = zlib.compressobj(compression_level)
 299     yield z.compress('%s %d\0' % (type, len(content)))
 300     yield z.compress(content)
 301     yield z.flush()
 302
 303
 304 def _decode_looseobj(buf):
 305     assert(buf);
 306     s = zlib.decompress(buf)
 307     i = s.find('\0')
 308     assert(i > 0)
 309     l = s[:i].split(' ')
 310     type = l[0]
 311     sz = int(l[1])
 312     content = s[i+1:]
 313     assert(type in _typemap)
 314     assert(sz == len(content))
 315     return (type, content)
 316
 317
 318 def _decode_packobj(buf):
 319     assert(buf)
 320     c = ord(buf[0])
 321     type = _typermap[(c & 0x70) >> 4]
 322     sz = c & 0x0f
 323     shift = 4
 324     i = 0
 325     while c & 0x80:
 326         i += 1
 327         c = ord(buf[i])
 328         sz |= (c & 0x7f) << shift
 329         shift += 7
 330         if not (c & 0x80):
 331             break
 332     return (type, zlib.decompress(buf[i+1:]))
 333
 334
 335 class PackIdx:
 336     def __init__(self):
 337         assert(0)
 338
 339     def find_offset(self, hash):
 340         """Get the offset of an object inside the index file."""
 341         idx = self._idx_from_hash(hash)
 342         if idx != None:
 343             return self._ofs_from_idx(idx)
 344         return None
 345
 346     def exists(self, hash, want_source=False):
 347         """Return nonempty if the object exists in this index."""
 348         if hash and (self._idx_from_hash(hash) != None):
 349             return want_source and os.path.basename(self.name) or True
 350         return None
 351
 352     def __len__(self):
 353         return int(self.fanout[255])
 354
 355     def _idx_from_hash(self, hash):
 356         global _total_searches, _total_steps
 357         _total_searches += 1
 358         assert(len(hash) == 20)
 359         b1 = ord(hash[0])
 360         start = self.fanout[b1-1] # range -1..254
 361         end = self.fanout[b1] # range 0..255
 362         want = str(hash)
 363         _total_steps += 1  # lookup table is a step
 364         while start < end:
 365             _total_steps += 1
 366             mid = start + (end-start)/2
 367             v = self._idx_to_hash(mid)
 368             if v < want:
 369                 start = mid+1
 370             elif v > want:
 371                 end = mid
 372             else: # got it!
 373                 return mid
 374         return None
 375
 376
 377 class PackIdxV1(PackIdx):
 378     """Object representation of a Git pack index (version 1) file."""
 379     def __init__(self, filename, f):
 380         self.name = filename
 381         self.idxnames = [self.name]
 382         self.map = mmap_read(f)
 383         self.fanout = list(struct.unpack('!256I',
 384                                          str(buffer(self.map, 0, 256*4))))
 385         self.fanout.append(0)  # entry "-1"
 386         nsha = self.fanout[255]
 387         self.sha_ofs = 256*4
 388         self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
 389
 390     def _ofs_from_idx(self, idx):
 391         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 392
 393     def _idx_to_hash(self, idx):
 394         return str(self.shatable[idx*24+4 : idx*24+24])
 395
 396     def __iter__(self):
 397         for i in xrange(self.fanout[255]):
 398             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 399
 400
 401 class PackIdxV2(PackIdx):
 402     """Object representation of a Git pack index (version 2) file."""
 403     def __init__(self, filename, f):
 404         self.name = filename
 405         self.idxnames = [self.name]
 406         self.map = mmap_read(f)
 407         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 408         self.fanout = list(struct.unpack('!256I',
 409                                          str(buffer(self.map, 8, 256*4))))
 410         self.fanout.append(0)  # entry "-1"
 411         nsha = self.fanout[255]
 412         self.sha_ofs = 8 + 256*4
 413         self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
 414         self.ofstable = buffer(self.map,
 415                                self.sha_ofs + nsha*20 + nsha*4,
 416                                nsha*4)
 417         self.ofs64table = buffer(self.map,
 418                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 419
 420     def _ofs_from_idx(self, idx):
 421         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 422         if ofs & 0x80000000:
 423             idx64 = ofs & 0x7fffffff
 424             ofs = struct.unpack('!Q',
 425                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 426         return ofs
 427
 428     def _idx_to_hash(self, idx):
 429         return str(self.shatable[idx*20:(idx+1)*20])
 430
 431     def __iter__(self):
 432         for i in xrange(self.fanout[255]):
 433             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 434
 435
 436 _mpi_count = 0
 437 class PackIdxList:
 438     def __init__(self, dir):
 439         global _mpi_count
 440         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 441         _mpi_count += 1
 442         self.dir = dir
 443         self.also = set()
 444         self.packs = []
 445         self.do_bloom = False
 446         self.bloom = None
 447         self.refresh()
 448
 449     def __del__(self):
 450         global _mpi_count
 451         _mpi_count -= 1
 452         assert(_mpi_count == 0)
 453
 454     def __iter__(self):
 455         return iter(idxmerge(self.packs))
 456
 457     def __len__(self):
 458         return sum(len(pack) for pack in self.packs)
 459
 460     def exists(self, hash, want_source=False):
 461         """Return nonempty if the object exists in the index files."""
 462         global _total_searches
 463         _total_searches += 1
 464         if hash in self.also:
 465             return True
 466         if self.do_bloom and self.bloom:
 467             if self.bloom.exists(hash):
 468                 self.do_bloom = False
 469             else:
 470                 _total_searches -= 1  # was counted by bloom
 471                 return None
 472         for i in xrange(len(self.packs)):
 473             p = self.packs[i]
 474             _total_searches -= 1  # will be incremented by sub-pack
 475             ix = p.exists(hash, want_source=want_source)
 476             if ix:
 477                 # reorder so most recently used packs are searched first
 478                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 479                 return ix
 480         self.do_bloom = True
 481         return None
 482
 483     def refresh(self, skip_midx = False):
 484         """Refresh the index list.
 485         This method verifies if .midx files were superseded (e.g. all of its
 486         contents are in another, bigger .midx file) and removes the superseded
 487         files.
 488
 489         If skip_midx is True, all work on .midx files will be skipped and .midx
 490         files will be removed from the list.
 491
 492         The module-global variable 'ignore_midx' can force this function to
 493         always act as if skip_midx was True.
 494         """
 495         self.bloom = None # Always reopen the bloom as it may have been relaced
 496         self.do_bloom = False
 497         skip_midx = skip_midx or ignore_midx
 498         d = dict((p.name, p) for p in self.packs
 499                  if not skip_midx or not isinstance(p, midx.PackMidx))
 500         if os.path.exists(self.dir):
 501             if not skip_midx:
 502                 midxl = []
 503                 for ix in self.packs:
 504                     if isinstance(ix, midx.PackMidx):
 505                         for name in ix.idxnames:
 506                             d[os.path.join(self.dir, name)] = ix
 507                 for full in glob.glob(os.path.join(self.dir,'*.midx')):
 508                     if not d.get(full):
 509                         mx = midx.PackMidx(full)
 510                         (mxd, mxf) = os.path.split(mx.name)
 511                         broken = False
 512                         for n in mx.idxnames:
 513                             if not os.path.exists(os.path.join(mxd, n)):
 514                                 log(('warning: index %s missing\n' +
 515                                     '  used by %s\n') % (n, mxf))
 516                                 broken = True
 517                         if broken:
 518                             mx.close()
 519                             del mx
 520                             unlink(full)
 521                         else:
 522                             midxl.append(mx)
 523                 midxl.sort(key=lambda ix:
 524                            (-len(ix), -xstat.stat(ix.name).st_mtime))
 525                 for ix in midxl:
 526                     any_needed = False
 527                     for sub in ix.idxnames:
 528                         found = d.get(os.path.join(self.dir, sub))
 529                         if not found or isinstance(found, PackIdx):
 530                             # doesn't exist, or exists but not in a midx
 531                             any_needed = True
 532                             break
 533                     if any_needed:
 534                         d[ix.name] = ix
 535                         for name in ix.idxnames:
 536                             d[os.path.join(self.dir, name)] = ix
 537                     elif not ix.force_keep:
 538                         debug1('midx: removing redundant: %s\n'
 539                                % os.path.basename(ix.name))
 540                         ix.close()
 541                         unlink(ix.name)
 542             for full in glob.glob(os.path.join(self.dir,'*.idx')):
 543                 if not d.get(full):
 544                     try:
 545                         ix = open_idx(full)
 546                     except GitError as e:
 547                         add_error(e)
 548                         continue
 549                     d[full] = ix
 550             bfull = os.path.join(self.dir, 'bup.bloom')
 551             if self.bloom is None and os.path.exists(bfull):
 552                 self.bloom = bloom.ShaBloom(bfull)
 553             self.packs = list(set(d.values()))
 554             self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
 555             if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
 556                 self.do_bloom = True
 557             else:
 558                 self.bloom = None
 559         debug1('PackIdxList: using %d index%s.\n'
 560             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 561
 562     def add(self, hash):
 563         """Insert an additional object in the list."""
 564         self.also.add(hash)
 565
 566
 567 def open_idx(filename):
 568     if filename.endswith('.idx'):
 569         f = open(filename, 'rb')
 570         header = f.read(8)
 571         if header[0:4] == '\377tOc':
 572             version = struct.unpack('!I', header[4:8])[0]
 573             if version == 2:
 574                 return PackIdxV2(filename, f)
 575             else:
 576                 raise GitError('%s: expected idx file version 2, got %d'
 577                                % (filename, version))
 578         elif len(header) == 8 and header[0:4] < '\377tOc':
 579             return PackIdxV1(filename, f)
 580         else:
 581             raise GitError('%s: unrecognized idx file header' % filename)
 582     elif filename.endswith('.midx'):
 583         return midx.PackMidx(filename)
 584     else:
 585         raise GitError('idx filenames must end with .idx or .midx')
 586
 587
 588 def idxmerge(idxlist, final_progress=True):
 589     """Generate a list of all the objects reachable in a PackIdxList."""
 590     def pfunc(count, total):
 591         qprogress('Reading indexes: %.2f%% (%d/%d)\r'
 592                   % (count*100.0/total, count, total))
 593     def pfinal(count, total):
 594         if final_progress:
 595             progress('Reading indexes: %.2f%% (%d/%d), done.\n'
 596                      % (100, total, total))
 597     return merge_iter(idxlist, 10024, pfunc, pfinal)
 598
 599
 600 def _make_objcache():
 601     return PackIdxList(repo('objects/pack'))
 602
 603 # bup-gc assumes that it can disable all PackWriter activities
 604 # (bloom/midx/cache) via the constructor and close() arguments.
 605
 606 class PackWriter:
 607     """Writes Git objects inside a pack file."""
 608     def __init__(self, objcache_maker=_make_objcache, compression_level=1,
 609                  run_midx=True, on_pack_finish=None,
 610                  max_pack_size=None, max_pack_objects=None):
 611         self.repo_dir = repo()
 612         self.file = None
 613         self.parentfd = None
 614         self.count = 0
 615         self.outbytes = 0
 616         self.filename = None
 617         self.idx = None
 618         self.objcache_maker = objcache_maker
 619         self.objcache = None
 620         self.compression_level = compression_level
 621         self.run_midx=run_midx
 622         self.on_pack_finish = on_pack_finish
 623         if not max_pack_size:
 624             max_pack_size = git_config_get('pack.packSizeLimit',
 625                                            repo_dir=self.repo_dir)
 626             if max_pack_size is not None:
 627                 max_pack_size = parse_num(max_pack_size)
 628             if not max_pack_size:
 629                 # larger packs slow down pruning
 630                 max_pack_size = 1000 * 1000 * 1000
 631         self.max_pack_size = max_pack_size
 632         # cache memory usage is about 83 bytes per object
 633         self.max_pack_objects = max_pack_objects if max_pack_objects \
 634                                 else max(1, self.max_pack_size // 5000)
 635
 636     def __del__(self):
 637         self.close()
 638
 639     def _open(self):
 640         if not self.file:
 641             objdir = dir = os.path.join(self.repo_dir, 'objects')
 642             fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
 643             try:
 644                 self.file = os.fdopen(fd, 'w+b')
 645             except:
 646                 os.close(fd)
 647                 raise
 648             try:
 649                 self.parentfd = os.open(objdir, os.O_RDONLY)
 650             except:
 651                 f = self.file
 652                 self.file = None
 653                 f.close()
 654                 raise
 655             assert(name.endswith('.pack'))
 656             self.filename = name[:-5]
 657             self.file.write('PACK\0\0\0\2\0\0\0\0')
 658             self.idx = list(list() for i in xrange(256))
 659
 660     def _raw_write(self, datalist, sha):
 661         self._open()
 662         f = self.file
 663         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 664         # the file never has a *partial* blob.  So let's make sure it's
 665         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 666         # to our hashsplit algorithm.)  f.write() does its own buffering,
 667         # but that's okay because we'll flush it in _end().
 668         oneblob = ''.join(datalist)
 669         try:
 670             f.write(oneblob)
 671         except IOError as e:
 672             raise GitError, e, sys.exc_info()[2]
 673         nw = len(oneblob)
 674         crc = zlib.crc32(oneblob) & 0xffffffff
 675         self._update_idx(sha, crc, nw)
 676         self.outbytes += nw
 677         self.count += 1
 678         return nw, crc
 679
 680     def _update_idx(self, sha, crc, size):
 681         assert(sha)
 682         if self.idx:
 683             self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
 684
 685     def _write(self, sha, type, content):
 686         if verbose:
 687             log('>')
 688         if not sha:
 689             sha = calc_hash(type, content)
 690         size, crc = self._raw_write(_encode_packobj(type, content,
 691                                                     self.compression_level),
 692                                     sha=sha)
 693         if self.outbytes >= self.max_pack_size \
 694            or self.count >= self.max_pack_objects:
 695             self.breakpoint()
 696         return sha
 697
 698     def breakpoint(self):
 699         """Clear byte and object counts and return the last processed id."""
 700         id = self._end(self.run_midx)
 701         self.outbytes = self.count = 0
 702         return id
 703
 704     def _require_objcache(self):
 705         if self.objcache is None and self.objcache_maker:
 706             self.objcache = self.objcache_maker()
 707         if self.objcache is None:
 708             raise GitError(
 709                     "PackWriter not opened or can't check exists w/o objcache")
 710
 711     def exists(self, id, want_source=False):
 712         """Return non-empty if an object is found in the object cache."""
 713         self._require_objcache()
 714         return self.objcache.exists(id, want_source=want_source)
 715
 716     def just_write(self, sha, type, content):
 717         """Write an object to the pack file, bypassing the objcache.  Fails if
 718         sha exists()."""
 719         self._write(sha, type, content)
 720
 721     def maybe_write(self, type, content):
 722         """Write an object to the pack file if not present and return its id."""
 723         sha = calc_hash(type, content)
 724         if not self.exists(sha):
 725             self.just_write(sha, type, content)
 726             self._require_objcache()
 727             self.objcache.add(sha)
 728         return sha
 729
 730     def new_blob(self, blob):
 731         """Create a blob object in the pack with the supplied content."""
 732         return self.maybe_write('blob', blob)
 733
 734     def new_tree(self, shalist):
 735         """Create a tree object in the pack."""
 736         content = tree_encode(shalist)
 737         return self.maybe_write('tree', content)
 738
 739     def new_commit(self, tree, parent,
 740                    author, adate_sec, adate_tz,
 741                    committer, cdate_sec, cdate_tz,
 742                    msg):
 743         """Create a commit object in the pack.  The date_sec values must be
 744         epoch-seconds, and if a tz is None, the local timezone is assumed."""
 745         if adate_tz:
 746             adate_str = _git_date_str(adate_sec, adate_tz)
 747         else:
 748             adate_str = _local_git_date_str(adate_sec)
 749         if cdate_tz:
 750             cdate_str = _git_date_str(cdate_sec, cdate_tz)
 751         else:
 752             cdate_str = _local_git_date_str(cdate_sec)
 753         l = []
 754         if tree: l.append('tree %s' % tree.encode('hex'))
 755         if parent: l.append('parent %s' % parent.encode('hex'))
 756         if author: l.append('author %s %s' % (author, adate_str))
 757         if committer: l.append('committer %s %s' % (committer, cdate_str))
 758         l.append('')
 759         l.append(msg)
 760         return self.maybe_write('commit', '\n'.join(l))
 761
 762     def abort(self):
 763         """Remove the pack file from disk."""
 764         f = self.file
 765         if f:
 766             pfd = self.parentfd
 767             self.file = None
 768             self.parentfd = None
 769             self.idx = None
 770             try:
 771                 try:
 772                     os.unlink(self.filename + '.pack')
 773                 finally:
 774                     f.close()
 775             finally:
 776                 if pfd is not None:
 777                     os.close(pfd)
 778
 779     def _end(self, run_midx=True):
 780         f = self.file
 781         if not f: return None
 782         self.file = None
 783         try:
 784             self.objcache = None
 785             idx = self.idx
 786             self.idx = None
 787
 788             # update object count
 789             f.seek(8)
 790             cp = struct.pack('!i', self.count)
 791             assert(len(cp) == 4)
 792             f.write(cp)
 793
 794             # calculate the pack sha1sum
 795             f.seek(0)
 796             sum = Sha1()
 797             for b in chunkyreader(f):
 798                 sum.update(b)
 799             packbin = sum.digest()
 800             f.write(packbin)
 801             fdatasync(f.fileno())
 802         finally:
 803             f.close()
 804
 805         obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
 806         nameprefix = os.path.join(self.repo_dir,
 807                                   'objects/pack/pack-' +  obj_list_sha)
 808         if os.path.exists(self.filename + '.map'):
 809             os.unlink(self.filename + '.map')
 810         os.rename(self.filename + '.pack', nameprefix + '.pack')
 811         os.rename(self.filename + '.idx', nameprefix + '.idx')
 812         try:
 813             os.fsync(self.parentfd)
 814         finally:
 815             os.close(self.parentfd)
 816
 817         if run_midx:
 818             auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
 819
 820         if self.on_pack_finish:
 821             self.on_pack_finish(nameprefix)
 822
 823         return nameprefix
 824
 825     def close(self, run_midx=True):
 826         """Close the pack file and move it to its definitive path."""
 827         return self._end(run_midx=run_midx)
 828
 829     def _write_pack_idx_v2(self, filename, idx, packbin):
 830         ofs64_count = 0
 831         for section in idx:
 832             for entry in section:
 833                 if entry[2] >= 2**31:
 834                     ofs64_count += 1
 835
 836         # Length: header + fan-out + shas-and-crcs + overflow-offsets
 837         index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
 838         idx_map = None
 839         idx_f = open(filename, 'w+b')
 840         try:
 841             idx_f.truncate(index_len)
 842             fdatasync(idx_f.fileno())
 843             idx_map = mmap_readwrite(idx_f, close=False)
 844             try:
 845                 count = _helpers.write_idx(filename, idx_map, idx, self.count)
 846                 assert(count == self.count)
 847                 idx_map.flush()
 848             finally:
 849                 idx_map.close()
 850         finally:
 851             idx_f.close()
 852
 853         idx_f = open(filename, 'a+b')
 854         try:
 855             idx_f.write(packbin)
 856             idx_f.seek(0)
 857             idx_sum = Sha1()
 858             b = idx_f.read(8 + 4*256)
 859             idx_sum.update(b)
 860
 861             obj_list_sum = Sha1()
 862             for b in chunkyreader(idx_f, 20*self.count):
 863                 idx_sum.update(b)
 864                 obj_list_sum.update(b)
 865             namebase = obj_list_sum.hexdigest()
 866
 867             for b in chunkyreader(idx_f):
 868                 idx_sum.update(b)
 869             idx_f.write(idx_sum.digest())
 870             fdatasync(idx_f.fileno())
 871             return namebase
 872         finally:
 873             idx_f.close()
 874
 875
 876 def _gitenv(repo_dir = None):
 877     if not repo_dir:
 878         repo_dir = repo()
 879     def env():
 880         os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
 881     return env
 882
 883
 884 def list_refs(patterns=None, repo_dir=None,
 885               limit_to_heads=False, limit_to_tags=False):
 886     """Yield (refname, hash) tuples for all repository refs unless
 887     patterns are specified.  In that case, only include tuples for
 888     refs matching those patterns (cf. git-show-ref(1)).  The limits
 889     restrict the result items to refs/heads or refs/tags.  If both
 890     limits are specified, items from both sources will be included.
 891
 892     """
 893     argv = ['git', 'show-ref']
 894     if limit_to_heads:
 895         argv.append('--heads')
 896     if limit_to_tags:
 897         argv.append('--tags')
 898     argv.append('--')
 899     if patterns:
 900         argv.extend(patterns)
 901     p = subprocess.Popen(argv,
 902                          preexec_fn = _gitenv(repo_dir),
 903                          stdout = subprocess.PIPE)
 904     out = p.stdout.read().strip()
 905     rv = p.wait()  # not fatal
 906     if rv:
 907         assert(not out)
 908     if out:
 909         for d in out.split('\n'):
 910             (sha, name) = d.split(' ', 1)
 911             yield (name, sha.decode('hex'))
 912
 913
 914 def read_ref(refname, repo_dir = None):
 915     """Get the commit id of the most recent commit made on a given ref."""
 916     refs = list_refs(patterns=[refname], repo_dir=repo_dir, limit_to_heads=True)
 917     l = tuple(islice(refs, 2))
 918     if l:
 919         assert(len(l) == 1)
 920         return l[0][1]
 921     else:
 922         return None
 923
 924
 925 def rev_list(ref, count=None, repo_dir=None):
 926     """Generate a list of reachable commits in reverse chronological order.
 927
 928     This generator walks through commits, from child to parent, that are
 929     reachable via the specified ref and yields a series of tuples of the form
 930     (date,hash).
 931
 932     If count is a non-zero integer, limit the number of commits to "count"
 933     objects.
 934     """
 935     assert(not ref.startswith('-'))
 936     opts = []
 937     if isinstance(count, Integral):
 938         opts += ['-n', str(count)]
 939     else:
 940         assert not count
 941     argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
 942     p = subprocess.Popen(argv,
 943                          preexec_fn = _gitenv(repo_dir),
 944                          stdout = subprocess.PIPE)
 945     commit = None
 946     for row in p.stdout:
 947         s = row.strip()
 948         if s.startswith('commit '):
 949             commit = s[7:].decode('hex')
 950         else:
 951             date = int(s)
 952             yield (date, commit)
 953     rv = p.wait()  # not fatal
 954     if rv:
 955         raise GitError, 'git rev-list returned error %d' % rv
 956
 957
 958 def get_commit_dates(refs, repo_dir=None):
 959     """Get the dates for the specified commit refs.  For now, every unique
 960        string in refs must resolve to a different commit or this
 961        function will fail."""
 962     result = []
 963     for ref in refs:
 964         commit = get_commit_items(ref, cp(repo_dir))
 965         result.append(commit.author_sec)
 966     return result
 967
 968
 969 def rev_parse(committish, repo_dir=None):
 970     """Resolve the full hash for 'committish', if it exists.
 971
 972     Should be roughly equivalent to 'git rev-parse'.
 973
 974     Returns the hex value of the hash if it is found, None if 'committish' does
 975     not correspond to anything.
 976     """
 977     head = read_ref(committish, repo_dir=repo_dir)
 978     if head:
 979         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 980         return head
 981
 982     pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
 983
 984     if len(committish) == 40:
 985         try:
 986             hash = committish.decode('hex')
 987         except TypeError:
 988             return None
 989
 990         if pL.exists(hash):
 991             return hash
 992
 993     return None
 994
 995
 996 def update_ref(refname, newval, oldval, repo_dir=None):
 997     """Update a repository reference."""
 998     if not oldval:
 999         oldval = ''
1000     assert(refname.startswith('refs/heads/') \
1001            or refname.startswith('refs/tags/'))
1002     p = subprocess.Popen(['git', 'update-ref', refname,
1003                           newval.encode('hex'), oldval.encode('hex')],
1004                          preexec_fn = _gitenv(repo_dir))
1005     _git_wait('git update-ref', p)
1006
1007
1008 def delete_ref(refname, oldvalue=None):
1009     """Delete a repository reference (see git update-ref(1))."""
1010     assert(refname.startswith('refs/'))
1011     oldvalue = [] if not oldvalue else [oldvalue]
1012     p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1013                          preexec_fn = _gitenv())
1014     _git_wait('git update-ref', p)
1015
1016
1017 def guess_repo(path=None):
1018     """Set the path value in the global variable "repodir".
1019     This makes bup look for an existing bup repository, but not fail if a
1020     repository doesn't exist. Usually, if you are interacting with a bup
1021     repository, you would not be calling this function but using
1022     check_repo_or_die().
1023     """
1024     global repodir
1025     if path:
1026         repodir = path
1027     if not repodir:
1028         repodir = os.environ.get('BUP_DIR')
1029         if not repodir:
1030             repodir = os.path.expanduser('~/.bup')
1031
1032
1033 def init_repo(path=None):
1034     """Create the Git bare repository for bup in a given path."""
1035     guess_repo(path)
1036     d = repo()  # appends a / to the path
1037     parent = os.path.dirname(os.path.dirname(d))
1038     if parent and not os.path.exists(parent):
1039         raise GitError('parent directory "%s" does not exist\n' % parent)
1040     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1041         raise GitError('"%s" exists but is not a directory\n' % d)
1042     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1043                          preexec_fn = _gitenv())
1044     _git_wait('git init', p)
1045     # Force the index version configuration in order to ensure bup works
1046     # regardless of the version of the installed Git binary.
1047     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1048                          stdout=sys.stderr, preexec_fn = _gitenv())
1049     _git_wait('git config', p)
1050     # Enable the reflog
1051     p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1052                          stdout=sys.stderr, preexec_fn = _gitenv())
1053     _git_wait('git config', p)
1054
1055
1056 def check_repo_or_die(path=None):
1057     """Check to see if a bup repository probably exists, and abort if not."""
1058     guess_repo(path)
1059     top = repo()
1060     pst = stat_if_exists(top + '/objects/pack')
1061     if pst and stat.S_ISDIR(pst.st_mode):
1062         return
1063     if not pst:
1064         top_st = stat_if_exists(top)
1065         if not top_st:
1066             log('error: repository %r does not exist (see "bup help init")\n'
1067                 % top)
1068             sys.exit(15)
1069     log('error: %r is not a repository\n' % top)
1070     sys.exit(14)
1071
1072
1073 _ver = None
1074 def ver():
1075     """Get Git's version and ensure a usable version is installed.
1076
1077     The returned version is formatted as an ordered tuple with each position
1078     representing a digit in the version tag. For example, the following tuple
1079     would represent version 1.6.6.9:
1080
1081         ('1', '6', '6', '9')
1082     """
1083     global _ver
1084     if not _ver:
1085         p = subprocess.Popen(['git', '--version'],
1086                              stdout=subprocess.PIPE)
1087         gvs = p.stdout.read()
1088         _git_wait('git --version', p)
1089         m = re.match(r'git version (\S+.\S+)', gvs)
1090         if not m:
1091             raise GitError('git --version weird output: %r' % gvs)
1092         _ver = tuple(m.group(1).split('.'))
1093     needed = ('1','5', '3', '1')
1094     if _ver < needed:
1095         raise GitError('git version %s or higher is required; you have %s'
1096                        % ('.'.join(needed), '.'.join(_ver)))
1097     return _ver
1098
1099
1100 class _AbortableIter:
1101     def __init__(self, it, onabort = None):
1102         self.it = it
1103         self.onabort = onabort
1104         self.done = None
1105
1106     def __iter__(self):
1107         return self
1108
1109     def next(self):
1110         try:
1111             return next(self.it)
1112         except StopIteration as e:
1113             self.done = True
1114             raise
1115         except:
1116             self.abort()
1117             raise
1118
1119     def abort(self):
1120         """Abort iteration and call the abortion callback, if needed."""
1121         if not self.done:
1122             self.done = True
1123             if self.onabort:
1124                 self.onabort()
1125
1126     def __del__(self):
1127         self.abort()
1128
1129
1130 _ver_warned = 0
1131 class CatPipe:
1132     """Link to 'git cat-file' that is used to retrieve blob data."""
1133     def __init__(self, repo_dir = None):
1134         global _ver_warned
1135         self.repo_dir = repo_dir
1136         wanted = ('1','5','6')
1137         if ver() < wanted:
1138             log('error: git version must be at least 1.5.6\n')
1139             sys.exit(1)
1140         self.p = self.inprogress = None
1141
1142     def _abort(self):
1143         if self.p:
1144             self.p.stdout.close()
1145             self.p.stdin.close()
1146         self.p = None
1147         self.inprogress = None
1148
1149     def restart(self):
1150         self._abort()
1151         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1152                                   stdin=subprocess.PIPE,
1153                                   stdout=subprocess.PIPE,
1154                                   close_fds = True,
1155                                   bufsize = 4096,
1156                                   preexec_fn = _gitenv(self.repo_dir))
1157
1158     def get(self, id, size=False):
1159         """Yield info about object id, and then if the object exists, all of
1160         the data referred to by the object.  If size is false the info
1161         will just be the object type name.  If size is true, the info
1162         will be (type, size).  When the object does not exist, in both
1163         cases the type will be None.
1164
1165         """
1166         if not self.p or self.p.poll() != None:
1167             self.restart()
1168         assert(self.p)
1169         poll_result = self.p.poll()
1170         assert(poll_result == None)
1171         if self.inprogress:
1172             log('get: opening %r while %r is open\n' % (id, self.inprogress))
1173         assert(not self.inprogress)
1174         assert(id.find('\n') < 0)
1175         assert(id.find('\r') < 0)
1176         assert(not id.startswith('-'))
1177         self.inprogress = id
1178         self.p.stdin.write('%s\n' % id)
1179         self.p.stdin.flush()
1180         hdr = self.p.stdout.readline()
1181         if hdr.endswith(' missing\n'):
1182             self.inprogress = None
1183             if size:
1184                 yield None, None
1185             else:
1186                 yield None
1187             return
1188         spl = hdr.split(' ')
1189         if len(spl) != 3 or len(spl[0]) != 40:
1190             raise GitError('expected blob, got %r' % spl)
1191         hex, typ, sz = spl
1192         sz = int(sz)
1193         it = _AbortableIter(chunkyreader(self.p.stdout, sz),
1194                             onabort=self._abort)
1195         try:
1196             if size:
1197                 yield typ, sz
1198             else:
1199                 yield typ
1200             for blob in it:
1201                 yield blob
1202             readline_result = self.p.stdout.readline()
1203             assert(readline_result == '\n')
1204             self.inprogress = None
1205         except Exception as e:
1206             it.abort()
1207             raise
1208
1209     def _join(self, it):
1210         type = next(it)
1211         if type == 'blob':
1212             for blob in it:
1213                 yield blob
1214         elif type == 'tree':
1215             treefile = ''.join(it)
1216             for (mode, name, sha) in tree_decode(treefile):
1217                 for blob in self.join(sha.encode('hex')):
1218                     yield blob
1219         elif type == 'commit':
1220             treeline = ''.join(it).split('\n')[0]
1221             assert(treeline.startswith('tree '))
1222             for blob in self.join(treeline[5:]):
1223                 yield blob
1224         else:
1225             raise GitError('invalid object type %r: expected blob/tree/commit'
1226                            % type)
1227
1228     def join(self, id):
1229         """Generate a list of the content of all blobs that can be reached
1230         from an object.  The hash given in 'id' must point to a blob, a tree
1231         or a commit. The content of all blobs that can be seen from trees or
1232         commits will be added to the list.
1233         """
1234         try:
1235             for d in self._join(self.get(id)):
1236                 yield d
1237         except StopIteration:
1238             log('booger!\n')
1239
1240
1241 _cp = {}
1242
1243 def cp(repo_dir=None):
1244     """Create a CatPipe object or reuse the already existing one."""
1245     global _cp, repodir
1246     if not repo_dir:
1247         repo_dir = repodir or repo()
1248     repo_dir = os.path.abspath(repo_dir)
1249     cp = _cp.get(repo_dir)
1250     if not cp:
1251         cp = CatPipe(repo_dir)
1252         _cp[repo_dir] = cp
1253     return cp
1254
1255
1256 def tags(repo_dir = None):
1257     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1258     tags = {}
1259     for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1260         assert(n.startswith('refs/tags/'))
1261         name = n[10:]
1262         if not c in tags:
1263             tags[c] = []
1264         tags[c].append(name)  # more than one tag can point at 'c'
1265     return tags
1266
1267
1268 class MissingObject(KeyError):
1269     def __init__(self, id):
1270         self.id = id
1271         KeyError.__init__(self, 'object %r is missing' % id.encode('hex'))
1272
1273
1274 WalkItem = namedtuple('WalkItem', ['id', 'type', 'mode',
1275                                    'path', 'chunk_path', 'data'])
1276 # The path is the mangled path, and if an item represents a fragment
1277 # of a chunked file, the chunk_path will be the chunked subtree path
1278 # for the chunk, i.e. ['', '2d3115e', ...].  The top-level path for a
1279 # chunked file will have a chunk_path of [''].  So some chunk subtree
1280 # of the file '/foo/bar/baz' might look like this:
1281 #
1282 #   item.path = ['foo', 'bar', 'baz.bup']
1283 #   item.chunk_path = ['', '2d3115e', '016b097']
1284 #   item.type = 'tree'
1285 #   ...
1286
1287
1288 def walk_object(cat_pipe, id,
1289                 stop_at=None,
1290                 include_data=None):
1291     """Yield everything reachable from id via cat_pipe as a WalkItem,
1292     stopping whenever stop_at(id) returns true.  Throw MissingObject
1293     if a hash encountered is missing from the repository, and don't
1294     read or return blob content in the data field unless include_data
1295     is set.
1296     """
1297     # Maintain the pending stack on the heap to avoid stack overflow
1298     pending = [(id, [], [], None)]
1299     while len(pending):
1300         id, parent_path, chunk_path, mode = pending.pop()
1301         if stop_at and stop_at(id):
1302             continue
1303
1304         if (not include_data) and mode and stat.S_ISREG(mode):
1305             # If the object is a "regular file", then it's a leaf in
1306             # the graph, so we can skip reading the data if the caller
1307             # hasn't requested it.
1308             yield WalkItem(id=id, type='blob',
1309                            chunk_path=chunk_path, path=parent_path,
1310                            mode=mode,
1311                            data=None)
1312             continue
1313
1314         item_it = cat_pipe.get(id)
1315         type = next(item_it)
1316         if not type:
1317             raise MissingObject(id.decode('hex'))
1318         if type not in ('blob', 'commit', 'tree'):
1319             raise Exception('unexpected repository object type %r' % type)
1320
1321         # FIXME: set the mode based on the type when the mode is None
1322         if type == 'blob' and not include_data:
1323             # Dump data until we can ask cat_pipe not to fetch it
1324             for ignored in item_it:
1325                 pass
1326             data = None
1327         else:
1328             data = ''.join(item_it)
1329
1330         yield WalkItem(id=id, type=type,
1331                        chunk_path=chunk_path, path=parent_path,
1332                        mode=mode,
1333                        data=(data if include_data else None))
1334
1335         if type == 'commit':
1336             commit_items = parse_commit(data)
1337             for pid in commit_items.parents:
1338                 pending.append((pid, parent_path, chunk_path, mode))
1339             pending.append((commit_items.tree, parent_path, chunk_path,
1340                             hashsplit.GIT_MODE_TREE))
1341         elif type == 'tree':
1342             for mode, name, ent_id in tree_decode(data):
1343                 demangled, bup_type = demangle_name(name, mode)
1344                 if chunk_path:
1345                     sub_path = parent_path
1346                     sub_chunk_path = chunk_path + [name]
1347                 else:
1348                     sub_path = parent_path + [name]
1349                     if bup_type == BUP_CHUNKED:
1350                         sub_chunk_path = ['']
1351                     else:
1352                         sub_chunk_path = chunk_path
1353                 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,
1354                                 mode))