BLOB_READ_SIZE = 1024*1024
MAX_PER_TREE = 256
progress_callback = None
-max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
-max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
fanout = 16
+GIT_MODE_FILE = 0100644
+GIT_MODE_TREE = 040000
+GIT_MODE_SYMLINK = 0120000
+assert(GIT_MODE_TREE != 40000) # 0xxx should be treated as octal
+
# The purpose of this type of buffer is to avoid copying on peek(), get(),
# and eat(). We do copy the buffer contents on put(), but that should
# be ok if we always only put() large amounts of data at a time.
return len(self.data) - self.start
+def _fadvise_done(f, ofs, len):
+ assert(ofs >= 0)
+ assert(len >= 0)
+ if len > 0 and hasattr(f, 'fileno'):
+ _helpers.fadvise_done(f.fileno(), ofs, len)
+
+
def readfile_iter(files, progress=None):
for filenum,f in enumerate(files):
ofs = 0
while 1:
if progress:
progress(filenum, len(b))
- fadvise_done(f, max(0, ofs - 1024*1024))
b = f.read(BLOB_READ_SIZE)
ofs += len(b)
+ # Warning: ofs == 0 means 'done with the whole file'
+ # This will only happen here when the file is empty
+ _fadvise_done(f, 0, ofs)
if not b:
- fadvise_done(f, ofs)
break
yield b
-def _splitbuf(buf):
+def _splitbuf(buf, basebits, fanbits):
while 1:
b = buf.peek(buf.used())
(ofs, bits) = _helpers.splitbuf(b)
- if ofs > BLOB_MAX:
- ofs = BLOB_MAX
if ofs:
+ if ofs > BLOB_MAX:
+ ofs = BLOB_MAX
+ level = 0
+ else:
+ level = (bits-basebits)//fanbits # integer division
buf.eat(ofs)
- yield buffer(b, 0, ofs), bits
+ yield buffer(b, 0, ofs), level
else:
break
while buf.used() >= BLOB_MAX:
def _hashsplit_iter(files, progress):
assert(BLOB_READ_SIZE > BLOB_MAX)
+ basebits = _helpers.blobbits()
+ fanbits = int(math.log(fanout or 128, 2))
buf = Buf()
for inblock in readfile_iter(files, progress):
buf.put(inblock)
- for buf_and_bits in _splitbuf(buf):
- yield buf_and_bits
+ for buf_and_level in _splitbuf(buf, basebits, fanbits):
+ yield buf_and_level
if buf.used():
yield buf.get(buf.used()), 0
return progress(real_filenum, nbytes)
else:
prog = None
- for buf_and_bits in _hashsplit_iter([f], progress=prog):
- yield buf_and_bits
+ for buf_and_level in _hashsplit_iter([f], progress=prog):
+ yield buf_and_level
def hashsplit_iter(files, keep_boundaries, progress):
total_split = 0
-def _split_to_blobs(w, files, keep_boundaries, progress):
+def split_to_blobs(makeblob, files, keep_boundaries, progress):
global total_split
- for (blob, bits) in hashsplit_iter(files, keep_boundaries, progress):
- sha = w.new_blob(blob)
+ for (blob, level) in hashsplit_iter(files, keep_boundaries, progress):
+ sha = makeblob(blob)
total_split += len(blob)
- if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
- w.breakpoint()
if progress_callback:
progress_callback(len(blob))
- yield (sha, len(blob), bits)
+ yield (sha, len(blob), level)
def _make_shalist(l):
ofs = 0
+ l = list(l)
+ total = sum(size for mode,sha,size, in l)
+ vlen = len('%x' % total)
shalist = []
for (mode, sha, size) in l:
- shalist.append((mode, '%016x' % ofs, sha))
+ shalist.append((mode, '%0*x' % (vlen,ofs), sha))
ofs += size
- total = ofs
+ assert(ofs == total)
return (shalist, total)
-def _squish(w, stacks, n):
+def _squish(maketree, stacks, n):
i = 0
- while i<n or len(stacks[i]) > MAX_PER_TREE:
+ while i < n or len(stacks[i]) >= MAX_PER_TREE:
while len(stacks) <= i+1:
stacks.append([])
if len(stacks[i]) == 1:
stacks[i+1] += stacks[i]
elif stacks[i]:
(shalist, size) = _make_shalist(stacks[i])
- tree = w.new_tree(shalist)
- stacks[i+1].append(('40000', tree, size))
+ tree = maketree(shalist)
+ stacks[i+1].append((GIT_MODE_TREE, tree, size))
stacks[i] = []
i += 1
-def split_to_shalist(w, files, keep_boundaries, progress=None):
- sl = _split_to_blobs(w, files, keep_boundaries, progress)
+def split_to_shalist(makeblob, maketree, files,
+ keep_boundaries, progress=None):
+ sl = split_to_blobs(makeblob, files, keep_boundaries, progress)
+ assert(fanout != 0)
if not fanout:
shal = []
- for (sha,size,bits) in sl:
- shal.append(('100644', sha, size))
+ for (sha,size,level) in sl:
+ shal.append((GIT_MODE_FILE, sha, size))
return _make_shalist(shal)[0]
else:
- base_bits = _helpers.blobbits()
- fanout_bits = int(math.log(fanout, 2))
- def bits_to_idx(n):
- assert(n >= base_bits)
- return (n - base_bits)/fanout_bits
stacks = [[]]
- for (sha,size,bits) in sl:
- assert(bits <= 32)
- stacks[0].append(('100644', sha, size))
- if bits > base_bits:
- _squish(w, stacks, bits_to_idx(bits))
+ for (sha,size,level) in sl:
+ stacks[0].append((GIT_MODE_FILE, sha, size))
+ _squish(maketree, stacks, level)
#log('stacks: %r\n' % [len(i) for i in stacks])
- _squish(w, stacks, len(stacks)-1)
+ _squish(maketree, stacks, len(stacks)-1)
#log('stacks: %r\n' % [len(i) for i in stacks])
return _make_shalist(stacks[-1])[0]
-def split_to_blob_or_tree(w, files, keep_boundaries):
- shalist = list(split_to_shalist(w, files, keep_boundaries))
+def split_to_blob_or_tree(makeblob, maketree, files,
+ keep_boundaries, progress=None):
+ shalist = list(split_to_shalist(makeblob, maketree,
+ files, keep_boundaries, progress))
if len(shalist) == 1:
return (shalist[0][0], shalist[0][2])
elif len(shalist) == 0:
- return ('100644', w.new_blob(''))
+ return (GIT_MODE_FILE, makeblob(''))
else:
- return ('40000', w.new_tree(shalist))
+ return (GIT_MODE_TREE, maketree(shalist))
def open_noatime(name):
except:
pass
raise
-
-
-def fadvise_done(f, ofs):
- assert(ofs >= 0)
- if ofs > 0 and hasattr(f, 'fileno'):
- _helpers.fadvise_done(f.fileno(), ofs)