-import math
-from bup import _helpers
-from bup.helpers import *
+
+from __future__ import absolute_import
+import io, math, os
+
+from bup import _helpers, compat, helpers
+from bup._helpers import cat_bytes
+from bup.compat import buffer, py_maj
+from bup.helpers import sc_page_size
+
+
+_fmincore = getattr(helpers, 'fmincore', None)
BLOB_MAX = 8192*4 # 8192 is the "typical" blob size for bupsplit
BLOB_READ_SIZE = 1024*1024
progress_callback = None
fanout = 16
-GIT_MODE_FILE = 0100644
-GIT_MODE_TREE = 040000
-GIT_MODE_SYMLINK = 0120000
-assert(GIT_MODE_TREE != 40000) # 0xxx should be treated as octal
+GIT_MODE_FILE = 0o100644
+GIT_MODE_TREE = 0o40000
+GIT_MODE_SYMLINK = 0o120000
# The purpose of this type of buffer is to avoid copying on peek(), get(),
# and eat(). We do copy the buffer contents on put(), but that should
# be ok if we always only put() large amounts of data at a time.
class Buf:
def __init__(self):
- self.data = ''
+ self.data = b''
self.start = 0
def put(self, s):
if s:
- self.data = buffer(self.data, self.start) + s
+ remaining = len(self.data) - self.start
+ self.data = cat_bytes(self.data, self.start, remaining,
+ s, 0, len(s))
self.start = 0
def peek(self, count):
+ if count <= 256:
+ return self.data[self.start : self.start + count]
return buffer(self.data, self.start, count)
def eat(self, count):
self.start += count
def get(self, count):
- v = buffer(self.data, self.start, count)
+ if count <= 256:
+ v = self.data[self.start : self.start + count]
+ else:
+ v = buffer(self.data, self.start, count)
self.start += count
return v
return len(self.data) - self.start
+def _fadvise_pages_done(fd, first_page, count):
+ assert(first_page >= 0)
+ assert(count >= 0)
+ if count > 0:
+ _helpers.fadvise_done(fd,
+ first_page * sc_page_size,
+ count * sc_page_size)
+
+
+def _nonresident_page_regions(status_bytes, incore_mask, max_region_len=None):
+ """Return (start_page, count) pairs in ascending start_page order for
+ each contiguous region of nonresident pages indicated by the
+ mincore() status_bytes. Limit the number of pages in each region
+ to max_region_len."""
+ assert(max_region_len is None or max_region_len > 0)
+ start = None
+ for i, x in enumerate(status_bytes):
+ in_core = x & incore_mask
+ if start is None:
+ if not in_core:
+ start = i
+ else:
+ count = i - start
+ if in_core:
+ yield (start, count)
+ start = None
+ elif max_region_len and count >= max_region_len:
+ yield (start, count)
+ start = i
+ if start is not None:
+ yield (start, len(status_bytes) - start)
+
+
+def _uncache_ours_upto(fd, offset, first_region, remaining_regions):
+ """Uncache the pages of fd indicated by first_region and
+ remaining_regions that are before offset, where each region is a
+ (start_page, count) pair. The final region must have a start_page
+ of None."""
+ rstart, rlen = first_region
+ while rstart is not None and (rstart + rlen) * sc_page_size <= offset:
+ _fadvise_pages_done(fd, rstart, rlen)
+ rstart, rlen = next(remaining_regions, (None, None))
+ return (rstart, rlen)
+
+
def readfile_iter(files, progress=None):
for filenum,f in enumerate(files):
ofs = 0
b = ''
+ fd = rpr = rstart = rlen = None
+ if _fmincore and hasattr(f, 'fileno'):
+ try:
+ fd = f.fileno()
+ except io.UnsupportedOperation:
+ pass
+ if fd:
+ mcore = _fmincore(fd)
+ if mcore:
+ max_chunk = max(1, (8 * 1024 * 1024) / sc_page_size)
+ rpr = _nonresident_page_regions(mcore, helpers.MINCORE_INCORE,
+ max_chunk)
+ rstart, rlen = next(rpr, (None, None))
while 1:
if progress:
progress(filenum, len(b))
b = f.read(BLOB_READ_SIZE)
ofs += len(b)
- # Warning: ofs == 0 means 'done with the whole file'
- # This will only happen here when the file is empty
- fadvise_done(f, ofs)
+ if rpr:
+ rstart, rlen = _uncache_ours_upto(fd, ofs, (rstart, rlen), rpr)
if not b:
break
yield b
+ if rpr:
+ rstart, rlen = _uncache_ours_upto(fd, ofs, (rstart, rlen), rpr)
def _splitbuf(buf, basebits, fanbits):
ofs = 0
l = list(l)
total = sum(size for mode,sha,size, in l)
- vlen = len('%x' % total)
+ vlen = len(b'%x' % total)
shalist = []
for (mode, sha, size) in l:
- shalist.append((mode, '%0*x' % (vlen,ofs), sha))
+ shalist.append((mode, b'%0*x' % (vlen,ofs), sha))
ofs += size
assert(ofs == total)
return (shalist, total)
if len(shalist) == 1:
return (shalist[0][0], shalist[0][2])
elif len(shalist) == 0:
- return (GIT_MODE_FILE, makeblob(''))
+ return (GIT_MODE_FILE, makeblob(b''))
else:
return (GIT_MODE_TREE, maketree(shalist))
except:
pass
raise
-
-
-def fadvise_done(f, ofs):
- assert(ofs >= 0)
- if ofs > 0 and hasattr(f, 'fileno'):
- _helpers.fadvise_done(f.fileno(), ofs)