cmd-index/cmd-save: correctly mark directories as dirty/clean.

author Avery Pennarun <apenwarr@gmail.com>

Tue, 9 Feb 2010 01:28:51 +0000 (20:28 -0500)

committer Avery Pennarun <apenwarr@gmail.com>

Tue, 9 Feb 2010 06:01:20 +0000 (01:01 -0500)
author Avery Pennarun <apenwarr@gmail.com>
Tue, 9 Feb 2010 01:28:51 +0000 (20:28 -0500)
committer Avery Pennarun <apenwarr@gmail.com>
Tue, 9 Feb 2010 06:01:20 +0000 (01:01 -0500)
diff --git a/cmd-index.py b/cmd-index.py

index ac7e47a007b08692211e07d5b4ffc6c66656eb2d..7b4f94849511c0e3874e0a99df1a3e90f144410e 100755 (executable)
--- a/cmd-index.py
+++ b/cmd-index.py
@@ -4,34 +4,10 @@ import options, git, index, drecurse
  from helpers import *
  
  
-def _simplify_iter(iters):
-    total = sum(len(it) for it in iters)
-    l = [iter(it) for it in iters]
-    del iters
-    l = [(next(it),it) for it in l]
-    l = filter(lambda x: x[0], l)
-    count = 0
-    while l:
-        if not (count % 1024):
-            progress('bup: merging indexes (%d/%d)\r' % (count, total))
-        l.sort()
-        (e,it) = l.pop()
-        if not e:
-            continue
-        #log('merge: %r %r (%d)\n' % (e.ctime, e.name, len(l)))
-        if e.ctime:  # skip auto-generated entries
-            yield e
-        n = next(it)
-        if n:
-            l.append((n,it))
-        count += 1
-    log('bup: merging indexes (%d/%d), done.\n' % (count, total))
-
-
  def merge_indexes(out, r1, r2):
-    for e in _simplify_iter([r1, r2]):
-        #if e.flags & index.IX_EXISTS:
-            out.add_ixentry(e)
+    for e in index.MergeIter([r1, r2]):
+        # FIXME: shouldn't we remove deleted entries eventually?  When?
+        out.add_ixentry(e)
  
  
  class IterHelper:
@@ -82,15 +58,12 @@ def update_index(top):
      hashgen = None
      if opt.fake_valid:
          def hashgen(name):
-            return (0, index.FAKE_SHA)
-
-    #log('doing: %r\n' % paths)
+            return (0100644, index.FAKE_SHA)
  
      total = 0
      for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev):
-        #log('got: %r\n' % path)
          if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
-            sys.stdout.write('%s\n' % path)
+            sys.stdout.write('%-70s\n' % path)
              sys.stdout.flush()
              progress('Indexing: %d\r' % total)
          elif not (total % 128):
@@ -107,10 +80,11 @@ def update_index(top):
                  if hashgen:
                      (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
                      rig.cur.flags |= index.IX_HASHVALID
-                rig.cur.repack()
+            if opt.fake_invalid:
+                rig.cur.invalidate()
+            rig.cur.repack()
              rig.next()
          else:  # new paths
-            #log('adding: %r\n' % path)
              wi.add(path, pst, hashgen = hashgen)
      progress('Indexing: %d, done.\n' % total)
      
@@ -144,6 +118,7 @@ l,long     print more information about each file
  u,update   (recursively) update the index entries for the given filenames
  x,xdev,one-file-system  don't cross filesystem boundaries
  fake-valid mark all index entries as up-to-date even if they aren't
+fake-invalid mark all index entries as invalid
  check      carefully check index file integrity
  f,indexfile=  the name of the index file (default 'index')
  v,verbose  increase log output (can be used more than once)
@@ -154,8 +129,11 @@ o = options.Options('bup index', optspec)
  if not (opt.modified or opt['print'] or opt.status or opt.update or opt.check):
      log('bup index: supply one or more of -p, -s, -m, -u, or --check\n')
      o.usage()
-if opt.fake_valid and not opt.update:
-    log('bup index: --fake-valid is meaningless without -u\n')
+if (opt.fake_valid or opt.fake_invalid) and not opt.update:
+    log('bup index: --fake-{in,}valid are meaningless without -u\n')
+    o.usage()
+if opt.fake_valid and opt.fake_invalid:
+    log('bup index: --fake-valid is incompatible with --fake-invalid\n')
      o.usage()
  
  git.check_repo_or_die()
@@ -178,8 +156,7 @@ if opt['print'] or opt.status or opt.modified:
      for (name, ent) in index.Reader(indexfile).filter(extra or ['']):
          if (opt.modified 
              and (ent.flags & index.IX_HASHVALID
-                 or not ent.mode
-                 or stat.S_ISDIR(ent.mode))):
+                 or not ent.mode)):
              continue
          line = ''
          if opt.status:
@@ -193,11 +170,10 @@ if opt['print'] or opt.status or opt.modified:
              else:
                  line += '  '
          if opt.long:
-            line += "%7s " % oct(ent.mode)
+            line += "%7s %7s " % (oct(ent.mode), oct(ent.gitmode))
          if opt.hash:
              line += ent.sha.encode('hex') + ' '
          print line + (name or './')
-        #print repr(ent)
  
  if opt.check:
      log('check: starting final check.\n')
diff --git a/cmd-save.py b/cmd-save.py

index 8a06593b70ffc8b94fee6e46be11c0e687ac33c4..de1457b50ec07b8d10a91fc77d27d38052fb2271 100755 (executable)
--- a/cmd-save.py
+++ b/cmd-save.py
@@ -60,6 +60,7 @@ def _pop():
      shalist = shalists.pop()
      tree = w.new_tree(shalist)
      shalists[-1].append(('40000', part, tree))
+    return tree
  
  lastremain = None
  def progress_report(n):
@@ -149,15 +150,15 @@ for (transname,ent) in r.filter(extra):
              _push(part)
  
      if not file:
-        # directory already handled.
-        # FIXME: not using the indexed tree sha1's for anything, which is
-        # a waste.  That's a potential optimization...
+        # sub/parentdirectories already handled in the pop/push() part above.
+        ent.validate(040000, _pop())
+        ent.repack()
          count += ent.size
          continue  
  
      id = None
      if hashvalid:
-        mode = '%o' % ent.mode
+        mode = '%o' % ent.gitmode
          id = ent.sha
          shalists[-1].append((mode, file, id))
      elif opt.smaller and ent.size >= opt.smaller:
@@ -188,7 +189,7 @@ for (transname,ent) in r.filter(extra):
                  add_error(Exception('skipping special file "%s"' % ent.name))
              count += ent.size
          if id:
-            ent.validate(id)
+            ent.validate(int(mode, 8), id)
              ent.repack()
              shalists[-1].append((mode, file, id))
  
diff --git a/index.py b/index.py

index a4fb4200a129e2a0c30a3f43139a096207abf6ae..613fa6b565d0d1ca6396fef50c18813f4351afe2 100644 (file)
--- a/index.py
+++ b/index.py
@@ -94,13 +94,24 @@ class Entry:
          self.mode = st.st_mode
          self.flags |= IX_EXISTS
          if int(st.st_ctime) >= tstart or old != new:
-            self.flags &= ~IX_HASHVALID
-            self.set_dirty()
+            self.invalidate()
  
-    def validate(self, sha):
+    def is_valid(self):
+        f = IX_HASHVALID|IX_EXISTS
+        return (self.flags & f) == f
+
+    def invalidate(self):
+        self.flags &= ~IX_HASHVALID
+        self.set_dirty()
+
+    def validate(self, gitmode, sha):
          assert(sha)
+        self.gitmode = gitmode
          self.sha = sha
-        self.flags |= IX_HASHVALID
+        self.flags |= IX_HASHVALID|IX_EXISTS
+
+    def is_deleted(self):
+        return (self.flags & IX_EXISTS) == 0
  
      def set_deleted(self):
          self.flags &= ~(IX_EXISTS | IX_HASHVALID)
@@ -109,8 +120,16 @@ class Entry:
      def set_dirty(self):
          pass # FIXME
  
+    def is_real(self):
+        return not self.is_fake()
+
+    def is_fake(self):
+        return not self.ctime
+
      def __cmp__(a, b):
-        return cmp(a.name, b.name)
+        return (cmp(a.name, b.name)
+                or -cmp(a.is_valid(), b.is_valid())
+                or -cmp(a.is_fake(), b.is_fake()))
  
      def write(self, f):
          f.write(self.basename + '\0' + self.packed())
@@ -135,8 +154,9 @@ class BlankNewEntry(NewEntry):
  
  
  class ExistingEntry(Entry):
-    def __init__(self, basename, name, m, ofs):
+    def __init__(self, parent, basename, name, m, ofs):
          Entry.__init__(self, basename, name)
+        self.parent = parent
          self._m = m
          self._ofs = ofs
          (self.dev, self.ctime, self.mtime, self.uid, self.gid,
@@ -146,6 +166,9 @@ class ExistingEntry(Entry):
  
      def repack(self):
          self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
+        if self.parent and not self.is_valid():
+            self.parent.invalidate()
+            self.parent.repack()
  
      def iter(self, name=None):
          dname = name
@@ -160,7 +183,7 @@ class ExistingEntry(Entry):
              assert(eon >= ofs)
              assert(eon > ofs)
              basename = str(buffer(self._m, ofs, eon-ofs))
-            child = ExistingEntry(basename, self.name + basename,
+            child = ExistingEntry(self, basename, self.name + basename,
                                    self._m, eon+1)
              if (not dname
                   or child.name.startswith(dname)
@@ -216,7 +239,7 @@ class Reader:
              assert(eon >= ofs)
              assert(eon > ofs)
              basename = str(buffer(self.m, ofs, eon-ofs))
-            yield ExistingEntry(basename, basename, self.m, eon+1)
+            yield ExistingEntry(None, basename, basename, self.m, eon+1)
              ofs = eon + 1 + ENTLEN
  
      def iter(self, name=None):
@@ -224,7 +247,8 @@ class Reader:
              dname = name
              if dname and not dname.endswith('/'):
                  dname += '/'
-            root = ExistingEntry('/', '/', self.m, len(self.m)-FOOTLEN-ENTLEN)
+            root = ExistingEntry(None, '/', '/',
+                                 self.m, len(self.m)-FOOTLEN-ENTLEN)
              for sub in root.iter(name=name):
                  yield sub
              if not dname or dname == root.name:
@@ -364,3 +388,36 @@ def reduce_paths(paths):
      paths.sort(reverse=True)
      return paths
  
+
+class MergeIter:
+    def __init__(self, iters):
+        self.iters = iters
+
+    def __len__(self):
+        # FIXME: doesn't remove duplicated entries between iters.
+        # That only happens for parent directories, but will mean the
+        # actual iteration returns fewer entries than this function counts.
+        return sum(len(it) for it in self.iters)
+
+    def __iter__(self):
+        total = len(self)
+        l = [iter(it) for it in self.iters]
+        l = [(next(it),it) for it in l]
+        l = filter(lambda x: x[0], l)
+        count = 0
+        lastname = None
+        while l:
+            if not (count % 1024):
+                progress('bup: merging indexes (%d/%d)\r' % (count, total))
+            l.sort()
+            (e,it) = l.pop()
+            if not e:
+                continue
+            if e.name != lastname:
+                yield e
+                lastname = e.name
+            n = next(it)
+            if n:
+                l.append((n,it))
+            count += 1
+        log('bup: merging indexes (%d/%d), done.\n' % (count, total))
diff --git a/t/test.sh b/t/test.sh

index 0f512a33c9d22c4739130ed6e8e7b2417519335f..d5a1917f3aa6d26ef2fd3449583988932219f1e8 100755 (executable)
--- a/t/test.sh
+++ b/t/test.sh
@@ -30,13 +30,13 @@ touch $D/a $D/b $D/f
  mkdir $D/d $D/d/e
  WVPASSEQ "$(bup index -s $D/)" "A $D/"
  WVPASSEQ "$(bup index -s $D/b)" ""
-bup tick
  WVPASSEQ "$(bup index --check -us $D/b)" "A $D/b"
  WVPASSEQ "$(bup index --check -us $D/b $D/d)" \
  "A $D/d/e/
  A $D/d/
  A $D/b"
  touch $D/d/z
+bup tick
  WVPASSEQ "$(bup index --check -usx $D)" \
  "A $D/f
  A $D/d/z
@@ -49,31 +49,53 @@ WVPASSEQ "$(bup index --check -us $D/a $D/b --fake-valid)" \
  "  $D/b
    $D/a"
  WVPASSEQ "$(bup index --check -us $D/a)" "  $D/a"  # stays unmodified
-touch $D/a
-WVPASS bup index -u $D/a  # becomes modified
+WVPASSEQ "$(bup index --check -us $D/d --fake-valid)" \
+"  $D/d/z
+  $D/d/e/
+  $D/d/"
+touch $D/d/z
+WVPASS bup index -u $D/d/z  # becomes modified
  WVPASSEQ "$(bup index -s $D/a $D $D/b)" \
  "A $D/f
-A $D/d/z
-A $D/d/e/
-A $D/d/
+M $D/d/z
+  $D/d/e/
+M $D/d/
    $D/b
-M $D/a
+  $D/a
  A $D/"
  
-# FIXME: currently directories are never marked unmodified, so -m just skips
-# them.  Eventually, we should actually store the hashes of completed
-# directories, at which time the output of -m will change, so we'll have to
-# change this test too.
+WVPASS bup index -u $D/d/e $D/a --fake-invalid
  WVPASSEQ "$(cd $D && bup index -m .)" \
  "./f
  ./d/z
-./a"
+./d/e/
+./d/
+./a
+./"
  WVPASSEQ "$(cd $D && bup index -m)" \
  "f
  d/z
-a"
+d/e/
+d/
+a
+./"
  WVPASSEQ "$(cd $D && bup index -s .)" "$(cd $D && bup index -s .)"
  
+WVPASS bup save -t $D/d/e
+WVPASSEQ "$(cd $D && bup index -m)" \
+"f
+d/z
+d/
+a
+./"
+WVPASS bup save -t $D/d
+WVPASSEQ "$(cd $D && bup index -m)" \
+"f
+a
+./"
+WVPASS bup save -t $D
+WVPASSEQ "$(cd $D && bup index -m)" ""
+
  
  WVSTART "split"
  WVPASS bup split --bench -b <t/testfile1 >tags1.tmp
@@ -142,7 +164,7 @@ WVFAIL bup fsck --quick
  WVFAIL bup fsck --quick -rvv -j99   # fails because repairs were needed
  if bup fsck --par2-ok; then
      WVPASS bup fsck -r # ok because of repairs from last time
-    WVPASS bup damage $BUP_DIR/objects/pack/*.pack -n201 -s1 --equal -S0
+    WVPASS bup damage $BUP_DIR/objects/pack/*.pack -n202 -s1 --equal -S0
      WVFAIL bup fsck
      WVFAIL bup fsck -rvv   # too many errors to be repairable
      WVFAIL bup fsck -r   # too many errors to be repairable
diff --git a/t/tindex.py b/t/tindex.py

index bcf87f7f78dda3899d8a713e04e9b541e881063a..e5c043b7633ff48fb9a54038264dc179e34b0eb2 100644 (file)
--- a/t/tindex.py
+++ b/t/tindex.py
@@ -4,7 +4,7 @@ from wvtest import *
  from helpers import *
  
  @wvtest
-def testbasic():
+def index_basic():
      cd = os.path.realpath('')
      WVPASS(cd)
      sd = os.path.realpath('t/sampledata')
@@ -15,7 +15,7 @@ def testbasic():
  
  
  @wvtest
-def testwriter():
+def index_writer():
      unlink('index.tmp')
      ds = os.stat('.')
      fs = os.stat('t/tindex.py')
@@ -25,3 +25,98 @@ def testwriter():
      w.add('/etc/', ds)
      w.add('/', ds)
      w.close()
+
+
+def dump(m):
+    for e in list(m):
+        print '%s%s %s' % (e.is_valid() and ' ' or 'M',
+                           e.is_fake() and 'F' or ' ',
+                           e.name)
+
+def fake_validate(*l):
+    for i in l:
+        for e in i:
+            e.validate(0100644, index.FAKE_SHA)
+            e.repack()
+
+def eget(l, ename):
+    for e in l:
+        if e.name == ename:
+            return e
+
+
+@wvtest
+def index_dirty():
+    unlink('index.tmp')
+    unlink('index2.tmp')
+    ds = os.stat('.')
+    fs = os.stat('t/tindex.py')
+    
+    w1 = index.Writer('index.tmp')
+    w1.add('/a/b/x', fs)
+    w1.add('/a/b/c', fs)
+    w1.add('/a/b/', ds)
+    w1.add('/a/', ds)
+    #w1.close()
+    WVPASS()
+
+    w2 = index.Writer('index2.tmp')
+    w2.add('/a/b/n/2', fs)
+    #w2.close()
+    WVPASS()
+
+    w3 = index.Writer('index3.tmp')
+    w3.add('/a/c/n/3', fs)
+    #w3.close()
+    WVPASS()
+
+    r1 = w1.new_reader()
+    r2 = w2.new_reader()
+    r3 = w3.new_reader()
+    WVPASS()
+
+    r1all = [e.name for e in r1]
+    WVPASSEQ(r1all,
+             ['/a/b/x', '/a/b/c', '/a/b/', '/a/', '/'])
+    r2all = [e.name for e in r2]
+    WVPASSEQ(r2all,
+             ['/a/b/n/2', '/a/b/n/', '/a/b/', '/a/', '/'])
+    r3all = [e.name for e in r3]
+    WVPASSEQ(r3all,
+             ['/a/c/n/3', '/a/c/n/', '/a/c/', '/a/', '/'])
+    m = index.MergeIter([r2,r1,r3])
+    all = [e.name for e in m]
+    WVPASSEQ(all,
+             ['/a/c/n/3', '/a/c/n/', '/a/c/',
+              '/a/b/x', '/a/b/n/2', '/a/b/n/', '/a/b/c',
+              '/a/b/', '/a/', '/'])
+    fake_validate(r1)
+    dump(r1)
+
+    print [hex(e.flags) for e in r1]
+    WVPASSEQ([e.name for e in r1 if e.is_valid()], r1all)
+    WVPASSEQ([e.name for e in r1 if not e.is_valid()], [])
+    WVPASSEQ([e.name for e in m if not e.is_valid()],
+             ['/a/c/n/3', '/a/c/n/', '/a/c/',
+              '/a/b/n/2', '/a/b/n/', '/a/b/', '/a/', '/'])
+
+    expect_invalid = ['/'] + r2all + r3all
+    expect_real = (set(r1all) - set(r2all) - set(r3all)) \
+                    | set(['/a/b/n/2', '/a/c/n/3'])
+    dump(m)
+    for e in m:
+        print e.name, hex(e.flags), e.ctime
+        eiv = e.name in expect_invalid
+        er  = e.name in expect_real
+        WVPASSEQ(eiv, not e.is_valid())
+        WVPASSEQ(er, e.is_real())
+    fake_validate(r2, r3)
+    dump(m)
+    WVPASSEQ([e.name for e in m if not e.is_valid()], [])
+    
+    e = eget(m, '/a/b/c')
+    e.invalidate()
+    e.repack()
+    dump(m)
+    WVPASSEQ([e.name for e in m if not e.is_valid()],
+             ['/a/b/c', '/a/b/', '/a/', '/'])
author	Avery Pennarun <apenwarr@gmail.com>
	Tue, 9 Feb 2010 01:28:51 +0000 (20:28 -0500)
committer	Avery Pennarun <apenwarr@gmail.com>
	Tue, 9 Feb 2010 06:01:20 +0000 (01:01 -0500)
cmd-index.py		patch \| blob \| history
cmd-save.py		patch \| blob \| history
index.py		patch \| blob \| history
t/test.sh		patch \| blob \| history
t/tindex.py		patch \| blob \| history