Add a 'bup' wrapper program.

author Avery Pennarun <apenwarr@gmail.com>

Wed, 30 Dec 2009 22:10:03 +0000 (17:10 -0500)

committer Avery Pennarun <apenwarr@gmail.com>

Wed, 30 Dec 2009 22:22:03 +0000 (17:22 -0500)
author Avery Pennarun <apenwarr@gmail.com>
Wed, 30 Dec 2009 22:10:03 +0000 (17:10 -0500)
committer Avery Pennarun <apenwarr@gmail.com>
Wed, 30 Dec 2009 22:22:03 +0000 (17:22 -0500)
diff --git a/.gitignore b/.gitignore

index 208c1278429dc8fb898a8ffe803e4f87357bce62..54cec6f1c7dc204956fd4338ee41e03b0b29e015 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
-hsplit
-hjoin
-hashsplit
-hashjoin
+bup
+bup-split
+bup-join
  datagen
  *.o
  *.so
diff --git a/Makefile b/Makefile

index a26c421ce6d70c3eb090472f7c59f4b9b8e79f5c..170756039d64073d88fd2e44e594e7627f160adb 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ CFLAGS=-Wall -g -O2 -Werror -I/usr/include/python2.5 -g -fwrapv -fPIC
  
  default: all
  
-all: hashsplit hashjoin datagen hashsplit.so
+all: bup-split bup-join bup datagen hashsplit.so
  
  datagen: datagen.o
  
@@ -10,24 +10,28 @@ hashsplit.so: hashsplitmodule.o
         $(CC) -shared -Wl,-Bsymbolic-functions -o $@ $<
  
  test: all
-       ./hashsplit.py <testfile1 >tags1
-       ./hashsplit.py <testfile2 >tags2
+       ./bup split <testfile1 >tags1
+       ./bup split <testfile2 >tags2
         diff -u tags1 tags2 || true
         wc -c testfile1 testfile2
         wc -l tags1 tags2
-       ./hashjoin <tags1 >out1
-       ./hashjoin <tags2 >out2
+       ./bup join <tags1 >out1
+       ./bup join <tags2 >out2
         diff -u testfile1 out1
         diff -u testfile2 out2
  
  %: %.o
         gcc -o $@ $< $(LDFLAGS) $(LIBS)
         
-%: %.py
+bup: bup.py
         rm -f $@
         ln -s $^ $@
         
-%: %.sh
+bup-%: cmd-%.py
+       rm -f $@
+       ln -s $^ $@
+       
+bup-%: cmd-%.sh
         rm -f $@
         ln -s $^ $@
         
@@ -35,5 +39,6 @@ test: all
         gcc -c -o $@ $^ $(CPPFLAGS) $(CFLAGS)
  
  clean:
-       rm -f *.o *.so *~ hashsplit hashjoin hsplit hjoin datagen *.pyc \
-               out[12] tags[12] .*~
+       rm -f *.o *.so *~ .*~ *.pyc \
+               bup bup-split bup-join datagen \
+               out[12] tags[12]
diff --git a/bup.py b/bup.py

new file mode 100755 (executable)

index 0000000..d15cc48
--- /dev/null
+++ b/bup.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+import sys, os
+
+argv = sys.argv
+exe = argv[0]
+exepath = os.path.split(exe)[0] or '.'
+
+def log(s):
+    sys.stderr.write(s)
+
+def usage():
+    log('Usage: %s <subcmd> <options...>\n\n' % exe)
+    log('Available subcommands:\n')
+    for c in os.listdir(exepath):
+        if c.startswith('bup-') and c.find('.') < 0:
+            log('\t%s\n' % c[4:])
+    exit(99)
+
+
+if len(argv) < 2 or not argv[1] or argv[1][0] == '-':
+    usage()
+
+subcmd = argv[1]
+if subcmd == 'help':
+    usage()
+
+subpath = os.path.join(exepath, 'bup-%s' % subcmd)
+
+if not os.path.exists(subpath):
+    log('%s: unknown command "%s"\n' % (exe, subcmd))
+    usage()
+
+try:
+    os.execv(subpath, [subpath] + argv[2:])
+except OSError, e:
+    log('%s: %s\n' % (subpath, e))
+    exit(98)
diff --git a/cmd-join.sh b/cmd-join.sh

new file mode 100755 (executable)

index 0000000..b04ad43
--- /dev/null
+++ b/cmd-join.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+while read x junk; do
+    git cat-file -p "$x"
+done
diff --git a/cmd-split.py b/cmd-split.py

new file mode 100755 (executable)

index 0000000..34b298a
--- /dev/null
+++ b/cmd-split.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+import sys, os, subprocess, errno, zlib, time
+import hashsplit
+from sha import sha
+
+# FIXME: duplicated in C module.  This shouldn't really be here at all...
+BLOBBITS = 14
+BLOBSIZE = 1 << (BLOBBITS-1)
+
+
+def log(s):
+    sys.stderr.write('%s\n' % s)
+
+
+class Buf:
+    def __init__(self):
+        self.data = ''
+        self.start = 0
+
+    def put(self, s):
+        #log('oldsize=%d+%d adding=%d' % (len(self.data), self.start, len(s)))
+        if s:
+            self.data = buffer(self.data, self.start) + s
+            self.start = 0
+            
+    def peek(self, count):
+        return buffer(self.data, self.start, count)
+    
+    def eat(self, count):
+        self.start += count
+
+    def get(self, count):
+        v = buffer(self.data, self.start, count)
+        self.start += count
+        return v
+
+    def used(self):
+        return len(self.data) - self.start
+
+
+def splitbuf(buf):
+    #return buf.get(BLOBSIZE)
+    b = buf.peek(buf.used())
+    ofs = hashsplit.splitbuf(b)
+    if ofs:
+        buf.eat(ofs)
+        return buffer(b, 0, ofs)
+    return None
+
+
+ocache = {}
+def save_blob(blob):
+    header = 'blob %d\0' % len(blob)
+    sum = sha(header)
+    sum.update(blob)
+    hex = sum.hexdigest()
+    dir = '.git/objects/%s' % hex[0:2]
+    fn = '%s/%s' % (dir, hex[2:])
+    if not ocache.get(hex) and not os.path.exists(fn):
+        #log('creating %s' % fn)
+        try:
+            os.mkdir(dir)
+        except OSError, e:
+            if e.errno != errno.EEXIST:
+                raise
+        tfn = '%s.%d' % (fn, os.getpid())
+        f = open(tfn, 'w')
+        z = zlib.compressobj(1)
+        f.write(z.compress(header))
+        f.write(z.compress(blob))
+        f.write(z.flush())
+        f.close()
+        os.rename(tfn, fn)
+    else:
+        #log('exists %s' % fn)
+        pass
+    ocache[hex] = 1
+    print hex
+    return hex
+
+
+def do_main():
+    start_time = time.time()
+    ofs = 0
+    buf = Buf()
+    blob = 1
+
+    eof = 0
+    lv = 0
+    while blob or not eof:
+        if not eof and (buf.used() < BLOBSIZE*2 or not blob):
+            bnew = sys.stdin.read(1024*1024)
+            if not len(bnew): eof = 1
+            #log('got %d, total %d' % (len(bnew), buf.used()))
+            buf.put(bnew)
+
+        blob = splitbuf(buf)
+        if eof and not blob:
+            blob = buf.get(buf.used())
+        if not blob and buf.used() >= BLOBSIZE*8:
+            blob = buf.get(BLOBSIZE*4)  # limit max blob size
+        if not blob and not eof:
+            continue
+
+        if blob:
+            ofs += len(blob)
+            #log('SPLIT @ %-8d size=%-8d (blobsize=%d)'
+            #    % (ofs, len(blob), BLOBSIZE))
+            save_blob(blob)
+          
+        nv = (ofs + buf.used())/1000000
+        if nv != lv:
+            log(nv)
+            lv = nv
+    secs = time.time() - start_time
+    log('\n%.2fkbytes in %.2f secs = %.2f kbytes/sec' 
+        % (ofs/1024., secs, ofs/1024./secs))
+
+
+assert(BLOBSIZE >= 32)
+do_main()
diff --git a/hashjoin.sh b/hashjoin.sh

deleted file mode 100755 (executable)

index b04ad43..0000000
--- a/hashjoin.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-while read x junk; do
-    git cat-file -p "$x"
-done
diff --git a/hashsplit.py b/hashsplit.py

deleted file mode 100755 (executable)

index 34b298a..0000000
--- a/hashsplit.py
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/env python
-import sys, os, subprocess, errno, zlib, time
-import hashsplit
-from sha import sha
-
-# FIXME: duplicated in C module.  This shouldn't really be here at all...
-BLOBBITS = 14
-BLOBSIZE = 1 << (BLOBBITS-1)
-
-
-def log(s):
-    sys.stderr.write('%s\n' % s)
-
-
-class Buf:
-    def __init__(self):
-        self.data = ''
-        self.start = 0
-
-    def put(self, s):
-        #log('oldsize=%d+%d adding=%d' % (len(self.data), self.start, len(s)))
-        if s:
-            self.data = buffer(self.data, self.start) + s
-            self.start = 0
-            
-    def peek(self, count):
-        return buffer(self.data, self.start, count)
-    
-    def eat(self, count):
-        self.start += count
-
-    def get(self, count):
-        v = buffer(self.data, self.start, count)
-        self.start += count
-        return v
-
-    def used(self):
-        return len(self.data) - self.start
-
-
-def splitbuf(buf):
-    #return buf.get(BLOBSIZE)
-    b = buf.peek(buf.used())
-    ofs = hashsplit.splitbuf(b)
-    if ofs:
-        buf.eat(ofs)
-        return buffer(b, 0, ofs)
-    return None
-
-
-ocache = {}
-def save_blob(blob):
-    header = 'blob %d\0' % len(blob)
-    sum = sha(header)
-    sum.update(blob)
-    hex = sum.hexdigest()
-    dir = '.git/objects/%s' % hex[0:2]
-    fn = '%s/%s' % (dir, hex[2:])
-    if not ocache.get(hex) and not os.path.exists(fn):
-        #log('creating %s' % fn)
-        try:
-            os.mkdir(dir)
-        except OSError, e:
-            if e.errno != errno.EEXIST:
-                raise
-        tfn = '%s.%d' % (fn, os.getpid())
-        f = open(tfn, 'w')
-        z = zlib.compressobj(1)
-        f.write(z.compress(header))
-        f.write(z.compress(blob))
-        f.write(z.flush())
-        f.close()
-        os.rename(tfn, fn)
-    else:
-        #log('exists %s' % fn)
-        pass
-    ocache[hex] = 1
-    print hex
-    return hex
-
-
-def do_main():
-    start_time = time.time()
-    ofs = 0
-    buf = Buf()
-    blob = 1
-
-    eof = 0
-    lv = 0
-    while blob or not eof:
-        if not eof and (buf.used() < BLOBSIZE*2 or not blob):
-            bnew = sys.stdin.read(1024*1024)
-            if not len(bnew): eof = 1
-            #log('got %d, total %d' % (len(bnew), buf.used()))
-            buf.put(bnew)
-
-        blob = splitbuf(buf)
-        if eof and not blob:
-            blob = buf.get(buf.used())
-        if not blob and buf.used() >= BLOBSIZE*8:
-            blob = buf.get(BLOBSIZE*4)  # limit max blob size
-        if not blob and not eof:
-            continue
-
-        if blob:
-            ofs += len(blob)
-            #log('SPLIT @ %-8d size=%-8d (blobsize=%d)'
-            #    % (ofs, len(blob), BLOBSIZE))
-            save_blob(blob)
-          
-        nv = (ofs + buf.used())/1000000
-        if nv != lv:
-            log(nv)
-            lv = nv
-    secs = time.time() - start_time
-    log('\n%.2fkbytes in %.2f secs = %.2f kbytes/sec' 
-        % (ofs/1024., secs, ofs/1024./secs))
-
-
-assert(BLOBSIZE >= 32)
-do_main()
author	Avery Pennarun <apenwarr@gmail.com>
	Wed, 30 Dec 2009 22:10:03 +0000 (17:10 -0500)
committer	Avery Pennarun <apenwarr@gmail.com>
	Wed, 30 Dec 2009 22:22:03 +0000 (17:22 -0500)
.gitignore		patch \| blob \| history
Makefile		patch \| blob \| history
bup.py	[new file with mode: 0755]	patch \| blob
cmd-join.sh	[new file with mode: 0755]	patch \| blob
cmd-split.py	[new file with mode: 0755]	patch \| blob
hashjoin.sh	[deleted file]	patch \| blob \| history
hashsplit.py	[deleted file]	patch \| blob \| history