]> arthur.barton.de Git - bup.git/commitdiff
Convert bup to binary executable and run python subcommands directly
authorRob Browning <rlb@defaultvalue.org>
Sat, 3 Apr 2021 20:17:01 +0000 (15:17 -0500)
committerRob Browning <rlb@defaultvalue.org>
Sat, 3 Apr 2021 20:17:01 +0000 (15:17 -0500)
Don't execute python subcommands in a child process, run them
directly.  Convert bup (and dev/python, etc.) to be C executables.
See the commit messages, comments, and doc changes (e.g. DESIGN) for
more information.

Signed-off-by: Rob Browning <rlb@defaultvalue.org>
Tested-by: Rob Browning <rlb@defaultvalue.org>
125 files changed:
.cirrus.yml
.gitignore
DESIGN
GNUmakefile [new file with mode: 0644]
Makefile
README.md
cmd [deleted symlink]
config/config.vars.in
config/configure
dev/bup-python [deleted file]
dev/cleanup-mounts-under
dev/configure-sampledata
dev/data-size
dev/echo-argv-bytes
dev/hardlink-sets
dev/id-other-than
dev/install-python-script [deleted file]
dev/lib.sh
dev/make-random-paths
dev/mksock
dev/ns-timestamp-resolutions
dev/prep-for-macos-build
dev/python.c [new file with mode: 0644]
dev/root-status
dev/sparse-test-data
dev/subtree-hash
dev/unknown-owner
dev/validate-python [new file with mode: 0755]
lib/bup/_helpers.c
lib/bup/cmd/__init__.py [new file with mode: 0644]
lib/bup/cmd/bloom.py [new file with mode: 0755]
lib/bup/cmd/cat_file.py [new file with mode: 0755]
lib/bup/cmd/daemon.py [new file with mode: 0755]
lib/bup/cmd/damage.py [new file with mode: 0755]
lib/bup/cmd/drecurse.py [new file with mode: 0755]
lib/bup/cmd/features.py [new file with mode: 0755]
lib/bup/cmd/fsck.py [new file with mode: 0755]
lib/bup/cmd/ftp.py [new file with mode: 0755]
lib/bup/cmd/fuse.py [new file with mode: 0755]
lib/bup/cmd/gc.py [new file with mode: 0755]
lib/bup/cmd/get.py [new file with mode: 0755]
lib/bup/cmd/help.py [new file with mode: 0755]
lib/bup/cmd/import_duplicity.py [new file with mode: 0755]
lib/bup/cmd/index.py [new file with mode: 0755]
lib/bup/cmd/init.py [new file with mode: 0755]
lib/bup/cmd/join.py [new file with mode: 0755]
lib/bup/cmd/list_idx.py [new file with mode: 0755]
lib/bup/cmd/ls.py [new file with mode: 0755]
lib/bup/cmd/margin.py [new file with mode: 0755]
lib/bup/cmd/memtest.py [new file with mode: 0755]
lib/bup/cmd/meta.py [new file with mode: 0755]
lib/bup/cmd/midx.py [new file with mode: 0755]
lib/bup/cmd/mux.py [new file with mode: 0755]
lib/bup/cmd/on.py [new file with mode: 0755]
lib/bup/cmd/on__server.py [new file with mode: 0755]
lib/bup/cmd/prune_older.py [new file with mode: 0755]
lib/bup/cmd/random.py [new file with mode: 0755]
lib/bup/cmd/restore.py [new file with mode: 0755]
lib/bup/cmd/rm.py [new file with mode: 0755]
lib/bup/cmd/save.py [new file with mode: 0755]
lib/bup/cmd/server.py [new file with mode: 0755]
lib/bup/cmd/split.py [new file with mode: 0755]
lib/bup/cmd/tag.py [new file with mode: 0755]
lib/bup/cmd/tick.py [new file with mode: 0755]
lib/bup/cmd/version.py [new file with mode: 0755]
lib/bup/cmd/web.py [new file with mode: 0755]
lib/bup/cmd/xstat.py [new file with mode: 0755]
lib/bup/compat.py
lib/bup/csetup.py [deleted file]
lib/bup/git.py
lib/bup/ls.py
lib/bup/main.py [new file with mode: 0755]
lib/bup/options.py
lib/cmd/bloom-cmd.py [deleted file]
lib/cmd/bup [deleted file]
lib/cmd/bup-import-rdiff-backup [new file with mode: 0755]
lib/cmd/bup-import-rsnapshot [new file with mode: 0755]
lib/cmd/bup.c [new file with mode: 0644]
lib/cmd/cat-file-cmd.py [deleted file]
lib/cmd/daemon-cmd.py [deleted file]
lib/cmd/damage-cmd.py [deleted file]
lib/cmd/drecurse-cmd.py [deleted file]
lib/cmd/features-cmd.py [deleted file]
lib/cmd/fsck-cmd.py [deleted file]
lib/cmd/ftp-cmd.py [deleted file]
lib/cmd/fuse-cmd.py [deleted file]
lib/cmd/gc-cmd.py [deleted file]
lib/cmd/get-cmd.py [deleted file]
lib/cmd/help-cmd.py [deleted file]
lib/cmd/import-duplicity-cmd.py [deleted file]
lib/cmd/import-rdiff-backup-cmd.sh [deleted file]
lib/cmd/import-rsnapshot-cmd.sh [deleted file]
lib/cmd/index-cmd.py [deleted file]
lib/cmd/init-cmd.py [deleted file]
lib/cmd/join-cmd.py [deleted file]
lib/cmd/list-idx-cmd.py [deleted file]
lib/cmd/ls-cmd.py [deleted file]
lib/cmd/margin-cmd.py [deleted file]
lib/cmd/memtest-cmd.py [deleted file]
lib/cmd/meta-cmd.py [deleted file]
lib/cmd/midx-cmd.py [deleted file]
lib/cmd/mux-cmd.py [deleted file]
lib/cmd/on--server-cmd.py [deleted file]
lib/cmd/on-cmd.py [deleted file]
lib/cmd/prune-older-cmd.py [deleted file]
lib/cmd/random-cmd.py [deleted file]
lib/cmd/restore-cmd.py [deleted file]
lib/cmd/rm-cmd.py [deleted file]
lib/cmd/save-cmd.py [deleted file]
lib/cmd/server-cmd.py [deleted file]
lib/cmd/split-cmd.py [deleted file]
lib/cmd/tag-cmd.py [deleted file]
lib/cmd/tick-cmd.py [deleted file]
lib/cmd/version-cmd.py [deleted file]
lib/cmd/web-cmd.py [deleted file]
lib/cmd/xstat-cmd.py [deleted file]
src/bup/compat.c [new file with mode: 0644]
src/bup/compat.h [new file with mode: 0644]
src/bup/io.c [new file with mode: 0644]
src/bup/io.h [new file with mode: 0644]
test/ext/test-meta
test/ext/test-misc
test/ext/test-save-errors
test/ext/test-web
wvtest-bup.sh

index d676e7c3f6e6169f87ec04a264561fff806179c4..4148b3b3aa132a2f899141dab3a5bf9f333147b7 100644 (file)
@@ -79,22 +79,6 @@ task:
     chown -R bup:bup .
     su -l bup -c "eatmydata make -j6 -C '$(pwd)' PYTHON=python3 check"
 
-task:
-  name: freebsd (py2)
-  freebsd_instance:
-    image: freebsd-12-1-release-amd64
-    cpu: 4
-    memory: 4
-  script: |
-    set -xe
-    dev/prep-for-freebsd-build python2
-    dev/system-info
-    gmake -j6 PYTHON=python2 check
-    # It looks like su might not work here...
-    #pw useradd -n bup -s /bin/sh -m -w no
-    #chown -R bup .
-    #su -l bup -c "gmake -j3 -C '$PWD' check"
-
 task:
   name: freebsd (py3)
   freebsd_instance:
@@ -107,18 +91,6 @@ task:
     dev/system-info
     gmake -j6 PYTHON=python3.7 check
 
-task:
-  name: macos (py2)
-  allow_failures: true
-  osx_instance:
-    image: catalina-base
-  script: |
-    set -xe
-    dev/prep-for-macos-build python2
-    export PKG_CONFIG_PATH=/usr/local/opt/readline/lib/pkgconfig
-    dev/system-info
-    make -j4 PYTHON=python2 check
-
 task:
   name: macos (py3)
   osx_instance:
index bda8de8bff067ae5d048518a7c012257c608771a..1555c5a72a0c45dad2ff28db516d585d581ff50f 100644 (file)
@@ -1,20 +1,20 @@
-\#*#
-.#*
-randomgen
-memtest
-*.o
-*.so
-*.exe
-*.dll
+*.swp
 *~
-*.pyc
-*.tmp
-*.tmp.meta
-/build
-/config/bin/
+/config/config.h.tmp
+/dev/bup-exec
+/dev/bup-python
+/dev/python
+/lib/bup/_helpers.dll
+/lib/bup/_helpers.so
 /lib/bup/checkout_info.py
-*.swp
-nbproject
-/lib/cmd/bup-*
+/lib/cmd/bup
+/nbproject/
+/test/int/__init__.pyc
+/test/lib/__init__.pyc
+/test/lib/buptest/__init__.pyc
+/test/lib/buptest/vfs.pyc
+/test/lib/wvpytest.pyc
 /test/sampledata/var/
 /test/tmp/
+\#*#
+__pycache__/
diff --git a/DESIGN b/DESIGN
index 89b06b7d83596f05094938e446f2f03253f5aec8..d6e8c1b17403411d063181d8055d2c08206aa503 100644 (file)
--- a/DESIGN
+++ b/DESIGN
@@ -18,17 +18,41 @@ source code to follow along and see what we're talking about.  bup's code is
 written primarily in python with a bit of C code in speed-sensitive places. 
 Here are the most important things to know:
 
- - bup (symlinked to main.py) is the main program that runs when you type
-   'bup'.
- - cmd/bup-* (mostly symlinked to cmd/*-cmd.py) are the individual
-   subcommands, in a way similar to how git breaks all its subcommands into
-   separate programs.  Not all the programs have to be written in python;
-   they could be in any language, as long as they end up named cmd/bup-*. 
-   We might end up re-coding large parts of bup in C eventually so that it
-   can be even faster and (perhaps) more portable.
-
- - lib/bup/*.py are python library files used by the cmd/*.py commands. 
+ - The main program is a fairly small C program that mostly just
+   initializes the correct Python interpreter and then runs
+   bup.main.main().  This arrangement was chosen in order to give us
+   more flexibility.  For example:
+
+     - It allows us to avoid
+       [crashing on some Unicode-unfriendly command line arguments](https://bugs.python.org/issue35883)
+       which is critical, given that paths can be arbitrary byte
+       sequences.
+
+     - It allows more flexibility in dealing with upstream changes
+       like the breakage of our ability to manipulate the
+       processes arguement list on platforms that support it during
+       the Python 3.9 series.
+
+     - It means that we'll no longer be affected by any changes to the
+       `#!/...` path, i.e. if `/usr/bin/python`, or
+       `/usr/bin/python3`, or whatever we'd previously selected during
+       `./configure` were to change from 2 to 3, or 3.5 to 3.20.
+
+   The version of python bup uses is determined by the `python-config`
+   program selected by `./configure`.  It tries to find a suitable
+   default unless `BUP_PYTHON_CONFIG` is set in the environment.
+
+ - bup supports both internal and external subcommands.  The former
+   are the most common, and are all located in lib/bup/cmd/.  They
+   must be python modules named lib/bup/cmd/COMMAND.py, and must
+   contain a `main(argv)` function that will be passed the *binary*
+   command line arguments (bytes, not strings).  The filename must
+   have underscores for any dashes in the subcommand name.  The
+   external subcommands are in lib/cmd/.
+
+ - The python code is all in lib/bup.
+
+ - lib/bup/\*.py contains the python code (modules) that bup depends on.
    That directory name seems a little silly (and worse, redundant) but there
    seemed to be no better way to let programs write "from bup import
    index" and have it work.  Putting bup in the top level conflicted with
diff --git a/GNUmakefile b/GNUmakefile
new file mode 100644 (file)
index 0000000..fd4931b
--- /dev/null
@@ -0,0 +1,289 @@
+
+MAKEFLAGS += --warn-undefined-variables
+OUTPUT_OPTION = -MMD -MP -o $@
+
+SHELL := bash
+.DEFAULT_GOAL := all
+
+clean_paths :=
+generated_dependencies :=
+
+# See config/config.vars.in (sets bup_python_config, among other things)
+include config/config.vars
+-include $(generated_dependencies)
+
+pf := set -o pipefail
+
+define isok
+  && echo " ok" || echo " no"
+endef
+
+# If ok, strip trailing " ok" and return the output, otherwise, error
+define shout
+$(if $(subst ok,,$(lastword $(1))),$(error $(2)),$(shell x="$(1)"; echo $${x%???}))
+endef
+
+sampledata_rev := $(shell dev/configure-sampledata --revision $(isok))
+sampledata_rev := \
+  $(call shout,$(sampledata_rev),Could not parse sampledata revision)
+
+current_sampledata := test/sampledata/var/rev/v$(sampledata_rev)
+
+os := $(shell ($(pf); uname | sed 's/[-_].*//') $(isok))
+os := $(call shout,$(os),Unable to determine OS)
+
+# Satisfy --warn-undefined-variables
+CFLAGS ?=
+CPPFLAGS ?=
+LDFLAGS ?=
+TARGET_ARCH ?=
+
+bup_shared_cflags := -O2 -Wall -Werror -Wformat=2
+bup_shared_cflags := -Wno-unused-command-line-argument $(bup_shared_cflags)
+bup_shared_cflags := -Wno-unknown-pragmas -Wsign-compare $(bup_shared_cflags)
+bup_shared_cflags := -D_FILE_OFFSET_BITS=64 $(bup_shared_cflags)
+
+soext := .so
+ifeq ($(os),CYGWIN)
+  soext := .dll
+endif
+
+ifdef TMPDIR
+  test_tmp := $(TMPDIR)
+else
+  test_tmp := $(CURDIR)/test/tmp
+endif
+
+initial_setup := $(shell dev/update-checkout-info lib/bup/checkout_info.py $(isok))
+initial_setup := $(call shout,$(initial_setup),update-checkout-info failed))
+clean_paths += lib/bup/checkout_info.py
+
+# Dependency changes here should be mirrored in Makefile
+config/config.vars: configure config/configure config/configure.inc config/*.in
+       MAKE="$(MAKE)" ./configure
+
+# On some platforms, Python.h and readline.h fight over the
+# _XOPEN_SOURCE version, i.e. -Werror crashes on a mismatch, so for
+# now, we're just going to let Python's version win.
+
+helpers_cflags := $(bup_python_cflags) $(bup_shared_cflags)
+helpers_ldflags := $(bup_python_ldflags) $(bup_shared_ldflags)
+
+ifneq ($(strip $(bup_readline_cflags)),)
+  readline_cflags += $(bup_readline_cflags)
+  readline_xopen := $(filter -D_XOPEN_SOURCE=%,$(readline_cflags))
+  readline_xopen := $(subst -D_XOPEN_SOURCE=,,$(readline_xopen))
+  readline_cflags := $(filter-out -D_XOPEN_SOURCE=%,$(readline_cflags))
+  readline_cflags += $(addprefix -DBUP_RL_EXPECTED_XOPEN_SOURCE=,$(readline_xopen))
+  helpers_cflags += $(readline_cflags)
+endif
+
+helpers_ldflags += $(bup_readline_ldflags)
+
+ifeq ($(bup_have_libacl),1)
+  helpers_cflags += $(bup_libacl_cflags)
+  helpers_ldflags += $(bup_libacl_ldflags)
+endif
+
+bup_ext_cmds := lib/cmd/bup-import-rdiff-backup lib/cmd/bup-import-rsnapshot
+
+bup_deps := lib/bup/_helpers$(soext) lib/cmd/bup
+
+all: dev/bup-exec dev/bup-python dev/python $(bup_deps) Documentation/all \
+  $(current_sampledata)
+
+$(current_sampledata):
+       dev/configure-sampledata --setup
+
+PANDOC ?= $(shell type -p pandoc)
+
+ifeq (,$(PANDOC))
+  $(shell echo "Warning: pandoc not found; skipping manpage generation" 1>&2)
+  man_md :=
+else
+  man_md := $(wildcard Documentation/*.md)
+endif
+
+man_roff := $(patsubst %.md,%.1,$(man_md))
+man_html := $(patsubst %.md,%.html,$(man_md))
+
+INSTALL=install
+PREFIX=/usr/local
+MANDIR=$(PREFIX)/share/man
+DOCDIR=$(PREFIX)/share/doc/bup
+BINDIR=$(PREFIX)/bin
+LIBDIR=$(PREFIX)/lib/bup
+
+dest_mandir := $(DESTDIR)$(MANDIR)
+dest_docdir := $(DESTDIR)$(DOCDIR)
+dest_bindir := $(DESTDIR)$(BINDIR)
+dest_libdir := $(DESTDIR)$(LIBDIR)
+
+install: all
+       $(INSTALL) -d $(dest_bindir) $(dest_libdir)/bup/cmd $(dest_libdir)/cmd \
+         $(dest_libdir)/web/static
+       test -z "$(man_roff)" || install -d $(dest_mandir)/man1
+       test -z "$(man_roff)" || $(INSTALL) -m 0644 $(man_roff) $(dest_mandir)/man1
+       test -z "$(man_html)" || install -d $(dest_docdir)
+       test -z "$(man_html)" || $(INSTALL) -m 0644 $(man_html) $(dest_docdir)
+       $(INSTALL) -pm 0755 lib/cmd/bup "$(dest_libdir)/cmd/bup"
+       $(INSTALL) -pm 0755 $(bup_ext_cmds) "$(dest_libdir)/cmd/"
+       cd "$(dest_bindir)" && \
+         ln -sf "$$($(bup_python) -c 'import os; print(os.path.relpath("$(abspath $(dest_libdir))/cmd/bup"))')"
+       set -e; \
+       $(INSTALL) -pm 0644 lib/bup/*.py $(dest_libdir)/bup/
+       $(INSTALL) -pm 0644 lib/bup/cmd/*.py $(dest_libdir)/bup/cmd/
+       $(INSTALL) -pm 0755 \
+               lib/bup/*$(soext) \
+               $(dest_libdir)/bup
+       $(INSTALL) -pm 0644 \
+               lib/web/static/* \
+               $(dest_libdir)/web/static/
+       $(INSTALL) -pm 0644 \
+               lib/web/*.html \
+               $(dest_libdir)/web/
+       if test -e lib/bup/checkout_info.py; then \
+           $(INSTALL) -pm 0644 lib/bup/checkout_info.py \
+               $(dest_libdir)/bup/source_info.py; \
+       else \
+           ! grep -qF '$$Format' lib/bup/source_info.py; \
+           $(INSTALL) -pm 0644 lib/bup/source_info.py $(dest_libdir)/bup/; \
+       fi
+
+embed_cflags := $(bup_python_cflags_embed) $(bup_shared_cflags) -I$(CURDIR)/src
+embed_ldflags := $(bup_python_ldflags_embed) $(bup_shared_ldflags)
+
+config/config.h: config/config.vars
+clean_paths += config/config.h.tmp
+
+cc_bin = $(CC) $(embed_cflags) $(CFLAGS) $^ $(embed_ldflags) $(LDFLAGS) -fPIE \
+  -I src $(OUTPUT_OPTION)
+
+clean_paths += dev/python-proposed
+generated_dependencies += dev/python-proposed.d
+dev/python-proposed: dev/python.c src/bup/compat.c src/bup/io.c
+       rm -f dev/python
+       $(cc_bin)
+
+clean_paths += dev/python
+dev/python: dev/python-proposed
+       dev/validate-python $@-proposed
+       ln $@-proposed $@
+
+clean_paths += dev/bup-exec
+generated_dependencies += dev/bup-exec.d
+dev/bup-exec: CFLAGS += -D BUP_DEV_BUP_EXEC=1
+dev/bup-exec: lib/cmd/bup.c src/bup/compat.c src/bup/io.c
+       $(cc_bin)
+
+clean_paths += dev/bup-python
+generated_dependencies += dev/bup-python.d
+dev/bup-python: CFLAGS += -D BUP_DEV_BUP_PYTHON=1
+dev/bup-python: lib/cmd/bup.c src/bup/compat.c src/bup/io.c
+       $(cc_bin)
+
+clean_paths += lib/cmd/bup
+generated_dependencies += lib/cmd/bup.d
+lib/cmd/bup: lib/cmd/bup.c src/bup/compat.c src/bup/io.c
+       $(cc_bin)
+
+clean_paths += lib/bup/_helpers$(soext)
+generated_dependencies += lib/bup/_helpers.d
+lib/bup/_helpers$(soext): lib/bup/_helpers.c lib/bup/bupsplit.c
+       $(CC) $(helpers_cflags) $(CFLAGS) -shared -fPIC $^ \
+         $(helpers_ldflags) $(LDFLAGS) $(OUTPUT_OPTION)
+
+test/tmp:
+       mkdir test/tmp
+
+# MAKEFLAGS must not be in an immediate := assignment
+parallel_opt = $(lastword $(filter -j%,$(MAKEFLAGS)))
+get_parallel_n = $(patsubst -j%,%,$(parallel_opt))
+maybe_specific_n = $(if $(filter -j%,$(parallel_opt)),-n$(get_parallel_n))
+xdist_opt = $(if $(filter -j,$(parallel_opt)),-nauto,$(maybe_specific_n))
+
+test: all test/tmp dev/python
+        if test yes = "$$(dev/python -c 'import xdist; print("yes")' 2>/dev/null)"; then \
+          (set -x; ./pytest $(xdist_opt);) \
+        else \
+          (set -x; ./pytest;) \
+        fi
+
+stupid:
+       PATH=/bin:/usr/bin $(MAKE) test
+
+check: test
+
+distcheck: all
+       if test yes = $$(dev/python -c "import xdist; print('yes')" 2>/dev/null); then \
+         (set -x; ./pytest $(xdist_opt) -m release;) \
+       else \
+         (set -x; ./pytest -m release;) \
+       fi
+
+long-test: export BUP_TEST_LEVEL=11
+long-test: test
+
+long-check: export BUP_TEST_LEVEL=11
+long-check: check
+
+.PHONY: check-both
+check-both:
+       $(MAKE) clean && BUP_PYTHON_CONFIG=python3-config $(MAKE) check
+       $(MAKE) clean && BUP_PYTHON_CONFIG=python2.7-config $(MAKE) check
+
+.PHONY: Documentation/all
+Documentation/all: $(man_roff) $(man_html)
+
+Documentation/substvars: $(bup_deps)
+        # FIXME: real temp file
+       set -e; bup_ver=$$(./bup version); \
+       echo "s,%BUP_VERSION%,$$bup_ver,g" > $@.tmp; \
+       echo "s,%BUP_DATE%,$$bup_ver,g" >> $@.tmp
+       mv $@.tmp $@
+
+Documentation/%.1: Documentation/%.md Documentation/substvars
+       $(pf); sed -f Documentation/substvars $< \
+         | $(PANDOC) -s -r markdown -w man -o $@
+
+Documentation/%.html: Documentation/%.md Documentation/substvars
+       $(pf); sed -f Documentation/substvars $< \
+         | $(PANDOC) -s -r markdown -w html -o $@
+
+.PHONY: Documentation/clean
+Documentation/clean:
+       cd Documentation && rm -f *~ .*~ *.[0-9] *.html substvars
+
+# Note: this adds commits containing the current manpages in roff and
+# html format to the man and html branches respectively.  The version
+# is determined by "git describe --always".
+.PHONY: update-doc-branches
+update-doc-branches: Documentation/all
+       dev/update-doc-branches refs/heads/man refs/heads/html
+
+# push the pregenerated doc files to origin/man and origin/html
+push-docs: export-docs
+       git push origin man html
+
+# import pregenerated doc files from origin/man and origin/html, in case you
+# don't have pandoc but still want to be able to install the docs.
+import-docs: Documentation/clean
+       $(pf); git archive origin/html | (cd Documentation && tar -xvf -)
+       $(pf); git archive origin/man | (cd Documentation && tar -xvf -)
+
+clean: Documentation/clean
+       cd config && rm -rf finished bin config.var
+       cd config && rm -f \
+         ${CONFIGURE_DETRITUS} ${CONFIGURE_FILES} ${GENERATED_FILES}
+       rm -rf $(clean_paths) .pytest_cache
+       rm -f $(generated_dependencies)
+       find . -name __pycache__ -exec rm -rf {} +
+       if test -e test/mnt; then dev/cleanup-mounts-under test/mnt; fi
+       if test -e test/mnt; then rm -r test/mnt; fi
+       if test -e test/tmp; then dev/cleanup-mounts-under test/tmp; fi
+        # FIXME: migrate these to test/mnt/
+       if test -e test/int/testfs; \
+         then umount test/int/testfs || true; fi
+       rm -rf test/int/testfs test/int/testfs.img testfs.img
+       if test -e test/tmp; then dev/force-delete test/tmp; fi
+       dev/configure-sampledata --clean
index e45719d6118027d5d841c31370920dfdd07ea710..e7f6ff948b7632a742b6a9580f15deabb0520354 100644 (file)
--- a/Makefile
+++ b/Makefile
 
-MAKEFLAGS += --warn-undefined-variables
+# Redirect to GNU make
 
-SHELL := bash
-.DEFAULT_GOAL := all
+.SUFFIXES:
 
-# See config/config.vars.in (sets bup_python, among other things)
--include config/config.vars
+default: config/finished
+       config/bin/make
 
-pf := set -o pipefail
-cfg_py := $(CURDIR)/config/bin/python
+.DEFAULT:
+       $(MAKE) config/finished
+       config/bin/make $(.TARGETS)
 
-define isok
-  && echo " ok" || echo " no"
-endef
-
-# If ok, strip trailing " ok" and return the output, otherwise, error
-define shout
-$(if $(subst ok,,$(lastword $(1))),$(error $(2)),$(shell x="$(1)"; echo $${x%???}))
-endef
-
-sampledata_rev := $(shell dev/configure-sampledata --revision $(isok))
-sampledata_rev := \
-  $(call shout,$(sampledata_rev),Could not parse sampledata revision)
-
-current_sampledata := test/sampledata/var/rev/v$(sampledata_rev)
-
-os := $(shell ($(pf); uname | sed 's/[-_].*//') $(isok))
-os := $(call shout,$(os),Unable to determine OS)
-
-CFLAGS := -O2 -Wall -Werror -Wformat=2 $(CFLAGS)
-CFLAGS := -Wno-unknown-pragmas -Wsign-compare $(CFLAGS)
-CFLAGS := -D_FILE_OFFSET_BITS=64 $(PYINCLUDE) $(CFLAGS)
-SOEXT:=.so
-
-ifeq ($(os),CYGWIN)
-  SOEXT:=.dll
-endif
-
-ifdef TMPDIR
-  test_tmp := $(TMPDIR)
-else
-  test_tmp := $(CURDIR)/test/tmp
-endif
-
-initial_setup := $(shell dev/update-checkout-info lib/bup/checkout_info.py $(isok))
-initial_setup := $(call shout,$(initial_setup),update-checkout-info failed))
-
-config/config.vars: \
-  configure config/configure config/configure.inc \
-  $(wildcard config/*.in)
-       MAKE="$(MAKE)" ./configure
-
-# On some platforms, Python.h and readline.h fight over the
-# _XOPEN_SOURCE version, i.e. -Werror crashes on a mismatch, so for
-# now, we're just going to let Python's version win.
-
-ifneq ($(strip $(bup_readline_cflags)),)
-  readline_cflags += $(bup_readline_cflags)
-  readline_xopen := $(filter -D_XOPEN_SOURCE=%,$(readline_cflags))
-  readline_xopen := $(subst -D_XOPEN_SOURCE=,,$(readline_xopen))
-  readline_cflags := $(filter-out -D_XOPEN_SOURCE=%,$(readline_cflags))
-  readline_cflags += $(addprefix -DBUP_RL_EXPECTED_XOPEN_SOURCE=,$(readline_xopen))
-  CFLAGS += $(readline_cflags)
-endif
-
-LDFLAGS += $(bup_readline_ldflags)
-
-ifeq ($(bup_have_libacl),1)
-  CFLAGS += $(bup_libacl_cflags)
-  LDFLAGS += $(bup_libacl_ldflags)
-endif
-
-config/bin/python: config/config.vars
-
-bup_cmds := \
-  $(patsubst cmd/%-cmd.py,cmd/bup-%,$(wildcard cmd/*-cmd.py)) \
-  $(patsubst cmd/%-cmd.sh,cmd/bup-%,$(wildcard cmd/*-cmd.sh))
-
-bup_deps := lib/bup/_helpers$(SOEXT) $(bup_cmds)
-
-all: $(bup_deps) Documentation/all $(current_sampledata)
-
-$(current_sampledata):
-       dev/configure-sampledata --setup
-
-PANDOC ?= $(shell type -p pandoc)
-
-ifeq (,$(PANDOC))
-  $(shell echo "Warning: pandoc not found; skipping manpage generation" 1>&2)
-  man_md :=
-else
-  man_md := $(wildcard Documentation/*.md)
-endif
-
-man_roff := $(patsubst %.md,%.1,$(man_md))
-man_html := $(patsubst %.md,%.html,$(man_md))
-
-INSTALL=install
-PREFIX=/usr/local
-MANDIR=$(PREFIX)/share/man
-DOCDIR=$(PREFIX)/share/doc/bup
-BINDIR=$(PREFIX)/bin
-LIBDIR=$(PREFIX)/lib/bup
-
-dest_mandir := $(DESTDIR)$(MANDIR)
-dest_docdir := $(DESTDIR)$(DOCDIR)
-dest_bindir := $(DESTDIR)$(BINDIR)
-dest_libdir := $(DESTDIR)$(LIBDIR)
-
-install: all
-       $(INSTALL) -d $(dest_bindir) \
-               $(dest_libdir)/bup $(dest_libdir)/cmd \
-               $(dest_libdir)/web $(dest_libdir)/web/static
-       test -z "$(man_roff)" || install -d $(dest_mandir)/man1
-       test -z "$(man_roff)" || $(INSTALL) -m 0644 $(man_roff) $(dest_mandir)/man1
-       test -z "$(man_html)" || install -d $(dest_docdir)
-       test -z "$(man_html)" || $(INSTALL) -m 0644 $(man_html) $(dest_docdir)
-       dev/install-python-script lib/cmd/bup "$(dest_libdir)/cmd/bup"
-       set -e; \
-       for cmd in $$(ls cmd/bup-*); do \
-         dev/install-python-script "$$cmd" "$(dest_libdir)/$$cmd"; \
-       done
-       cd "$(dest_bindir)" && \
-         ln -sf "$$($(bup_python) -c 'import os; print(os.path.relpath("$(abspath $(dest_libdir))/cmd/bup"))')"
-       set -e; \
-       $(INSTALL) -pm 0644 \
-               lib/bup/*.py \
-               $(dest_libdir)/bup
-       $(INSTALL) -pm 0755 \
-               lib/bup/*$(SOEXT) \
-               $(dest_libdir)/bup
-       $(INSTALL) -pm 0644 \
-               lib/web/static/* \
-               $(dest_libdir)/web/static/
-       $(INSTALL) -pm 0644 \
-               lib/web/*.html \
-               $(dest_libdir)/web/
-       if test -e lib/bup/checkout_info.py; then \
-           $(INSTALL) -pm 0644 lib/bup/checkout_info.py \
-               $(dest_libdir)/bup/source_info.py; \
-       else \
-           ! grep -qF '$$Format' lib/bup/source_info.py; \
-           $(INSTALL) -pm 0644 lib/bup/source_info.py $(dest_libdir)/bup/; \
-       fi
-
-config/config.h: config/config.vars
-
-lib/bup/_helpers$(SOEXT): \
-               config/config.h lib/bup/bupsplit.h \
-               lib/bup/bupsplit.c lib/bup/_helpers.c lib/bup/csetup.py
-       @rm -f $@
-       cd lib/bup && $(cfg_py) csetup.py build "$(CFLAGS)" "$(LDFLAGS)"
-        # Make sure there's just the one file we expect before we copy it.
-       $(cfg_py) -c \
-         "import glob; assert(len(glob.glob('lib/bup/build/*/_helpers*$(SOEXT)')) == 1)"
-       cp lib/bup/build/*/_helpers*$(SOEXT) "$@"
-
-test/tmp:
-       mkdir test/tmp
-
-ifeq (yes,$(shell config/bin/python -c "import xdist; print('yes')" 2>/dev/null))
-  # MAKEFLAGS must not be in an immediate := assignment
-  parallel_opt = $(lastword $(filter -j%,$(MAKEFLAGS)))
-  get_parallel_n = $(patsubst -j%,%,$(parallel_opt))
-  maybe_specific_n = $(if $(filter -j%,$(parallel_opt)),-n$(get_parallel_n))
-  xdist_opt = $(if $(filter -j,$(parallel_opt)),-nauto,$(maybe_specific_n))
-else
-  xdist_opt =
-endif
-
-test: all test/tmp
-       ./pytest $(xdist_opt)
-
-stupid:
-       PATH=/bin:/usr/bin $(MAKE) test
-
-check: test
-
-distcheck: all
-       ./pytest $(xdist_opt) -m release
-
-long-test: export BUP_TEST_LEVEL=11
-long-test: test
-
-long-check: export BUP_TEST_LEVEL=11
-long-check: check
-
-.PHONY: check-both
-check-both:
-       $(MAKE) clean && PYTHON=python3 $(MAKE) check
-       $(MAKE) clean && PYTHON=python2 $(MAKE) check
-
-cmd/bup-%: cmd/%-cmd.py
-       rm -f $@
-       ln -s $*-cmd.py $@
-
-cmd/bup-%: cmd/%-cmd.sh
-       rm -f $@
-       ln -s $*-cmd.sh $@
-
-.PHONY: Documentation/all
-Documentation/all: $(man_roff) $(man_html)
-
-Documentation/substvars: $(bup_deps)
-       echo "s,%BUP_VERSION%,$$(./bup version),g" > $@
-       echo "s,%BUP_DATE%,$$(./bup version --date),g" >> $@
-
-Documentation/%.1: Documentation/%.md Documentation/substvars
-       $(pf); sed -f Documentation/substvars $< \
-         | $(PANDOC) -s -r markdown -w man -o $@
-
-Documentation/%.html: Documentation/%.md Documentation/substvars
-       $(pf); sed -f Documentation/substvars $< \
-         | $(PANDOC) -s -r markdown -w html -o $@
-
-.PHONY: Documentation/clean
-Documentation/clean:
-       cd Documentation && rm -f *~ .*~ *.[0-9] *.html substvars
-
-# Note: this adds commits containing the current manpages in roff and
-# html format to the man and html branches respectively.  The version
-# is determined by "git describe --always".
-.PHONY: update-doc-branches
-update-doc-branches: Documentation/all
-       dev/update-doc-branches refs/heads/man refs/heads/html
-
-# push the pregenerated doc files to origin/man and origin/html
-push-docs: export-docs
-       git push origin man html
-
-# import pregenerated doc files from origin/man and origin/html, in case you
-# don't have pandoc but still want to be able to install the docs.
-import-docs: Documentation/clean
-       $(pf); git archive origin/html | (cd Documentation && tar -xvf -)
-       $(pf); git archive origin/man | (cd Documentation && tar -xvf -)
-
-clean: Documentation/clean config/bin/python
-       cd config && rm -rf config.var
-       cd config && rm -f *~ .*~ \
-         ${CONFIGURE_DETRITUS} ${CONFIGURE_FILES} ${GENERATED_FILES}
-       rm -f *.o lib/*/*.o *.so lib/*/*.so *.dll lib/*/*.dll *.exe \
-               .*~ *~ */*~ lib/*/*~ lib/*/*/*~ \
-               *.pyc */*.pyc lib/*/*.pyc lib/*/*/*.pyc \
-               lib/bup/checkout_info.py \
-               randomgen memtest \
-               testfs.img test/int/testfs.img
-       for x in $$(ls cmd/*-cmd.py cmd/*-cmd.sh | grep -vF python-cmd.sh | cut -b 5-); do \
-           echo "cmd/bup-$${x%-cmd.*}"; \
-       done | xargs -t rm -f
-       if test -e test/mnt; then dev/cleanup-mounts-under test/mnt; fi
-       if test -e test/mnt; then rm -r test/mnt; fi
-       if test -e test/tmp; then dev/cleanup-mounts-under test/tmp; fi
-        # FIXME: migrate these to test/mnt/
-       if test -e test/int/testfs; \
-         then umount test/int/testfs || true; fi
-       rm -rf *.tmp *.tmp.meta test/*.tmp lib/*/*/*.tmp build lib/bup/build test/int/testfs
-       if test -e test/tmp; then dev/force-delete test/tmp; fi
-       dev/configure-sampledata --clean
-        # Remove last so that cleanup tools can depend on it
-       rm -rf config/bin
+# Dependency changes here should be mirrored in GNUmakefile
+config/finished: configure config/configure config/configure.inc config/*.in
+       MAKE= ./configure
index 2c00a3fc7a60b96b299ce601b5ee8b3a0be19aba..0e9f4724de55eaa915f74ca0b0ab526420de9844 100644 (file)
--- a/README.md
+++ b/README.md
@@ -197,7 +197,7 @@ From source
     pip install tornado
     ```
 
- - Build the python module and symlinks:
+ - Build:
 
     ```sh
     make
@@ -244,12 +244,13 @@ From source
     make install DESTDIR=/opt/bup PREFIX=''
     ```
 
- - The Python executable that bup will use is chosen by ./configure,
-   which will search for a reasonable version unless PYTHON is set in
-   the environment, in which case, bup will use that path.  You can
-   see which Python executable was chosen by looking at the
-   configure output, or examining cmd/python-cmd.sh, and you can
-   change the selection by re-running ./configure.
+ - The Python version that bup will use is determined by the
+   `python-config` program chosen by `./configure`, which will search
+   for a reasonable version unless `BUP_PYTHON_CONFIG` is set in the
+   environment.  You can see which Python executable was chosen by
+   looking at the configure output, or examining
+   `config/config.var/bup-python-config`, and you can change the
+   selection by re-running `./configure`.
 
 From binary packages
 --------------------
diff --git a/cmd b/cmd
deleted file mode 120000 (symlink)
index 7819428..0000000
--- a/cmd
+++ /dev/null
@@ -1 +0,0 @@
-lib/cmd
\ No newline at end of file
index 8f4769cc2f6030c61e99169362e8251fc6b789cf..6606bfd78f3a3a7fcc27bfe3f7f05b8d2646f6ef 100644 (file)
@@ -2,8 +2,12 @@ CONFIGURE_FILES=@CONFIGURE_FILES@
 GENERATED_FILES=@GENERATED_FILES@
 
 bup_make=@bup_make@
-bup_python=@bup_python@
-bup_python_majver=@bup_python_majver@
+
+bup_python_config=@bup_python_config@
+bup_python_cflags=@bup_python_cflags@
+bup_python_ldflags=@bup_python_ldflags@
+bup_python_cflags_embed=@bup_python_cflags_embed@
+bup_python_ldflags_embed=@bup_python_ldflags_embed@
 
 bup_have_libacl=@bup_have_libacl@
 bup_libacl_cflags=@bup_libacl_cflags@
index 6ef05315feaaa0dc349bbf28a1c34d38736672de..8b9263d3d6d2f13b1aa123619c15a8c1c36e6ad4 100755 (executable)
@@ -37,6 +37,9 @@ TARGET=bup
 
 . ./configure.inc
 
+# FIXME: real tmpdir
+rm -rf finished config/bin config.var config.var.tmp config.vars
+
 AC_INIT $TARGET
 
 if ! AC_PROG_CC; then
@@ -44,17 +47,17 @@ if ! AC_PROG_CC; then
     exit 1
 fi
 
-MAKE="$(bup_find_prog make "$MAKE")"
-if test -z "$MAKE"; then
-    MAKE="$(bup_find_prog gmake "$GMAKE")"
-fi
-
-if test -z "$MAKE"; then
-    AC_FAIL "ERROR: unable to find make"
-fi
+for make_candidate in make gmake; do
+    found_make="$(bup_find_prog "$make_candidate" "$MAKE")"
+    if test "$found_make" \
+            && ("$found_make" --version | grep "GNU Make"); then
+        MAKE="$found_make"
+        break;
+    fi
+done
 
-if ! ($MAKE --version | grep "GNU Make"); then
-    AC_FAIL "ERROR: $MAKE is not GNU Make"
+if ! test "$MAKE"; then
+    AC_FAIL "ERROR: unable to find GNU make as make or gmake"
 fi
 
 MAKE_VERSION=`$MAKE --version | grep "GNU Make" | awk '{print $3}'`
@@ -65,30 +68,47 @@ expr "$MAKE_VERSION" '>=' '3.81' || AC_FAIL "ERROR: $MAKE must be >= version 3.8
 
 AC_SUB bup_make "$MAKE"
 
-bup_python="$(type -p "$PYTHON")"
-test -z "$bup_python" && bup_python="$(bup_find_prog python3.8 '')"
-test -z "$bup_python" && bup_python="$(bup_find_prog python3.7 '')"
-test -z "$bup_python" && bup_python="$(bup_find_prog python3.6 '')"
-test -z "$bup_python" && bup_python="$(bup_find_prog python3 '')"
-test -z "$bup_python" && bup_python="$(bup_find_prog python2.7 '')"
-test -z "$bup_python" && bup_python="$(bup_find_prog python2.6 '')"
-test -z "$bup_python" && bup_python="$(bup_find_prog python2 '')"
-test -z "$bup_python" && bup_python="$(bup_find_prog python '')"
-if test -z "$bup_python"; then
-    AC_FAIL "ERROR: unable to find python"
+
+# Haven't seen a documented way to determine the python version via
+# python-config right now, so we'll defer version checking until
+# later.
+
+if test "$BUP_PYTHON_CONFIG"; then
+    bup_python_config="$(type -p "$BUP_PYTHON_CONFIG")"
+    if test -z "$bup_python_config"; then
+        AC_FAIL $(printf "ERROR: BUP_PYTHON_CONFIG value %q appears invalid" \
+                         "$BUP_PYTHON_CONFIG")
+    fi
 else
-    AC_SUB bup_python "$bup_python"
-    bup_python_majver=$("$bup_python" -c 'import sys; print(sys.version_info[0])')
-    bup_python_minver=$("$bup_python" -c 'import sys; print(sys.version_info[1])')
-    AC_SUB bup_python_majver "$bup_python_majver"
+    for py_maj_ver in 9 8 7 6; do
+        bup_python_config="$(bup_find_prog "python3.$py_maj_ver-config" '')"
+        test -z "$bup_python_config" || break
+    done
+    test -z "$bup_python_config" \
+        && bup_python_config="$(bup_find_prog python3-config '')"
+    test -z "$bup_python_config" \
+        && bup_python_config="$(bup_find_prog python2.7-config '')"
+    if test -z "$bup_python_config"; then
+        AC_FAIL "ERROR: unable to find a suitable python-config"
+    fi
 fi
 
-# May not be correct yet, i.e. actual requirement may be higher.
-if test "$bup_python_majver" -gt 2 -a "$bup_python_minver" -lt 3; then
-    # utime follow_symlinks >= 3.3
-    bup_version_str=$("$bup_python" --version 2>&1)
-    AC_FAIL "ERROR: found $bup_version_str (must be >= 3.3 if >= 3)"
+
+bup_python_cflags=$("$bup_python_config" --cflags) || exit $?
+bup_python_ldflags=$("$bup_python_config" --ldflags) || exit $?
+bup_python_cflags_embed=$("$bup_python_config" --cflags --embed)
+if test $? -eq 0; then
+    bup_python_ldflags_embed=$("$bup_python_config" --ldflags --embed) || exit $?
+else  # Earlier versions didn't support --embed
+    bup_python_cflags_embed=$("$bup_python_config" --cflags) || exit $?
+    bup_python_ldflags_embed=$("$bup_python_config" --ldflags) || exit $?
 fi
+AC_SUB bup_python_config "$bup_python_config"
+AC_SUB bup_python_cflags "$bup_python_cflags"
+AC_SUB bup_python_ldflags "$bup_python_ldflags"
+AC_SUB bup_python_cflags_embed "$bup_python_cflags_embed"
+AC_SUB bup_python_ldflags_embed "$bup_python_ldflags_embed"
+
 
 bup_git="$(bup_find_prog git '')"
 if test -z "$bup_git"; then
@@ -109,16 +129,12 @@ AC_CHECK_HEADERS sys/mman.h
 AC_CHECK_HEADERS linux/fs.h
 AC_CHECK_HEADERS sys/ioctl.h
 
-if test "$bup_python_majver" -gt 2; then
-    AC_DEFINE BUP_USE_PYTHON_UTIME 1
-else # Python 2
-    # On GNU/kFreeBSD utimensat is defined in GNU libc, but won't work.
-    if [ -z "$OS_GNU_KFREEBSD" ]; then
-        AC_CHECK_FUNCS utimensat
-    fi
-    AC_CHECK_FUNCS utimes
-    AC_CHECK_FUNCS lutimes
+# On GNU/kFreeBSD utimensat is defined in GNU libc, but won't work.
+if [ -z "$OS_GNU_KFREEBSD" ]; then
+    AC_CHECK_FUNCS utimensat
 fi
+AC_CHECK_FUNCS utimes
+AC_CHECK_FUNCS lutimes
 
 builtin_mul_overflow_code="
 #include <stddef.h>
@@ -300,23 +316,27 @@ AC_SUB bup_have_libacl "$bup_have_libacl"
 AC_CC="$orig_ac_cc"
 LIBS="$orig_libs"
 
-
 AC_OUTPUT config.vars
 
-if test -e config.var; then rm -r config.var; fi
-mkdir -p config.var
-echo -n "$MAKE" > config.var/bup-make
-echo -n "$bup_python" > config.var/bup-python
+set -euo pipefail
+
+# FIXME: real tmpdir
+mkdir -p config.var.tmp
+echo -n "$MAKE" > config.var.tmp/bup-make
+echo -n "$bup_python_config" > config.var.tmp/bup-python-config
+mv config.var.tmp config.var
 
 if test -e bin; then rm -r bin; fi
 mkdir -p bin
-(cd bin && ln -s "$bup_python" python)
+(cd bin && ln -s "$MAKE" make)
+
+touch finished
 
 printf "
-found: python (%q, $("$bup_python" --version 2>&1))
+found: python-config (%q)
 found: git (%q, ($("$bup_git" --version))
 " \
-       "$bup_python" \
+       "$bup_python_config" \
        "$bup_git" \
        1>&5
 
diff --git a/dev/bup-python b/dev/bup-python
deleted file mode 100755 (executable)
index 384a8fd..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-
-set -e
-
-script_home="$(cd "$(dirname "$0")" && pwd -P)"
-python="$script_home/../config/bin/python"
-libdir="$script_home/../lib"
-
-export PYTHONPATH="$libdir${PYTHONPATH:+:$PYTHONPATH}"
-exec "$python" "$@"
index c0c26715b8909b0028548dc8669490ec87124040..fedc0b34a7a78390acab8c8423c69d3f18837901 100755 (executable)
@@ -1,9 +1,24 @@
 #!/bin/sh
 """": # -*-python-*-
-bup_python="$(dirname "$0")/../config/bin/python" || exit $?
-exec "$bup_python" "$0" ${1+"$@"}
+# This command is used by "make clean", so don't rely on ./configure
+set -e
+for python in \
+    python3 \
+    python3.9 \
+    python3.8 \
+    python3.7 \
+    python3.6 \
+    python \
+    python2.7; do \
+    if command -v "$python"; then
+        exec "$python" "$0" "$@"
+    fi
+done
+echo "error: unable to find suitable python executable; please report" 1>&2
+exit 2
 """
 
+from __future__ import absolute_import, print_function
 from sys import stderr
 import os.path, re, subprocess, sys
 
@@ -21,13 +36,14 @@ def mntent_unescape(x):
 targets = sys.argv[1:]
 
 if not os.path.exists('/proc/mounts'):
-    print >> stderr, 'No /proc/mounts; skipping mount cleanup in', repr(targets)
+    print('No /proc/mounts; skipping mount cleanup in', repr(targets),
+          file=stderr)
     sys.exit(0)
 
 exit_status = 0
 for target in targets:
     if not os.path.isdir(target):
-        print >> stderr, repr(target), 'is not a directory'
+        print(repr(target), 'is not a directory', file=stderr)
         exit_status = 1
         continue
     top = os.path.realpath(target)
index c24a79c01d3dca10e4ee5fe5ea9d1ec67d9f986a..dc24b8986b55b5f943c14a51c862380477061f2d 100755 (executable)
@@ -45,7 +45,7 @@ case "$1" in
             ln -sf "$(pwd)/abs-symlink-target" abs-symlink || exit $?
             mkfifo fifo
             mkdir -p cmd doc lib/bup || exit $?
-            cp -pP "$top"/cmd/*.py cmd/ || exit $?
+            cp -pP "$top"/lib/bup/*.py lib/bup/ || exit $?
             cp -pP "$top"/Documentation/*.md doc/ || exit $?
             cp -pP "$top"/lib/bup/*.py lib/bup || exit $?
             mkdir path-zoo || exit $?
index e5068da62043b357bdf8037083fe483ddf60b131..451498dfa3cc1a4380ed04e95be852cce580feb4 100755 (executable)
@@ -1,16 +1,18 @@
 #!/bin/sh
 """": # -*-python-*-
-bup_python="$(dirname "$0")/../config/bin/python" || exit $?
-exec "$bup_python" "$0" ${1+"$@"}
+bup_exec="$(dirname "$0")/bup-exec" || exit $?
+exec "$bup_exec" "$0" ${1+"$@"}
 """
-# end of bup preamble
 
 from __future__ import absolute_import, print_function
 
 from os.path import getsize, isdir
-from sys import argv, stderr
+from sys import stderr
 import os
 
+from bup.compat import get_argvb
+
+
 def listdir_failure(ex):
     raise ex
 
@@ -18,7 +20,7 @@ def usage():
     print('Usage: data-size PATH ...', file=sys.stderr)
 
 total = 0
-for path in argv[1:]:
+for path in get_argvb()[1:]:
     if isdir(path):
         for root, dirs, files in os.walk(path, onerror=listdir_failure):
             total += sum(getsize(os.path.join(root, name)) for name in files)
index d49c26cc751e5e908a24a97f4878670b9c1195c9..f9a71c2fb2fe9589254e34855efaccbaacfa1450 100755 (executable)
@@ -1,17 +1,8 @@
 #!/bin/sh
 """": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-bup_python="$(dirname "$0")/../config/bin/python" || exit $?
-exec "$bup_python" "$0"
+bup_exec="$(dirname "$0")/bup-exec" || exit $?
+exec "$bup_exec" "$0" ${1+"$@"}
 """
-# end of bup preamble
 
 from __future__ import absolute_import, print_function
 
@@ -19,12 +10,9 @@ from os.path import abspath, dirname
 from sys import stdout
 import os, sys
 
-script_home = abspath(dirname(__file__))
-sys.path[:0] = [abspath(script_home + '/../../lib'), abspath(script_home + '/../..')]
-
 from bup import compat
 
-for arg in compat.argvb:
+for arg in compat.get_argvb():
     os.write(stdout.fileno(), arg)
     os.write(stdout.fileno(), b'\0\n')
     stdout.flush()
index e1be7424b8c2d00670d6f397e8f6b4fa97f8afd9..fb0bdb7e4b86e02242b06b5708f019b9a102cf73 100755 (executable)
@@ -1,24 +1,13 @@
 #!/bin/sh
 """": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-bup_python="$(dirname "$0")/bup-python" || exit $?
-exec "$bup_python" "$0"
+bup_exec="$(dirname "$0")/bup-exec" || exit $?
+exec "$bup_exec" "$0" ${1+"$@"}
 """
-# end of bup preamble
 
 from __future__ import absolute_import, print_function
 import os, stat, sys
 
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/../lib']
-
-from bup import compat
+from bup.compat import get_argvb
 from bup.io import byte_stream
 
 
@@ -29,7 +18,9 @@ from bup.io import byte_stream
 def usage():
     print("Usage: hardlink-sets <paths ...>", file=sys.stderr)
 
-if len(compat.argv) < 2:
+argvb = get_argvb()
+
+if len(argvb) < 2:
     usage()
     sys.exit(1)
 
@@ -41,7 +32,7 @@ out = byte_stream(sys.stdout)
 
 hardlink_set = {}
 
-for p in compat.argvb[1:]:
+for p in argvb[1:]:
   for root, dirs, files in os.walk(p, onerror = on_walk_error):
       for filename in files:
           full_path = os.path.join(root, filename)
index e54696a21e05700e910fc7e3a9fab4831e8b6e44..fa13d1390fc888d8b9033d3c9e5f5f80783c87bf 100755 (executable)
@@ -1,12 +1,8 @@
 #!/bin/sh
 """": # -*-python-*-
-bup_python="$(dirname "$0")/../config/bin/python" || exit $?
-exec "$bup_python" "$0" ${1+"$@"}
+bup_exec="$(dirname "$0")/bup-exec" || exit $?
+exec "$bup_exec" "$0" ${1+"$@"}
 """
-# end of bup preamble
-
-# Note: this currently relies on bup-python to handle arbitrary binary
-# user/group names.
 
 from __future__ import absolute_import, print_function
 
@@ -14,11 +10,15 @@ import grp
 import pwd
 import sys
 
+from bup.compat import get_argv, get_argvb
+
 def usage():
     print('Usage: id-other-than <--user|--group> ID [ID ...]',
           file=sys.stderr)
 
-if len(sys.argv) < 2:
+argvb = get_argvb()
+
+if len(argvb) < 2:
     usage()
     sys.exit(1)
 
@@ -29,17 +29,17 @@ def is_integer(x):
     except ValueError as e:
         return False
 
-excluded_ids = set(int(x) for x in sys.argv[2:] if is_integer(x))
-excluded_names = (x for x in sys.argv[2:] if not is_integer(x))
+excluded_ids = set(int(x) for x in argvb[2:] if is_integer(x))
+excluded_names = (x for x in get_argv()[2:] if not is_integer(x))
 
-if sys.argv[1] == '--user':
+if argvb[1] == b'--user':
     for x in excluded_names:
         excluded_ids.add(pwd.getpwnam(x).pw_uid)
     for x in pwd.getpwall():
         if x.pw_uid not in excluded_ids:
             print(x.pw_name + ':' + str(x.pw_uid))
             sys.exit(0)
-elif sys.argv[1] == '--group':
+elif argvb[1] == b'--group':
     for x in excluded_names:
         excluded_ids.add(grp.getgrnam(x).gr_gid)
     for x in grp.getgrall():
diff --git a/dev/install-python-script b/dev/install-python-script
deleted file mode 100755 (executable)
index 83d8861..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-export LC_CTYPE=iso-8859-1
-exec "$(dirname "$0")/../config/bin/python" "$0" "$@"
-"""
-
-from __future__ import absolute_import, print_function
-from tempfile import NamedTemporaryFile
-import os, shutil, sys
-
-if sys.version_info[0] >= 3:
-    from shlex import quote
-else:
-    from pipes import quote
-
-src_path, dest_path = sys.argv[1:]
-
-with open(b'config/config.var/bup-python', 'rb') as src:
-    python = src.read()
-
-with NamedTemporaryFile() as tmp:
-    # Replace the section between "Here to end..." and the end of the
-    # preamble with the correct 'exec PYTHON "$0"'.
-    with open(src_path, 'rb') as src:
-        for line in src:
-            if line.startswith(b'# Here to end of preamble replaced during install'):
-                break
-            tmp.write(line)
-        for line in src:
-            if line == b'"""\n':
-                break
-        tmp.write(b'exec %s "$0"\n' % python)
-        tmp.write(b'"""\n')
-        for line in src:
-            tmp.write(line)
-    tmp.flush()
-    shutil.copy(tmp.name, dest_path)
-    os.chmod(dest_path, 0o755)
index 20780e22d930eff4db78563f9a3fc731adf194af..b89c05d5ac9e65fece7ce4ae93f407ba79ed9d75 100644 (file)
@@ -3,7 +3,7 @@
 # Assumes this is always loaded while pwd is still the source tree root
 bup_dev_lib_top=$(pwd) || exit $?
 
-bup-cfg-py() { "$bup_dev_lib_top/config/bin/python" "$@"; }
+bup-cfg-py() { "$bup_dev_lib_top/dev/python" "$@"; }
 bup-python() { "$bup_dev_lib_top/dev/bup-python" "$@"; }
 
 force-delete()
index 05d24f4ebdb2f6a131824bf3fa040a28d349b84f..c0c6c78736c70a87bfc09216b0b41027a14feac8 100755 (executable)
@@ -1,19 +1,19 @@
 #!/bin/sh
 """": # -*-python-*-
-bup_python="$(dirname "$0")//bup-python" || exit $?
-exec "$bup_python" "$0" ${1+"$@"}
+bup_exec="$(dirname "$0")/bup-exec" || exit $?
+exec "$bup_exec" "$0" ${1+"$@"}
 """
-# end of bup preamble
 
 from __future__ import absolute_import, print_function
 
 from os.path import abspath, dirname
 from random import randint
-from sys import argv, exit, stderr, stdout
+from sys import stderr, stdout
 import errno, re, sys
 
-from bup.compat import fsencode, range
+from bup.compat import fsencode, get_argv, get_argvb, range
 
+argv = get_argv()
 
 def usage(out=stdout):
     print('Usage:', argv[0], 'NUM', 'DEST_DIR', file=out)
@@ -44,10 +44,10 @@ def random_filename():
 if len(argv) != 3:
     misuse()
 
-count, dest = argv[1:]
+count, dest = get_argvb()[1:]
 count = int(count)
 
 i = 0
 while i < count:
-    with open(fsencode(dest) + b'/' + random_filename(), 'w') as _:
+    with open(dest + b'/' + random_filename(), 'w') as _:
         i += 1
index 88372c3a882294294d18eb8292fb5513496056b7..4b14b229f25920095e2c53cb1bcb3be666e7c38b 100755 (executable)
@@ -1,13 +1,13 @@
 #!/bin/sh
 """": # -*-python-*-
-bup_python="$(dirname "$0")/../config/bin/python" || exit $?
-exec "$bup_python" "$0" ${1+"$@"}
+bup_exec="$(dirname "$0")/bup-exec" || exit $?
+exec "$bup_exec" "$0" ${1+"$@"}
 """
-# end of bup preamble
 
 from __future__ import absolute_import
-
 import socket, sys
 
+from bup.compat import get_argvb
+
 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, 0)
-s.bind(sys.argv[1])
+s.bind(get_argvb()[1])
index f5b296c0d364fb444427d53debc72ea5da4cbf5c..3f0c9d46bed4a27461ab0f4ff0794028345a1b6f 100755 (executable)
@@ -1,24 +1,13 @@
 #!/bin/sh
 """": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-bup_python="$(dirname "$0")/bup-python" || exit $?
-exec "$bup_python" "$0"
+bup_exec="$(dirname "$0")/bup-exec" || exit $?
+exec "$bup_exec" "$0" ${1+"$@"}
 """
-# end of bup preamble
 
 from __future__ import absolute_import
 import os.path, sys
 
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/../../lib']
-
-from bup.compat import argv_bytes
+from bup.compat import argv_bytes, get_argvb
 from bup.helpers import handle_ctrl_c, saved_errors
 from bup.io import byte_stream
 from bup import compat, metadata, options
@@ -33,7 +22,7 @@ ns-timestamp-resolutions TEST_FILE_NAME
 handle_ctrl_c()
 
 o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
+opt, flags, extra = o.parse_bytes(get_argvb()[1:])
 
 sys.stdout.flush()
 out = byte_stream(sys.stdout)
index 164a98b5473938e10e12a100634bf7d419b7c4af..e5594417cf63aa24e71bc70dd85775e8c7aa88c4 100755 (executable)
@@ -27,7 +27,6 @@ case "$pyver" in
         ;;
     python3)
         brew install python
-        easy_install --user pip
         pip3 install --user pytest pytest-xdist
         ;;
     *)
diff --git a/dev/python.c b/dev/python.c
new file mode 100644 (file)
index 0000000..88df9e2
--- /dev/null
@@ -0,0 +1,24 @@
+#define _LARGEFILE64_SOURCE 1
+#define PY_SSIZE_T_CLEAN 1
+#undef NDEBUG
+#include "../config/config.h"
+
+// According to Python, its header has to go first:
+//   http://docs.python.org/2/c-api/intro.html#include-files
+//   http://docs.python.org/3/c-api/intro.html#include-files
+#include <Python.h>
+
+#include "bup/compat.h"
+
+#if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 8
+# define bup_py_main bup_py_bytes_main
+#elif PY_MAJOR_VERSION > 2
+# define bup_py_main Py_BytesMain
+#else
+# define bup_py_main Py_Main
+#endif
+
+int main(int argc, char **argv)
+{
+    return bup_py_main (argc, argv);
+}
index c37806dbd35fede05e66d66485be6b31f9617d48..36a173f28b520a488335f84b23d1b280a2aed0ad 100755 (executable)
@@ -1,9 +1,8 @@
 #!/bin/sh
 """": # -*-python-*-
-bup_python="$(dirname "$0")/../config/bin/python" || exit $?
-exec "$bup_python" "$0" ${1+"$@"}
+python="$(dirname "$0")/python" || exit $?
+exec "$python" "$0" ${1+"$@"}
 """
-# end of bup preamble
 
 from __future__ import absolute_import, print_function
 import os, sys
index 34bb117840ab4d174cb08af0adbe52cfd6c24454..3d9f712415ccd5aa5fd4d30015a496a82e3d0308 100755 (executable)
@@ -1,7 +1,7 @@
 #!/bin/sh
 """": # -*-python-*-
-bup_python="$(dirname "$0")/bup-python" || exit $?
-exec "$bup_python" "$0" ${1+"$@"}
+bup_exec="$(dirname "$0")/bup-exec" || exit $?
+exec "$bup_exec" "$0" ${1+"$@"}
 """
 
 from __future__ import absolute_import, print_function
@@ -9,8 +9,7 @@ from random import randint
 from sys import stderr, stdout
 import os, sys
 
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/../lib']
-
+from bup.compat import get_argvb
 from bup.io import byte_stream
 
 def smaller_region(max_offset):
@@ -48,11 +47,13 @@ def random_region():
     global generators
     return generators[randint(0, len(generators) - 1)]()
 
-if len(sys.argv) == 0:
+argv = get_argvb()
+
+if len(argv) == 0:
     stdout.flush()
     out = byte_stream(stdout)
-if len(sys.argv) == 2:
-    out = open(sys.argv[1], 'wb')
+if len(argv) == 2:
+    out = open(argv[1], 'wb')
 else:
     print('Usage: sparse-test-data [FILE]', file=stderr)
     sys.exit(2)
index e3468fb57093e2fb54e0ada1f1d845ed450c5bb6..f0a3f6fc162f45a3890850a19f04d1efe59efdf6 100755 (executable)
@@ -1,16 +1,13 @@
 #!/bin/sh
 """": # -*-python-*-
-bup_python="$(dirname "$0")/bup-python" || exit $?
-exec "$bup_python" "$0" ${1+"$@"}
+bup_exec="$(dirname "$0")/bup-exec" || exit $?
+exec "$bup_exec" "$0" ${1+"$@"}
 """
-# end of bup preamble
 
 from __future__ import absolute_import, print_function
 import os.path, sys
 
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/../lib']
-
-from bup.compat import argv_bytes
+from bup.compat import argv_bytes, get_argvb
 from bup.helpers import handle_ctrl_c, readpipe
 from bup.io import byte_stream
 from bup import options
@@ -24,7 +21,7 @@ subtree-hash ROOT_HASH [PATH_ITEM...]
 handle_ctrl_c()
 
 o = options.Options(optspec)
-(opt, flags, extra) = o.parse(sys.argv[1:])
+opt, flags, extra = o.parse_bytes(get_argvb()[1:])
 
 if len(extra) < 1:
     o.fatal('must specify a root hash')
index 937e7086f05af40878c10df40887e277ae475a94..0077e2469675f1bbcb3410ac21e87babc1b57d8f 100755 (executable)
@@ -1,9 +1,8 @@
 #!/bin/sh
 """": # -*-python-*-
-bup_python="$(dirname "$0")/../config/bin/python" || exit $?
-exec "$bup_python" "$0" ${1+"$@"}
+python="$(dirname "$0")/python" || exit $?
+exec "$python" "$0" ${1+"$@"}
 """
-# end of bup preamble
 
 from __future__ import absolute_import, print_function
 
diff --git a/dev/validate-python b/dev/validate-python
new file mode 100755 (executable)
index 0000000..a64ecfe
--- /dev/null
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+set -ueo pipefail
+
+die () { echo "Usage: validate-python PYTHON_EXECUTABLE"; }
+
+test $# -eq 1 || { usage 1>&2 ; exit 2; }
+python="$1"
+
+majver=$("$python" -c 'import sys; print(sys.version_info[0])')
+minver=$("$python" -c 'import sys; print(sys.version_info[1])')
+
+# May not be correct yet, i.e. actual requirement may be higher.
+if test "$majver" -gt 2 -a "$minver" -lt 3; then
+    # utime follow_symlinks >= 3.3
+    bup_version_str=$("$python" --version 2>&1)
+    echo "ERROR: found $bup_version_str (must be >= 3.3 if >= 3)" 1>&2
+    exit 2
+fi
index 2790b07d791ca59ab93e5711c5a276aec2cd411f..5b9ace9b6bac81a32bf77cc3f382cc8bf89d865e 100644 (file)
@@ -5,6 +5,7 @@
 
 // According to Python, its header has to go first:
 //   http://docs.python.org/2/c-api/intro.html#include-files
+//   http://docs.python.org/3/c-api/intro.html#include-files
 #include <Python.h>
 
 #include <arpa/inet.h>
 #define BUP_HAVE_FILE_ATTRS 1
 #endif
 
+#if PY_MAJOR_VERSION > 2
+# define BUP_USE_PYTHON_UTIME 1
+#endif
+
 #ifndef BUP_USE_PYTHON_UTIME // just for Python 2 now
 /*
  * Check for incomplete UTIMENSAT support (NetBSD 6), and if so,
@@ -353,58 +358,6 @@ static PyObject *bup_cat_bytes(PyObject *self, PyObject *args)
 }
 
 
-
-// Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV...
-#if __WIN32__ || __CYGWIN__ || PY_VERSION_HEX >= 0x03090000
-
-// There's no 'ps' on win32 anyway, and Py_GetArgcArgv() isn't available.
-static void unpythonize_argv(void) { }
-
-#else // not __WIN32__
-
-// For some reason this isn't declared in Python.h
-extern void Py_GetArgcArgv(int *argc, char ***argv);
-
-static void unpythonize_argv(void)
-{
-    int argc, i;
-    char **argv, *arge;
-    
-    Py_GetArgcArgv(&argc, &argv);
-    
-    for (i = 0; i < argc-1; i++)
-    {
-       if (argv[i] + strlen(argv[i]) + 1 != argv[i+1])
-       {
-           // The argv block doesn't work the way we expected; it's unsafe
-           // to mess with it.
-           return;
-       }
-    }
-    
-    arge = argv[argc-1] + strlen(argv[argc-1]) + 1;
-    
-    if (strstr(argv[0], "python") && argv[1] == argv[0] + strlen(argv[0]) + 1)
-    {
-       char *p;
-       size_t len, diff;
-       p = strrchr(argv[1], '/');
-       if (p)
-       {
-           p++;
-           diff = p - argv[0];
-           len = arge - p;
-           memmove(argv[0], p, len);
-           memset(arge - diff, 0, diff);
-           for (i = 0; i < argc; i++)
-               argv[i] = argv[i+1] ? argv[i+1]-diff : NULL;
-       }
-    }
-}
-
-#endif // not __WIN32__ or __CYGWIN__
-
-
 static int write_all(int fd, const void *buf, const size_t count)
 {
     size_t written = 0;
@@ -2460,7 +2413,6 @@ static int setup_module(PyObject *m)
 
     e = getenv("BUP_FORCE_TTY");
     get_state(m)->istty2 = isatty(2) || (atoi(e ? e : "0") & 2);
-    unpythonize_argv();
     return 1;
 }
 
@@ -2470,11 +2422,13 @@ static int setup_module(PyObject *m)
 PyMODINIT_FUNC init_helpers(void)
 {
     PyObject *m = Py_InitModule("_helpers", helper_methods);
-    if (m == NULL)
+    if (m == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "bup._helpers init failed");
         return;
-
+    }
     if (!setup_module(m))
     {
+        PyErr_SetString(PyExc_RuntimeError, "bup._helpers set up failed");
         Py_DECREF(m);
         return;
     }
diff --git a/lib/bup/cmd/__init__.py b/lib/bup/cmd/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/lib/bup/cmd/bloom.py b/lib/bup/cmd/bloom.py
new file mode 100755 (executable)
index 0000000..3bff576
--- /dev/null
@@ -0,0 +1,172 @@
+
+from __future__ import absolute_import
+
+import os, glob, tempfile
+
+from bup import compat, options, git, bloom
+from bup.compat import argv_bytes, hexstr
+from bup.helpers import (add_error, debug1, handle_ctrl_c, log, progress, qprogress,
+                         saved_errors)
+from bup.io import path_msg
+
+
+optspec = """
+bup bloom [options...]
+--
+ruin       ruin the specified bloom file (clearing the bitfield)
+f,force    ignore existing bloom file and regenerate it from scratch
+o,output=  output bloom filename (default: auto)
+d,dir=     input directory to look for idx files (default: auto)
+k,hashes=  number of hash functions to use (4 or 5) (default: auto)
+c,check=   check the given .idx file against the bloom filter
+"""
+
+
+def ruin_bloom(bloomfilename):
+    rbloomfilename = git.repo_rel(bloomfilename)
+    if not os.path.exists(bloomfilename):
+        log(path_msg(bloomfilename) + '\n')
+        add_error('bloom: %s not found to ruin\n' % path_msg(rbloomfilename))
+        return
+    b = bloom.ShaBloom(bloomfilename, readwrite=True, expected=1)
+    b.map[16 : 16 + 2**b.bits] = b'\0' * 2**b.bits
+
+
+def check_bloom(path, bloomfilename, idx):
+    rbloomfilename = git.repo_rel(bloomfilename)
+    ridx = git.repo_rel(idx)
+    if not os.path.exists(bloomfilename):
+        log('bloom: %s: does not exist.\n' % path_msg(rbloomfilename))
+        return
+    b = bloom.ShaBloom(bloomfilename)
+    if not b.valid():
+        add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename))
+        return
+    base = os.path.basename(idx)
+    if base not in b.idxnames:
+        log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename))
+        return
+    if base == idx:
+        idx = os.path.join(path, idx)
+    log('bloom: bloom file: %s\n' % path_msg(rbloomfilename))
+    log('bloom:   checking %s\n' % path_msg(ridx))
+    for objsha in git.open_idx(idx):
+        if not b.exists(objsha):
+            add_error('bloom: ERROR: object %s missing' % hexstr(objsha))
+
+
+_first = None
+def do_bloom(path, outfilename, k, force):
+    global _first
+    assert k in (None, 4, 5)
+    b = None
+    if os.path.exists(outfilename) and not force:
+        b = bloom.ShaBloom(outfilename)
+        if not b.valid():
+            debug1("bloom: Existing invalid bloom found, regenerating.\n")
+            b = None
+
+    add = []
+    rest = []
+    add_count = 0
+    rest_count = 0
+    for i, name in enumerate(glob.glob(b'%s/*.idx' % path)):
+        progress('bloom: counting: %d\r' % i)
+        ix = git.open_idx(name)
+        ixbase = os.path.basename(name)
+        if b and (ixbase in b.idxnames):
+            rest.append(name)
+            rest_count += len(ix)
+        else:
+            add.append(name)
+            add_count += len(ix)
+
+    if not add:
+        debug1("bloom: nothing to do.\n")
+        return
+
+    if b:
+        if len(b) != rest_count:
+            debug1("bloom: size %d != idx total %d, regenerating\n"
+                   % (len(b), rest_count))
+            b = None
+        elif k is not None and k != b.k:
+            debug1("bloom: new k %d != existing k %d, regenerating\n"
+                   % (k, b.k))
+            b = None
+        elif (b.bits < bloom.MAX_BLOOM_BITS[b.k] and
+              b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
+            debug1("bloom: regenerating: adding %d entries gives "
+                   "%.2f%% false positives.\n"
+                   % (add_count, b.pfalse_positive(add_count)))
+            b = None
+        else:
+            b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count)
+    if not b: # Need all idxs to build from scratch
+        add += rest
+        add_count += rest_count
+    del rest
+    del rest_count
+
+    msg = b is None and 'creating from' or 'adding'
+    if not _first: _first = path
+    dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b''
+    progress('bloom: %s%s %d file%s (%d object%s).\r'
+        % (path_msg(dirprefix), msg,
+           len(add), len(add)!=1 and 's' or '',
+           add_count, add_count!=1 and 's' or ''))
+
+    tfname = None
+    if b is None:
+        tfname = os.path.join(path, b'bup.tmp.bloom')
+        b = bloom.create(tfname, expected=add_count, k=k)
+    count = 0
+    icount = 0
+    for name in add:
+        ix = git.open_idx(name)
+        qprogress('bloom: writing %.2f%% (%d/%d objects)\r' 
+                  % (icount*100.0/add_count, icount, add_count))
+        b.add_idx(ix)
+        count += 1
+        icount += len(ix)
+
+    # Currently, there's an open file object for tfname inside b.
+    # Make sure it's closed before rename.
+    b.close()
+
+    if tfname:
+        os.rename(tfname, outfilename)
+
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if extra:
+        o.fatal('no positional parameters expected')
+
+    if not opt.check and opt.k and opt.k not in (4,5):
+        o.fatal('only k values of 4 and 5 are supported')
+
+    if opt.check:
+        opt.check = argv_bytes(opt.check)
+
+    git.check_repo_or_die()
+
+    output = argv_bytes(opt.output) if opt.output else None
+    paths = opt.dir and [argv_bytes(opt.dir)] or git.all_packdirs()
+    for path in paths:
+        debug1('bloom: scanning %s\n' % path_msg(path))
+        outfilename = output or os.path.join(path, b'bup.bloom')
+        if opt.check:
+            check_bloom(path, outfilename, opt.check)
+        elif opt.ruin:
+            ruin_bloom(outfilename)
+        else:
+            do_bloom(path, outfilename, opt.k, opt.force)
+
+    if saved_errors:
+        log('WARNING: %d errors encountered during bloom.\n' % len(saved_errors))
+        sys.exit(1)
+    elif opt.check:
+        log('All tests passed.\n')
diff --git a/lib/bup/cmd/cat_file.py b/lib/bup/cmd/cat_file.py
new file mode 100755 (executable)
index 0000000..e45d1ea
--- /dev/null
@@ -0,0 +1,70 @@
+
+from __future__ import absolute_import
+
+import re, stat, sys
+
+from bup import options, git, vfs
+from bup.compat import argv_bytes
+from bup.helpers import chunkyreader, handle_ctrl_c, log, saved_errors
+from bup.io import byte_stream
+from bup.repo import LocalRepo
+
+optspec = """
+bup cat-file [--meta|--bupm] /branch/revision/[path]
+--
+meta        print the target's metadata entry (decoded then reencoded) to stdout
+bupm        print the target directory's .bupm file directly to stdout
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    git.check_repo_or_die()
+
+    if not extra:
+        o.fatal('must specify a target')
+    if len(extra) > 1:
+        o.fatal('only one target file allowed')
+    if opt.bupm and opt.meta:
+        o.fatal('--meta and --bupm are incompatible')
+
+    target = argv_bytes(extra[0])
+
+    if not re.match(br'/*[^/]+/[^/]+', target):
+        o.fatal("path %r doesn't include a branch and revision" % target)
+
+    repo = LocalRepo()
+    resolved = vfs.resolve(repo, target, follow=False)
+    leaf_name, leaf_item = resolved[-1]
+    if not leaf_item:
+        log('error: cannot access %r in %r\n'
+            % (b'/'.join(name for name, item in resolved), target))
+        sys.exit(1)
+
+    mode = vfs.item_mode(leaf_item)
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+
+    if opt.bupm:
+        if not stat.S_ISDIR(mode):
+            o.fatal('%r is not a directory' % target)
+        _, bupm_oid = vfs.tree_data_and_bupm(repo, leaf_item.oid)
+        if bupm_oid:
+            with vfs.tree_data_reader(repo, bupm_oid) as meta_stream:
+                out.write(meta_stream.read())
+    elif opt.meta:
+        augmented = vfs.augment_item_meta(repo, leaf_item, include_size=True)
+        out.write(augmented.meta.encode())
+    else:
+        if stat.S_ISREG(mode):
+            with vfs.fopen(repo, leaf_item) as f:
+                for b in chunkyreader(f):
+                    out.write(b)
+        else:
+            o.fatal('%r is not a plain file' % target)
+
+    if saved_errors:
+        log('warning: %d errors encountered\n' % len(saved_errors))
+        sys.exit(1)
diff --git a/lib/bup/cmd/daemon.py b/lib/bup/cmd/daemon.py
new file mode 100755 (executable)
index 0000000..d82e1e0
--- /dev/null
@@ -0,0 +1,70 @@
+
+from __future__ import absolute_import
+import fcntl, getopt, os, socket, subprocess, sys
+
+from bup import options, path
+from bup.helpers import *
+
+
+optspec = """
+bup daemon [options...] -- [bup-server options...]
+--
+l,listen  ip address to listen on, defaults to *
+p,port    port to listen on, defaults to 1982
+"""
+
+def main(argv):
+    o = options.Options(optspec, optfunc=getopt.getopt)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    host = opt.listen
+    port = opt.port and int(opt.port) or 1982
+    socks = []
+    e = None
+    for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC,
+                                  socket.SOCK_STREAM, 0, socket.AI_PASSIVE):
+        af, socktype, proto, canonname, sa = res
+        try:
+            s = socket.socket(af, socktype, proto)
+        except socket.error as e:
+            continue
+        try:
+            if af == socket.AF_INET6:
+                log("bup daemon: listening on [%s]:%s\n" % sa[:2])
+            else:
+                log("bup daemon: listening on %s:%s\n" % sa[:2])
+            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            s.bind(sa)
+            s.listen(1)
+            fcntl.fcntl(s.fileno(), fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+        except socket.error as e:
+            s.close()
+            continue
+        socks.append(s)
+
+    if not socks:
+        log('bup daemon: listen socket: %s\n' % e.args[1])
+        sys.exit(1)
+
+    try:
+        while True:
+            [rl,wl,xl] = select.select(socks, [], [], 60)
+            for l in rl:
+                s, src = l.accept()
+                try:
+                    log("Socket accepted connection from %s\n" % (src,))
+                    fd1 = os.dup(s.fileno())
+                    fd2 = os.dup(s.fileno())
+                    s.close()
+                    sp = subprocess.Popen([path.exe(), 'mux', '--',
+                                           path.exe(), 'server']
+                                          + extra, stdin=fd1, stdout=fd2)
+                finally:
+                    os.close(fd1)
+                    os.close(fd2)
+    finally:
+        for l in socks:
+            l.shutdown(socket.SHUT_RDWR)
+            l.close()
+
+    debug1("bup daemon: done")
diff --git a/lib/bup/cmd/damage.py b/lib/bup/cmd/damage.py
new file mode 100755 (executable)
index 0000000..d3a1778
--- /dev/null
@@ -0,0 +1,60 @@
+
+from __future__ import absolute_import
+import os, random, sys
+
+from bup import options
+from bup.compat import argv_bytes, bytes_from_uint, range
+from bup.helpers import log
+from bup.io import path_msg
+
+
+def randblock(n):
+    return b''.join(bytes_from_uint(random.randrange(0,256)) for i in range(n))
+
+
+optspec = """
+bup damage [-n count] [-s maxsize] [-S seed] <filenames...>
+--
+   WARNING: THIS COMMAND IS EXTREMELY DANGEROUS
+n,num=   number of blocks to damage
+s,size=  maximum size of each damaged block
+percent= maximum size of each damaged block (as a percent of entire file)
+equal    spread damage evenly throughout the file
+S,seed=  random number seed (for repeatable tests)
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if not extra:
+        o.fatal('filenames expected')
+
+    if opt.seed != None:
+        random.seed(opt.seed)
+
+    for name in extra:
+        name = argv_bytes(name)
+        log('Damaging "%s"...\n' % path_msg(name))
+        with open(name, 'r+b') as f:
+            st = os.fstat(f.fileno())
+            size = st.st_size
+            if opt.percent or opt.size:
+                ms1 = int(float(opt.percent or 0)/100.0*size) or size
+                ms2 = opt.size or size
+                maxsize = min(ms1, ms2)
+            else:
+                maxsize = 1
+            chunks = opt.num or 10
+            chunksize = size // chunks
+            for r in range(chunks):
+                sz = random.randrange(1, maxsize+1)
+                if sz > size:
+                    sz = size
+                if opt.equal:
+                    ofs = r*chunksize
+                else:
+                    ofs = random.randrange(0, size - sz + 1)
+                log('  %6d bytes at %d\n' % (sz, ofs))
+                f.seek(ofs)
+                f.write(randblock(sz))
diff --git a/lib/bup/cmd/drecurse.py b/lib/bup/cmd/drecurse.py
new file mode 100755 (executable)
index 0000000..42b292c
--- /dev/null
@@ -0,0 +1,57 @@
+
+from __future__ import absolute_import, print_function
+from os.path import relpath
+import sys
+
+from bup import options, drecurse
+from bup.compat import argv_bytes
+from bup.helpers import log, parse_excludes, parse_rx_excludes, saved_errors
+from bup.io import byte_stream
+
+
+optspec = """
+bup drecurse <path>
+--
+x,xdev,one-file-system   don't cross filesystem boundaries
+exclude= a path to exclude from the backup (can be used more than once)
+exclude-from= a file that contains exclude paths (can be used more than once)
+exclude-rx= skip paths matching the unanchored regex (may be repeated)
+exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
+q,quiet  don't actually print filenames
+profile  run under the python profiler
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if len(extra) != 1:
+        o.fatal("exactly one filename expected")
+
+    drecurse_top = argv_bytes(extra[0])
+    excluded_paths = parse_excludes(flags, o.fatal)
+    if not drecurse_top.startswith(b'/'):
+        excluded_paths = [relpath(x) for x in excluded_paths]
+    exclude_rxs = parse_rx_excludes(flags, o.fatal)
+    it = drecurse.recursive_dirlist([drecurse_top], opt.xdev,
+                                    excluded_paths=excluded_paths,
+                                    exclude_rxs=exclude_rxs)
+    if opt.profile:
+        import cProfile
+        def do_it():
+            for i in it:
+                pass
+        cProfile.run('do_it()')
+    else:
+        if opt.quiet:
+            for i in it:
+                pass
+        else:
+            sys.stdout.flush()
+            out = byte_stream(sys.stdout)
+            for (name,st) in it:
+                out.write(name + b'\n')
+
+    if saved_errors:
+        log('WARNING: %d errors encountered.\n' % len(saved_errors))
+        sys.exit(1)
diff --git a/lib/bup/cmd/features.py b/lib/bup/cmd/features.py
new file mode 100755 (executable)
index 0000000..8d4c4f3
--- /dev/null
@@ -0,0 +1,34 @@
+
+from __future__ import absolute_import, print_function
+import platform, sys
+
+from bup import _helpers, metadata, options, version
+from bup.io import byte_stream
+
+out = None
+
+def show_support(out, bool_opt, what):
+    out.write(b'    %s: %s\n' % (what, b'yes' if bool_opt else b'no'))
+
+optspec = """
+bup features
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+
+    out.write(b'bup %s\n' % version.version)
+    out.write(b'Source %s %s\n' % (version.commit, version.date))
+
+    have_readline = getattr(_helpers, 'readline', None)
+    have_libacl = getattr(_helpers, 'read_acl', None)
+    have_xattr = metadata.xattr
+
+    out.write(b'    Python: %s\n' % platform.python_version().encode('ascii'))
+    show_support(out, have_readline, b'Command line editing (e.g. bup ftp)')
+    show_support(out, have_libacl, b'Saving and restoring POSIX ACLs')
+    show_support(out, have_xattr, b'Saving and restoring extended attributes (xattrs)')
diff --git a/lib/bup/cmd/fsck.py b/lib/bup/cmd/fsck.py
new file mode 100755 (executable)
index 0000000..c195f77
--- /dev/null
@@ -0,0 +1,266 @@
+
+from __future__ import absolute_import, print_function
+from shutil import rmtree
+from subprocess import PIPE, Popen
+from tempfile import mkdtemp
+from binascii import hexlify
+import glob, os, subprocess, sys
+
+from bup import options, git
+from bup.compat import argv_bytes
+from bup.helpers import Sha1, chunkyreader, istty2, log, progress
+from bup.io import byte_stream
+
+
+par2_ok = 0
+nullf = open(os.devnull, 'wb+')
+opt = None
+
+def debug(s):
+    if opt.verbose > 1:
+        log(s)
+
+def run(argv):
+    # at least in python 2.5, using "stdout=2" or "stdout=sys.stderr" below
+    # doesn't actually work, because subprocess closes fd #2 right before
+    # execing for some reason.  So we work around it by duplicating the fd
+    # first.
+    fd = os.dup(2)  # copy stderr
+    try:
+        p = subprocess.Popen(argv, stdout=fd, close_fds=False)
+        return p.wait()
+    finally:
+        os.close(fd)
+
+def par2_setup():
+    global par2_ok
+    rv = 1
+    try:
+        p = subprocess.Popen([b'par2', b'--help'],
+                             stdout=nullf, stderr=nullf, stdin=nullf)
+        rv = p.wait()
+    except OSError:
+        log('fsck: warning: par2 not found; disabling recovery features.\n')
+    else:
+        par2_ok = 1
+
+def is_par2_parallel():
+    # A true result means it definitely allows -t1; a false result is
+    # technically inconclusive, but likely means no.
+    tmpdir = mkdtemp(prefix=b'bup-fsck')
+    try:
+        canary = tmpdir + b'/canary'
+        with open(canary, 'wb') as f:
+            f.write(b'canary\n')
+        p = subprocess.Popen((b'par2', b'create', b'-qq', b'-t1', canary),
+                             stderr=PIPE, stdin=nullf)
+        _, err = p.communicate()
+        parallel = p.returncode == 0
+        if opt.verbose:
+            if len(err) > 0 and err != b'Invalid option specified: -t1\n':
+                log('Unexpected par2 error output\n')
+                log(repr(err) + '\n')
+            if parallel:
+                log('Assuming par2 supports parallel processing\n')
+            else:
+                log('Assuming par2 does not support parallel processing\n')
+        return parallel
+    finally:
+        rmtree(tmpdir)
+
+_par2_parallel = None
+
+def par2(action, args, verb_floor=0):
+    global _par2_parallel
+    if _par2_parallel is None:
+        _par2_parallel = is_par2_parallel()
+    cmd = [b'par2', action]
+    if opt.verbose >= verb_floor and not istty2:
+        cmd.append(b'-q')
+    else:
+        cmd.append(b'-qq')
+    if _par2_parallel:
+        cmd.append(b'-t1')
+    cmd.extend(args)
+    return run(cmd)
+
+def par2_generate(base):
+    return par2(b'create',
+                [b'-n1', b'-c200', b'--', base, base + b'.pack', base + b'.idx'],
+                verb_floor=2)
+
+def par2_verify(base):
+    return par2(b'verify', [b'--', base], verb_floor=3)
+
+def par2_repair(base):
+    return par2(b'repair', [b'--', base], verb_floor=2)
+
+def quick_verify(base):
+    f = open(base + b'.pack', 'rb')
+    f.seek(-20, 2)
+    wantsum = f.read(20)
+    assert(len(wantsum) == 20)
+    f.seek(0)
+    sum = Sha1()
+    for b in chunkyreader(f, os.fstat(f.fileno()).st_size - 20):
+        sum.update(b)
+    if sum.digest() != wantsum:
+        raise ValueError('expected %r, got %r' % (hexlify(wantsum),
+                                                  sum.hexdigest()))
+        
+
+def git_verify(base):
+    if opt.quick:
+        try:
+            quick_verify(base)
+        except Exception as e:
+            log('error: %s\n' % e)
+            return 1
+        return 0
+    else:
+        return run([b'git', b'verify-pack', b'--', base])
+    
+    
+def do_pack(base, last, par2_exists, out):
+    code = 0
+    if par2_ok and par2_exists and (opt.repair or not opt.generate):
+        vresult = par2_verify(base)
+        if vresult != 0:
+            if opt.repair:
+                rresult = par2_repair(base)
+                if rresult != 0:
+                    action_result = b'failed'
+                    log('%s par2 repair: failed (%d)\n' % (last, rresult))
+                    code = rresult
+                else:
+                    action_result = b'repaired'
+                    log('%s par2 repair: succeeded (0)\n' % last)
+                    code = 100
+            else:
+                action_result = b'failed'
+                log('%s par2 verify: failed (%d)\n' % (last, vresult))
+                code = vresult
+        else:
+            action_result = b'ok'
+    elif not opt.generate or (par2_ok and not par2_exists):
+        gresult = git_verify(base)
+        if gresult != 0:
+            action_result = b'failed'
+            log('%s git verify: failed (%d)\n' % (last, gresult))
+            code = gresult
+        else:
+            if par2_ok and opt.generate:
+                presult = par2_generate(base)
+                if presult != 0:
+                    action_result = b'failed'
+                    log('%s par2 create: failed (%d)\n' % (last, presult))
+                    code = presult
+                else:
+                    action_result = b'generated'
+            else:
+                action_result = b'ok'
+    else:
+        assert(opt.generate and (not par2_ok or par2_exists))
+        action_result = b'exists' if par2_exists else b'skipped'
+    if opt.verbose:
+        out.write(last + b' ' +  action_result + b'\n')
+    return code
+
+
+optspec = """
+bup fsck [options...] [filenames...]
+--
+r,repair    attempt to repair errors using par2 (dangerous!)
+g,generate  generate auto-repair information using par2
+v,verbose   increase verbosity (can be used more than once)
+quick       just check pack sha1sum, don't use git verify-pack
+j,jobs=     run 'n' jobs in parallel
+par2-ok     immediately return 0 if par2 is ok, 1 if not
+disable-par2  ignore par2 even if it is available
+"""
+
+def main(argv):
+    global opt, par2_ok
+
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+    opt.verbose = opt.verbose or 0
+
+    par2_setup()
+    if opt.par2_ok:
+        if par2_ok:
+            sys.exit(0)  # 'true' in sh
+        else:
+            sys.exit(1)
+    if opt.disable_par2:
+        par2_ok = 0
+
+    git.check_repo_or_die()
+
+    if extra:
+        extra = [argv_bytes(x) for x in extra]
+    else:
+        debug('fsck: No filenames given: checking all packs.\n')
+        extra = glob.glob(git.repo(b'objects/pack/*.pack'))
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+    code = 0
+    count = 0
+    outstanding = {}
+    for name in extra:
+        if name.endswith(b'.pack'):
+            base = name[:-5]
+        elif name.endswith(b'.idx'):
+            base = name[:-4]
+        elif name.endswith(b'.par2'):
+            base = name[:-5]
+        elif os.path.exists(name + b'.pack'):
+            base = name
+        else:
+            raise Exception('%r is not a pack file!' % name)
+        (dir,last) = os.path.split(base)
+        par2_exists = os.path.exists(base + b'.par2')
+        if par2_exists and os.stat(base + b'.par2').st_size == 0:
+            par2_exists = 0
+        sys.stdout.flush()  # Not sure we still need this, but it'll flush out too
+        debug('fsck: checking %r (%s)\n'
+              % (last, par2_ok and par2_exists and 'par2' or 'git'))
+        if not opt.verbose:
+            progress('fsck (%d/%d)\r' % (count, len(extra)))
+
+        if not opt.jobs:
+            nc = do_pack(base, last, par2_exists, out)
+            code = code or nc
+            count += 1
+        else:
+            while len(outstanding) >= opt.jobs:
+                (pid,nc) = os.wait()
+                nc >>= 8
+                if pid in outstanding:
+                    del outstanding[pid]
+                    code = code or nc
+                    count += 1
+            pid = os.fork()
+            if pid:  # parent
+                outstanding[pid] = 1
+            else: # child
+                try:
+                    sys.exit(do_pack(base, last, par2_exists, out))
+                except Exception as e:
+                    log('exception: %r\n' % e)
+                    sys.exit(99)
+
+    while len(outstanding):
+        (pid,nc) = os.wait()
+        nc >>= 8
+        if pid in outstanding:
+            del outstanding[pid]
+            code = code or nc
+            count += 1
+        if not opt.verbose:
+            progress('fsck (%d/%d)\r' % (count, len(extra)))
+
+    if istty2:
+        debug('fsck done.           \n')
+    sys.exit(code)
diff --git a/lib/bup/cmd/ftp.py b/lib/bup/cmd/ftp.py
new file mode 100755 (executable)
index 0000000..bed1b9a
--- /dev/null
@@ -0,0 +1,238 @@
+
+# For now, this completely relies on the assumption that the current
+# encoding (LC_CTYPE, etc.) is ASCII compatible, and that it returns
+# the exact same bytes from a decode/encode round-trip (or the reverse
+# (e.g. ISO-8859-1).
+
+from __future__ import absolute_import, print_function
+import os, fnmatch, stat, sys
+
+from bup import _helpers, options, git, shquote, ls, vfs
+from bup.compat import argv_bytes, fsdecode
+from bup.helpers import chunkyreader, handle_ctrl_c, log
+from bup.io import byte_stream, path_msg
+from bup.repo import LocalRepo
+
+
+class OptionError(Exception):
+    pass
+
+
+def do_ls(repo, args, out):
+    try:
+        opt = ls.opts_from_cmdline(args, onabort=OptionError)
+    except OptionError as e:
+        log('error: %s' % e)
+        return
+    return ls.within_repo(repo, opt, out)
+
+
+def write_to_file(inf, outf):
+    for blob in chunkyreader(inf):
+        outf.write(blob)
+
+
+def inputiter(f):
+    if os.isatty(f.fileno()):
+        while 1:
+            if hasattr(_helpers, 'readline'):
+                try:
+                    yield _helpers.readline(b'bup> ')
+                except EOFError:
+                    print()  # Clear the line for the terminal's next prompt
+                    break
+            else:
+                out.write(b'bup> ')
+                out.flush()
+                read_line = f.readline()
+                if not read_line:
+                    print('')
+                    break
+                yield read_line
+    else:
+        for line in f:
+            yield line
+
+
+def _completer_get_subs(repo, line):
+    (qtype, lastword) = shquote.unfinished_word(line)
+    dir, name = os.path.split(lastword)
+    dir_path = vfs.resolve(repo, dir or b'/')
+    _, dir_item = dir_path[-1]
+    if not dir_item:
+        subs = tuple()
+    else:
+        subs = tuple(dir_path + (entry,)
+                     for entry in vfs.contents(repo, dir_item)
+                     if (entry[0] != b'.' and entry[0].startswith(name)))
+    return qtype, lastword, subs
+
+
+_attempt_start = None
+_attempt_end = None
+def attempt_completion(text, start, end):
+    global _attempt_start, _attempt_end
+    _attempt_start = start
+    _attempt_end = end
+    return None
+
+_last_line = None
+_last_res = None
+def enter_completion(text, iteration):
+    global repo
+    global _attempt_end
+    global _last_line
+    global _last_res
+    try:
+        line = _helpers.get_line_buffer()[:_attempt_end]
+        if _last_line != line:
+            _last_res = _completer_get_subs(repo, line)
+            _last_line = line
+        qtype, lastword, subs = _last_res
+        if iteration < len(subs):
+            path = subs[iteration]
+            leaf_name, leaf_item = path[-1]
+            res = vfs.try_resolve(repo, leaf_name, parent=path[:-1])
+            leaf_name, leaf_item = res[-1]
+            fullname = os.path.join(*(name for name, item in res))
+            if stat.S_ISDIR(vfs.item_mode(leaf_item)):
+                ret = shquote.what_to_add(qtype, lastword, fullname + b'/',
+                                          terminate=False)
+            else:
+                ret = shquote.what_to_add(qtype, lastword, fullname,
+                                          terminate=True) + b' '
+            return text + ret
+    except Exception as e:
+        log('\n')
+        try:
+            import traceback
+            traceback.print_tb(sys.exc_traceback)
+        except Exception as e2:
+            log('Error printing traceback: %s\n' % e2)
+        log('\nError in completion: %s\n' % e)
+
+
+optspec = """
+bup ftp [commands...]
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    git.check_repo_or_die()
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+    stdin = byte_stream(sys.stdin)
+    repo = LocalRepo()
+    pwd = vfs.resolve(repo, b'/')
+    rv = 0
+
+    if extra:
+        lines = (argv_bytes(arg) for arg in extra)
+    else:
+        if hasattr(_helpers, 'readline'):
+            _helpers.set_completer_word_break_characters(b' \t\n\r/')
+            _helpers.set_attempted_completion_function(attempt_completion)
+            _helpers.set_completion_entry_function(enter_completion)
+            if sys.platform.startswith('darwin'):
+                # MacOS uses a slightly incompatible clone of libreadline
+                _helpers.parse_and_bind(b'bind ^I rl_complete')
+            _helpers.parse_and_bind(b'tab: complete')
+        lines = inputiter(stdin)
+
+    for line in lines:
+        if not line.strip():
+            continue
+        words = [word for (wordstart,word) in shquote.quotesplit(line)]
+        cmd = words[0].lower()
+        #log('execute: %r %r\n' % (cmd, parm))
+        try:
+            if cmd == b'ls':
+                # FIXME: respect pwd (perhaps via ls accepting resolve path/parent)
+                do_ls(repo, words[1:], out)
+                out.flush()
+            elif cmd == b'cd':
+                np = pwd
+                for parm in words[1:]:
+                    res = vfs.resolve(repo, parm, parent=np)
+                    _, leaf_item = res[-1]
+                    if not leaf_item:
+                        raise Exception('%s does not exist'
+                                        % path_msg(b'/'.join(name for name, item
+                                                             in res)))
+                    if not stat.S_ISDIR(vfs.item_mode(leaf_item)):
+                        raise Exception('%s is not a directory' % path_msg(parm))
+                    np = res
+                pwd = np
+            elif cmd == b'pwd':
+                if len(pwd) == 1:
+                    out.write(b'/')
+                out.write(b'/'.join(name for name, item in pwd) + b'\n')
+                out.flush()
+            elif cmd == b'cat':
+                for parm in words[1:]:
+                    res = vfs.resolve(repo, parm, parent=pwd)
+                    _, leaf_item = res[-1]
+                    if not leaf_item:
+                        raise Exception('%s does not exist' %
+                                        path_msg(b'/'.join(name for name, item
+                                                           in res)))
+                    with vfs.fopen(repo, leaf_item) as srcfile:
+                        write_to_file(srcfile, out)
+                out.flush()
+            elif cmd == b'get':
+                if len(words) not in [2,3]:
+                    rv = 1
+                    raise Exception('Usage: get <filename> [localname]')
+                rname = words[1]
+                (dir,base) = os.path.split(rname)
+                lname = len(words) > 2 and words[2] or base
+                res = vfs.resolve(repo, rname, parent=pwd)
+                _, leaf_item = res[-1]
+                if not leaf_item:
+                    raise Exception('%s does not exist' %
+                                    path_msg(b'/'.join(name for name, item in res)))
+                with vfs.fopen(repo, leaf_item) as srcfile:
+                    with open(lname, 'wb') as destfile:
+                        log('Saving %s\n' % path_msg(lname))
+                        write_to_file(srcfile, destfile)
+            elif cmd == b'mget':
+                for parm in words[1:]:
+                    dir, base = os.path.split(parm)
+
+                    res = vfs.resolve(repo, dir, parent=pwd)
+                    _, dir_item = res[-1]
+                    if not dir_item:
+                        raise Exception('%s does not exist' % path_msg(dir))
+                    for name, item in vfs.contents(repo, dir_item):
+                        if name == b'.':
+                            continue
+                        if fnmatch.fnmatch(name, base):
+                            if stat.S_ISLNK(vfs.item_mode(item)):
+                                deref = vfs.resolve(repo, name, parent=res)
+                                deref_name, deref_item = deref[-1]
+                                if not deref_item:
+                                    raise Exception('%s does not exist' %
+                                                    path_msg('/'.join(name for name, item
+                                                                      in deref)))
+                                item = deref_item
+                            with vfs.fopen(repo, item) as srcfile:
+                                with open(name, 'wb') as destfile:
+                                    log('Saving %s\n' % path_msg(name))
+                                    write_to_file(srcfile, destfile)
+            elif cmd == b'help' or cmd == b'?':
+                out.write(b'Commands: ls cd pwd cat get mget help quit\n')
+                out.flush()
+            elif cmd in (b'quit', b'exit', b'bye'):
+                break
+            else:
+                rv = 1
+                raise Exception('no such command %r' % cmd)
+        except Exception as e:
+            rv = 1
+            log('error: %s\n' % e)
+            raise
+
+    sys.exit(rv)
diff --git a/lib/bup/cmd/fuse.py b/lib/bup/cmd/fuse.py
new file mode 100755 (executable)
index 0000000..96bf617
--- /dev/null
@@ -0,0 +1,162 @@
+
+from __future__ import absolute_import, print_function
+import errno, os, sys
+
+try:
+    import fuse
+except ImportError:
+    print('error: cannot find the python "fuse" module; please install it',
+          file=sys.stderr)
+    sys.exit(2)
+if not hasattr(fuse, '__version__'):
+    print('error: fuse module is too old for fuse.__version__', file=sys.stderr)
+    sys.exit(2)
+fuse.fuse_python_api = (0, 2)
+
+if sys.version_info[0] > 2:
+    try:
+        fuse_ver = fuse.__version__.split('.')
+        fuse_ver_maj = int(fuse_ver[0])
+    except:
+        log('error: cannot determine the fuse major version; please report',
+            file=sys.stderr)
+        sys.exit(2)
+    if len(fuse_ver) < 3 or fuse_ver_maj < 1:
+        print("error: fuse module can't handle binary data; please upgrade to 1.0+\n",
+              file=sys.stderr)
+        sys.exit(2)
+
+from bup import options, git, vfs, xstat
+from bup.compat import argv_bytes, fsdecode, py_maj
+from bup.helpers import log
+from bup.repo import LocalRepo
+
+
+# FIXME: self.meta and want_meta?
+
+# The path handling is just wrong, but the current fuse module can't
+# handle bytes paths.
+
+class BupFs(fuse.Fuse):
+    def __init__(self, repo, verbose=0, fake_metadata=False):
+        fuse.Fuse.__init__(self)
+        self.repo = repo
+        self.verbose = verbose
+        self.fake_metadata = fake_metadata
+    
+    def getattr(self, path):
+        path = argv_bytes(path)
+        if self.verbose > 0:
+            log('--getattr(%r)\n' % path)
+        res = vfs.resolve(self.repo, path, want_meta=(not self.fake_metadata),
+                          follow=False)
+        name, item = res[-1]
+        if not item:
+            return -errno.ENOENT
+        if self.fake_metadata:
+            item = vfs.augment_item_meta(self.repo, item, include_size=True)
+        else:
+            item = vfs.ensure_item_has_metadata(self.repo, item,
+                                                include_size=True)
+        meta = item.meta
+        # FIXME: do we want/need to do anything more with nlink?
+        st = fuse.Stat(st_mode=meta.mode, st_nlink=1, st_size=meta.size)
+        st.st_mode = meta.mode
+        st.st_uid = meta.uid or 0
+        st.st_gid = meta.gid or 0
+        st.st_atime = max(0, xstat.fstime_floor_secs(meta.atime))
+        st.st_mtime = max(0, xstat.fstime_floor_secs(meta.mtime))
+        st.st_ctime = max(0, xstat.fstime_floor_secs(meta.ctime))
+        return st
+
+    def readdir(self, path, offset):
+        path = argv_bytes(path)
+        assert not offset  # We don't return offsets, so offset should be unused
+        res = vfs.resolve(self.repo, path, follow=False)
+        dir_name, dir_item = res[-1]
+        if not dir_item:
+            yield -errno.ENOENT
+        yield fuse.Direntry('..')
+        # FIXME: make sure want_meta=False is being completely respected
+        for ent_name, ent_item in vfs.contents(self.repo, dir_item, want_meta=False):
+            fusename = fsdecode(ent_name.replace(b'/', b'-'))
+            yield fuse.Direntry(fusename)
+
+    def readlink(self, path):
+        path = argv_bytes(path)
+        if self.verbose > 0:
+            log('--readlink(%r)\n' % path)
+        res = vfs.resolve(self.repo, path, follow=False)
+        name, item = res[-1]
+        if not item:
+            return -errno.ENOENT
+        return fsdecode(vfs.readlink(self.repo, item))
+
+    def open(self, path, flags):
+        path = argv_bytes(path)
+        if self.verbose > 0:
+            log('--open(%r)\n' % path)
+        res = vfs.resolve(self.repo, path, follow=False)
+        name, item = res[-1]
+        if not item:
+            return -errno.ENOENT
+        accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR
+        if (flags & accmode) != os.O_RDONLY:
+            return -errno.EACCES
+        # Return None since read doesn't need the file atm...
+        # If we *do* return the file, it'll show up as the last argument
+        #return vfs.fopen(repo, item)
+
+    def read(self, path, size, offset):
+        path = argv_bytes(path)
+        if self.verbose > 0:
+            log('--read(%r)\n' % path)
+        res = vfs.resolve(self.repo, path, follow=False)
+        name, item = res[-1]
+        if not item:
+            return -errno.ENOENT
+        with vfs.fopen(self.repo, item) as f:
+            f.seek(offset)
+            return f.read(size)
+
+
+optspec = """
+bup fuse [-d] [-f] <mountpoint>
+--
+f,foreground  run in foreground
+d,debug       run in the foreground and display FUSE debug information
+o,allow-other allow other users to access the filesystem
+meta          report original metadata for paths when available
+v,verbose     increase log output (can be used more than once)
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+    if not opt.verbose:
+        opt.verbose = 0
+
+    # Set stderr to be line buffered, even if it's not connected to the console
+    # so that we'll be able to see diagnostics in a timely fashion.
+    errfd = sys.stderr.fileno()
+    sys.stderr.flush()
+    sys.stderr = os.fdopen(errfd, 'w', 1)
+
+    if len(extra) != 1:
+        o.fatal('only one mount point argument expected')
+
+    git.check_repo_or_die()
+    repo = LocalRepo()
+    f = BupFs(repo=repo, verbose=opt.verbose, fake_metadata=(not opt.meta))
+
+    # This is likely wrong, but the fuse module doesn't currently accept bytes
+    f.fuse_args.mountpoint = extra[0]
+
+    if opt.debug:
+        f.fuse_args.add('debug')
+    if opt.foreground:
+        f.fuse_args.setmod('foreground')
+    f.multithreaded = False
+    if opt.allow_other:
+        f.fuse_args.add('allow_other')
+    f.main()
diff --git a/lib/bup/cmd/gc.py b/lib/bup/cmd/gc.py
new file mode 100755 (executable)
index 0000000..e1b995a
--- /dev/null
@@ -0,0 +1,45 @@
+
+from __future__ import absolute_import
+
+from bup import git, options
+from bup.gc import bup_gc
+from bup.helpers import die_if_errors, handle_ctrl_c, log
+
+
+optspec = """
+bup gc [options...]
+--
+v,verbose   increase log output (can be used more than once)
+threshold=  only rewrite a packfile if it's over this percent garbage [10]
+#,compress= set compression level to # (0-9, 9 is highest) [1]
+unsafe      use the command even though it may be DANGEROUS
+"""
+
+# FIXME: server mode?
+# FIXME: make sure client handles server-side changes reasonably
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if not opt.unsafe:
+        o.fatal('refusing to run dangerous, experimental command without --unsafe')
+
+    if extra:
+        o.fatal('no positional parameters expected')
+
+    if opt.threshold:
+        try:
+            opt.threshold = int(opt.threshold)
+        except ValueError:
+            o.fatal('threshold must be an integer percentage value')
+        if opt.threshold < 0 or opt.threshold > 100:
+            o.fatal('threshold must be an integer percentage value')
+
+    git.check_repo_or_die()
+
+    bup_gc(threshold=opt.threshold,
+           compression=opt.compress,
+           verbosity=opt.verbose)
+
+    die_if_errors()
diff --git a/lib/bup/cmd/get.py b/lib/bup/cmd/get.py
new file mode 100755 (executable)
index 0000000..8844544
--- /dev/null
@@ -0,0 +1,664 @@
+
+from __future__ import absolute_import, print_function
+from binascii import hexlify, unhexlify
+from collections import namedtuple
+from functools import partial
+from stat import S_ISDIR
+import os, sys, textwrap, time
+
+from bup import compat, git, client, helpers, vfs
+from bup.compat import (
+    argv_bytes,
+    bytes_from_byte,
+    environ,
+    hexstr,
+    items,
+    wrap_main
+)
+from bup.git import get_cat_data, parse_commit, walk_object
+from bup.helpers import add_error, debug1, handle_ctrl_c, log, saved_errors
+from bup.helpers import hostname, shstr, tty_width
+from bup.io import path_msg
+from bup.pwdgrp import userfullname, username
+from bup.repo import LocalRepo, RemoteRepo
+
+argspec = (
+    "usage: bup get [-s source] [-r remote] (<--ff|--append|...> REF [DEST])...",
+
+    """Transfer data from a source repository to a destination repository
+    according to the methods specified (--ff, --ff:, --append, etc.).
+    Both repositories default to BUP_DIR.  A remote destination may be
+    specified with -r, and data may be pulled from a remote repository
+    with the related "bup on HOST get ..." command.""",
+
+    ('optional arguments:',
+     (('-h, --help', 'show this help message and exit'),
+      ('-v, --verbose',
+       'increase log output (can be specified more than once)'),
+      ('-q, --quiet', "don't show progress meter"),
+      ('-s SOURCE, --source SOURCE',
+       'path to the source repository (defaults to BUP_DIR)'),
+      ('-r REMOTE, --remote REMOTE',
+       'hostname:/path/to/repo of remote destination repository'),
+      ('-t --print-trees', 'output a tree id for each ref set'),
+      ('-c, --print-commits', 'output a commit id for each ref set'),
+      ('--print-tags', 'output an id for each tag'),
+      ('--bwlimit BWLIMIT', 'maximum bytes/sec to transmit to server'),
+      ('-0, -1, -2, -3, -4, -5, -6, -7, -8, -9, --compress LEVEL',
+       'set compression LEVEL (default: 1)'))),
+
+    ('transfer methods:',
+     (('--ff REF, --ff: REF DEST',
+       'fast-forward dest REF (or DEST) to match source REF'),
+      ('--append REF, --append: REF DEST',
+       'append REF (treeish or committish) to dest REF (or DEST)'),
+      ('--pick REF, --pick: REF DEST',
+       'append single source REF commit to dest REF (or DEST)'),
+      ('--force-pick REF, --force-pick: REF DEST',
+       '--pick, overwriting REF (or DEST)'),
+      ('--new-tag REF, --new-tag: REF DEST',
+       'tag source ref REF as REF (or DEST) in dest unless it already exists'),
+      ('--replace, --replace: REF DEST',
+       'overwrite REF (or DEST) in dest with source REF'),
+      ('--unnamed REF',
+       'fetch REF anonymously (without destination ref)'))))
+
+def render_opts(opts, width=None):
+    if not width:
+        width = tty_width()
+    result = []
+    for args, desc in opts:
+        result.append(textwrap.fill(args, width=width,
+                                    initial_indent=(' ' * 2),
+                                    subsequent_indent=(' ' * 4)))
+        result.append('\n')
+        result.append(textwrap.fill(desc, width=width,
+                                    initial_indent=(' ' * 6),
+                                    subsequent_indent=(' ' * 6)))
+        result.append('\n')
+    return result
+
+def usage(argspec, width=None):
+    if not width:
+        width = tty_width()
+    usage, preamble, groups = argspec[0], argspec[1], argspec[2:]
+    msg = []
+    msg.append(textwrap.fill(usage, width=width, subsequent_indent='  '))
+    msg.append('\n\n')
+    msg.append(textwrap.fill(preamble.replace('\n', ' '), width=width))
+    msg.append('\n')
+    for group_name, group_args in groups:
+        msg.extend(['\n', group_name, '\n'])
+        msg.extend(render_opts(group_args, width=width))
+    return ''.join(msg)
+
+def misuse(message=None):
+    sys.stderr.write(usage(argspec))
+    if message:
+        sys.stderr.write('\nerror: ')
+        sys.stderr.write(message)
+        sys.stderr.write('\n')
+    sys.exit(1)
+
+def require_n_args_or_die(n, args):
+    if len(args) < n + 1:
+        misuse('%s argument requires %d %s'
+               % (n, 'values' if n == 1 else 'value'))
+    result = args[1:1+n], args[1+n:]
+    assert len(result[0]) == n
+    return result
+
+Spec = namedtuple('Spec', ('method', 'src', 'dest'))
+
+def spec_msg(s):
+    if not s.dest:
+        return '--%s %s' % (s.method, path_msg(s.src))
+    return '--%s: %s %s' % (s.method, path_msg(s.src), path_msg(s.dest))
+
+def parse_args(args):
+    class GetOpts:
+        pass
+    opt = GetOpts()
+    opt.help = False
+    opt.verbose = 0
+    opt.quiet = False
+    opt.print_commits = opt.print_trees = opt.print_tags = False
+    opt.bwlimit = None
+    opt.compress = 1
+    opt.source = opt.remote = None
+    opt.target_specs = []
+
+    remaining = args[1:]  # Skip argv[0]
+    while remaining:
+        arg = remaining[0]
+        if arg in (b'-h', b'--help'):
+            sys.stdout.write(usage(argspec))
+            sys.exit(0)
+        elif arg in (b'-v', b'--verbose'):
+            opt.verbose += 1
+            remaining = remaining[1:]
+        elif arg in (b'--ff', b'--append', b'--pick', b'--force-pick',
+                     b'--new-tag', b'--replace', b'--unnamed'):
+            (ref,), remaining = require_n_args_or_die(1, remaining)
+            opt.target_specs.append(Spec(method=arg[2:].decode('ascii'),
+                                         src=ref, dest=None))
+        elif arg in (b'--ff:', b'--append:', b'--pick:', b'--force-pick:',
+                     b'--new-tag:', b'--replace:'):
+            (ref, dest), remaining = require_n_args_or_die(2, remaining)
+            opt.target_specs.append(Spec(method=arg[2:-1].decode('ascii'),
+                                         src=ref, dest=dest))
+        elif arg in (b'-s', b'--source'):
+            (opt.source,), remaining = require_n_args_or_die(1, remaining)
+        elif arg in (b'-r', b'--remote'):
+            (opt.remote,), remaining = require_n_args_or_die(1, remaining)
+        elif arg in (b'-c', b'--print-commits'):
+            opt.print_commits, remaining = True, remaining[1:]
+        elif arg in (b'-t', b'--print-trees'):
+            opt.print_trees, remaining = True, remaining[1:]
+        elif arg == b'--print-tags':
+            opt.print_tags, remaining = True, remaining[1:]
+        elif arg in (b'-0', b'-1', b'-2', b'-3', b'-4', b'-5', b'-6', b'-7',
+                     b'-8', b'-9'):
+            opt.compress = int(arg[1:])
+            remaining = remaining[1:]
+        elif arg == b'--compress':
+            (opt.compress,), remaining = require_n_args_or_die(1, remaining)
+            opt.compress = int(opt.compress)
+        elif arg == b'--bwlimit':
+            (opt.bwlimit,), remaining = require_n_args_or_die(1, remaining)
+            opt.bwlimit = long(opt.bwlimit)
+        elif arg.startswith(b'-') and len(arg) > 2 and arg[1] != b'-':
+            # Try to interpret this as -xyz, i.e. "-xyz -> -x -y -z".
+            # We do this last so that --foo -bar is valid if --foo
+            # requires a value.
+            remaining[0:1] = (b'-' + bytes_from_byte(c) for c in arg[1:])
+            # FIXME
+            continue
+        else:
+            misuse()
+    return opt
+
+# FIXME: client error handling (remote exceptions, etc.)
+
+# FIXME: walk_object in in git.py doesn't support opt.verbose.  Do we
+# need to adjust for that here?
+def get_random_item(name, hash, repo, writer, opt):
+    def already_seen(oid):
+        return writer.exists(unhexlify(oid))
+    for item in walk_object(repo.cat, hash, stop_at=already_seen,
+                            include_data=True):
+        # already_seen ensures that writer.exists(id) is false.
+        # Otherwise, just_write() would fail.
+        writer.just_write(item.oid, item.type, item.data)
+
+
+def append_commit(name, hash, parent, src_repo, writer, opt):
+    now = time.time()
+    items = parse_commit(get_cat_data(src_repo.cat(hash), b'commit'))
+    tree = unhexlify(items.tree)
+    author = b'%s <%s>' % (items.author_name, items.author_mail)
+    author_time = (items.author_sec, items.author_offset)
+    committer = b'%s <%s@%s>' % (userfullname(), username(), hostname())
+    get_random_item(name, hexlify(tree), src_repo, writer, opt)
+    c = writer.new_commit(tree, parent,
+                          author, items.author_sec, items.author_offset,
+                          committer, now, None,
+                          items.message)
+    return c, tree
+
+
+def append_commits(commits, src_name, dest_hash, src_repo, writer, opt):
+    last_c, tree = dest_hash, None
+    for commit in commits:
+        last_c, tree = append_commit(src_name, commit, last_c,
+                                     src_repo, writer, opt)
+    assert(tree is not None)
+    return last_c, tree
+
+Loc = namedtuple('Loc', ['type', 'hash', 'path'])
+default_loc = Loc(None, None, None)
+
+def find_vfs_item(name, repo):
+    res = repo.resolve(name, follow=False, want_meta=False)
+    leaf_name, leaf_item = res[-1]
+    if not leaf_item:
+        return None
+    kind = type(leaf_item)
+    if kind == vfs.Root:
+        kind = 'root'
+    elif kind == vfs.Tags:
+        kind = 'tags'
+    elif kind == vfs.RevList:
+        kind = 'branch'
+    elif kind == vfs.Commit:
+        if len(res) > 1 and type(res[-2][1]) == vfs.RevList:
+            kind = 'save'
+        else:
+            kind = 'commit'
+    elif kind == vfs.Item:
+        if S_ISDIR(vfs.item_mode(leaf_item)):
+            kind = 'tree'
+        else:
+            kind = 'blob'
+    elif kind == vfs.Chunky:
+        kind = 'tree'
+    elif kind == vfs.FakeLink:
+        # Don't have to worry about ELOOP, excepting malicious
+        # remotes, since "latest" is the only FakeLink.
+        assert leaf_name == b'latest'
+        res = repo.resolve(leaf_item.target, parent=res[:-1],
+                           follow=False, want_meta=False)
+        leaf_name, leaf_item = res[-1]
+        assert leaf_item
+        assert type(leaf_item) == vfs.Commit
+        name = b'/'.join(x[0] for x in res)
+        kind = 'save'
+    else:
+        raise Exception('unexpected resolution for %s: %r'
+                        % (path_msg(name), res))
+    path = b'/'.join(name for name, item in res)
+    if hasattr(leaf_item, 'coid'):
+        result = Loc(type=kind, hash=leaf_item.coid, path=path)
+    elif hasattr(leaf_item, 'oid'):
+        result = Loc(type=kind, hash=leaf_item.oid, path=path)
+    else:
+        result = Loc(type=kind, hash=None, path=path)
+    return result
+
+
+Target = namedtuple('Target', ['spec', 'src', 'dest'])
+
+def loc_desc(loc):
+    if loc and loc.hash:
+        loc = loc._replace(hash=hexlify(loc.hash))
+    return repr(loc)
+
+
+# FIXME: see if resolve() means we can drop the vfs path cleanup
+
+def cleanup_vfs_path(p):
+    result = os.path.normpath(p)
+    if result.startswith(b'/'):
+        return result
+    return b'/' + result
+
+
+def validate_vfs_path(p):
+    if p.startswith(b'/.') \
+       and not p.startswith(b'/.tag/'):
+        misuse('unsupported destination path %s in %s'
+               % (path_msg(dest.path), spec_msg(spec)))
+    return p
+
+
+def resolve_src(spec, src_repo):
+    src = find_vfs_item(spec.src, src_repo)
+    spec_args = spec_msg(spec)
+    if not src:
+        misuse('cannot find source for %s' % spec_args)
+    if src.type == 'root':
+        misuse('cannot fetch entire repository for %s' % spec_args)
+    if src.type == 'tags':
+        misuse('cannot fetch entire /.tag directory for %s' % spec_args)
+    debug1('src: %s\n' % loc_desc(src))
+    return src
+
+
+def get_save_branch(repo, path):
+    res = repo.resolve(path, follow=False, want_meta=False)
+    leaf_name, leaf_item = res[-1]
+    if not leaf_item:
+        misuse('error: cannot access %r in %r' % (leaf_name, path))
+    assert len(res) == 3
+    res_path = b'/'.join(name for name, item in res[:-1])
+    return res_path
+
+
+def resolve_branch_dest(spec, src, src_repo, dest_repo):
+    # Resulting dest must be treeish, or not exist.
+    if not spec.dest:
+        # Pick a default dest.
+        if src.type == 'branch':
+            spec = spec._replace(dest=spec.src)
+        elif src.type == 'save':
+            spec = spec._replace(dest=get_save_branch(src_repo, spec.src))
+        elif src.path.startswith(b'/.tag/'):  # Dest defaults to the same.
+            spec = spec._replace(dest=spec.src)
+
+    spec_args = spec_msg(spec)
+    if not spec.dest:
+        misuse('no destination (implicit or explicit) for %s', spec_args)
+
+    dest = find_vfs_item(spec.dest, dest_repo)
+    if dest:
+        if dest.type == 'commit':
+            misuse('destination for %s is a tagged commit, not a branch'
+                  % spec_args)
+        if dest.type != 'branch':
+            misuse('destination for %s is a %s, not a branch'
+                  % (spec_args, dest.type))
+    else:
+        dest = default_loc._replace(path=cleanup_vfs_path(spec.dest))
+
+    if dest.path.startswith(b'/.'):
+        misuse('destination for %s must be a valid branch name' % spec_args)
+
+    debug1('dest: %s\n' % loc_desc(dest))
+    return spec, dest
+
+
+def resolve_ff(spec, src_repo, dest_repo):
+    src = resolve_src(spec, src_repo)
+    spec_args = spec_msg(spec)
+    if src.type == 'tree':
+        misuse('%s is impossible; can only --append a tree to a branch'
+              % spec_args)
+    if src.type not in ('branch', 'save', 'commit'):
+        misuse('source for %s must be a branch, save, or commit, not %s'
+              % (spec_args, src.type))
+    spec, dest = resolve_branch_dest(spec, src, src_repo, dest_repo)
+    return Target(spec=spec, src=src, dest=dest)
+
+
+def handle_ff(item, src_repo, writer, opt):
+    assert item.spec.method == 'ff'
+    assert item.src.type in ('branch', 'save', 'commit')
+    src_oidx = hexlify(item.src.hash)
+    dest_oidx = hexlify(item.dest.hash) if item.dest.hash else None
+    if not dest_oidx or dest_oidx in src_repo.rev_list(src_oidx):
+        # Can fast forward.
+        get_random_item(item.spec.src, src_oidx, src_repo, writer, opt)
+        commit_items = parse_commit(get_cat_data(src_repo.cat(src_oidx), b'commit'))
+        return item.src.hash, unhexlify(commit_items.tree)
+    misuse('destination is not an ancestor of source for %s'
+           % spec_msg(item.spec))
+
+
+def resolve_append(spec, src_repo, dest_repo):
+    src = resolve_src(spec, src_repo)
+    if src.type not in ('branch', 'save', 'commit', 'tree'):
+        misuse('source for %s must be a branch, save, commit, or tree, not %s'
+              % (spec_msg(spec), src.type))
+    spec, dest = resolve_branch_dest(spec, src, src_repo, dest_repo)
+    return Target(spec=spec, src=src, dest=dest)
+
+
+def handle_append(item, src_repo, writer, opt):
+    assert item.spec.method == 'append'
+    assert item.src.type in ('branch', 'save', 'commit', 'tree')
+    assert item.dest.type == 'branch' or not item.dest.type
+    src_oidx = hexlify(item.src.hash)
+    if item.src.type == 'tree':
+        get_random_item(item.spec.src, src_oidx, src_repo, writer, opt)
+        parent = item.dest.hash
+        msg = b'bup save\n\nGenerated by command:\n%r\n' % compat.get_argvb()
+        userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
+        now = time.time()
+        commit = writer.new_commit(item.src.hash, parent,
+                                   userline, now, None,
+                                   userline, now, None, msg)
+        return commit, item.src.hash
+    commits = list(src_repo.rev_list(src_oidx))
+    commits.reverse()
+    return append_commits(commits, item.spec.src, item.dest.hash,
+                          src_repo, writer, opt)
+
+
+def resolve_pick(spec, src_repo, dest_repo):
+    src = resolve_src(spec, src_repo)
+    spec_args = spec_msg(spec)
+    if src.type == 'tree':
+        misuse('%s is impossible; can only --append a tree' % spec_args)
+    if src.type not in ('commit', 'save'):
+        misuse('%s impossible; can only pick a commit or save, not %s'
+              % (spec_args, src.type))
+    if not spec.dest:
+        if src.path.startswith(b'/.tag/'):
+            spec = spec._replace(dest=spec.src)
+        elif src.type == 'save':
+            spec = spec._replace(dest=get_save_branch(src_repo, spec.src))
+    if not spec.dest:
+        misuse('no destination provided for %s', spec_args)
+    dest = find_vfs_item(spec.dest, dest_repo)
+    if not dest:
+        cp = validate_vfs_path(cleanup_vfs_path(spec.dest))
+        dest = default_loc._replace(path=cp)
+    else:
+        if not dest.type == 'branch' and not dest.path.startswith(b'/.tag/'):
+            misuse('%s destination is not a tag or branch' % spec_args)
+        if spec.method == 'pick' \
+           and dest.hash and dest.path.startswith(b'/.tag/'):
+            misuse('cannot overwrite existing tag for %s (requires --force-pick)'
+                  % spec_args)
+    return Target(spec=spec, src=src, dest=dest)
+
+
+def handle_pick(item, src_repo, writer, opt):
+    assert item.spec.method in ('pick', 'force-pick')
+    assert item.src.type in ('save', 'commit')
+    src_oidx = hexlify(item.src.hash)
+    if item.dest.hash:
+        return append_commit(item.spec.src, src_oidx, item.dest.hash,
+                             src_repo, writer, opt)
+    return append_commit(item.spec.src, src_oidx, None, src_repo, writer, opt)
+
+
+def resolve_new_tag(spec, src_repo, dest_repo):
+    src = resolve_src(spec, src_repo)
+    spec_args = spec_msg(spec)
+    if not spec.dest and src.path.startswith(b'/.tag/'):
+        spec = spec._replace(dest=src.path)
+    if not spec.dest:
+        misuse('no destination (implicit or explicit) for %s', spec_args)
+    dest = find_vfs_item(spec.dest, dest_repo)
+    if not dest:
+        dest = default_loc._replace(path=cleanup_vfs_path(spec.dest))
+    if not dest.path.startswith(b'/.tag/'):
+        misuse('destination for %s must be a VFS tag' % spec_args)
+    if dest.hash:
+        misuse('cannot overwrite existing tag for %s (requires --replace)'
+              % spec_args)
+    return Target(spec=spec, src=src, dest=dest)
+
+
+def handle_new_tag(item, src_repo, writer, opt):
+    assert item.spec.method == 'new-tag'
+    assert item.dest.path.startswith(b'/.tag/')
+    get_random_item(item.spec.src, hexlify(item.src.hash),
+                    src_repo, writer, opt)
+    return (item.src.hash,)
+
+
+def resolve_replace(spec, src_repo, dest_repo):
+    src = resolve_src(spec, src_repo)
+    spec_args = spec_msg(spec)
+    if not spec.dest:
+        if src.path.startswith(b'/.tag/') or src.type == 'branch':
+            spec = spec._replace(dest=spec.src)
+    if not spec.dest:
+        misuse('no destination provided for %s', spec_args)
+    dest = find_vfs_item(spec.dest, dest_repo)
+    if dest:
+        if not dest.type == 'branch' and not dest.path.startswith(b'/.tag/'):
+            misuse('%s impossible; can only overwrite branch or tag'
+                  % spec_args)
+    else:
+        cp = validate_vfs_path(cleanup_vfs_path(spec.dest))
+        dest = default_loc._replace(path=cp)
+    if not dest.path.startswith(b'/.tag/') \
+       and not src.type in ('branch', 'save', 'commit'):
+        misuse('cannot overwrite branch with %s for %s' % (src.type, spec_args))
+    return Target(spec=spec, src=src, dest=dest)
+
+
+def handle_replace(item, src_repo, writer, opt):
+    assert(item.spec.method == 'replace')
+    if item.dest.path.startswith(b'/.tag/'):
+        get_random_item(item.spec.src, hexlify(item.src.hash),
+                        src_repo, writer, opt)
+        return (item.src.hash,)
+    assert(item.dest.type == 'branch' or not item.dest.type)
+    src_oidx = hexlify(item.src.hash)
+    get_random_item(item.spec.src, src_oidx, src_repo, writer, opt)
+    commit_items = parse_commit(get_cat_data(src_repo.cat(src_oidx), b'commit'))
+    return item.src.hash, unhexlify(commit_items.tree)
+
+
+def resolve_unnamed(spec, src_repo, dest_repo):
+    if spec.dest:
+        misuse('destination name given for %s' % spec_msg(spec))
+    src = resolve_src(spec, src_repo)
+    return Target(spec=spec, src=src, dest=None)
+
+
+def handle_unnamed(item, src_repo, writer, opt):
+    get_random_item(item.spec.src, hexlify(item.src.hash),
+                    src_repo, writer, opt)
+    return (None,)
+
+
+def resolve_targets(specs, src_repo, dest_repo):
+    resolved_items = []
+    common_args = src_repo, dest_repo
+    for spec in specs:
+        debug1('initial-spec: %r\n' % (spec,))
+        if spec.method == 'ff':
+            resolved_items.append(resolve_ff(spec, *common_args))
+        elif spec.method == 'append':
+            resolved_items.append(resolve_append(spec, *common_args))
+        elif spec.method in ('pick', 'force-pick'):
+            resolved_items.append(resolve_pick(spec, *common_args))
+        elif spec.method == 'new-tag':
+            resolved_items.append(resolve_new_tag(spec, *common_args))
+        elif spec.method == 'replace':
+            resolved_items.append(resolve_replace(spec, *common_args))
+        elif spec.method == 'unnamed':
+            resolved_items.append(resolve_unnamed(spec, *common_args))
+        else: # Should be impossible -- prevented by the option parser.
+            assert(False)
+
+    # FIXME: check for prefix overlap?  i.e.:
+    #   bup get --ff foo --ff: baz foo/bar
+    #   bup get --new-tag .tag/foo --new-tag: bar .tag/foo/bar
+
+    # Now that we have all the items, check for duplicate tags.
+    tags_targeted = set()
+    for item in resolved_items:
+        dest_path = item.dest and item.dest.path
+        if dest_path:
+            assert(dest_path.startswith(b'/'))
+            if dest_path.startswith(b'/.tag/'):
+                if dest_path in tags_targeted:
+                    if item.spec.method not in ('replace', 'force-pick'):
+                        misuse('cannot overwrite tag %s via %s' \
+                              % (path_msg(dest_path), spec_msg(item.spec)))
+                else:
+                    tags_targeted.add(dest_path)
+    return resolved_items
+
+
+def log_item(name, type, opt, tree=None, commit=None, tag=None):
+    if tag and opt.print_tags:
+        print(hexstr(tag))
+    if tree and opt.print_trees:
+        print(hexstr(tree))
+    if commit and opt.print_commits:
+        print(hexstr(commit))
+    if opt.verbose:
+        last = ''
+        if type in ('root', 'branch', 'save', 'commit', 'tree'):
+            if not name.endswith(b'/'):
+                last = '/'
+        log('%s%s\n' % (path_msg(name), last))
+
+def main(argv):
+    is_reverse = environ.get(b'BUP_SERVER_REVERSE')
+    opt = parse_args(argv)
+    git.check_repo_or_die()
+    if opt.source:
+        opt.source = argv_bytes(opt.source)
+    if opt.bwlimit:
+        client.bwlimit = parse_num(opt.bwlimit)
+    if is_reverse and opt.remote:
+        misuse("don't use -r in reverse mode; it's automatic")
+    if opt.remote or is_reverse:
+        dest_repo = RemoteRepo(opt.remote)
+    else:
+        dest_repo = LocalRepo()
+
+    with dest_repo as dest_repo:
+        with LocalRepo(repo_dir=opt.source) as src_repo:
+            with dest_repo.new_packwriter(compression_level=opt.compress) as writer:
+                # Resolve and validate all sources and destinations,
+                # implicit or explicit, and do it up-front, so we can
+                # fail before we start writing (for any obviously
+                # broken cases).
+                target_items = resolve_targets(opt.target_specs,
+                                               src_repo, dest_repo)
+
+                updated_refs = {}  # ref_name -> (original_ref, tip_commit(bin))
+                no_ref_info = (None, None)
+
+                handlers = {'ff': handle_ff,
+                            'append': handle_append,
+                            'force-pick': handle_pick,
+                            'pick': handle_pick,
+                            'new-tag': handle_new_tag,
+                            'replace': handle_replace,
+                            'unnamed': handle_unnamed}
+
+                for item in target_items:
+                    debug1('get-spec: %r\n' % (item.spec,))
+                    debug1('get-src: %s\n' % loc_desc(item.src))
+                    debug1('get-dest: %s\n' % loc_desc(item.dest))
+                    dest_path = item.dest and item.dest.path
+                    if dest_path:
+                        if dest_path.startswith(b'/.tag/'):
+                            dest_ref = b'refs/tags/%s' % dest_path[6:]
+                        else:
+                            dest_ref = b'refs/heads/%s' % dest_path[1:]
+                    else:
+                        dest_ref = None
+
+                    dest_hash = item.dest and item.dest.hash
+                    orig_ref, cur_ref = updated_refs.get(dest_ref, no_ref_info)
+                    orig_ref = orig_ref or dest_hash
+                    cur_ref = cur_ref or dest_hash
+
+                    handler = handlers[item.spec.method]
+                    item_result = handler(item, src_repo, writer, opt)
+                    if len(item_result) > 1:
+                        new_id, tree = item_result
+                    else:
+                        new_id = item_result[0]
+
+                    if not dest_ref:
+                        log_item(item.spec.src, item.src.type, opt)
+                    else:
+                        updated_refs[dest_ref] = (orig_ref, new_id)
+                        if dest_ref.startswith(b'refs/tags/'):
+                            log_item(item.spec.src, item.src.type, opt, tag=new_id)
+                        else:
+                            log_item(item.spec.src, item.src.type, opt,
+                                     tree=tree, commit=new_id)
+
+        # Only update the refs at the very end, once the writer is
+        # closed, so that if something goes wrong above, the old refs
+        # will be undisturbed.
+        for ref_name, info in items(updated_refs):
+            orig_ref, new_ref = info
+            try:
+                dest_repo.update_ref(ref_name, new_ref, orig_ref)
+                if opt.verbose:
+                    new_hex = hexlify(new_ref)
+                    if orig_ref:
+                        orig_hex = hexlify(orig_ref)
+                        log('updated %r (%s -> %s)\n' % (ref_name, orig_hex, new_hex))
+                    else:
+                        log('updated %r (%s)\n' % (ref_name, new_hex))
+            except (git.GitError, client.ClientError) as ex:
+                add_error('unable to update ref %r: %s' % (ref_name, ex))
+
+    if saved_errors:
+        log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
+        sys.exit(1)
diff --git a/lib/bup/cmd/help.py b/lib/bup/cmd/help.py
new file mode 100755 (executable)
index 0000000..10b3d36
--- /dev/null
@@ -0,0 +1,34 @@
+
+from __future__ import absolute_import
+import os, glob, sys
+
+from bup import options, path
+from bup.compat import argv_bytes
+
+
+optspec = """
+bup help <command>
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if len(extra) == 0:
+        # the wrapper program provides the default usage string
+        os.execvp(path.exe(), [path.exe()])
+    elif len(extra) == 1:
+        docname = (extra[0]=='bup' and b'bup' or (b'bup-%s' % argv_bytes(extra[0])))
+        manpath = os.path.join(path.exedir(),
+                               b'../../Documentation/' + docname + b'.[1-9]')
+        g = glob.glob(manpath)
+        try:
+            if g:
+                os.execvp('man', ['man', '-l', g[0]])
+            else:
+                os.execvp('man', ['man', docname])
+        except OSError as e:
+            sys.stderr.write('Unable to run man command: %s\n' % e)
+            sys.exit(1)
+    else:
+        o.fatal("exactly one command name expected")
diff --git a/lib/bup/cmd/import_duplicity.py b/lib/bup/cmd/import_duplicity.py
new file mode 100755 (executable)
index 0000000..b2b011a
--- /dev/null
@@ -0,0 +1,103 @@
+
+from __future__ import absolute_import
+from calendar import timegm
+from pipes import quote
+from subprocess import check_call
+from time import strftime, strptime
+import os, sys, tempfile
+
+from bup import git, helpers, options
+from bup.compat import argv_bytes, str_type
+from bup.helpers import (handle_ctrl_c,
+                         log,
+                         readpipe,
+                         shstr,
+                         saved_errors,
+                         unlink)
+import bup.path
+
+
+optspec = """
+bup import-duplicity [-n] <duplicity-source-url> <bup-save-name>
+--
+n,dry-run  don't do anything; just print what would be done
+"""
+
+dry_run = False
+
+def logcmd(cmd):
+    log(shstr(cmd).decode(errors='backslashreplace') + '\n')
+
+def exc(cmd, shell=False):
+    logcmd(cmd)
+    if not dry_run:
+        check_call(cmd, shell=shell)
+
+def exo(cmd, shell=False, preexec_fn=None, close_fds=True):
+    logcmd(cmd)
+    if not dry_run:
+        return helpers.exo(cmd, shell=shell, preexec_fn=preexec_fn,
+                           close_fds=close_fds)[0]
+
+def redirect_dup_output():
+    os.dup2(1, 3)
+    os.dup2(1, 2)
+
+
+def main(argv):
+    global dry_run
+
+    log('\nbup: import-duplicity is EXPERIMENTAL (proceed with caution)\n\n')
+
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+    dry_run = opt.dry_run
+
+    if len(extra) < 1 or not extra[0]:
+        o.fatal('duplicity source URL required')
+    if len(extra) < 2 or not extra[1]:
+        o.fatal('bup destination save name required')
+    if len(extra) > 2:
+        o.fatal('too many arguments')
+
+    source_url, save_name = extra
+    source_url = argv_bytes(source_url)
+    save_name = argv_bytes(save_name)
+    bup_path = bup.path.exe()
+
+    git.check_repo_or_die()
+
+    tmpdir = tempfile.mkdtemp(prefix=b'bup-import-dup-')
+    try:
+        dup = [b'duplicity', b'--archive-dir', tmpdir + b'/dup-cache']
+        restoredir = tmpdir + b'/restore'
+        tmpidx = tmpdir + b'/index'
+
+        collection_status = \
+            exo(dup + [b'collection-status', b'--log-fd=3', source_url],
+                close_fds=False, preexec_fn=redirect_dup_output)  # i.e. 3>&1 1>&2
+        # Duplicity output lines of interest look like this (one leading space):
+        #  full 20150222T073111Z 1 noenc
+        #  inc 20150222T073233Z 1 noenc
+        dup_timestamps = []
+        for line in collection_status.splitlines():
+            if line.startswith(b' inc '):
+                assert(len(line) >= len(b' inc 20150222T073233Z'))
+                dup_timestamps.append(line[5:21])
+            elif line.startswith(b' full '):
+                assert(len(line) >= len(b' full 20150222T073233Z'))
+                dup_timestamps.append(line[6:22])
+        for i, dup_ts in enumerate(dup_timestamps):
+            tm = strptime(dup_ts.decode('ascii'), '%Y%m%dT%H%M%SZ')
+            exc([b'rm', b'-rf', restoredir])
+            exc(dup + [b'restore', b'-t', dup_ts, source_url, restoredir])
+            exc([bup_path, b'index', b'-uxf', tmpidx, restoredir])
+            exc([bup_path, b'save', b'--strip', b'--date', b'%d' % timegm(tm),
+                 b'-f', tmpidx, b'-n', save_name, restoredir])
+        sys.stderr.flush()
+    finally:
+        exc([b'rm', b'-rf', tmpdir])
+
+    if saved_errors:
+        log('warning: %d errors encountered\n' % len(saved_errors))
+        sys.exit(1)
diff --git a/lib/bup/cmd/index.py b/lib/bup/cmd/index.py
new file mode 100755 (executable)
index 0000000..8633585
--- /dev/null
@@ -0,0 +1,323 @@
+from __future__ import absolute_import, print_function
+
+from binascii import hexlify
+import errno, os, stat, sys, time
+
+from bup import compat, metadata, options, git, index, drecurse, hlinkdb
+from bup.compat import argv_bytes
+from bup.drecurse import recursive_dirlist
+from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
+from bup.helpers import (add_error, handle_ctrl_c, log, parse_excludes, parse_rx_excludes,
+                         progress, qprogress, saved_errors)
+from bup.io import byte_stream, path_msg
+
+
+class IterHelper:
+    def __init__(self, l):
+        self.i = iter(l)
+        self.cur = None
+        self.next()
+
+    def __next__(self):
+        self.cur = next(self.i, None)
+        return self.cur
+
+    next = __next__
+
+def check_index(reader, verbose):
+    try:
+        log('check: checking forward iteration...\n')
+        e = None
+        d = {}
+        for e in reader.forward_iter():
+            if e.children_n:
+                if verbose:
+                    log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n,
+                                            path_msg(e.name)))
+                assert(e.children_ofs)
+                assert e.name.endswith(b'/')
+                assert(not d.get(e.children_ofs))
+                d[e.children_ofs] = 1
+            if e.flags & index.IX_HASHVALID:
+                assert(e.sha != index.EMPTY_SHA)
+                assert(e.gitmode)
+        assert not e or bytes(e.name) == b'/'  # last entry is *always* /
+        log('check: checking normal iteration...\n')
+        last = None
+        for e in reader:
+            if last:
+                assert(last > e.name)
+            last = e.name
+    except:
+        log('index error! at %r\n' % e)
+        raise
+    log('check: passed.\n')
+
+
+def clear_index(indexfile, verbose):
+    indexfiles = [indexfile, indexfile + b'.meta', indexfile + b'.hlink']
+    for indexfile in indexfiles:
+        try:
+            os.remove(indexfile)
+            if verbose:
+                log('clear: removed %s\n' % path_msg(indexfile))
+        except OSError as e:
+            if e.errno != errno.ENOENT:
+                raise
+
+
+def update_index(top, excluded_paths, exclude_rxs, indexfile,
+                 check=False, check_device=True,
+                 xdev=False, xdev_exceptions=frozenset(),
+                 fake_valid=False, fake_invalid=False,
+                 out=None, verbose=0):
+    # tmax must be epoch nanoseconds.
+    tmax = (time.time() - 1) * 10**9
+    ri = index.Reader(indexfile)
+    msw = index.MetaStoreWriter(indexfile + b'.meta')
+    wi = index.Writer(indexfile, msw, tmax)
+    rig = IterHelper(ri.iter(name=top))
+
+    hlinks = hlinkdb.HLinkDB(indexfile + b'.hlink')
+
+    fake_hash = None
+    if fake_valid:
+        def fake_hash(name):
+            return (GIT_MODE_FILE, index.FAKE_SHA)
+
+    total = 0
+    bup_dir = os.path.abspath(git.repo())
+    index_start = time.time()
+    for path, pst in recursive_dirlist([top],
+                                       xdev=xdev,
+                                       bup_dir=bup_dir,
+                                       excluded_paths=excluded_paths,
+                                       exclude_rxs=exclude_rxs,
+                                       xdev_exceptions=xdev_exceptions):
+        if verbose>=2 or (verbose == 1 and stat.S_ISDIR(pst.st_mode)):
+            out.write(b'%s\n' % path)
+            out.flush()
+            elapsed = time.time() - index_start
+            paths_per_sec = total / elapsed if elapsed else 0
+            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
+        elif not (total % 128):
+            elapsed = time.time() - index_start
+            paths_per_sec = total / elapsed if elapsed else 0
+            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
+        total += 1
+
+        while rig.cur and rig.cur.name > path:  # deleted paths
+            if rig.cur.exists():
+                rig.cur.set_deleted()
+                rig.cur.repack()
+                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
+                    hlinks.del_path(rig.cur.name)
+            rig.next()
+
+        if rig.cur and rig.cur.name == path:    # paths that already existed
+            need_repack = False
+            if(rig.cur.stale(pst, check_device=check_device)):
+                try:
+                    meta = metadata.from_path(path, statinfo=pst)
+                except (OSError, IOError) as e:
+                    add_error(e)
+                    rig.next()
+                    continue
+                if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
+                    hlinks.del_path(rig.cur.name)
+                if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
+                    hlinks.add_path(path, pst.st_dev, pst.st_ino)
+                # Clear these so they don't bloat the store -- they're
+                # already in the index (since they vary a lot and they're
+                # fixed length).  If you've noticed "tmax", you might
+                # wonder why it's OK to do this, since that code may
+                # adjust (mangle) the index mtime and ctime -- producing
+                # fake values which must not end up in a .bupm.  However,
+                # it looks like that shouldn't be possible:  (1) When
+                # "save" validates the index entry, it always reads the
+                # metadata from the filesytem. (2) Metadata is only
+                # read/used from the index if hashvalid is true. (3)
+                # "faked" entries will be stale(), and so we'll invalidate
+                # them below.
+                meta.ctime = meta.mtime = meta.atime = 0
+                meta_ofs = msw.store(meta)
+                rig.cur.update_from_stat(pst, meta_ofs)
+                rig.cur.invalidate()
+                need_repack = True
+            if not (rig.cur.flags & index.IX_HASHVALID):
+                if fake_hash:
+                    if rig.cur.sha == index.EMPTY_SHA:
+                        rig.cur.gitmode, rig.cur.sha = fake_hash(path)
+                    rig.cur.flags |= index.IX_HASHVALID
+                    need_repack = True
+            if fake_invalid:
+                rig.cur.invalidate()
+                need_repack = True
+            if need_repack:
+                rig.cur.repack()
+            rig.next()
+        else:  # new paths
+            try:
+                meta = metadata.from_path(path, statinfo=pst)
+            except (OSError, IOError) as e:
+                add_error(e)
+                continue
+            # See same assignment to 0, above, for rationale.
+            meta.atime = meta.mtime = meta.ctime = 0
+            meta_ofs = msw.store(meta)
+            wi.add(path, pst, meta_ofs, hashgen=fake_hash)
+            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
+                hlinks.add_path(path, pst.st_dev, pst.st_ino)
+
+    elapsed = time.time() - index_start
+    paths_per_sec = total / elapsed if elapsed else 0
+    progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))
+
+    hlinks.prepare_save()
+
+    if ri.exists():
+        ri.save()
+        wi.flush()
+        if wi.count:
+            wr = wi.new_reader()
+            if check:
+                log('check: before merging: oldfile\n')
+                check_index(ri, verbose)
+                log('check: before merging: newfile\n')
+                check_index(wr, verbose)
+            mi = index.Writer(indexfile, msw, tmax)
+
+            for e in index.merge(ri, wr):
+                # FIXME: shouldn't we remove deleted entries eventually?  When?
+                mi.add_ixentry(e)
+
+            ri.close()
+            mi.close()
+            wr.close()
+        wi.abort()
+    else:
+        wi.close()
+
+    msw.close()
+    hlinks.commit_save()
+
+
+optspec = """
+bup index <-p|-m|-s|-u|--clear|--check> [options...] <filenames...>
+--
+ Modes:
+p,print    print the index entries for the given names (also works with -u)
+m,modified print only added/deleted/modified files (implies -p)
+s,status   print each filename with a status char (A/M/D) (implies -p)
+u,update   recursively update the index entries for the given file/dir names (default if no mode is specified)
+check      carefully check index file integrity
+clear      clear the default index
+ Options:
+H,hash     print the hash for each object next to its name
+l,long     print more information about each file
+no-check-device don't invalidate an entry if the containing device changes
+fake-valid mark all index entries as up-to-date even if they aren't
+fake-invalid mark all index entries as invalid
+f,indexfile=  the name of the index file (normally BUP_DIR/bupindex)
+exclude= a path to exclude from the backup (may be repeated)
+exclude-from= skip --exclude paths in file (may be repeated)
+exclude-rx= skip paths matching the unanchored regex (may be repeated)
+exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
+v,verbose  increase log output (can be used more than once)
+x,xdev,one-file-system  don't cross filesystem boundaries
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if not (opt.modified or \
+            opt['print'] or \
+            opt.status or \
+            opt.update or \
+            opt.check or \
+            opt.clear):
+        opt.update = 1
+    if (opt.fake_valid or opt.fake_invalid) and not opt.update:
+        o.fatal('--fake-{in,}valid are meaningless without -u')
+    if opt.fake_valid and opt.fake_invalid:
+        o.fatal('--fake-valid is incompatible with --fake-invalid')
+    if opt.clear and opt.indexfile:
+        o.fatal('cannot clear an external index (via -f)')
+
+    # FIXME: remove this once we account for timestamp races, i.e. index;
+    # touch new-file; index.  It's possible for this to happen quickly
+    # enough that new-file ends up with the same timestamp as the first
+    # index, and then bup will ignore it.
+    tick_start = time.time()
+    time.sleep(1 - (tick_start - int(tick_start)))
+
+    git.check_repo_or_die()
+
+    handle_ctrl_c()
+
+    if opt.verbose is None:
+        opt.verbose = 0
+
+    if opt.indexfile:
+        indexfile = argv_bytes(opt.indexfile)
+    else:
+        indexfile = git.repo(b'bupindex')
+
+    if opt.check:
+        log('check: starting initial check.\n')
+        check_index(index.Reader(indexfile), opt.verbose)
+
+    if opt.clear:
+        log('clear: clearing index.\n')
+        clear_index(indexfile, opt.verbose)
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+
+    if opt.update:
+        if not extra:
+            o.fatal('update mode (-u) requested but no paths given')
+        extra = [argv_bytes(x) for x in extra]
+        excluded_paths = parse_excludes(flags, o.fatal)
+        exclude_rxs = parse_rx_excludes(flags, o.fatal)
+        xexcept = index.unique_resolved_paths(extra)
+        for rp, path in index.reduce_paths(extra):
+            update_index(rp, excluded_paths, exclude_rxs, indexfile,
+                         check=opt.check, check_device=opt.check_device,
+                         xdev=opt.xdev, xdev_exceptions=xexcept,
+                         fake_valid=opt.fake_valid,
+                         fake_invalid=opt.fake_invalid,
+                         out=out, verbose=opt.verbose)
+
+    if opt['print'] or opt.status or opt.modified:
+        extra = [argv_bytes(x) for x in extra]
+        for name, ent in index.Reader(indexfile).filter(extra or [b'']):
+            if (opt.modified
+                and (ent.is_valid() or ent.is_deleted() or not ent.mode)):
+                continue
+            line = b''
+            if opt.status:
+                if ent.is_deleted():
+                    line += b'D '
+                elif not ent.is_valid():
+                    if ent.sha == index.EMPTY_SHA:
+                        line += b'A '
+                    else:
+                        line += b'M '
+                else:
+                    line += b'  '
+            if opt.hash:
+                line += hexlify(ent.sha) + b' '
+            if opt.long:
+                line += b'%7s %7s ' % (oct(ent.mode).encode('ascii'),
+                                       oct(ent.gitmode).encode('ascii'))
+            out.write(line + (name or b'./') + b'\n')
+
+    if opt.check and (opt['print'] or opt.status or opt.modified or opt.update):
+        log('check: starting final check.\n')
+        check_index(index.Reader(indexfile), opt.verbose)
+
+    if saved_errors:
+        log('WARNING: %d errors encountered.\n' % len(saved_errors))
+        sys.exit(1)
diff --git a/lib/bup/cmd/init.py b/lib/bup/cmd/init.py
new file mode 100755 (executable)
index 0000000..c354584
--- /dev/null
@@ -0,0 +1,32 @@
+
+from __future__ import absolute_import
+import sys
+
+from bup import git, options, client
+from bup.helpers import log, saved_errors
+from bup.compat import argv_bytes
+
+
+optspec = """
+[BUP_DIR=...] bup init [-r host:path]
+--
+r,remote=  remote repository path
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if extra:
+        o.fatal("no arguments expected")
+
+    try:
+        git.init_repo()  # local repo
+    except git.GitError as e:
+        log("bup: error: could not init repository: %s" % e)
+        sys.exit(1)
+
+    if opt.remote:
+        git.check_repo_or_die()
+        cli = client.Client(argv_bytes(opt.remote), create=True)
+        cli.close()
diff --git a/lib/bup/cmd/join.py b/lib/bup/cmd/join.py
new file mode 100755 (executable)
index 0000000..caf524a
--- /dev/null
@@ -0,0 +1,51 @@
+
+from __future__ import absolute_import
+
+import sys
+
+from bup import git, options
+from bup.compat import argv_bytes
+from bup.helpers import linereader, log
+from bup.io import byte_stream
+from bup.repo import LocalRepo, RemoteRepo
+
+
+optspec = """
+bup join [-r host:path] [refs or hashes...]
+--
+r,remote=  remote repository path
+o=         output filename
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+    if opt.remote:
+        opt.remote = argv_bytes(opt.remote)
+
+    git.check_repo_or_die()
+
+    stdin = byte_stream(sys.stdin)
+
+    if not extra:
+        extra = linereader(stdin)
+
+    ret = 0
+    repo = RemoteRepo(opt.remote) if opt.remote else LocalRepo()
+
+    if opt.o:
+        outfile = open(opt.o, 'wb')
+    else:
+        sys.stdout.flush()
+        outfile = byte_stream(sys.stdout)
+
+    for ref in [argv_bytes(x) for x in extra]:
+        try:
+            for blob in repo.join(ref):
+                outfile.write(blob)
+        except KeyError as e:
+            outfile.flush()
+            log('error: %s\n' % e)
+            ret = 1
+
+    sys.exit(ret)
diff --git a/lib/bup/cmd/list_idx.py b/lib/bup/cmd/list_idx.py
new file mode 100755 (executable)
index 0000000..8bf940e
--- /dev/null
@@ -0,0 +1,65 @@
+
+from __future__ import absolute_import, print_function
+from binascii import hexlify, unhexlify
+import os, sys
+
+from bup import git, options
+from bup.compat import argv_bytes
+from bup.helpers import add_error, handle_ctrl_c, log, qprogress, saved_errors
+from bup.io import byte_stream
+
+optspec = """
+bup list-idx [--find=<prefix>] <idxfilenames...>
+--
+find=   display only objects that start with <prefix>
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    handle_ctrl_c()
+
+    opt.find = argv_bytes(opt.find) if opt.find else b''
+
+    if not extra:
+        o.fatal('you must provide at least one filename')
+
+    if len(opt.find) > 40:
+        o.fatal('--find parameter must be <= 40 chars long')
+    else:
+        if len(opt.find) % 2:
+            s = opt.find + b'0'
+        else:
+            s = opt.find
+        try:
+            bin = unhexlify(s)
+        except TypeError:
+            o.fatal('--find parameter is not a valid hex string')
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+    find = opt.find.lower()
+    count = 0
+    idxfiles = [argv_bytes(x) for x in extra]
+    for name in idxfiles:
+        try:
+            ix = git.open_idx(name)
+        except git.GitError as e:
+            add_error('%r: %s' % (name, e))
+            continue
+        if len(opt.find) == 40:
+            if ix.exists(bin):
+                out.write(b'%s %s\n' % (name, find))
+        else:
+            # slow, exhaustive search
+            for _i in ix:
+                i = hexlify(_i)
+                if i.startswith(find):
+                    out.write(b'%s %s\n' % (name, i))
+                qprogress('Searching: %d\r' % count)
+                count += 1
+
+    if saved_errors:
+        log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
+        sys.exit(1)
diff --git a/lib/bup/cmd/ls.py b/lib/bup/cmd/ls.py
new file mode 100755 (executable)
index 0000000..a7fc6db
--- /dev/null
@@ -0,0 +1,15 @@
+
+from __future__ import absolute_import, print_function
+import os.path, sys
+
+from bup import compat, git, ls
+from bup.io import byte_stream
+
+def main(argv):
+    git.check_repo_or_die()
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+    # Check out lib/bup/ls.py for the opt spec
+    rc = ls.via_cmdline(argv[1:], out=out)
+    sys.exit(rc)
diff --git a/lib/bup/cmd/margin.py b/lib/bup/cmd/margin.py
new file mode 100755 (executable)
index 0000000..07f2b0f
--- /dev/null
@@ -0,0 +1,75 @@
+
+from __future__ import absolute_import
+import math, struct, sys
+
+from bup import options, git, _helpers
+from bup.helpers import log
+from bup.io import byte_stream
+
+POPULATION_OF_EARTH=6.7e9  # as of September, 2010
+
+optspec = """
+bup margin
+--
+predict    Guess object offsets and report the maximum deviation
+ignore-midx  Don't use midx files; use only plain pack idx files.
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if extra:
+        o.fatal("no arguments expected")
+
+    git.check_repo_or_die()
+
+    mi = git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx)
+
+    def do_predict(ix, out):
+        total = len(ix)
+        maxdiff = 0
+        for count,i in enumerate(ix):
+            prefix = struct.unpack('!Q', i[:8])[0]
+            expected = prefix * total // (1 << 64)
+            diff = count - expected
+            maxdiff = max(maxdiff, abs(diff))
+        out.write(b'%d of %d (%.3f%%) '
+                  % (maxdiff, len(ix), maxdiff * 100.0 / len(ix)))
+        out.flush()
+        assert(count+1 == len(ix))
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+
+    if opt.predict:
+        if opt.ignore_midx:
+            for pack in mi.packs:
+                do_predict(pack, out)
+        else:
+            do_predict(mi, out)
+    else:
+        # default mode: find longest matching prefix
+        last = b'\0'*20
+        longmatch = 0
+        for i in mi:
+            if i == last:
+                continue
+            #assert(str(i) >= last)
+            pm = _helpers.bitmatch(last, i)
+            longmatch = max(longmatch, pm)
+            last = i
+        out.write(b'%d\n' % longmatch)
+        log('%d matching prefix bits\n' % longmatch)
+        doublings = math.log(len(mi), 2)
+        bpd = longmatch / doublings
+        log('%.2f bits per doubling\n' % bpd)
+        remain = 160 - longmatch
+        rdoublings = remain / bpd
+        log('%d bits (%.2f doublings) remaining\n' % (remain, rdoublings))
+        larger = 2**rdoublings
+        log('%g times larger is possible\n' % larger)
+        perperson = larger/POPULATION_OF_EARTH
+        log('\nEveryone on earth could have %d data sets like yours, all in one\n'
+            'repository, and we would expect 1 object collision.\n'
+            % int(perperson))
diff --git a/lib/bup/cmd/memtest.py b/lib/bup/cmd/memtest.py
new file mode 100755 (executable)
index 0000000..f62c64f
--- /dev/null
@@ -0,0 +1,125 @@
+
+from __future__ import absolute_import, print_function
+import re, resource, sys, time
+
+from bup import git, bloom, midx, options, _helpers
+from bup.compat import range
+from bup.helpers import handle_ctrl_c
+from bup.io import byte_stream
+
+
+_linux_warned = 0
+def linux_memstat():
+    global _linux_warned
+    #fields = ['VmSize', 'VmRSS', 'VmData', 'VmStk', 'ms']
+    d = {}
+    try:
+        f = open(b'/proc/self/status', 'rb')
+    except IOError as e:
+        if not _linux_warned:
+            log('Warning: %s\n' % e)
+            _linux_warned = 1
+        return {}
+    for line in f:
+        # Note that on Solaris, this file exists but is binary.  If that
+        # happens, this split() might not return two elements.  We don't
+        # really need to care about the binary format since this output
+        # isn't used for much and report() can deal with missing entries.
+        t = re.split(br':\s*', line.strip(), 1)
+        if len(t) == 2:
+            k,v = t
+            d[k] = v
+    return d
+
+
+last = last_u = last_s = start = 0
+def report(count, out):
+    global last, last_u, last_s, start
+    headers = ['RSS', 'MajFlt', 'user', 'sys', 'ms']
+    ru = resource.getrusage(resource.RUSAGE_SELF)
+    now = time.time()
+    rss = int(ru.ru_maxrss // 1024)
+    if not rss:
+        rss = linux_memstat().get(b'VmRSS', b'??')
+    fields = [rss,
+              ru.ru_majflt,
+              int((ru.ru_utime - last_u) * 1000),
+              int((ru.ru_stime - last_s) * 1000),
+              int((now - last) * 1000)]
+    fmt = '%9s  ' + ('%10s ' * len(fields))
+    if count >= 0:
+        line = fmt % tuple([count] + fields)
+        out.write(line.encode('ascii') + b'\n')
+    else:
+        start = now
+        out.write((fmt % tuple([''] + headers)).encode('ascii') + b'\n')
+    out.flush()
+
+    # don't include time to run report() in usage counts
+    ru = resource.getrusage(resource.RUSAGE_SELF)
+    last_u = ru.ru_utime
+    last_s = ru.ru_stime
+    last = time.time()
+
+
+optspec = """
+bup memtest [-n elements] [-c cycles]
+--
+n,number=  number of objects per cycle [10000]
+c,cycles=  number of cycles to run [100]
+ignore-midx  ignore .midx files, use only .idx files
+existing   test with existing objects instead of fake ones
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if extra:
+        o.fatal('no arguments expected')
+
+    git.check_repo_or_die()
+    m = git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx)
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+
+    report(-1, out)
+    _helpers.random_sha()
+    report(0, out)
+
+    if opt.existing:
+        def foreverit(mi):
+            while 1:
+                for e in mi:
+                    yield e
+        objit = iter(foreverit(m))
+
+    for c in range(opt.cycles):
+        for n in range(opt.number):
+            if opt.existing:
+                bin = next(objit)
+                assert(m.exists(bin))
+            else:
+                bin = _helpers.random_sha()
+
+                # technically, a randomly generated object id might exist.
+                # but the likelihood of that is the likelihood of finding
+                # a collision in sha-1 by accident, which is so unlikely that
+                # we don't care.
+                assert(not m.exists(bin))
+        report((c+1)*opt.number, out)
+
+    if bloom._total_searches:
+        out.write(b'bloom: %d objects searched in %d steps: avg %.3f steps/object\n'
+                  % (bloom._total_searches, bloom._total_steps,
+                     bloom._total_steps*1.0/bloom._total_searches))
+    if midx._total_searches:
+        out.write(b'midx: %d objects searched in %d steps: avg %.3f steps/object\n'
+                  % (midx._total_searches, midx._total_steps,
+                     midx._total_steps*1.0/midx._total_searches))
+    if git._total_searches:
+        out.write(b'idx: %d objects searched in %d steps: avg %.3f steps/object\n'
+                  % (git._total_searches, git._total_steps,
+                     git._total_steps*1.0/git._total_searches))
+    out.write(b'Total time: %.3fs\n' % (time.time() - start))
diff --git a/lib/bup/cmd/meta.py b/lib/bup/cmd/meta.py
new file mode 100755 (executable)
index 0000000..eab9101
--- /dev/null
@@ -0,0 +1,164 @@
+# Copyright (C) 2010 Rob Browning
+#
+# This code is covered under the terms of the GNU Library General
+# Public License as described in the bup LICENSE file.
+
+# TODO: Add tar-like -C option.
+
+from __future__ import absolute_import
+import sys
+
+from bup import compat, metadata
+from bup import options
+from bup.compat import argv_bytes
+from bup.io import byte_stream
+from bup.helpers import handle_ctrl_c, log, saved_errors
+
+
+def open_input(name):
+    if not name or name == b'-':
+        return byte_stream(sys.stdin)
+    return open(name, 'rb')
+
+
+def open_output(name):
+    if not name or name == b'-':
+        sys.stdout.flush()
+        return byte_stream(sys.stdout)
+    return open(name, 'wb')
+
+
+optspec = """
+bup meta --create [OPTION ...] <PATH ...>
+bup meta --list [OPTION ...]
+bup meta --extract [OPTION ...]
+bup meta --start-extract [OPTION ...]
+bup meta --finish-extract [OPTION ...]
+bup meta --edit [OPTION ...] <PATH ...>
+--
+c,create       write metadata for PATHs to stdout (or --file)
+t,list         display metadata
+x,extract      perform --start-extract followed by --finish-extract
+start-extract  build tree matching metadata provided on standard input (or --file)
+finish-extract finish applying standard input (or --file) metadata to filesystem
+edit           alter metadata; write to stdout (or --file)
+f,file=        specify source or destination file
+R,recurse      recurse into subdirectories
+xdev,one-file-system  don't cross filesystem boundaries
+numeric-ids    apply numeric IDs (user, group, etc.) rather than names
+symlinks       handle symbolic links (default is true)
+paths          include paths in metadata (default is true)
+set-uid=       set metadata uid (via --edit)
+set-gid=       set metadata gid (via --edit)
+set-user=      set metadata user (via --edit)
+unset-user     remove metadata user (via --edit)
+set-group=     set metadata group (via --edit)
+unset-group    remove metadata group (via --edit)
+v,verbose      increase log output (can be used more than once)
+q,quiet        don't show progress meter
+"""
+
+def main(argv):
+
+    o = options.Options(optspec)
+    opt, flags, remainder = o.parse_bytes([b'--paths', b'--symlinks', b'--recurse']
+                                          + argv[1:])
+
+    opt.verbose = opt.verbose or 0
+    opt.quiet = opt.quiet or 0
+    metadata.verbose = opt.verbose - opt.quiet
+    opt.file = argv_bytes(opt.file) if opt.file else None
+
+    action_count = sum([bool(x) for x in [opt.create, opt.list, opt.extract,
+                                          opt.start_extract, opt.finish_extract,
+                                          opt.edit]])
+    if action_count > 1:
+        o.fatal("bup: only one action permitted: --create --list --extract --edit")
+    if action_count == 0:
+        o.fatal("bup: no action specified")
+
+    if opt.create:
+        if len(remainder) < 1:
+            o.fatal("no paths specified for create")
+        output_file = open_output(opt.file)
+        metadata.save_tree(output_file,
+                           [argv_bytes(r) for r in remainder],
+                           recurse=opt.recurse,
+                           write_paths=opt.paths,
+                           save_symlinks=opt.symlinks,
+                           xdev=opt.xdev)
+    elif opt.list:
+        if len(remainder) > 0:
+            o.fatal("cannot specify paths for --list")
+        src = open_input(opt.file)
+        metadata.display_archive(src, open_output(b'-'))
+    elif opt.start_extract:
+        if len(remainder) > 0:
+            o.fatal("cannot specify paths for --start-extract")
+        src = open_input(opt.file)
+        metadata.start_extract(src, create_symlinks=opt.symlinks)
+    elif opt.finish_extract:
+        if len(remainder) > 0:
+            o.fatal("cannot specify paths for --finish-extract")
+        src = open_input(opt.file)
+        metadata.finish_extract(src, restore_numeric_ids=opt.numeric_ids)
+    elif opt.extract:
+        if len(remainder) > 0:
+            o.fatal("cannot specify paths for --extract")
+        src = open_input(opt.file)
+        metadata.extract(src,
+                         restore_numeric_ids=opt.numeric_ids,
+                         create_symlinks=opt.symlinks)
+    elif opt.edit:
+        if len(remainder) < 1:
+            o.fatal("no paths specified for edit")
+        output_file = open_output(opt.file)
+
+        unset_user = False # True if --unset-user was the last relevant option.
+        unset_group = False # True if --unset-group was the last relevant option.
+        for flag in flags:
+            if flag[0] == '--set-user':
+                unset_user = False
+            elif flag[0] == '--unset-user':
+                unset_user = True
+            elif flag[0] == '--set-group':
+                unset_group = False
+            elif flag[0] == '--unset-group':
+                unset_group = True
+
+        for path in remainder:
+            f = open(argv_bytes(path), 'rb')
+            try:
+                for m in metadata._ArchiveIterator(f):
+                    if opt.set_uid is not None:
+                        try:
+                            m.uid = int(opt.set_uid)
+                        except ValueError:
+                            o.fatal("uid must be an integer")
+
+                    if opt.set_gid is not None:
+                        try:
+                            m.gid = int(opt.set_gid)
+                        except ValueError:
+                            o.fatal("gid must be an integer")
+
+                    if unset_user:
+                        m.user = b''
+                    elif opt.set_user is not None:
+                        m.user = argv_bytes(opt.set_user)
+
+                    if unset_group:
+                        m.group = b''
+                    elif opt.set_group is not None:
+                        m.group = argv_bytes(opt.set_group)
+
+                    m.write(output_file)
+            finally:
+                f.close()
+
+
+    if saved_errors:
+        log('WARNING: %d errors encountered.\n' % len(saved_errors))
+        sys.exit(1)
+    else:
+        sys.exit(0)
diff --git a/lib/bup/cmd/midx.py b/lib/bup/cmd/midx.py
new file mode 100755 (executable)
index 0000000..644cea8
--- /dev/null
@@ -0,0 +1,299 @@
+
+from __future__ import absolute_import, print_function
+from binascii import hexlify
+import glob, os, math, resource, struct, sys, tempfile
+
+from bup import options, git, midx, _helpers, xstat
+from bup.compat import argv_bytes, hexstr, range
+from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
+                         handle_ctrl_c, log, mmap_readwrite, qprogress,
+                         saved_errors, unlink)
+from bup.io import byte_stream, path_msg
+
+
+PAGE_SIZE=4096
+SHA_PER_PAGE=PAGE_SIZE/20.
+
+optspec = """
+bup midx [options...] <idxnames...>
+--
+o,output=  output midx filename (default: auto-generated)
+a,auto     automatically use all existing .midx/.idx files as input
+f,force    merge produce exactly one .midx containing all objects
+p,print    print names of generated midx files
+check      validate contents of the given midx files (with -a, all midx files)
+max-files= maximum number of idx files to open at once [-1]
+d,dir=     directory containing idx/midx files
+"""
+
+merge_into = _helpers.merge_into
+
+
+def _group(l, count):
+    for i in range(0, len(l), count):
+        yield l[i:i+count]
+
+
+def max_files():
+    mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
+    if mf > 32:
+        mf -= 20  # just a safety margin
+    else:
+        mf -= 6   # minimum safety margin
+    return mf
+
+
+def check_midx(name):
+    nicename = git.repo_rel(name)
+    log('Checking %s.\n' % path_msg(nicename))
+    try:
+        ix = git.open_idx(name)
+    except git.GitError as e:
+        add_error('%s: %s' % (pathmsg(name), e))
+        return
+    for count,subname in enumerate(ix.idxnames):
+        sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
+        for ecount,e in enumerate(sub):
+            if not (ecount % 1234):
+                qprogress('  %d/%d: %s %d/%d\r' 
+                          % (count, len(ix.idxnames),
+                             git.shorten_hash(subname).decode('ascii'),
+                             ecount, len(sub)))
+            if not sub.exists(e):
+                add_error("%s: %s: %s missing from idx"
+                          % (path_msg(nicename),
+                             git.shorten_hash(subname).decode('ascii'),
+                             hexstr(e)))
+            if not ix.exists(e):
+                add_error("%s: %s: %s missing from midx"
+                          % (path_msg(nicename),
+                             git.shorten_hash(subname).decode('ascii'),
+                             hexstr(e)))
+    prev = None
+    for ecount,e in enumerate(ix):
+        if not (ecount % 1234):
+            qprogress('  Ordering: %d/%d\r' % (ecount, len(ix)))
+        if e and prev and not e >= prev:
+            add_error('%s: ordering error: %s < %s'
+                      % (nicename, hexstr(e), hexstr(prev)))
+        prev = e
+
+
+_first = None
+def _do_midx(outdir, outfilename, infilenames, prefixstr,
+             auto=False, force=False):
+    global _first
+    if not outfilename:
+        assert(outdir)
+        sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
+        outfilename = b'%s/midx-%s.midx' % (outdir, sum)
+    
+    inp = []
+    total = 0
+    allfilenames = []
+    midxs = []
+    try:
+        for name in infilenames:
+            ix = git.open_idx(name)
+            midxs.append(ix)
+            inp.append((
+                ix.map,
+                len(ix),
+                ix.sha_ofs,
+                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
+                len(allfilenames),
+            ))
+            for n in ix.idxnames:
+                allfilenames.append(os.path.basename(n))
+            total += len(ix)
+        inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20])
+
+        if not _first: _first = outdir
+        dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
+        debug1('midx: %s%screating from %d files (%d objects).\n'
+               % (dirprefix, prefixstr, len(infilenames), total))
+        if (auto and (total < 1024 and len(infilenames) < 3)) \
+           or ((auto or force) and len(infilenames) < 2) \
+           or (force and not total):
+            debug1('midx: nothing to do.\n')
+            return
+
+        pages = int(total/SHA_PER_PAGE) or 1
+        bits = int(math.ceil(math.log(pages, 2)))
+        entries = 2**bits
+        debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
+
+        unlink(outfilename)
+        with atomically_replaced_file(outfilename, 'wb') as f:
+            f.write(b'MIDX')
+            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
+            assert(f.tell() == 12)
+
+            f.truncate(12 + 4*entries + 20*total + 4*total)
+            f.flush()
+            fdatasync(f.fileno())
+
+            fmap = mmap_readwrite(f, close=False)
+            count = merge_into(fmap, bits, total, inp)
+            del fmap # Assume this calls msync() now.
+            f.seek(0, os.SEEK_END)
+            f.write(b'\0'.join(allfilenames))
+    finally:
+        for ix in midxs:
+            if isinstance(ix, midx.PackMidx):
+                ix.close()
+        midxs = None
+        inp = None
+
+
+    # This is just for testing (if you enable this, don't clear inp above)
+    if 0:
+        p = midx.PackMidx(outfilename)
+        assert(len(p.idxnames) == len(infilenames))
+        log(repr(p.idxnames) + '\n')
+        assert(len(p) == total)
+        for pe, e in p, git.idxmerge(inp, final_progress=False):
+            pin = next(pi)
+            assert(i == pin)
+            assert(p.exists(i))
+
+    return total, outfilename
+
+
+def do_midx(outdir, outfilename, infilenames, prefixstr, prout,
+            auto=False, force=False, print_names=False):
+    rv = _do_midx(outdir, outfilename, infilenames, prefixstr,
+                  auto=auto, force=force)
+    if rv and print_names:
+        prout.write(rv[1] + b'\n')
+
+
+def do_midx_dir(path, outfilename, prout, auto=False, force=False,
+                max_files=-1, print_names=False):
+    already = {}
+    sizes = {}
+    if force and not auto:
+        midxs = []   # don't use existing midx files
+    else:
+        midxs = glob.glob(b'%s/*.midx' % path)
+        contents = {}
+        for mname in midxs:
+            m = git.open_idx(mname)
+            contents[mname] = [(b'%s/%s' % (path,i)) for i in m.idxnames]
+            sizes[mname] = len(m)
+                    
+        # sort the biggest+newest midxes first, so that we can eliminate
+        # smaller (or older) redundant ones that come later in the list
+        midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
+        
+        for mname in midxs:
+            any = 0
+            for iname in contents[mname]:
+                if not already.get(iname):
+                    already[iname] = 1
+                    any = 1
+            if not any:
+                debug1('%r is redundant\n' % mname)
+                unlink(mname)
+                already[mname] = 1
+
+    midxs = [k for k in midxs if not already.get(k)]
+    idxs = [k for k in glob.glob(b'%s/*.idx' % path) if not already.get(k)]
+
+    for iname in idxs:
+        i = git.open_idx(iname)
+        sizes[iname] = len(i)
+
+    all = [(sizes[n],n) for n in (midxs + idxs)]
+    
+    # FIXME: what are the optimal values?  Does this make sense?
+    DESIRED_HWM = force and 1 or 5
+    DESIRED_LWM = force and 1 or 2
+    existed = dict((name,1) for sz,name in all)
+    debug1('midx: %d indexes; want no more than %d.\n' 
+           % (len(all), DESIRED_HWM))
+    if len(all) <= DESIRED_HWM:
+        debug1('midx: nothing to do.\n')
+    while len(all) > DESIRED_HWM:
+        all.sort()
+        part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
+        part2 = all[len(all)-DESIRED_LWM+1:]
+        all = list(do_midx_group(path, outfilename, part1,
+                                 auto=auto, force=force, max_files=max_files)) \
+                                 + part2
+        if len(all) > DESIRED_HWM:
+            debug1('\nStill too many indexes (%d > %d).  Merging again.\n'
+                   % (len(all), DESIRED_HWM))
+
+    if print_names:
+        for sz,name in all:
+            if not existed.get(name):
+                prout.write(name + b'\n')
+
+
+def do_midx_group(outdir, outfilename, infiles, auto=False, force=False,
+                  max_files=-1):
+    groups = list(_group(infiles, max_files))
+    gprefix = ''
+    for n,sublist in enumerate(groups):
+        if len(groups) != 1:
+            gprefix = 'Group %d: ' % (n+1)
+        rv = _do_midx(outdir, outfilename, sublist, gprefix,
+                      auto=auto, force=force)
+        if rv:
+            yield rv
+
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+    opt.dir = argv_bytes(opt.dir) if opt.dir else None
+    opt.output = argv_bytes(opt.output) if opt.output else None
+
+    if extra and (opt.auto or opt.force):
+        o.fatal("you can't use -f/-a and also provide filenames")
+    if opt.check and (not extra and not opt.auto):
+        o.fatal("if using --check, you must provide filenames or -a")
+
+    git.check_repo_or_die()
+
+    if opt.max_files < 0:
+        opt.max_files = max_files()
+    assert(opt.max_files >= 5)
+
+    extra = [argv_bytes(x) for x in extra]
+
+    if opt.check:
+        # check existing midx files
+        if extra:
+            midxes = extra
+        else:
+            midxes = []
+            paths = opt.dir and [opt.dir] or git.all_packdirs()
+            for path in paths:
+                debug1('midx: scanning %s\n' % path)
+                midxes += glob.glob(os.path.join(path, b'*.midx'))
+        for name in midxes:
+            check_midx(name)
+        if not saved_errors:
+            log('All tests passed.\n')
+    else:
+        if extra:
+            sys.stdout.flush()
+            do_midx(git.repo(b'objects/pack'), opt.output, extra, b'',
+                    byte_stream(sys.stdout), auto=opt.auto, force=opt.force,
+                    print_names=opt.print)
+        elif opt.auto or opt.force:
+            sys.stdout.flush()
+            paths = opt.dir and [opt.dir] or git.all_packdirs()
+            for path in paths:
+                debug1('midx: scanning %s\n' % path_msg(path))
+                do_midx_dir(path, opt.output, byte_stream(sys.stdout),
+                            auto=opt.auto, force=opt.force,
+                            max_files=opt.max_files)
+        else:
+            o.fatal("you must use -f or -a or provide input filenames")
+
+    if saved_errors:
+        log('WARNING: %d errors encountered.\n' % len(saved_errors))
+        sys.exit(1)
diff --git a/lib/bup/cmd/mux.py b/lib/bup/cmd/mux.py
new file mode 100755 (executable)
index 0000000..606f027
--- /dev/null
@@ -0,0 +1,55 @@
+
+from __future__ import absolute_import
+import os, struct, subprocess, sys
+
+from bup import options
+from bup.helpers import debug1, debug2, mux
+from bup.io import byte_stream
+
+
+optspec = """
+bup mux command [arguments...]
+--
+"""
+
+def main(argv):
+    # Give the subcommand exclusive access to stdin.
+    orig_stdin = os.dup(0)
+    devnull = os.open(os.devnull, os.O_RDONLY)
+    os.dup2(devnull, 0)
+    os.close(devnull)
+
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+    if len(extra) < 1:
+        o.fatal('command is required')
+
+    subcmd = extra
+
+    debug2('bup mux: starting %r\n' % (extra,))
+
+    outr, outw = os.pipe()
+    errr, errw = os.pipe()
+    def close_fds():
+        os.close(outr)
+        os.close(errr)
+
+    p = subprocess.Popen(subcmd, stdin=orig_stdin, stdout=outw, stderr=errw,
+                         close_fds=False, preexec_fn=close_fds)
+    os.close(outw)
+    os.close(errw)
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+    out.write(b'BUPMUX')
+    out.flush()
+    mux(p, out.fileno(), outr, errr)
+    os.close(outr)
+    os.close(errr)
+    prv = p.wait()
+
+    if prv:
+        debug1('%s exited with code %d\n' % (extra[0], prv))
+
+    debug1('bup mux: done\n')
+
+    sys.exit(prv)
diff --git a/lib/bup/cmd/on.py b/lib/bup/cmd/on.py
new file mode 100755 (executable)
index 0000000..bb5e3f3
--- /dev/null
@@ -0,0 +1,80 @@
+from __future__ import absolute_import
+from subprocess import PIPE
+import getopt, os, signal, struct, subprocess, sys
+
+from bup import options, ssh, path
+from bup.compat import argv_bytes
+from bup.helpers import DemuxConn, log
+from bup.io import byte_stream
+
+
+optspec = """
+bup on <hostname> index ...
+bup on <hostname> save ...
+bup on <hostname> split ...
+bup on <hostname> get ...
+"""
+
+def main(argv):
+    o = options.Options(optspec, optfunc=getopt.getopt)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+    if len(extra) < 2:
+        o.fatal('arguments expected')
+
+    class SigException(Exception):
+        def __init__(self, signum):
+            self.signum = signum
+            Exception.__init__(self, 'signal %d received' % signum)
+    def handler(signum, frame):
+        raise SigException(signum)
+
+    signal.signal(signal.SIGTERM, handler)
+    signal.signal(signal.SIGINT, handler)
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+
+    try:
+        sp = None
+        p = None
+        ret = 99
+
+        hp = argv_bytes(extra[0]).split(b':')
+        if len(hp) == 1:
+            (hostname, port) = (hp[0], None)
+        else:
+            (hostname, port) = hp
+        argv = [argv_bytes(x) for x in extra[1:]]
+        p = ssh.connect(hostname, port, b'on--server', stderr=PIPE)
+
+        try:
+            argvs = b'\0'.join([b'bup'] + argv)
+            p.stdin.write(struct.pack('!I', len(argvs)) + argvs)
+            p.stdin.flush()
+            sp = subprocess.Popen([path.exe(), b'server'],
+                                  stdin=p.stdout, stdout=p.stdin)
+            p.stdin.close()
+            p.stdout.close()
+            # Demultiplex remote client's stderr (back to stdout/stderr).
+            dmc = DemuxConn(p.stderr.fileno(), open(os.devnull, "wb"))
+            for line in iter(dmc.readline, b''):
+                out.write(line)
+        finally:
+            while 1:
+                # if we get a signal while waiting, we have to keep waiting, just
+                # in case our child doesn't die.
+                try:
+                    ret = p.wait()
+                    if sp:
+                        sp.wait()
+                    break
+                except SigException as e:
+                    log('\nbup on: %s\n' % e)
+                    os.kill(p.pid, e.signum)
+                    ret = 84
+    except SigException as e:
+        if ret == 0:
+            ret = 99
+        log('\nbup on: %s\n' % e)
+
+    sys.exit(ret)
diff --git a/lib/bup/cmd/on__server.py b/lib/bup/cmd/on__server.py
new file mode 100755 (executable)
index 0000000..c04b3b0
--- /dev/null
@@ -0,0 +1,61 @@
+
+from __future__ import absolute_import
+import os, struct, sys
+
+from bup import options, helpers, path
+from bup.compat import environ, py_maj
+from bup.io import byte_stream
+
+optspec = """
+bup on--server
+--
+    This command is run automatically by 'bup on'
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+    if extra:
+        o.fatal('no arguments expected')
+
+    # get the subcommand's argv.
+    # Normally we could just pass this on the command line, but since we'll often
+    # be getting called on the other end of an ssh pipe, which tends to mangle
+    # argv (by sending it via the shell), this way is much safer.
+
+    stdin = byte_stream(sys.stdin)
+    buf = stdin.read(4)
+    sz = struct.unpack('!I', buf)[0]
+    assert(sz > 0)
+    assert(sz < 1000000)
+    buf = stdin.read(sz)
+    assert(len(buf) == sz)
+    argv = buf.split(b'\0')
+    argv[0] = path.exe()
+    argv = [argv[0], b'mux', b'--'] + argv
+
+
+    # stdin/stdout are supposedly connected to 'bup server' that the caller
+    # started for us (often on the other end of an ssh tunnel), so we don't want
+    # to misuse them.  Move them out of the way, then replace stdout with
+    # a pointer to stderr in case our subcommand wants to do something with it.
+    #
+    # It might be nice to do the same with stdin, but my experiments showed that
+    # ssh seems to make its child's stderr a readable-but-never-reads-anything
+    # socket.  They really should have used shutdown(SHUT_WR) on the other end
+    # of it, but probably didn't.  Anyway, it's too messy, so let's just make sure
+    # anyone reading from stdin is disappointed.
+    #
+    # (You can't just leave stdin/stdout "not open" by closing the file
+    # descriptors.  Then the next file that opens is automatically assigned 0 or 1,
+    # and people *trying* to read/write stdin/stdout get screwed.)
+    os.dup2(0, 3)
+    os.dup2(1, 4)
+    os.dup2(2, 1)
+    fd = os.open(os.devnull, os.O_RDONLY)
+    os.dup2(fd, 0)
+    os.close(fd)
+
+    environ[b'BUP_SERVER_REVERSE'] = helpers.hostname()
+    os.execvp(argv[0], argv)
+    sys.exit(99)
diff --git a/lib/bup/cmd/prune_older.py b/lib/bup/cmd/prune_older.py
new file mode 100755 (executable)
index 0000000..b3df6e9
--- /dev/null
@@ -0,0 +1,165 @@
+
+from __future__ import absolute_import, print_function
+from binascii import hexlify, unhexlify
+from collections import defaultdict
+from itertools import groupby
+from sys import stderr
+from time import localtime, strftime, time
+import sys
+
+from bup import git, options
+from bup.compat import argv_bytes, int_types
+from bup.gc import bup_gc
+from bup.helpers import die_if_errors, log, partition, period_as_secs
+from bup.io import byte_stream
+from bup.repo import LocalRepo
+from bup.rm import bup_rm
+
+
+def branches(refnames=tuple()):
+    return ((name[11:], hexlify(sha)) for (name,sha)
+            in git.list_refs(patterns=(b'refs/heads/' + n for n in refnames),
+                             limit_to_heads=True))
+
+def save_name(branch, utc):
+    return branch + b'/' \
+            + strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
+
+def classify_saves(saves, period_start):
+    """For each (utc, id) in saves, yield (True, (utc, id)) if the save
+    should be kept and (False, (utc, id)) if the save should be removed.
+    The ids are binary hashes.
+    """
+
+    def retain_newest_in_region(region):
+        for save in region[0:1]:
+            yield True, save
+        for save in region[1:]:
+            yield False, save
+
+    matches, rest = partition(lambda s: s[0] >= period_start['all'], saves)
+    for save in matches:
+        yield True, save
+
+    tm_ranges = ((period_start['dailies'], lambda s: localtime(s[0]).tm_yday),
+                 (period_start['monthlies'], lambda s: localtime(s[0]).tm_mon),
+                 (period_start['yearlies'], lambda s: localtime(s[0]).tm_year))
+
+    # Break the decreasing utc sorted saves up into the respective
+    # period ranges (dailies, monthlies, ...).  Within each range,
+    # group the saves by the period scale (days, months, ...), and
+    # then yield a "keep" action (True, utc) for the newest save in
+    # each group, and a "drop" action (False, utc) for the rest.
+    for pstart, time_region_id in tm_ranges:
+        matches, rest = partition(lambda s: s[0] >= pstart, rest)
+        for region_id, region_saves in groupby(matches, time_region_id):
+            for action in retain_newest_in_region(list(region_saves)):
+                yield action
+
+    # Finally, drop any saves older than the specified periods
+    for save in rest:
+        yield False, save
+
+
+optspec = """
+bup prune-older [options...] [BRANCH...]
+--
+keep-all-for=       retain all saves within the PERIOD
+keep-dailies-for=   retain the newest save per day within the PERIOD
+keep-monthlies-for= retain the newest save per month within the PERIOD
+keep-yearlies-for=  retain the newest save per year within the PERIOD
+wrt=                end all periods at this number of seconds since the epoch
+pretend       don't prune, just report intended actions to standard output
+gc            collect garbage after removals [1]
+gc-threshold= only rewrite a packfile if it's over this percent garbage [10]
+#,compress=   set compression level to # (0-9, 9 is highest) [1]
+v,verbose     increase log output (can be used more than once)
+unsafe        use the command even though it may be DANGEROUS
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, roots = o.parse_bytes(argv[1:])
+    roots = [argv_bytes(x) for x in roots]
+
+    if not opt.unsafe:
+        o.fatal('refusing to run dangerous, experimental command without --unsafe')
+
+    now = int(time()) if opt.wrt is None else opt.wrt
+    if not isinstance(now, int_types):
+        o.fatal('--wrt value ' + str(now) + ' is not an integer')
+
+    period_start = {}
+    for period, extent in (('all', opt.keep_all_for),
+                           ('dailies', opt.keep_dailies_for),
+                           ('monthlies', opt.keep_monthlies_for),
+                           ('yearlies', opt.keep_yearlies_for)):
+        if extent:
+            secs = period_as_secs(extent.encode('ascii'))
+            if not secs:
+                o.fatal('%r is not a valid period' % extent)
+            period_start[period] = now - secs
+
+    if not period_start:
+        o.fatal('at least one keep argument is required')
+
+    period_start = defaultdict(lambda: float('inf'), period_start)
+
+    if opt.verbose:
+        epoch_ymd = strftime('%Y-%m-%d-%H%M%S', localtime(0))
+        for kind in ['all', 'dailies', 'monthlies', 'yearlies']:
+            period_utc = period_start[kind]
+            if period_utc != float('inf'):
+                if not (period_utc > float('-inf')):
+                    log('keeping all ' + kind)
+                else:
+                    try:
+                        when = strftime('%Y-%m-%d-%H%M%S', localtime(period_utc))
+                        log('keeping ' + kind + ' since ' + when + '\n')
+                    except ValueError as ex:
+                        if period_utc < 0:
+                            log('keeping %s since %d seconds before %s\n'
+                                %(kind, abs(period_utc), epoch_ymd))
+                        elif period_utc > 0:
+                            log('keeping %s since %d seconds after %s\n'
+                                %(kind, period_utc, epoch_ymd))
+                        else:
+                            log('keeping %s since %s\n' % (kind, epoch_ymd))
+
+    git.check_repo_or_die()
+
+    # This could be more efficient, but for now just build the whole list
+    # in memory and let bup_rm() do some redundant work.
+
+    def parse_info(f):
+        author_secs = f.readline().strip()
+        return int(author_secs)
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+
+    removals = []
+    for branch, branch_id in branches(roots):
+        die_if_errors()
+        saves = ((utc, unhexlify(oidx)) for (oidx, utc) in
+                 git.rev_list(branch_id, format=b'%at', parse=parse_info))
+        for keep_save, (utc, id) in classify_saves(saves, period_start):
+            assert(keep_save in (False, True))
+            # FIXME: base removals on hashes
+            if opt.pretend:
+                out.write((b'+ ' if keep_save else b'- ')
+                          + save_name(branch, utc) + b'\n')
+            elif not keep_save:
+                removals.append(save_name(branch, utc))
+
+    if not opt.pretend:
+        die_if_errors()
+        repo = LocalRepo()
+        bup_rm(repo, removals, compression=opt.compress, verbosity=opt.verbose)
+        if opt.gc:
+            die_if_errors()
+            bup_gc(threshold=opt.gc_threshold,
+                   compression=opt.compress,
+                   verbosity=opt.verbose)
+
+    die_if_errors()
diff --git a/lib/bup/cmd/random.py b/lib/bup/cmd/random.py
new file mode 100755 (executable)
index 0000000..c4b234e
--- /dev/null
@@ -0,0 +1,34 @@
+
+from __future__ import absolute_import
+import os, sys
+
+from bup import options, _helpers
+from bup.helpers import handle_ctrl_c, log, parse_num
+
+
+optspec = """
+bup random [-S seed] <numbytes>
+--
+S,seed=   optional random number seed [1]
+f,force   print random data to stdout even if it's a tty
+v,verbose print byte counter to stderr
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if len(extra) != 1:
+        o.fatal("exactly one argument expected")
+
+    total = parse_num(extra[0])
+
+    handle_ctrl_c()
+
+    if opt.force or (not os.isatty(1) and
+                     not int(os.environ.get('BUP_FORCE_TTY', 0)) & 1):
+        _helpers.write_random(sys.stdout.fileno(), total, opt.seed,
+                              opt.verbose and 1 or 0)
+    else:
+        log('error: not writing binary data to a terminal. Use -f to force.\n')
+        sys.exit(1)
diff --git a/lib/bup/cmd/restore.py b/lib/bup/cmd/restore.py
new file mode 100755 (executable)
index 0000000..0764355
--- /dev/null
@@ -0,0 +1,302 @@
+
+from __future__ import absolute_import
+from stat import S_ISDIR
+import copy, errno, os, re, stat, sys
+
+from bup import options, git, metadata, vfs
+from bup._helpers import write_sparsely
+from bup.compat import argv_bytes, fsencode, wrap_main
+from bup.helpers import (add_error, chunkyreader, die_if_errors, handle_ctrl_c,
+                         log, mkdirp, parse_rx_excludes, progress, qprogress,
+                         saved_errors, should_rx_exclude_path, unlink)
+from bup.io import byte_stream
+from bup.repo import LocalRepo, RemoteRepo
+
+
+optspec = """
+bup restore [-r host:path] [-C outdir] </branch/revision/path/to/dir ...>
+--
+r,remote=   remote repository path
+C,outdir=   change to given outdir before extracting files
+numeric-ids restore numeric IDs (user, group, etc.) rather than names
+exclude-rx= skip paths matching the unanchored regex (may be repeated)
+exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
+sparse      create sparse files
+v,verbose   increase log output (can be used more than once)
+map-user=   given OLD=NEW, restore OLD user as NEW user
+map-group=  given OLD=NEW, restore OLD group as NEW group
+map-uid=    given OLD=NEW, restore OLD uid as NEW uid
+map-gid=    given OLD=NEW, restore OLD gid as NEW gid
+q,quiet     don't show progress meter
+"""
+
+total_restored = 0
+
+# stdout should be flushed after each line, even when not connected to a tty
+stdoutfd = sys.stdout.fileno()
+sys.stdout.flush()
+sys.stdout = os.fdopen(stdoutfd, 'w', 1)
+out = byte_stream(sys.stdout)
+
+def valid_restore_path(path):
+    path = os.path.normpath(path)
+    if path.startswith(b'/'):
+        path = path[1:]
+    if b'/' in path:
+        return True
+
+def parse_owner_mappings(type, options, fatal):
+    """Traverse the options and parse all --map-TYPEs, or call Option.fatal()."""
+    opt_name = '--map-' + type
+    if type in ('uid', 'gid'):
+        value_rx = re.compile(br'^(-?[0-9]+)=(-?[0-9]+)$')
+    else:
+        value_rx = re.compile(br'^([^=]+)=([^=]*)$')
+    owner_map = {}
+    for flag in options:
+        (option, parameter) = flag
+        if option != opt_name:
+            continue
+        parameter = argv_bytes(parameter)
+        match = value_rx.match(parameter)
+        if not match:
+            raise fatal("couldn't parse %r as %s mapping" % (parameter, type))
+        old_id, new_id = match.groups()
+        if type in ('uid', 'gid'):
+            old_id = int(old_id)
+            new_id = int(new_id)
+        owner_map[old_id] = new_id
+    return owner_map
+
+def apply_metadata(meta, name, restore_numeric_ids, owner_map):
+    m = copy.deepcopy(meta)
+    m.user = owner_map['user'].get(m.user, m.user)
+    m.group = owner_map['group'].get(m.group, m.group)
+    m.uid = owner_map['uid'].get(m.uid, m.uid)
+    m.gid = owner_map['gid'].get(m.gid, m.gid)
+    m.apply_to_path(name, restore_numeric_ids = restore_numeric_ids)
+    
+def hardlink_compatible(prev_path, prev_item, new_item, top):
+    prev_candidate = top + prev_path
+    if not os.path.exists(prev_candidate):
+        return False
+    prev_meta, new_meta = prev_item.meta, new_item.meta
+    if new_item.oid != prev_item.oid \
+            or new_meta.mtime != prev_meta.mtime \
+            or new_meta.ctime != prev_meta.ctime \
+            or new_meta.mode != prev_meta.mode:
+        return False
+    # FIXME: should we be checking the path on disk, or the recorded metadata?
+    # The exists() above might seem to suggest the former.
+    if not new_meta.same_file(prev_meta):
+        return False
+    return True
+
+def hardlink_if_possible(fullname, item, top, hardlinks):
+    """Find a suitable hardlink target, link to it, and return true,
+    otherwise return false."""
+    # The cwd will be dirname(fullname), and fullname will be
+    # absolute, i.e. /foo/bar, and the caller is expected to handle
+    # restoring the metadata if hardlinking isn't possible.
+
+    # FIXME: we can probably replace the target_vfs_path with the
+    # relevant vfs item
+    
+    # hardlinks tracks a list of (restore_path, vfs_path, meta)
+    # triples for each path we've written for a given hardlink_target.
+    # This allows us to handle the case where we restore a set of
+    # hardlinks out of order (with respect to the original save
+    # call(s)) -- i.e. when we don't restore the hardlink_target path
+    # first.  This data also allows us to attempt to handle other
+    # situations like hardlink sets that change on disk during a save,
+    # or between index and save.
+
+    target = item.meta.hardlink_target
+    assert(target)
+    assert(fullname.startswith(b'/'))
+    target_versions = hardlinks.get(target)
+    if target_versions:
+        # Check every path in the set that we've written so far for a match.
+        for prev_path, prev_item in target_versions:
+            if hardlink_compatible(prev_path, prev_item, item, top):
+                try:
+                    os.link(top + prev_path, top + fullname)
+                    return True
+                except OSError as e:
+                    if e.errno != errno.EXDEV:
+                        raise
+    else:
+        target_versions = []
+        hardlinks[target] = target_versions
+    target_versions.append((fullname, item))
+    return False
+
+def write_file_content(repo, dest_path, vfs_file):
+    with vfs.fopen(repo, vfs_file) as inf:
+        with open(dest_path, 'wb') as outf:
+            for b in chunkyreader(inf):
+                outf.write(b)
+
+def write_file_content_sparsely(repo, dest_path, vfs_file):
+    with vfs.fopen(repo, vfs_file) as inf:
+        outfd = os.open(dest_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
+        try:
+            trailing_zeros = 0;
+            for b in chunkyreader(inf):
+                trailing_zeros = write_sparsely(outfd, b, 512, trailing_zeros)
+            pos = os.lseek(outfd, trailing_zeros, os.SEEK_END)
+            os.ftruncate(outfd, pos)
+        finally:
+            os.close(outfd)
+            
+def restore(repo, parent_path, name, item, top, sparse, numeric_ids, owner_map,
+            exclude_rxs, verbosity, hardlinks):
+    global total_restored
+    mode = vfs.item_mode(item)
+    treeish = S_ISDIR(mode)
+    fullname = parent_path + b'/' + name
+    # Match behavior of index --exclude-rx with respect to paths.
+    if should_rx_exclude_path(fullname + (b'/' if treeish else b''),
+                              exclude_rxs):
+        return
+
+    if not treeish:
+        # Do this now so we'll have meta.symlink_target for verbose output
+        item = vfs.augment_item_meta(repo, item, include_size=True)
+        meta = item.meta
+        assert(meta.mode == mode)
+
+    if stat.S_ISDIR(mode):
+        if verbosity >= 1:
+            out.write(b'%s/\n' % fullname)
+    elif stat.S_ISLNK(mode):
+        assert(meta.symlink_target)
+        if verbosity >= 2:
+            out.write(b'%s@ -> %s\n' % (fullname, meta.symlink_target))
+    else:
+        if verbosity >= 2:
+            out.write(fullname + b'\n')
+
+    orig_cwd = os.getcwd()
+    try:
+        if treeish:
+            # Assumes contents() returns '.' with the full metadata first
+            sub_items = vfs.contents(repo, item, want_meta=True)
+            dot, item = next(sub_items, None)
+            assert(dot == b'.')
+            item = vfs.augment_item_meta(repo, item, include_size=True)
+            meta = item.meta
+            meta.create_path(name)
+            os.chdir(name)
+            total_restored += 1
+            if verbosity >= 0:
+                qprogress('Restoring: %d\r' % total_restored)
+            for sub_name, sub_item in sub_items:
+                restore(repo, fullname, sub_name, sub_item, top, sparse,
+                        numeric_ids, owner_map, exclude_rxs, verbosity,
+                        hardlinks)
+            os.chdir(b'..')
+            apply_metadata(meta, name, numeric_ids, owner_map)
+        else:
+            created_hardlink = False
+            if meta.hardlink_target:
+                created_hardlink = hardlink_if_possible(fullname, item, top,
+                                                        hardlinks)
+            if not created_hardlink:
+                meta.create_path(name)
+                if stat.S_ISREG(meta.mode):
+                    if sparse:
+                        write_file_content_sparsely(repo, name, item)
+                    else:
+                        write_file_content(repo, name, item)
+            total_restored += 1
+            if verbosity >= 0:
+                qprogress('Restoring: %d\r' % total_restored)
+            if not created_hardlink:
+                apply_metadata(meta, name, numeric_ids, owner_map)
+    finally:
+        os.chdir(orig_cwd)
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+    verbosity = (opt.verbose or 0) if not opt.quiet else -1
+    if opt.remote:
+        opt.remote = argv_bytes(opt.remote)
+    if opt.outdir:
+        opt.outdir = argv_bytes(opt.outdir)
+    
+    git.check_repo_or_die()
+
+    if not extra:
+        o.fatal('must specify at least one filename to restore')
+
+    exclude_rxs = parse_rx_excludes(flags, o.fatal)
+
+    owner_map = {}
+    for map_type in ('user', 'group', 'uid', 'gid'):
+        owner_map[map_type] = parse_owner_mappings(map_type, flags, o.fatal)
+
+    if opt.outdir:
+        mkdirp(opt.outdir)
+        os.chdir(opt.outdir)
+
+    repo = RemoteRepo(opt.remote) if opt.remote else LocalRepo()
+    top = fsencode(os.getcwd())
+    hardlinks = {}
+    for path in [argv_bytes(x) for x in extra]:
+        if not valid_restore_path(path):
+            add_error("path %r doesn't include a branch and revision" % path)
+            continue
+        try:
+            resolved = vfs.resolve(repo, path, want_meta=True, follow=False)
+        except vfs.IOError as e:
+            add_error(e)
+            continue
+        if len(resolved) == 3 and resolved[2][0] == b'latest':
+            # Follow latest symlink to the actual save
+            try:
+                resolved = vfs.resolve(repo, b'latest', parent=resolved[:-1],
+                                       want_meta=True)
+            except vfs.IOError as e:
+                add_error(e)
+                continue
+            # Rename it back to 'latest'
+            resolved = tuple(elt if i != 2 else (b'latest',) + elt[1:]
+                             for i, elt in enumerate(resolved))
+        path_parent, path_name = os.path.split(path)
+        leaf_name, leaf_item = resolved[-1]
+        if not leaf_item:
+            add_error('error: cannot access %r in %r'
+                      % (b'/'.join(name for name, item in resolved),
+                         path))
+            continue
+        if not path_name or path_name == b'.':
+            # Source is /foo/what/ever/ or /foo/what/ever/. -- extract
+            # what/ever/* to the current directory, and if name == '.'
+            # (i.e. /foo/what/ever/.), then also restore what/ever's
+            # metadata to the current directory.
+            treeish = vfs.item_mode(leaf_item)
+            if not treeish:
+                add_error('%r cannot be restored as a directory' % path)
+            else:
+                items = vfs.contents(repo, leaf_item, want_meta=True)
+                dot, leaf_item = next(items, None)
+                assert dot == b'.'
+                for sub_name, sub_item in items:
+                    restore(repo, b'', sub_name, sub_item, top,
+                            opt.sparse, opt.numeric_ids, owner_map,
+                            exclude_rxs, verbosity, hardlinks)
+                if path_name == b'.':
+                    leaf_item = vfs.augment_item_meta(repo, leaf_item,
+                                                      include_size=True)
+                    apply_metadata(leaf_item.meta, b'.',
+                                   opt.numeric_ids, owner_map)
+        else:
+            restore(repo, b'', leaf_name, leaf_item, top,
+                    opt.sparse, opt.numeric_ids, owner_map,
+                    exclude_rxs, verbosity, hardlinks)
+
+    if verbosity >= 0:
+        progress('Restoring: %d, done.\n' % total_restored)
+    die_if_errors()
diff --git a/lib/bup/cmd/rm.py b/lib/bup/cmd/rm.py
new file mode 100755 (executable)
index 0000000..75d35a2
--- /dev/null
@@ -0,0 +1,33 @@
+
+from __future__ import absolute_import
+
+from bup.compat import argv_bytes
+from bup.git import check_repo_or_die
+from bup.options import Options
+from bup.helpers import die_if_errors, handle_ctrl_c, log
+from bup.repo import LocalRepo
+from bup.rm import bup_rm
+
+optspec = """
+bup rm <branch|save...>
+--
+#,compress=  set compression level to # (0-9, 9 is highest) [6]
+v,verbose    increase verbosity (can be specified multiple times)
+unsafe       use the command even though it may be DANGEROUS
+"""
+
+def main(argv):
+    o = Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if not opt.unsafe:
+        o.fatal('refusing to run dangerous, experimental command without --unsafe')
+
+    if len(extra) < 1:
+        o.fatal('no paths specified')
+
+    check_repo_or_die()
+    repo = LocalRepo()
+    bup_rm(repo, [argv_bytes(x) for x in extra],
+           compression=opt.compress, verbosity=opt.verbose)
+    die_if_errors()
diff --git a/lib/bup/cmd/save.py b/lib/bup/cmd/save.py
new file mode 100755 (executable)
index 0000000..e0897dd
--- /dev/null
@@ -0,0 +1,509 @@
+
+from __future__ import absolute_import, print_function
+from binascii import hexlify
+from errno import EACCES
+from io import BytesIO
+import math, os, stat, sys, time
+
+from bup import compat, hashsplit, git, options, index, client, metadata
+from bup import hlinkdb
+from bup.compat import argv_bytes, environ
+from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
+from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
+                         hostname, istty2, log, parse_date_or_fatal, parse_num,
+                         path_components, progress, qprogress, resolve_parent,
+                         saved_errors, stripped_path_components,
+                         valid_save_name)
+from bup.io import byte_stream, path_msg
+from bup.pwdgrp import userfullname, username
+
+
+optspec = """
+bup save [-tc] [-n name] <filenames...>
+--
+r,remote=  hostname:/path/to/repo of remote repository
+t,tree     output a tree id
+c,commit   output a commit id
+n,name=    name of backup set to update (if any)
+d,date=    date for the commit (seconds since the epoch)
+v,verbose  increase log output (can be used more than once)
+q,quiet    don't show progress meter
+smaller=   only back up files smaller than n bytes
+bwlimit=   maximum bytes/sec to transmit to server
+f,indexfile=  the name of the index file (normally BUP_DIR/bupindex)
+strip      strips the path to every filename given
+strip-path= path-prefix to be stripped when saving
+graft=     a graft point *old_path*=*new_path* (can be used more than once)
+#,compress=  set compression level to # (0-9, 9 is highest) [1]
+"""
+
+def main(argv):
+
+    # Hack around lack of nonlocal vars in python 2
+    _nonlocal = {}
+
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if opt.indexfile:
+        opt.indexfile = argv_bytes(opt.indexfile)
+    if opt.name:
+        opt.name = argv_bytes(opt.name)
+    if opt.remote:
+        opt.remote = argv_bytes(opt.remote)
+    if opt.strip_path:
+        opt.strip_path = argv_bytes(opt.strip_path)
+
+    git.check_repo_or_die()
+    if not (opt.tree or opt.commit or opt.name):
+        o.fatal("use one or more of -t, -c, -n")
+    if not extra:
+        o.fatal("no filenames given")
+
+    extra = [argv_bytes(x) for x in extra]
+
+    opt.progress = (istty2 and not opt.quiet)
+    opt.smaller = parse_num(opt.smaller or 0)
+    if opt.bwlimit:
+        client.bwlimit = parse_num(opt.bwlimit)
+
+    if opt.date:
+        date = parse_date_or_fatal(opt.date, o.fatal)
+    else:
+        date = time.time()
+
+    if opt.strip and opt.strip_path:
+        o.fatal("--strip is incompatible with --strip-path")
+
+    graft_points = []
+    if opt.graft:
+        if opt.strip:
+            o.fatal("--strip is incompatible with --graft")
+
+        if opt.strip_path:
+            o.fatal("--strip-path is incompatible with --graft")
+
+        for (option, parameter) in flags:
+            if option == "--graft":
+                parameter = argv_bytes(parameter)
+                splitted_parameter = parameter.split(b'=')
+                if len(splitted_parameter) != 2:
+                    o.fatal("a graft point must be of the form old_path=new_path")
+                old_path, new_path = splitted_parameter
+                if not (old_path and new_path):
+                    o.fatal("a graft point cannot be empty")
+                graft_points.append((resolve_parent(old_path),
+                                     resolve_parent(new_path)))
+
+    is_reverse = environ.get(b'BUP_SERVER_REVERSE')
+    if is_reverse and opt.remote:
+        o.fatal("don't use -r in reverse mode; it's automatic")
+
+    name = opt.name
+    if name and not valid_save_name(name):
+        o.fatal("'%s' is not a valid branch name" % path_msg(name))
+    refname = name and b'refs/heads/%s' % name or None
+    if opt.remote or is_reverse:
+        try:
+            cli = client.Client(opt.remote)
+        except client.ClientError as e:
+            log('error: %s' % e)
+            sys.exit(1)
+        oldref = refname and cli.read_ref(refname) or None
+        w = cli.new_packwriter(compression_level=opt.compress)
+    else:
+        cli = None
+        oldref = refname and git.read_ref(refname) or None
+        w = git.PackWriter(compression_level=opt.compress)
+
+    handle_ctrl_c()
+
+
+    # Metadata is stored in a file named .bupm in each directory.  The
+    # first metadata entry will be the metadata for the current directory.
+    # The remaining entries will be for each of the other directory
+    # elements, in the order they're listed in the index.
+    #
+    # Since the git tree elements are sorted according to
+    # git.shalist_item_sort_key, the metalist items are accumulated as
+    # (sort_key, metadata) tuples, and then sorted when the .bupm file is
+    # created.  The sort_key should have been computed using the element's
+    # mangled name and git mode (after hashsplitting), but the code isn't
+    # actually doing that but rather uses the element's real name and mode.
+    # This makes things a bit more difficult when reading it back, see
+    # vfs.ordered_tree_entries().
+
+    # Maintain a stack of information representing the current location in
+    # the archive being constructed.  The current path is recorded in
+    # parts, which will be something like ['', 'home', 'someuser'], and
+    # the accumulated content and metadata for of the dirs in parts is
+    # stored in parallel stacks in shalists and metalists.
+
+    parts = [] # Current archive position (stack of dir names).
+    shalists = [] # Hashes for each dir in paths.
+    metalists = [] # Metadata for each dir in paths.
+
+
+    def _push(part, metadata):
+        # Enter a new archive directory -- make it the current directory.
+        parts.append(part)
+        shalists.append([])
+        metalists.append([(b'', metadata)]) # This dir's metadata (no name).
+
+
+    def _pop(force_tree, dir_metadata=None):
+        # Leave the current archive directory and add its tree to its parent.
+        assert(len(parts) >= 1)
+        part = parts.pop()
+        shalist = shalists.pop()
+        metalist = metalists.pop()
+        # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
+        if force_tree:
+            tree = force_tree
+        else:
+            names_seen = set()
+            clean_list = []
+            metaidx = 1 # entry at 0 is for the dir
+            for x in shalist:
+                name = x[1]
+                if name in names_seen:
+                    parent_path = b'/'.join(parts) + b'/'
+                    add_error('error: ignoring duplicate path %s in %s'
+                              % (path_msg(name), path_msg(parent_path)))
+                    if not stat.S_ISDIR(x[0]):
+                        del metalist[metaidx]
+                else:
+                    names_seen.add(name)
+                    clean_list.append(x)
+                    if not stat.S_ISDIR(x[0]):
+                        metaidx += 1
+
+            if dir_metadata: # Override the original metadata pushed for this dir.
+                metalist = [(b'', dir_metadata)] + metalist[1:]
+            sorted_metalist = sorted(metalist, key = lambda x : x[0])
+            metadata = b''.join([m[1].encode() for m in sorted_metalist])
+            metadata_f = BytesIO(metadata)
+            mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
+                                                       [metadata_f],
+                                                       keep_boundaries=False)
+            clean_list.append((mode, b'.bupm', id))
+
+            tree = w.new_tree(clean_list)
+        if shalists:
+            shalists[-1].append((GIT_MODE_TREE,
+                                 git.mangle_name(part,
+                                                 GIT_MODE_TREE, GIT_MODE_TREE),
+                                 tree))
+        return tree
+
+
+    _nonlocal['count'] = 0
+    _nonlocal['subcount'] = 0
+    _nonlocal['lastremain'] = None
+
+    def progress_report(n):
+        _nonlocal['subcount'] += n
+        cc = _nonlocal['count'] + _nonlocal['subcount']
+        pct = total and (cc*100.0/total) or 0
+        now = time.time()
+        elapsed = now - tstart
+        kps = elapsed and int(cc/1024./elapsed)
+        kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
+        kps = int(kps/kps_frac)*kps_frac
+        if cc:
+            remain = elapsed*1.0/cc * (total-cc)
+        else:
+            remain = 0.0
+        if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain'])
+              and ((remain - _nonlocal['lastremain'])/_nonlocal['lastremain'] < 0.05)):
+            remain = _nonlocal['lastremain']
+        else:
+            _nonlocal['lastremain'] = remain
+        hours = int(remain/60/60)
+        mins = int(remain/60 - hours*60)
+        secs = int(remain - hours*60*60 - mins*60)
+        if elapsed < 30:
+            remainstr = ''
+            kpsstr = ''
+        else:
+            kpsstr = '%dk/s' % kps
+            if hours:
+                remainstr = '%dh%dm' % (hours, mins)
+            elif mins:
+                remainstr = '%dm%d' % (mins, secs)
+            else:
+                remainstr = '%ds' % secs
+        qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
+                  % (pct, cc/1024, total/1024, fcount, ftotal,
+                     remainstr, kpsstr))
+
+
+    indexfile = opt.indexfile or git.repo(b'bupindex')
+    r = index.Reader(indexfile)
+    try:
+        msr = index.MetaStoreReader(indexfile + b'.meta')
+    except IOError as ex:
+        if ex.errno != EACCES:
+            raise
+        log('error: cannot access %r; have you run bup index?'
+            % path_msg(indexfile))
+        sys.exit(1)
+    hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')
+
+    def already_saved(ent):
+        return ent.is_valid() and w.exists(ent.sha) and ent.sha
+
+    def wantrecurse_pre(ent):
+        return not already_saved(ent)
+
+    def wantrecurse_during(ent):
+        return not already_saved(ent) or ent.sha_missing()
+
+    def find_hardlink_target(hlink_db, ent):
+        if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
+            link_paths = hlink_db.node_paths(ent.dev, ent.ino)
+            if link_paths:
+                return link_paths[0]
+
+    total = ftotal = 0
+    if opt.progress:
+        for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
+            if not (ftotal % 10024):
+                qprogress('Reading index: %d\r' % ftotal)
+            exists = ent.exists()
+            hashvalid = already_saved(ent)
+            ent.set_sha_missing(not hashvalid)
+            if not opt.smaller or ent.size < opt.smaller:
+                if exists and not hashvalid:
+                    total += ent.size
+            ftotal += 1
+        progress('Reading index: %d, done.\n' % ftotal)
+        hashsplit.progress_callback = progress_report
+
+    # Root collisions occur when strip or graft options map more than one
+    # path to the same directory (paths which originally had separate
+    # parents).  When that situation is detected, use empty metadata for
+    # the parent.  Otherwise, use the metadata for the common parent.
+    # Collision example: "bup save ... --strip /foo /foo/bar /bar".
+
+    # FIXME: Add collision tests, or handle collisions some other way.
+
+    # FIXME: Detect/handle strip/graft name collisions (other than root),
+    # i.e. if '/foo/bar' and '/bar' both map to '/'.
+
+    first_root = None
+    root_collision = None
+    tstart = time.time()
+    fcount = 0
+    lastskip_name = None
+    lastdir = b''
+    for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
+        (dir, file) = os.path.split(ent.name)
+        exists = (ent.flags & index.IX_EXISTS)
+        hashvalid = already_saved(ent)
+        wasmissing = ent.sha_missing()
+        oldsize = ent.size
+        if opt.verbose:
+            if not exists:
+                status = 'D'
+            elif not hashvalid:
+                if ent.sha == index.EMPTY_SHA:
+                    status = 'A'
+                else:
+                    status = 'M'
+            else:
+                status = ' '
+            if opt.verbose >= 2:
+                log('%s %-70s\n' % (status, path_msg(ent.name)))
+            elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
+                if not lastdir.startswith(dir):
+                    log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b''))))
+                lastdir = dir
+
+        if opt.progress:
+            progress_report(0)
+        fcount += 1
+
+        if not exists:
+            continue
+        if opt.smaller and ent.size >= opt.smaller:
+            if exists and not hashvalid:
+                if opt.verbose:
+                    log('skipping large file "%s"\n' % path_msg(ent.name))
+                lastskip_name = ent.name
+            continue
+
+        assert(dir.startswith(b'/'))
+        if opt.strip:
+            dirp = stripped_path_components(dir, extra)
+        elif opt.strip_path:
+            dirp = stripped_path_components(dir, [opt.strip_path])
+        elif graft_points:
+            dirp = grafted_path_components(graft_points, dir)
+        else:
+            dirp = path_components(dir)
+
+        # At this point, dirp contains a representation of the archive
+        # path that looks like [(archive_dir_name, real_fs_path), ...].
+        # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
+        # might look like this at some point:
+        #   [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
+
+        # This dual representation supports stripping/grafting, where the
+        # archive path may not have a direct correspondence with the
+        # filesystem.  The root directory is represented by an initial
+        # component named '', and any component that doesn't have a
+        # corresponding filesystem directory (due to grafting, for
+        # example) will have a real_fs_path of None, i.e. [('', None),
+        # ...].
+
+        if first_root == None:
+            first_root = dirp[0]
+        elif first_root != dirp[0]:
+            root_collision = True
+
+        # If switching to a new sub-tree, finish the current sub-tree.
+        while parts > [x[0] for x in dirp]:
+            _pop(force_tree = None)
+
+        # If switching to a new sub-tree, start a new sub-tree.
+        for path_component in dirp[len(parts):]:
+            dir_name, fs_path = path_component
+            # Not indexed, so just grab the FS metadata or use empty metadata.
+            try:
+                meta = metadata.from_path(fs_path, normalized=True) \
+                    if fs_path else metadata.Metadata()
+            except (OSError, IOError) as e:
+                add_error(e)
+                lastskip_name = dir_name
+                meta = metadata.Metadata()
+            _push(dir_name, meta)
+
+        if not file:
+            if len(parts) == 1:
+                continue # We're at the top level -- keep the current root dir
+            # Since there's no filename, this is a subdir -- finish it.
+            oldtree = already_saved(ent) # may be None
+            newtree = _pop(force_tree = oldtree)
+            if not oldtree:
+                if lastskip_name and lastskip_name.startswith(ent.name):
+                    ent.invalidate()
+                else:
+                    ent.validate(GIT_MODE_TREE, newtree)
+                ent.repack()
+            if exists and wasmissing:
+                _nonlocal['count'] += oldsize
+            continue
+
+        # it's not a directory
+        if hashvalid:
+            id = ent.sha
+            git_name = git.mangle_name(file, ent.mode, ent.gitmode)
+            git_info = (ent.gitmode, git_name, id)
+            shalists[-1].append(git_info)
+            sort_key = git.shalist_item_sort_key((ent.mode, file, id))
+            meta = msr.metadata_at(ent.meta_ofs)
+            meta.hardlink_target = find_hardlink_target(hlink_db, ent)
+            # Restore the times that were cleared to 0 in the metastore.
+            (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
+            metalists[-1].append((sort_key, meta))
+        else:
+            id = None
+            if stat.S_ISREG(ent.mode):
+                try:
+                    with hashsplit.open_noatime(ent.name) as f:
+                        (mode, id) = hashsplit.split_to_blob_or_tree(
+                                                w.new_blob, w.new_tree, [f],
+                                                keep_boundaries=False)
+                except (IOError, OSError) as e:
+                    add_error('%s: %s' % (ent.name, e))
+                    lastskip_name = ent.name
+            elif stat.S_ISDIR(ent.mode):
+                assert(0)  # handled above
+            elif stat.S_ISLNK(ent.mode):
+                try:
+                    rl = os.readlink(ent.name)
+                except (OSError, IOError) as e:
+                    add_error(e)
+                    lastskip_name = ent.name
+                else:
+                    (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
+            else:
+                # Everything else should be fully described by its
+                # metadata, so just record an empty blob, so the paths
+                # in the tree and .bupm will match up.
+                (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))
+
+            if id:
+                ent.validate(mode, id)
+                ent.repack()
+                git_name = git.mangle_name(file, ent.mode, ent.gitmode)
+                git_info = (mode, git_name, id)
+                shalists[-1].append(git_info)
+                sort_key = git.shalist_item_sort_key((ent.mode, file, id))
+                hlink = find_hardlink_target(hlink_db, ent)
+                try:
+                    meta = metadata.from_path(ent.name, hardlink_target=hlink,
+                                              normalized=True)
+                except (OSError, IOError) as e:
+                    add_error(e)
+                    lastskip_name = ent.name
+                    meta = metadata.Metadata()
+                metalists[-1].append((sort_key, meta))
+
+        if exists and wasmissing:
+            _nonlocal['count'] += oldsize
+            _nonlocal['subcount'] = 0
+
+
+    if opt.progress:
+        pct = total and _nonlocal['count']*100.0/total or 100
+        progress('Saving: %.2f%% (%d/%dk, %d/%d files), done.    \n'
+                 % (pct, _nonlocal['count']/1024, total/1024, fcount, ftotal))
+
+    while len(parts) > 1: # _pop() all the parts above the root
+        _pop(force_tree = None)
+    assert(len(shalists) == 1)
+    assert(len(metalists) == 1)
+
+    # Finish the root directory.
+    tree = _pop(force_tree = None,
+                # When there's a collision, use empty metadata for the root.
+                dir_metadata = metadata.Metadata() if root_collision else None)
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+
+    if opt.tree:
+        out.write(hexlify(tree))
+        out.write(b'\n')
+    if opt.commit or name:
+        if compat.py_maj > 2:
+            # Strip b prefix from python 3 bytes reprs to preserve previous format
+             msgcmd = b'[%s]' % b', '.join([repr(argv_bytes(x))[1:].encode('ascii')
+                                           for x in argv])
+        else:
+            msgcmd = repr(argv)
+        msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd
+        userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
+        commit = w.new_commit(tree, oldref, userline, date, None,
+                              userline, date, None, msg)
+        if opt.commit:
+            out.write(hexlify(commit))
+            out.write(b'\n')
+
+    msr.close()
+    w.close()  # must close before we can update the ref
+
+    if opt.name:
+        if cli:
+            cli.update_ref(refname, commit, oldref)
+        else:
+            git.update_ref(refname, commit, oldref)
+
+    if cli:
+        cli.close()
+
+    if saved_errors:
+        log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
+        sys.exit(1)
diff --git a/lib/bup/cmd/server.py b/lib/bup/cmd/server.py
new file mode 100755 (executable)
index 0000000..3ff8a19
--- /dev/null
@@ -0,0 +1,312 @@
+
+from __future__ import absolute_import
+from binascii import hexlify, unhexlify
+import os, struct, subprocess, sys
+
+from bup import options, git, vfs, vint
+from bup.compat import environ, hexstr
+from bup.git import MissingObject
+from bup.helpers import (Conn, debug1, debug2, linereader, lines_until_sentinel,
+                         log)
+from bup.io import byte_stream, path_msg
+from bup.repo import LocalRepo
+
+
+suspended_w = None
+dumb_server_mode = False
+repo = None
+
+
+def do_help(conn, junk):
+    conn.write(b'Commands:\n    %s\n' % b'\n    '.join(sorted(commands)))
+    conn.ok()
+
+
+def _set_mode():
+    global dumb_server_mode
+    dumb_server_mode = os.path.exists(git.repo(b'bup-dumb-server'))
+    debug1('bup server: serving in %s mode\n' 
+           % (dumb_server_mode and 'dumb' or 'smart'))
+
+
+def _init_session(reinit_with_new_repopath=None):
+    global repo
+    if reinit_with_new_repopath is None and git.repodir:
+        if not repo:
+            repo = LocalRepo()
+        return
+    git.check_repo_or_die(reinit_with_new_repopath)
+    if repo:
+        repo.close()
+    repo = LocalRepo()
+    # OK. we now know the path is a proper repository. Record this path in the
+    # environment so that subprocesses inherit it and know where to operate.
+    environ[b'BUP_DIR'] = git.repodir
+    debug1('bup server: bupdir is %s\n' % path_msg(git.repodir))
+    _set_mode()
+
+
+def init_dir(conn, arg):
+    git.init_repo(arg)
+    debug1('bup server: bupdir initialized: %s\n' % path_msg(git.repodir))
+    _init_session(arg)
+    conn.ok()
+
+
+def set_dir(conn, arg):
+    _init_session(arg)
+    conn.ok()
+
+    
+def list_indexes(conn, junk):
+    _init_session()
+    suffix = b''
+    if dumb_server_mode:
+        suffix = b' load'
+    for f in os.listdir(git.repo(b'objects/pack')):
+        if f.endswith(b'.idx'):
+            conn.write(b'%s%s\n' % (f, suffix))
+    conn.ok()
+
+
+def send_index(conn, name):
+    _init_session()
+    assert name.find(b'/') < 0
+    assert name.endswith(b'.idx')
+    idx = git.open_idx(git.repo(b'objects/pack/%s' % name))
+    conn.write(struct.pack('!I', len(idx.map)))
+    conn.write(idx.map)
+    conn.ok()
+
+
+def receive_objects_v2(conn, junk):
+    global suspended_w
+    _init_session()
+    suggested = set()
+    if suspended_w:
+        w = suspended_w
+        suspended_w = None
+    else:
+        if dumb_server_mode:
+            w = git.PackWriter(objcache_maker=None)
+        else:
+            w = git.PackWriter()
+    while 1:
+        ns = conn.read(4)
+        if not ns:
+            w.abort()
+            raise Exception('object read: expected length header, got EOF\n')
+        n = struct.unpack('!I', ns)[0]
+        #debug2('expecting %d bytes\n' % n)
+        if not n:
+            debug1('bup server: received %d object%s.\n' 
+                % (w.count, w.count!=1 and "s" or ''))
+            fullpath = w.close(run_midx=not dumb_server_mode)
+            if fullpath:
+                (dir, name) = os.path.split(fullpath)
+                conn.write(b'%s.idx\n' % name)
+            conn.ok()
+            return
+        elif n == 0xffffffff:
+            debug2('bup server: receive-objects suspended.\n')
+            suspended_w = w
+            conn.ok()
+            return
+            
+        shar = conn.read(20)
+        crcr = struct.unpack('!I', conn.read(4))[0]
+        n -= 20 + 4
+        buf = conn.read(n)  # object sizes in bup are reasonably small
+        #debug2('read %d bytes\n' % n)
+        _check(w, n, len(buf), 'object read: expected %d bytes, got %d\n')
+        if not dumb_server_mode:
+            oldpack = w.exists(shar, want_source=True)
+            if oldpack:
+                assert(not oldpack == True)
+                assert(oldpack.endswith(b'.idx'))
+                (dir,name) = os.path.split(oldpack)
+                if not (name in suggested):
+                    debug1("bup server: suggesting index %s\n"
+                           % git.shorten_hash(name).decode('ascii'))
+                    debug1("bup server:   because of object %s\n"
+                           % hexstr(shar))
+                    conn.write(b'index %s\n' % name)
+                    suggested.add(name)
+                continue
+        nw, crc = w._raw_write((buf,), sha=shar)
+        _check(w, crcr, crc, 'object read: expected crc %d, got %d\n')
+    # NOTREACHED
+    
+
+def _check(w, expected, actual, msg):
+    if expected != actual:
+        w.abort()
+        raise Exception(msg % (expected, actual))
+
+
+def read_ref(conn, refname):
+    _init_session()
+    r = git.read_ref(refname)
+    conn.write(b'%s\n' % hexlify(r) if r else b'')
+    conn.ok()
+
+
+def update_ref(conn, refname):
+    _init_session()
+    newval = conn.readline().strip()
+    oldval = conn.readline().strip()
+    git.update_ref(refname, unhexlify(newval), unhexlify(oldval))
+    conn.ok()
+
+def join(conn, id):
+    _init_session()
+    try:
+        for blob in git.cp().join(id):
+            conn.write(struct.pack('!I', len(blob)))
+            conn.write(blob)
+    except KeyError as e:
+        log('server: error: %s\n' % e)
+        conn.write(b'\0\0\0\0')
+        conn.error(e)
+    else:
+        conn.write(b'\0\0\0\0')
+        conn.ok()
+
+def cat_batch(conn, dummy):
+    _init_session()
+    cat_pipe = git.cp()
+    # For now, avoid potential deadlock by just reading them all
+    for ref in tuple(lines_until_sentinel(conn, b'\n', Exception)):
+        ref = ref[:-1]
+        it = cat_pipe.get(ref)
+        info = next(it)
+        if not info[0]:
+            conn.write(b'missing\n')
+            continue
+        conn.write(b'%s %s %d\n' % info)
+        for buf in it:
+            conn.write(buf)
+    conn.ok()
+
+def refs(conn, args):
+    limit_to_heads, limit_to_tags = args.split()
+    assert limit_to_heads in (b'0', b'1')
+    assert limit_to_tags in (b'0', b'1')
+    limit_to_heads = int(limit_to_heads)
+    limit_to_tags = int(limit_to_tags)
+    _init_session()
+    patterns = tuple(x[:-1] for x in lines_until_sentinel(conn, b'\n', Exception))
+    for name, oid in git.list_refs(patterns=patterns,
+                                   limit_to_heads=limit_to_heads,
+                                   limit_to_tags=limit_to_tags):
+        assert b'\n' not in name
+        conn.write(b'%s %s\n' % (hexlify(oid), name))
+    conn.write(b'\n')
+    conn.ok()
+
+def rev_list(conn, _):
+    _init_session()
+    count = conn.readline()
+    if not count:
+        raise Exception('Unexpected EOF while reading rev-list count')
+    assert count == b'\n'
+    count = None
+    fmt = conn.readline()
+    if not fmt:
+        raise Exception('Unexpected EOF while reading rev-list format')
+    fmt = None if fmt == b'\n' else fmt[:-1]
+    refs = tuple(x[:-1] for x in lines_until_sentinel(conn, b'\n', Exception))
+    args = git.rev_list_invocation(refs, format=fmt)
+    p = subprocess.Popen(args, env=git._gitenv(git.repodir),
+                         stdout=subprocess.PIPE)
+    while True:
+        out = p.stdout.read(64 * 1024)
+        if not out:
+            break
+        conn.write(out)
+    conn.write(b'\n')
+    rv = p.wait()  # not fatal
+    if rv:
+        msg = 'git rev-list returned error %d' % rv
+        conn.error(msg)
+        raise GitError(msg)
+    conn.ok()
+
+def resolve(conn, args):
+    _init_session()
+    (flags,) = args.split()
+    flags = int(flags)
+    want_meta = bool(flags & 1)
+    follow = bool(flags & 2)
+    have_parent = bool(flags & 4)
+    parent = vfs.read_resolution(conn) if have_parent else None
+    path = vint.read_bvec(conn)
+    if not len(path):
+        raise Exception('Empty resolve path')
+    try:
+        res = list(vfs.resolve(repo, path, parent=parent, want_meta=want_meta,
+                               follow=follow))
+    except vfs.IOError as ex:
+        res = ex
+    if isinstance(res, vfs.IOError):
+        conn.write(b'\x00')  # error
+        vfs.write_ioerror(conn, res)
+    else:
+        conn.write(b'\x01')  # success
+        vfs.write_resolution(conn, res)
+    conn.ok()
+
+optspec = """
+bup server
+"""
+
+commands = {
+    b'quit': None,
+    b'help': do_help,
+    b'init-dir': init_dir,
+    b'set-dir': set_dir,
+    b'list-indexes': list_indexes,
+    b'send-index': send_index,
+    b'receive-objects-v2': receive_objects_v2,
+    b'read-ref': read_ref,
+    b'update-ref': update_ref,
+    b'join': join,
+    b'cat': join,  # apocryphal alias
+    b'cat-batch' : cat_batch,
+    b'refs': refs,
+    b'rev-list': rev_list,
+    b'resolve': resolve
+}
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if extra:
+        o.fatal('no arguments expected')
+
+    debug2('bup server: reading from stdin.\n')
+
+    # FIXME: this protocol is totally lame and not at all future-proof.
+    # (Especially since we abort completely as soon as *anything* bad happens)
+    sys.stdout.flush()
+    conn = Conn(byte_stream(sys.stdin), byte_stream(sys.stdout))
+    lr = linereader(conn)
+    for _line in lr:
+        line = _line.strip()
+        if not line:
+            continue
+        debug1('bup server: command: %r\n' % line)
+        words = line.split(b' ', 1)
+        cmd = words[0]
+        rest = len(words)>1 and words[1] or b''
+        if cmd == b'quit':
+            break
+        else:
+            cmd = commands.get(cmd)
+            if cmd:
+                cmd(conn, rest)
+            else:
+                raise Exception('unknown server command: %r\n' % line)
+
+    debug1('bup server: done\n')
diff --git a/lib/bup/cmd/split.py b/lib/bup/cmd/split.py
new file mode 100755 (executable)
index 0000000..87ad887
--- /dev/null
@@ -0,0 +1,236 @@
+
+from __future__ import absolute_import, division, print_function
+from binascii import hexlify
+import sys, time
+
+from bup import compat, hashsplit, git, options, client
+from bup.compat import argv_bytes, environ
+from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
+                         qprogress, reprogress, saved_errors,
+                         valid_save_name,
+                         parse_date_or_fatal)
+from bup.io import byte_stream
+from bup.pwdgrp import userfullname, username
+
+
+optspec = """
+bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
+bup split -b OPTIONS [--git-ids | filenames...]
+bup split --copy OPTIONS [--git-ids | filenames...]
+bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
+--
+ Modes:
+b,blobs    output a series of blob ids.  Implies --fanout=0.
+t,tree     output a tree id
+c,commit   output a commit id
+n,name=    save the result under the given name
+noop       split the input, but throw away the result
+copy       split the input, copy it to stdout, don't save to repo
+ Options:
+r,remote=  remote repository path
+d,date=    date for the commit (seconds since the epoch)
+q,quiet    don't print progress messages
+v,verbose  increase log output (can be used more than once)
+git-ids    read a list of git object ids from stdin and split their contents
+keep-boundaries  don't let one chunk span two input files
+bench      print benchmark timings to stderr
+max-pack-size=  maximum bytes in a single pack
+max-pack-objects=  maximum number of objects in a single pack
+fanout=    average number of blobs in a single tree
+bwlimit=   maximum bytes/sec to transmit to server
+#,compress=  set compression level to # (0-9, 9 is highest) [1]
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+    if opt.name: opt.name = argv_bytes(opt.name)
+    if opt.remote: opt.remote = argv_bytes(opt.remote)
+    if opt.verbose is None: opt.verbose = 0
+
+    if not (opt.blobs or opt.tree or opt.commit or opt.name or
+            opt.noop or opt.copy):
+        o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
+    if opt.copy and (opt.blobs or opt.tree):
+        o.fatal('--copy is incompatible with -b, -t')
+    if (opt.noop or opt.copy) and (opt.commit or opt.name):
+        o.fatal('--noop and --copy are incompatible with -c, -n')
+    if opt.blobs and (opt.tree or opt.commit or opt.name):
+        o.fatal('-b is incompatible with -t, -c, -n')
+    if extra and opt.git_ids:
+        o.fatal("don't provide filenames when using --git-ids")
+
+    if opt.verbose >= 2:
+        git.verbose = opt.verbose - 1
+        opt.bench = 1
+
+    max_pack_size = None
+    if opt.max_pack_size:
+        max_pack_size = parse_num(opt.max_pack_size)
+    max_pack_objects = None
+    if opt.max_pack_objects:
+        max_pack_objects = parse_num(opt.max_pack_objects)
+
+    if opt.fanout:
+        hashsplit.fanout = parse_num(opt.fanout)
+    if opt.blobs:
+        hashsplit.fanout = 0
+    if opt.bwlimit:
+        client.bwlimit = parse_num(opt.bwlimit)
+    if opt.date:
+        date = parse_date_or_fatal(opt.date, o.fatal)
+    else:
+        date = time.time()
+
+    # Hack around lack of nonlocal vars in python 2
+    total_bytes = [0]
+    def prog(filenum, nbytes):
+        total_bytes[0] += nbytes
+        if filenum > 0:
+            qprogress('Splitting: file #%d, %d kbytes\r'
+                      % (filenum+1, total_bytes[0] // 1024))
+        else:
+            qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))
+
+
+    is_reverse = environ.get(b'BUP_SERVER_REVERSE')
+    if is_reverse and opt.remote:
+        o.fatal("don't use -r in reverse mode; it's automatic")
+    start_time = time.time()
+
+    if opt.name and not valid_save_name(opt.name):
+        o.fatal("'%r' is not a valid branch name." % opt.name)
+    refname = opt.name and b'refs/heads/%s' % opt.name or None
+
+    if opt.noop or opt.copy:
+        cli = pack_writer = oldref = None
+    elif opt.remote or is_reverse:
+        git.check_repo_or_die()
+        cli = client.Client(opt.remote)
+        oldref = refname and cli.read_ref(refname) or None
+        pack_writer = cli.new_packwriter(compression_level=opt.compress,
+                                         max_pack_size=max_pack_size,
+                                         max_pack_objects=max_pack_objects)
+    else:
+        git.check_repo_or_die()
+        cli = None
+        oldref = refname and git.read_ref(refname) or None
+        pack_writer = git.PackWriter(compression_level=opt.compress,
+                                     max_pack_size=max_pack_size,
+                                     max_pack_objects=max_pack_objects)
+
+    input = byte_stream(sys.stdin)
+
+    if opt.git_ids:
+        # the input is actually a series of git object ids that we should retrieve
+        # and split.
+        #
+        # This is a bit messy, but basically it converts from a series of
+        # CatPipe.get() iterators into a series of file-type objects.
+        # It would be less ugly if either CatPipe.get() returned a file-like object
+        # (not very efficient), or split_to_shalist() expected an iterator instead
+        # of a file.
+        cp = git.CatPipe()
+        class IterToFile:
+            def __init__(self, it):
+                self.it = iter(it)
+            def read(self, size):
+                v = next(self.it, None)
+                return v or b''
+        def read_ids():
+            while 1:
+                line = input.readline()
+                if not line:
+                    break
+                if line:
+                    line = line.strip()
+                try:
+                    it = cp.get(line.strip())
+                    next(it, None)  # skip the file info
+                except KeyError as e:
+                    add_error('error: %s' % e)
+                    continue
+                yield IterToFile(it)
+        files = read_ids()
+    else:
+        # the input either comes from a series of files or from stdin.
+        files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input]
+
+    if pack_writer:
+        new_blob = pack_writer.new_blob
+        new_tree = pack_writer.new_tree
+    elif opt.blobs or opt.tree:
+        # --noop mode
+        new_blob = lambda content: git.calc_hash(b'blob', content)
+        new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist))
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+
+    if opt.blobs:
+        shalist = hashsplit.split_to_blobs(new_blob, files,
+                                           keep_boundaries=opt.keep_boundaries,
+                                           progress=prog)
+        for (sha, size, level) in shalist:
+            out.write(hexlify(sha) + b'\n')
+            reprogress()
+    elif opt.tree or opt.commit or opt.name:
+        if opt.name: # insert dummy_name which may be used as a restore target
+            mode, sha = \
+                hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
+                                                keep_boundaries=opt.keep_boundaries,
+                                                progress=prog)
+            splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode)
+            shalist = [(mode, splitfile_name, sha)]
+        else:
+            shalist = hashsplit.split_to_shalist(
+                          new_blob, new_tree, files,
+                          keep_boundaries=opt.keep_boundaries, progress=prog)
+        tree = new_tree(shalist)
+    else:
+        last = 0
+        it = hashsplit.hashsplit_iter(files,
+                                      keep_boundaries=opt.keep_boundaries,
+                                      progress=prog)
+        for (blob, level) in it:
+            hashsplit.total_split += len(blob)
+            if opt.copy:
+                sys.stdout.write(str(blob))
+            megs = hashsplit.total_split // 1024 // 1024
+            if not opt.quiet and last != megs:
+                last = megs
+
+    if opt.verbose:
+        log('\n')
+    if opt.tree:
+        out.write(hexlify(tree) + b'\n')
+    if opt.commit or opt.name:
+        msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
+        ref = opt.name and (b'refs/heads/%s' % opt.name) or None
+        userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
+        commit = pack_writer.new_commit(tree, oldref, userline, date, None,
+                                        userline, date, None, msg)
+        if opt.commit:
+            out.write(hexlify(commit) + b'\n')
+
+    if pack_writer:
+        pack_writer.close()  # must close before we can update the ref
+
+    if opt.name:
+        if cli:
+            cli.update_ref(refname, commit, oldref)
+        else:
+            git.update_ref(refname, commit, oldref)
+
+    if cli:
+        cli.close()
+
+    secs = time.time() - start_time
+    size = hashsplit.total_split
+    if opt.bench:
+        log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
+            % (size / 1024, secs, size / 1024 / secs))
+
+    if saved_errors:
+        log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
+        sys.exit(1)
diff --git a/lib/bup/cmd/tag.py b/lib/bup/cmd/tag.py
new file mode 100755 (executable)
index 0000000..aaff755
--- /dev/null
@@ -0,0 +1,90 @@
+
+from __future__ import absolute_import
+from binascii import hexlify
+import os, sys
+
+from bup import git, options
+from bup.compat import argv_bytes
+from bup.helpers import debug1, handle_ctrl_c, log
+from bup.io import byte_stream, path_msg
+
+
+# FIXME: review for safe writes.
+
+optspec = """
+bup tag
+bup tag [-f] <tag name> <commit>
+bup tag [-f] -d <tag name>
+--
+d,delete=   Delete a tag
+f,force     Overwrite existing tag, or ignore missing tag when deleting
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    git.check_repo_or_die()
+
+    tags = [t for sublist in git.tags().values() for t in sublist]
+
+    if opt.delete:
+        # git.delete_ref() doesn't complain if a ref doesn't exist.  We
+        # could implement this verification but we'd need to read in the
+        # contents of the tag file and pass the hash, and we already know
+        # about the tag's existance via "tags".
+        tag_name = argv_bytes(opt.delete)
+        if not opt.force and tag_name not in tags:
+            log("error: tag '%s' doesn't exist\n" % path_msg(tag_name))
+            sys.exit(1)
+        tag_file = b'refs/tags/%s' % tag_name
+        git.delete_ref(tag_file)
+        sys.exit(0)
+
+    if not extra:
+        for t in tags:
+            sys.stdout.flush()
+            out = byte_stream(sys.stdout)
+            out.write(t)
+            out.write(b'\n')
+        sys.exit(0)
+    elif len(extra) != 2:
+        o.fatal('expected commit ref and hash')
+
+    tag_name, commit = map(argv_bytes, extra[:2])
+    if not tag_name:
+        o.fatal("tag name must not be empty.")
+    debug1("args: tag name = %s; commit = %s\n"
+           % (path_msg(tag_name), commit.decode('ascii')))
+
+    if tag_name in tags and not opt.force:
+        log("bup: error: tag '%s' already exists\n" % path_msg(tag_name))
+        sys.exit(1)
+
+    if tag_name.startswith(b'.'):
+        o.fatal("'%s' is not a valid tag name." % path_msg(tag_name))
+
+    try:
+        hash = git.rev_parse(commit)
+    except git.GitError as e:
+        log("bup: error: %s" % e)
+        sys.exit(2)
+
+    if not hash:
+        log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
+        sys.exit(2)
+
+    pL = git.PackIdxList(git.repo(b'objects/pack'))
+    if not pL.exists(hash):
+        log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
+        sys.exit(2)
+
+    tag_file = git.repo(b'refs/tags/' + tag_name)
+    try:
+        tag = open(tag_file, 'wb')
+    except OSError as e:
+        log("bup: error: could not create tag '%s': %s" % (path_msg(tag_name), e))
+        sys.exit(3)
+    with tag as tag:
+        tag.write(hexlify(hash))
+        tag.write(b'\n')
diff --git a/lib/bup/cmd/tick.py b/lib/bup/cmd/tick.py
new file mode 100755 (executable)
index 0000000..e2dfa39
--- /dev/null
@@ -0,0 +1,21 @@
+
+from __future__ import absolute_import
+import os, sys, time
+
+from bup import options
+
+
+optspec = """
+bup tick
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if extra:
+        o.fatal("no arguments expected")
+
+    t = time.time()
+    tleft = 1 - (t - int(t))
+    time.sleep(tleft)
diff --git a/lib/bup/cmd/version.py b/lib/bup/cmd/version.py
new file mode 100755 (executable)
index 0000000..853fe4f
--- /dev/null
@@ -0,0 +1,34 @@
+
+from __future__ import absolute_import, print_function
+import re, sys
+
+from bup import compat, options, version
+from bup.io import byte_stream
+
+version_rx = re.compile(r'^[0-9]+\.[0-9]+(\.[0-9]+)?(-[0-9]+-g[0-9abcdef]+)?$')
+
+optspec = """
+bup version [--date|--commit]
+--
+date    display the date this version of bup was created
+commit  display the git commit id of this version of bup
+"""
+
+def main(argv):
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+
+    total = (opt.date or 0) + (opt.commit or 0)
+    if total > 1:
+        o.fatal('at most one option expected')
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+
+    if opt.date:
+        out.write(version.date.split(b' ')[0] + b'\n')
+    elif opt.commit:
+        out.write(version.commit + b'\n')
+    else:
+        out.write(version.version + b'\n')
diff --git a/lib/bup/cmd/web.py b/lib/bup/cmd/web.py
new file mode 100755 (executable)
index 0000000..83c7ca6
--- /dev/null
@@ -0,0 +1,316 @@
+
+from __future__ import absolute_import, print_function
+from collections import namedtuple
+import mimetypes, os, posixpath, signal, stat, sys, time, urllib, webbrowser
+from binascii import hexlify
+
+
+from bup import options, git, vfs
+from bup.helpers import (chunkyreader, debug1, format_filesize, handle_ctrl_c,
+                         log, saved_errors)
+from bup.metadata import Metadata
+from bup.path import resource_path
+from bup.repo import LocalRepo
+from bup.io import path_msg
+
+try:
+    from tornado import gen
+    from tornado.httpserver import HTTPServer
+    from tornado.ioloop import IOLoop
+    from tornado.netutil import bind_unix_socket
+    import tornado.web
+except ImportError:
+    log('error: cannot find the python "tornado" module; please install it\n')
+    sys.exit(1)
+
+
+# FIXME: right now the way hidden files are handled causes every
+# directory to be traversed twice.
+
+
+def http_date_from_utc_ns(utc_ns):
+    return time.strftime('%a, %d %b %Y %H:%M:%S', time.gmtime(utc_ns / 10**9))
+
+
+def _compute_breadcrumbs(path, show_hidden=False):
+    """Returns a list of breadcrumb objects for a path."""
+    breadcrumbs = []
+    breadcrumbs.append((b'[root]', b'/'))
+    path_parts = path.split(b'/')[1:-1]
+    full_path = b'/'
+    for part in path_parts:
+        full_path += part + b"/"
+        url_append = b""
+        if show_hidden:
+            url_append = b'?hidden=1'
+        breadcrumbs.append((part, full_path+url_append))
+    return breadcrumbs
+
+
+def _contains_hidden_files(repo, dir_item):
+    """Return true if the directory contains items with names other than
+    '.' and '..' that begin with '.'
+
+    """
+    for name, item in vfs.contents(repo, dir_item, want_meta=False):
+        if name in (b'.', b'..'):
+            continue
+        if name.startswith(b'.'):
+            return True
+    return False
+
+
+def _dir_contents(repo, resolution, show_hidden=False):
+    """Yield the display information for the contents of dir_item."""
+
+    url_query = b'?hidden=1' if show_hidden else b''
+
+    def display_info(name, item, resolved_item, display_name=None):
+        global opt
+        # link should be based on fully resolved type to avoid extra
+        # HTTP redirect.
+        link = tornado.escape.url_escape(name, plus=False)
+        if stat.S_ISDIR(vfs.item_mode(resolved_item)):
+            link += '/'
+        link = link.encode('ascii')
+
+        size = vfs.item_size(repo, item)
+        if opt.human_readable:
+            display_size = format_filesize(size)
+        else:
+            display_size = size
+
+        if not display_name:
+            mode = vfs.item_mode(item)
+            if stat.S_ISDIR(mode):
+                display_name = name + b'/'
+            elif stat.S_ISLNK(mode):
+                display_name = name + b'@'
+            else:
+                display_name = name
+
+        return display_name, link + url_query, display_size
+
+    dir_item = resolution[-1][1]    
+    for name, item in vfs.contents(repo, dir_item):
+        if not show_hidden:
+            if (name not in (b'.', b'..')) and name.startswith(b'.'):
+                continue
+        if name == b'.':
+            yield display_info(name, item, item, b'.')
+            parent_item = resolution[-2][1] if len(resolution) > 1 else dir_item
+            yield display_info(b'..', parent_item, parent_item, b'..')
+            continue
+        res_item = vfs.ensure_item_has_metadata(repo, item, include_size=True)
+        yield display_info(name, item, res_item)
+
+
+class BupRequestHandler(tornado.web.RequestHandler):
+
+    def initialize(self, repo=None):
+        self.repo = repo
+
+    def decode_argument(self, value, name=None):
+        if name == 'path':
+            return value
+        return super(BupRequestHandler, self).decode_argument(value, name)
+
+    def get(self, path):
+        return self._process_request(path)
+
+    def head(self, path):
+        return self._process_request(path)
+    
+    def _process_request(self, path):
+        print('Handling request for %s' % path)
+        sys.stdout.flush()
+        # Set want_meta because dir metadata won't be fetched, and if
+        # it's not a dir, then we're going to want the metadata.
+        res = vfs.resolve(self.repo, path, want_meta=True)
+        leaf_name, leaf_item = res[-1]
+        if not leaf_item:
+            self.send_error(404)
+            return
+        mode = vfs.item_mode(leaf_item)
+        if stat.S_ISDIR(mode):
+            self._list_directory(path, res)
+        else:
+            self._get_file(self.repo, path, res)
+
+    def _list_directory(self, path, resolution):
+        """Helper to produce a directory listing.
+
+        Return value is either a file object, or None (indicating an
+        error).  In either case, the headers are sent.
+        """
+        if not path.endswith(b'/') and len(path) > 0:
+            print('Redirecting from %s to %s' % (path_msg(path), path_msg(path + b'/')))
+            return self.redirect(path + b'/', permanent=True)
+
+        hidden_arg = self.request.arguments.get('hidden', [0])[-1]
+        try:
+            show_hidden = int(hidden_arg)
+        except ValueError as e:
+            show_hidden = False
+
+        self.render(
+            'list-directory.html',
+            path=path,
+            breadcrumbs=_compute_breadcrumbs(path, show_hidden),
+            files_hidden=_contains_hidden_files(self.repo, resolution[-1][1]),
+            hidden_shown=show_hidden,
+            dir_contents=_dir_contents(self.repo, resolution,
+                                       show_hidden=show_hidden))
+
+    @gen.coroutine
+    def _get_file(self, repo, path, resolved):
+        """Process a request on a file.
+
+        Return value is either a file object, or None (indicating an error).
+        In either case, the headers are sent.
+        """
+        file_item = resolved[-1][1]
+        file_item = vfs.augment_item_meta(repo, file_item, include_size=True)
+        meta = file_item.meta
+        ctype = self._guess_type(path)
+        self.set_header("Last-Modified", http_date_from_utc_ns(meta.mtime))
+        self.set_header("Content-Type", ctype)
+        
+        self.set_header("Content-Length", str(meta.size))
+        assert len(file_item.oid) == 20
+        self.set_header("Etag", hexlify(file_item.oid))
+        if self.request.method != 'HEAD':
+            with vfs.fopen(self.repo, file_item) as f:
+                it = chunkyreader(f)
+                for blob in chunkyreader(f):
+                    self.write(blob)
+        raise gen.Return()
+
+    def _guess_type(self, path):
+        """Guess the type of a file.
+
+        Argument is a PATH (a filename).
+
+        Return value is a string of the form type/subtype,
+        usable for a MIME Content-type header.
+
+        The default implementation looks the file's extension
+        up in the table self.extensions_map, using application/octet-stream
+        as a default; however it would be permissible (if
+        slow) to look inside the data to make a better guess.
+        """
+        base, ext = posixpath.splitext(path)
+        if ext in self.extensions_map:
+            return self.extensions_map[ext]
+        ext = ext.lower()
+        if ext in self.extensions_map:
+            return self.extensions_map[ext]
+        else:
+            return self.extensions_map['']
+
+    if not mimetypes.inited:
+        mimetypes.init() # try to read system mime.types
+    extensions_map = mimetypes.types_map.copy()
+    extensions_map.update({
+        '': 'text/plain', # Default
+        '.py': 'text/plain',
+        '.c': 'text/plain',
+        '.h': 'text/plain',
+        })
+
+
+io_loop = None
+
+def handle_sigterm(signum, frame):
+    global io_loop
+    debug1('\nbup-web: signal %d received\n' % signum)
+    log('Shutdown requested\n')
+    if not io_loop:
+        sys.exit(0)
+    io_loop.stop()
+
+
+optspec = """
+bup web [[hostname]:port]
+bup web unix://path
+--
+human-readable    display human readable file sizes (i.e. 3.9K, 4.7M)
+browser           show repository in default browser (incompatible with unix://)
+"""
+
+opt = None
+
+def main(argv):
+    global opt
+    signal.signal(signal.SIGTERM, handle_sigterm)
+
+    UnixAddress = namedtuple('UnixAddress', ['path'])
+    InetAddress = namedtuple('InetAddress', ['host', 'port'])
+
+    o = options.Options(optspec)
+    opt, flags, extra = o.parse_bytes(argv[1:])
+
+    if len(extra) > 1:
+        o.fatal("at most one argument expected")
+
+    if len(extra) == 0:
+        address = InetAddress(host='127.0.0.1', port=8080)
+    else:
+        bind_url = extra[0]
+        if bind_url.startswith('unix://'):
+            address = UnixAddress(path=bind_url[len('unix://'):])
+        else:
+            addr_parts = extra[0].split(':', 1)
+            if len(addr_parts) == 1:
+                host = '127.0.0.1'
+                port = addr_parts[0]
+            else:
+                host, port = addr_parts
+            try:
+                port = int(port)
+            except (TypeError, ValueError) as ex:
+                o.fatal('port must be an integer, not %r' % port)
+            address = InetAddress(host=host, port=port)
+
+    git.check_repo_or_die()
+
+    settings = dict(
+        debug = 1,
+        template_path = resource_path(b'web').decode('utf-8'),
+        static_path = resource_path(b'web/static').decode('utf-8'),
+    )
+
+    # Disable buffering on stdout, for debug messages
+    try:
+        sys.stdout._line_buffering = True
+    except AttributeError:
+        sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
+
+    application = tornado.web.Application([
+        (r"(?P<path>/.*)", BupRequestHandler, dict(repo=LocalRepo())),
+    ], **settings)
+
+    http_server = HTTPServer(application)
+    io_loop_pending = IOLoop.instance()
+
+    if isinstance(address, InetAddress):
+        sockets = tornado.netutil.bind_sockets(address.port, address.host)
+        http_server.add_sockets(sockets)
+        print('Serving HTTP on %s:%d...' % sockets[0].getsockname()[0:2])
+        if opt.browser:
+            browser_addr = 'http://' + address[0] + ':' + str(address[1])
+            io_loop_pending.add_callback(lambda : webbrowser.open(browser_addr))
+    elif isinstance(address, UnixAddress):
+        unix_socket = bind_unix_socket(address.path)
+        http_server.add_socket(unix_socket)
+        print('Serving HTTP on filesystem socket %r' % address.path)
+    else:
+        log('error: unexpected address %r', address)
+        sys.exit(1)
+
+    io_loop = io_loop_pending
+    io_loop.start()
+
+    if saved_errors:
+        log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
+        sys.exit(1)
diff --git a/lib/bup/cmd/xstat.py b/lib/bup/cmd/xstat.py
new file mode 100755 (executable)
index 0000000..0cf8bdc
--- /dev/null
@@ -0,0 +1,112 @@
+# Copyright (C) 2010 Rob Browning
+#
+# This code is covered under the terms of the GNU Library General
+# Public License as described in the bup LICENSE file.
+
+from __future__ import absolute_import, print_function
+
+import errno, stat, sys
+
+from bup import compat, metadata, options, xstat
+from bup.compat import argv_bytes
+from bup.helpers import add_error, handle_ctrl_c, parse_timestamp, saved_errors, \
+    add_error, log
+from bup.io import byte_stream
+
+
+def parse_timestamp_arg(field, value):
+    res = str(value) # Undo autoconversion.
+    try:
+        res = parse_timestamp(res)
+    except ValueError as ex:
+        if ex.args:
+            o.fatal('unable to parse %s resolution "%s" (%s)'
+                    % (field, value, ex))
+        else:
+            o.fatal('unable to parse %s resolution "%s"' % (field, value))
+
+    if res != 1 and res % 10:
+        o.fatal('%s resolution "%s" must be a power of 10' % (field, value))
+    return res
+
+
+optspec = """
+bup xstat pathinfo [OPTION ...] <PATH ...>
+--
+v,verbose       increase log output (can be used more than once)
+q,quiet         don't show progress meter
+exclude-fields= exclude comma-separated fields
+include-fields= include comma-separated fields (definitive if first)
+atime-resolution=  limit s, ms, us, ns, 10ns (value must be a power of 10) [ns]
+mtime-resolution=  limit s, ms, us, ns, 10ns (value must be a power of 10) [ns]
+ctime-resolution=  limit s, ms, us, ns, 10ns (value must be a power of 10) [ns]
+"""
+
+def main(argv):
+
+    target_filename = b''
+    active_fields = metadata.all_fields
+
+    o = options.Options(optspec)
+    (opt, flags, remainder) = o.parse_bytes(argv[1:])
+
+    atime_resolution = parse_timestamp_arg('atime', opt.atime_resolution)
+    mtime_resolution = parse_timestamp_arg('mtime', opt.mtime_resolution)
+    ctime_resolution = parse_timestamp_arg('ctime', opt.ctime_resolution)
+
+    treat_include_fields_as_definitive = True
+    for flag, value in flags:
+        if flag == '--exclude-fields':
+            exclude_fields = frozenset(value.split(','))
+            for f in exclude_fields:
+                if not f in metadata.all_fields:
+                    o.fatal(f + ' is not a valid field name')
+            active_fields = active_fields - exclude_fields
+            treat_include_fields_as_definitive = False
+        elif flag == '--include-fields':
+            include_fields = frozenset(value.split(','))
+            for f in include_fields:
+                if not f in metadata.all_fields:
+                    o.fatal(f + ' is not a valid field name')
+            if treat_include_fields_as_definitive:
+                active_fields = include_fields
+                treat_include_fields_as_definitive = False
+            else:
+                active_fields = active_fields | include_fields
+
+    opt.verbose = opt.verbose or 0
+    opt.quiet = opt.quiet or 0
+    metadata.verbose = opt.verbose - opt.quiet
+
+    sys.stdout.flush()
+    out = byte_stream(sys.stdout)
+
+    first_path = True
+    for path in remainder:
+        path = argv_bytes(path)
+        try:
+            m = metadata.from_path(path, archive_path = path)
+        except (OSError,IOError) as e:
+            if e.errno == errno.ENOENT:
+                add_error(e)
+                continue
+            else:
+                raise
+        if metadata.verbose >= 0:
+            if not first_path:
+                out.write(b'\n')
+            if atime_resolution != 1:
+                m.atime = (m.atime / atime_resolution) * atime_resolution
+            if mtime_resolution != 1:
+                m.mtime = (m.mtime / mtime_resolution) * mtime_resolution
+            if ctime_resolution != 1:
+                m.ctime = (m.ctime / ctime_resolution) * ctime_resolution
+            out.write(metadata.detailed_bytes(m, active_fields))
+            out.write(b'\n')
+            first_path = False
+
+    if saved_errors:
+        log('WARNING: %d errors encountered.\n' % len(saved_errors))
+        sys.exit(1)
+    else:
+        sys.exit(0)
index 2cd6fbaf8a3cbb01074b325ec55be62119ac7880..a06ffe8ee581d9cab13e1764740241768d2d2210 100644 (file)
@@ -169,41 +169,27 @@ else:  # Python 2
 
     buffer = buffer
 
-
-argv = None
-argvb = None
-
-def _configure_argv():
-    global argv, argvb
-    assert not argv
-    assert not argvb
-    if len(sys.argv) > 1:
-        if environ.get(b'BUP_ARGV_0'):
-            print('error: BUP_ARGV* set and sys.argv not empty', file=sys.stderr)
-            sys.exit(2)
-        argv = sys.argv
-        argvb = [argv_bytes(x) for x in argv]
-        return
-    args = []
-    i = 0
-    arg = environ.get(b'BUP_ARGV_%d' % i)
-    while arg is not None:
-        args.append(arg)
-        i += 1
-        arg = environ.get(b'BUP_ARGV_%d' % i)
-    i -= 1
-    while i >= 0:
-        del environ[b'BUP_ARGV_%d' % i]
-        i -= 1
-    argvb = args
-    # System encoding?
+try:
+    import bup_main
+except ModuleNotFoundError:
+    bup_main = None
+
+if bup_main:
+    def get_argvb():
+        "Return a new list containing the current process argv bytes."
+        return bup_main.argv()
     if py3:
-        argv = [x.decode(errors='surrogateescape') for x in args]
+        def get_argv():
+            "Return a new list containing the current process argv strings."
+            return [x.decode(errors='surrogateescape') for x in bup_main.argv()]
     else:
-        argv = argvb
-
-_configure_argv()
-
+        def get_argv():
+            return bup_main.argv()
+else:
+    def get_argvb():
+        raise Exception('get_argvb requires the bup_main module');
+    def get_argv():
+        raise Exception('get_argv requires the bup_main module');
 
 def wrap_main(main):
     """Run main() and raise a SystemExit with the return value if it
diff --git a/lib/bup/csetup.py b/lib/bup/csetup.py
deleted file mode 100644 (file)
index 9dbb4a7..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-
-from __future__ import absolute_import, print_function
-
-import shlex, sys
-from distutils.core import setup, Extension
-import os
-
-if len(sys.argv) != 4:
-    print('Usage: csetup.py CFLAGS LDFLAGS', file=sys.stderr)
-    sys.exit(2)
-_helpers_cflags = shlex.split(sys.argv[2])
-_helpers_ldflags = shlex.split(sys.argv[3])
-sys.argv = sys.argv[:2]
-
-_helpers_mod = Extension('_helpers',
-                         sources=['_helpers.c', 'bupsplit.c'],
-                         depends=['../../config/config.h', 'bupsplit.h'],
-                         extra_compile_args=_helpers_cflags,
-                         extra_link_args=_helpers_ldflags)
-
-setup(name='_helpers',
-      version='0.1',
-      description='accelerator library for bup',
-      ext_modules=[_helpers_mod])
index c0ac91f4360452edbedb29945f0bc6051fc1b1d5..691a5552dd66ae776b988e64d639f1a7a3ee7d3c 100644 (file)
@@ -69,7 +69,8 @@ def _git_exo(cmd, **kwargs):
 def git_config_get(option, repo_dir=None):
     cmd = (b'git', b'config', b'--get', option)
     p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
-                         env=_gitenv(repo_dir=repo_dir))
+                         env=_gitenv(repo_dir=repo_dir),
+                         close_fds=True)
     r = p.stdout.read()
     rc = p.wait()
     if rc == 0:
@@ -994,7 +995,8 @@ def list_refs(patterns=None, repo_dir=None,
     argv.append(b'--')
     if patterns:
         argv.extend(patterns)
-    p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE)
+    p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE,
+                         close_fds=True)
     out = p.stdout.read().strip()
     rv = p.wait()  # not fatal
     if rv:
@@ -1045,7 +1047,8 @@ def rev_list(ref_or_refs, parse=None, format=None, repo_dir=None):
     p = subprocess.Popen(rev_list_invocation(ref_or_refs,
                                              format=format),
                          env=_gitenv(repo_dir),
-                         stdout = subprocess.PIPE)
+                         stdout = subprocess.PIPE,
+                         close_fds=True)
     if not format:
         for line in p.stdout:
             yield line.strip()
@@ -1111,7 +1114,8 @@ def update_ref(refname, newval, oldval, repo_dir=None):
         or refname.startswith(b'refs/tags/')
     p = subprocess.Popen([b'git', b'update-ref', refname,
                           hexlify(newval), hexlify(oldval)],
-                         env=_gitenv(repo_dir))
+                         env=_gitenv(repo_dir),
+                         close_fds=True)
     _git_wait(b'git update-ref', p)
 
 
@@ -1120,7 +1124,8 @@ def delete_ref(refname, oldvalue=None):
     assert refname.startswith(b'refs/')
     oldvalue = [] if not oldvalue else [oldvalue]
     p = subprocess.Popen([b'git', b'update-ref', b'-d', refname] + oldvalue,
-                         env=_gitenv())
+                         env=_gitenv(),
+                         close_fds=True)
     _git_wait('git update-ref', p)
 
 
@@ -1151,16 +1156,17 @@ def init_repo(path=None):
     if os.path.exists(d) and not os.path.isdir(os.path.join(d, b'.')):
         raise GitError('"%s" exists but is not a directory\n' % path_msg(d))
     p = subprocess.Popen([b'git', b'--bare', b'init'], stdout=sys.stderr,
-                         env=_gitenv())
+                         env=_gitenv(),
+                         close_fds=True)
     _git_wait('git init', p)
     # Force the index version configuration in order to ensure bup works
     # regardless of the version of the installed Git binary.
     p = subprocess.Popen([b'git', b'config', b'pack.indexVersion', '2'],
-                         stdout=sys.stderr, env=_gitenv())
+                         stdout=sys.stderr, env=_gitenv(), close_fds=True)
     _git_wait('git config', p)
     # Enable the reflog
     p = subprocess.Popen([b'git', b'config', b'core.logAllRefUpdates', b'true'],
-                         stdout=sys.stderr, env=_gitenv())
+                         stdout=sys.stderr, env=_gitenv(), close_fds=True)
     _git_wait('git config', p)
 
 
@@ -1270,15 +1276,19 @@ class CatPipe:
         self.repo_dir = repo_dir
         self.p = self.inprogress = None
 
-    def _abort(self):
-        if self.p:
-            self.p.stdout.close()
-            self.p.stdin.close()
+    def close(self, wait=False):
+        p = self.p
+        if p:
+            p.stdout.close()
+            p.stdin.close()
         self.p = None
         self.inprogress = None
+        if wait:
+            p.wait()
+            return p.returncode
 
     def restart(self):
-        self._abort()
+        self.close()
         self.p = subprocess.Popen([b'git', b'cat-file', b'--batch'],
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE,
@@ -1316,7 +1326,7 @@ class CatPipe:
         oidx, typ, size = info
         size = int(size)
         it = _AbortableIter(chunkyreader(self.p.stdout, size),
-                            onabort=self._abort)
+                            onabort=self.close)
         try:
             yield oidx, typ, size
             for blob in it:
@@ -1372,6 +1382,13 @@ def cp(repo_dir=None):
     return cp
 
 
+def close_catpipes():
+    # FIXME: chain exceptions
+    while _cp:
+        _, cp = _cp.popitem()
+        cp.close(wait=True)
+
+
 def tags(repo_dir = None):
     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
     tags = {}
index 8117bd76506eb66da342b4eb5dc9fe458c8e1fb8..9ea313352c9a4d0a618e3448947826af25dfb3e7 100644 (file)
@@ -78,9 +78,9 @@ def opts_from_cmdline(args, onabort=None):
 
     """
     if onabort:
-        opt, flags, extra = Options(optspec, onabort=onabort).parse(args)
+        opt, flags, extra = Options(optspec, onabort=onabort).parse_bytes(args)
     else:
-        opt, flags, extra = Options(optspec).parse(args)
+        opt, flags, extra = Options(optspec).parse_bytes(args)
 
     opt.paths = [argv_bytes(x) for x in extra] or (b'/',)
     opt.long_listing = opt.l
diff --git a/lib/bup/main.py b/lib/bup/main.py
new file mode 100755 (executable)
index 0000000..928f5a6
--- /dev/null
@@ -0,0 +1,409 @@
+
+from __future__ import absolute_import, print_function
+
+import bup_main, os, sys
+if bup_main.env_pythonpath:
+    if sys.version_info[0] < 3:
+        os.environ['PYTHONPATH'] = bup_main.env_pythonpath
+    else:
+        os.environb[b'PYTHONPATH'] = bup_main.env_pythonpath
+else:
+    del os.environ['PYTHONPATH']
+
+from importlib import import_module
+from pkgutil import iter_modules
+from subprocess import PIPE
+from threading import Thread
+import errno, re, select, signal, subprocess
+
+from bup import compat, path, helpers
+from bup.compat import (
+    ModuleNotFoundError,
+    add_ex_ctx,
+    add_ex_tb,
+    argv_bytes,
+    environ,
+    fsdecode,
+    int_types,
+    wrap_main
+)
+from bup.compat import add_ex_tb, add_ex_ctx, argv_bytes, wrap_main
+from bup.helpers import (
+    columnate,
+    debug1,
+    handle_ctrl_c,
+    log,
+    merge_dict,
+    tty_width
+)
+from bup.git import close_catpipes
+from bup.io import byte_stream, path_msg
+from bup.options import _tty_width
+import bup.cmd
+
+def maybe_import_early(argv):
+    """Scan argv and import any modules specified by --import-py-module."""
+    while argv:
+        if argv[0] != '--import-py-module':
+            argv = argv[1:]
+            continue
+        if len(argv) < 2:
+            log("bup: --import-py-module must have an argument\n")
+            exit(2)
+        mod = argv[1]
+        import_module(mod)
+        argv = argv[2:]
+
+maybe_import_early(compat.get_argv())
+
+handle_ctrl_c()
+
+cmdpath = path.cmddir()
+
+# We manipulate the subcmds here as strings, but they must be ASCII
+# compatible, since we're going to be looking for exactly
+# b'bup-SUBCMD' to exec.
+
+def usage(msg=""):
+    log('Usage: bup [-?|--help] [-d BUP_DIR] [--debug] [--profile] '
+        '<command> [options...]\n\n')
+    common = dict(
+        ftp = 'Browse backup sets using an ftp-like client',
+        fsck = 'Check backup sets for damage and add redundancy information',
+        fuse = 'Mount your backup sets as a filesystem',
+        help = 'Print detailed help for the given command',
+        index = 'Create or display the index of files to back up',
+        on = 'Backup a remote machine to the local one',
+        restore = 'Extract files from a backup set',
+        save = 'Save files into a backup set (note: run "bup index" first)',
+        tag = 'Tag commits for easier access',
+        web = 'Launch a web server to examine backup sets',
+    )
+
+    log('Common commands:\n')
+    for cmd,synopsis in sorted(common.items()):
+        log('    %-10s %s\n' % (cmd, synopsis))
+    log('\n')
+    
+    log('Other available commands:\n')
+    cmds = set()
+    for c in sorted(os.listdir(cmdpath)):
+        if c.startswith(b'bup-') and c.find(b'.') < 0:
+            cname = fsdecode(c[4:])
+            if cname not in common:
+                cmds.add(c[4:].decode(errors='backslashreplace'))
+    # built-in commands take precedence
+    for _, name, _ in iter_modules(path=bup.cmd.__path__):
+        name = name.replace('_','-')
+        if name not in common:
+            cmds.add(name)
+
+    log(columnate(sorted(cmds), '    '))
+    log('\n')
+    
+    log("See 'bup help COMMAND' for more information on " +
+        "a specific command.\n")
+    if msg:
+        log("\n%s\n" % msg)
+    sys.exit(99)
+
+args = compat.get_argvb()
+if len(args) < 2:
+    usage()
+
+## Parse global options
+help_requested = None
+do_profile = False
+bup_dir = None
+args = args[1:]
+while args:
+    arg = args[0]
+    if arg in (b'-?', b'--help'):
+        help_requested = True
+        args = args[1:]
+    elif arg in (b'-V', b'--version'):
+        subcmd = [b'version']
+        args = args[1:]
+    elif arg in (b'-D', b'--debug'):
+        helpers.buglvl += 1
+        environ[b'BUP_DEBUG'] = b'%d' % helpers.buglvl
+        args = args[1:]
+    elif arg == b'--profile':
+        do_profile = True
+        args = args[1:]
+    elif arg in (b'-d', b'--bup-dir'):
+        if len(args) < 2:
+            usage('error: no path provided for %s option' % arg)
+        bup_dir = args[1]
+        args = args[2:]
+    elif arg == b'--import-py-module':
+        args = args[2:]
+    elif arg.startswith(b'-'):
+        usage('error: unexpected option "%s"'
+              % arg.decode('ascii', 'backslashescape'))
+    else:
+        break
+
+subcmd = args
+
+# Make BUP_DIR absolute, so we aren't affected by chdir (i.e. save -C, etc.).
+if bup_dir:
+    environ[b'BUP_DIR'] = os.path.abspath(bup_dir)
+
+if len(subcmd) == 0:
+    if help_requested:
+        subcmd = [b'help']
+    else:
+        usage()
+
+if help_requested and subcmd[0] != b'help':
+    subcmd = [b'help'] + subcmd
+
+if len(subcmd) > 1 and subcmd[1] == b'--help' and subcmd[0] != b'help':
+    subcmd = [b'help', subcmd[0]] + subcmd[2:]
+
+subcmd_name = subcmd[0]
+if not subcmd_name:
+    usage()
+
+try:
+    cmd_module = import_module('bup.cmd.'
+                               + subcmd_name.decode('ascii').replace('-', '_'))
+except ModuleNotFoundError as ex:
+    cmd_module = None
+
+if not cmd_module:
+    subcmd[0] = os.path.join(cmdpath, b'bup-' + subcmd_name)
+    if not os.path.exists(subcmd[0]):
+        usage('error: unknown command "%s"' % path_msg(subcmd_name))
+
+already_fixed = int(environ.get(b'BUP_FORCE_TTY', 0))
+if subcmd_name in [b'mux', b'ftp', b'help']:
+    already_fixed = True
+fix_stdout = not already_fixed and os.isatty(1)
+fix_stderr = not already_fixed and os.isatty(2)
+
+if fix_stdout or fix_stderr:
+    tty_env = merge_dict(environ,
+                         {b'BUP_FORCE_TTY': (b'%d'
+                                             % ((fix_stdout and 1 or 0)
+                                                + (fix_stderr and 2 or 0))),
+                          b'BUP_TTY_WIDTH': b'%d' % _tty_width(), })
+else:
+    tty_env = environ
+
+
+sep_rx = re.compile(br'([\r\n])')
+
+def print_clean_line(dest, content, width, sep=None):
+    """Write some or all of content, followed by sep, to the dest fd after
+    padding the content with enough spaces to fill the current
+    terminal width or truncating it to the terminal width if sep is a
+    carriage return."""
+    global sep_rx
+    assert sep in (b'\r', b'\n', None)
+    if not content:
+        if sep:
+            os.write(dest, sep)
+        return
+    for x in content:
+        assert not sep_rx.match(x)
+    content = b''.join(content)
+    if sep == b'\r' and len(content) > width:
+        content = content[width:]
+    os.write(dest, content)
+    if len(content) < width:
+        os.write(dest, b' ' * (width - len(content)))
+    if sep:
+        os.write(dest, sep)
+
+def filter_output(srcs, dests):
+    """Transfer data from file descriptors in srcs to the corresponding
+    file descriptors in dests print_clean_line until all of the srcs
+    have closed.
+
+    """
+    global sep_rx
+    assert all(type(x) in int_types for x in srcs)
+    assert all(type(x) in int_types for x in srcs)
+    assert len(srcs) == len(dests)
+    srcs = tuple(srcs)
+    dest_for = dict(zip(srcs, dests))
+    pending = {}
+    pending_ex = None
+    try:
+        while srcs:
+            ready_fds, _, _ = select.select(srcs, [], [])
+            width = tty_width()
+            for fd in ready_fds:
+                buf = os.read(fd, 4096)
+                dest = dest_for[fd]
+                if not buf:
+                    srcs = tuple([x for x in srcs if x is not fd])
+                    print_clean_line(dest, pending.pop(fd, []), width)
+                else:
+                    split = sep_rx.split(buf)
+                    while len(split) > 1:
+                        content, sep = split[:2]
+                        split = split[2:]
+                        print_clean_line(dest,
+                                         pending.pop(fd, []) + [content],
+                                         width,
+                                         sep)
+                    assert len(split) == 1
+                    if split[0]:
+                        pending.setdefault(fd, []).extend(split)
+    except BaseException as ex:
+        pending_ex = add_ex_ctx(add_ex_tb(ex), pending_ex)
+    try:
+        # Try to finish each of the streams
+        for fd, pending_items in compat.items(pending):
+            dest = dest_for[fd]
+            width = tty_width()
+            try:
+                print_clean_line(dest, pending_items, width)
+            except (EnvironmentError, EOFError) as ex:
+                pending_ex = add_ex_ctx(add_ex_tb(ex), pending_ex)
+    except BaseException as ex:
+        pending_ex = add_ex_ctx(add_ex_tb(ex), pending_ex)
+    if pending_ex:
+        raise pending_ex
+
+
+def import_and_run_main(module, args):
+    if do_profile:
+        import cProfile
+        f = compile('module.main(args)', __file__, 'exec')
+        cProfile.runctx(f, globals(), locals())
+    else:
+        module.main(args)
+
+
+def run_module_cmd(module, args):
+    if not (fix_stdout or fix_stderr):
+        import_and_run_main(module, args)
+        return
+    # Interpose filter_output between all attempts to write to the
+    # stdout/stderr and the real stdout/stderr (e.g. the fds that
+    # connect directly to the terminal) via a thread that runs
+    # filter_output in a pipeline.
+    srcs = []
+    dests = []
+    real_out_fd = real_err_fd = stdout_pipe = stderr_pipe = None
+    filter_thread = filter_thread_started = None
+    pending_ex = None
+    try:
+        if fix_stdout:
+            sys.stdout.flush()
+            stdout_pipe = os.pipe()  # monitored_by_filter, stdout_everyone_uses
+            real_out_fd = os.dup(sys.stdout.fileno())
+            os.dup2(stdout_pipe[1], sys.stdout.fileno())
+            srcs.append(stdout_pipe[0])
+            dests.append(real_out_fd)
+        if fix_stderr:
+            sys.stderr.flush()
+            stderr_pipe = os.pipe()  # monitored_by_filter, stderr_everyone_uses
+            real_err_fd = os.dup(sys.stderr.fileno())
+            os.dup2(stderr_pipe[1], sys.stderr.fileno())
+            srcs.append(stderr_pipe[0])
+            dests.append(real_err_fd)
+
+        filter_thread = Thread(name='output filter',
+                               target=lambda : filter_output(srcs, dests))
+        filter_thread.start()
+        filter_thread_started = True
+        import_and_run_main(module, args)
+    except Exception as ex:
+        add_ex_tb(ex)
+        pending_ex = ex
+        raise
+    finally:
+        # Try to make sure that whatever else happens, we restore
+        # stdout and stderr here, if that's possible, so that we don't
+        # risk just losing some output.
+        try:
+            real_out_fd is not None and os.dup2(real_out_fd, sys.stdout.fileno())
+        except Exception as ex:
+            add_ex_tb(ex)
+            add_ex_ctx(ex, pending_ex)
+        try:
+            real_err_fd is not None and os.dup2(real_err_fd, sys.stderr.fileno())
+        except Exception as ex:
+            add_ex_tb(ex)
+            add_ex_ctx(ex, pending_ex)
+        # Kick filter loose
+        try:
+            stdout_pipe is not None and os.close(stdout_pipe[1])
+        except Exception as ex:
+            add_ex_tb(ex)
+            add_ex_ctx(ex, pending_ex)
+        try:
+            stderr_pipe is not None and os.close(stderr_pipe[1])
+        except Exception as ex:
+            add_ex_tb(ex)
+            add_ex_ctx(ex, pending_ex)
+        try:
+            close_catpipes()
+        except Exception as ex:
+            add_ex_tb(ex)
+            add_ex_ctx(ex, pending_ex)
+    if pending_ex:
+        raise pending_ex
+    # There's no point in trying to join unless we finished the finally block.
+    if filter_thread_started:
+        filter_thread.join()
+
+
+def run_subproc_cmd(args):
+
+    c = (do_profile and [sys.executable, b'-m', b'cProfile'] or []) + args
+    if not (fix_stdout or fix_stderr):
+        os.execvp(c[0], c)
+
+    sys.stdout.flush()
+    sys.stderr.flush()
+    out = byte_stream(sys.stdout)
+    err = byte_stream(sys.stderr)
+    p = None
+    try:
+        p = subprocess.Popen(c,
+                             stdout=PIPE if fix_stdout else out,
+                             stderr=PIPE if fix_stderr else err,
+                             env=tty_env, bufsize=4096, close_fds=True)
+        # Assume p will receive these signals and quit, which will
+        # then cause us to quit.
+        for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGQUIT):
+            signal.signal(sig, signal.SIG_IGN)
+
+        srcs = []
+        dests = []
+        if fix_stdout:
+            srcs.append(p.stdout.fileno())
+            dests.append(out.fileno())
+        if fix_stderr:
+            srcs.append(p.stderr.fileno())
+            dests.append(err.fileno())
+        filter_output(srcs, dests)
+        return p.wait()
+    except BaseException as ex:
+        add_ex_tb(ex)
+        try:
+            if p and p.poll() == None:
+                os.kill(p.pid, signal.SIGTERM)
+                p.wait()
+        except BaseException as kill_ex:
+            raise add_ex_ctx(add_ex_tb(kill_ex), ex)
+        raise ex
+
+
+def run_subcmd(module, args):
+    if module:
+        run_module_cmd(module, args)
+    else:
+        run_subproc_cmd(args)
+
+def main():
+    wrap_main(lambda : run_subcmd(cmd_module, subcmd))
+
+if __name__ == "__main__":
+    main()
index 83159ebc648431c868109cb68289ced191bc040b..6f1f162d2b2683cc4384a94a8efcb2439b8cd3a2 100644 (file)
@@ -285,3 +285,8 @@ class Options:
                     v = _intify(v)
             opt[k] = _invert(v, invert)
         return (opt,flags,extra)
+
+    def parse_bytes(self, args):
+        if sys.version_info[0] > 2:
+            args = [x.decode(errors='surrogateescape') for x in args]
+        return self.parse(args)
diff --git a/lib/cmd/bloom-cmd.py b/lib/cmd/bloom-cmd.py
deleted file mode 100755 (executable)
index 850382e..0000000
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import glob, os, sys, tempfile
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, git, bloom
-from bup.compat import argv_bytes, hexstr
-from bup.helpers import (add_error, debug1, handle_ctrl_c, log, progress, qprogress,
-                         saved_errors)
-from bup.io import path_msg
-
-
-optspec = """
-bup bloom [options...]
---
-ruin       ruin the specified bloom file (clearing the bitfield)
-f,force    ignore existing bloom file and regenerate it from scratch
-o,output=  output bloom filename (default: auto)
-d,dir=     input directory to look for idx files (default: auto)
-k,hashes=  number of hash functions to use (4 or 5) (default: auto)
-c,check=   check the given .idx file against the bloom filter
-"""
-
-
-def ruin_bloom(bloomfilename):
-    rbloomfilename = git.repo_rel(bloomfilename)
-    if not os.path.exists(bloomfilename):
-        log(path_msg(bloomfilename) + '\n')
-        add_error('bloom: %s not found to ruin\n' % path_msg(rbloomfilename))
-        return
-    b = bloom.ShaBloom(bloomfilename, readwrite=True, expected=1)
-    b.map[16 : 16 + 2**b.bits] = b'\0' * 2**b.bits
-
-
-def check_bloom(path, bloomfilename, idx):
-    rbloomfilename = git.repo_rel(bloomfilename)
-    ridx = git.repo_rel(idx)
-    if not os.path.exists(bloomfilename):
-        log('bloom: %s: does not exist.\n' % path_msg(rbloomfilename))
-        return
-    b = bloom.ShaBloom(bloomfilename)
-    if not b.valid():
-        add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename))
-        return
-    base = os.path.basename(idx)
-    if base not in b.idxnames:
-        log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename))
-        return
-    if base == idx:
-        idx = os.path.join(path, idx)
-    log('bloom: bloom file: %s\n' % path_msg(rbloomfilename))
-    log('bloom:   checking %s\n' % path_msg(ridx))
-    for objsha in git.open_idx(idx):
-        if not b.exists(objsha):
-            add_error('bloom: ERROR: object %s missing' % hexstr(objsha))
-
-
-_first = None
-def do_bloom(path, outfilename, k):
-    global _first
-    assert k in (None, 4, 5)
-    b = None
-    if os.path.exists(outfilename) and not opt.force:
-        b = bloom.ShaBloom(outfilename)
-        if not b.valid():
-            debug1("bloom: Existing invalid bloom found, regenerating.\n")
-            b = None
-
-    add = []
-    rest = []
-    add_count = 0
-    rest_count = 0
-    for i, name in enumerate(glob.glob(b'%s/*.idx' % path)):
-        progress('bloom: counting: %d\r' % i)
-        ix = git.open_idx(name)
-        ixbase = os.path.basename(name)
-        if b and (ixbase in b.idxnames):
-            rest.append(name)
-            rest_count += len(ix)
-        else:
-            add.append(name)
-            add_count += len(ix)
-
-    if not add:
-        debug1("bloom: nothing to do.\n")
-        return
-
-    if b:
-        if len(b) != rest_count:
-            debug1("bloom: size %d != idx total %d, regenerating\n"
-                   % (len(b), rest_count))
-            b = None
-        elif k is not None and k != b.k:
-            debug1("bloom: new k %d != existing k %d, regenerating\n"
-                   % (k, b.k))
-            b = None
-        elif (b.bits < bloom.MAX_BLOOM_BITS[b.k] and
-              b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
-            debug1("bloom: regenerating: adding %d entries gives "
-                   "%.2f%% false positives.\n"
-                   % (add_count, b.pfalse_positive(add_count)))
-            b = None
-        else:
-            b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count)
-    if not b: # Need all idxs to build from scratch
-        add += rest
-        add_count += rest_count
-    del rest
-    del rest_count
-
-    msg = b is None and 'creating from' or 'adding'
-    if not _first: _first = path
-    dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b''
-    progress('bloom: %s%s %d file%s (%d object%s).\r'
-        % (path_msg(dirprefix), msg,
-           len(add), len(add)!=1 and 's' or '',
-           add_count, add_count!=1 and 's' or ''))
-
-    tfname = None
-    if b is None:
-        tfname = os.path.join(path, b'bup.tmp.bloom')
-        b = bloom.create(tfname, expected=add_count, k=k)
-    count = 0
-    icount = 0
-    for name in add:
-        ix = git.open_idx(name)
-        qprogress('bloom: writing %.2f%% (%d/%d objects)\r' 
-                  % (icount*100.0/add_count, icount, add_count))
-        b.add_idx(ix)
-        count += 1
-        icount += len(ix)
-
-    # Currently, there's an open file object for tfname inside b.
-    # Make sure it's closed before rename.
-    b.close()
-
-    if tfname:
-        os.rename(tfname, outfilename)
-
-
-handle_ctrl_c()
-
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if extra:
-    o.fatal('no positional parameters expected')
-
-if not opt.check and opt.k and opt.k not in (4,5):
-    o.fatal('only k values of 4 and 5 are supported')
-
-if opt.check:
-    opt.check = argv_bytes(opt.check)
-
-git.check_repo_or_die()
-
-output = argv_bytes(opt.output) if opt.output else None
-paths = opt.dir and [argv_bytes(opt.dir)] or git.all_packdirs()
-for path in paths:
-    debug1('bloom: scanning %s\n' % path_msg(path))
-    outfilename = output or os.path.join(path, b'bup.bloom')
-    if opt.check:
-        check_bloom(path, outfilename, opt.check)
-    elif opt.ruin:
-        ruin_bloom(outfilename)
-    else:
-        do_bloom(path, outfilename, opt.k)
-
-if saved_errors:
-    log('WARNING: %d errors encountered during bloom.\n' % len(saved_errors))
-    sys.exit(1)
-elif opt.check:
-    log('All tests passed.\n')
diff --git a/lib/cmd/bup b/lib/cmd/bup
deleted file mode 100755 (executable)
index ee029aa..0000000
+++ /dev/null
@@ -1,275 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-set -e
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-# Find our directory
-top="$(pwd)"
-cmdpath="$0"
-# loop because macos doesn't have recursive readlink/realpath utils
-while test -L "$cmdpath"; do
-    link="$(readlink "$cmdpath")"
-    cd "$(dirname "$cmdpath")"
-    cmdpath="$link"
-done
-script_home="$(cd "$(dirname "$cmdpath")" && pwd -P)"
-cd "$top"
-exec "$script_home/../../config/bin/python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-
-import os, sys
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-import errno, getopt, os, re, select, signal, subprocess, sys
-from subprocess import PIPE
-
-from bup.compat import environ, fsdecode
-from bup.io import path_msg
-from bup import compat, path, helpers
-from bup.compat import add_ex_tb, add_ex_ctx, argv_bytes, wrap_main
-from bup.helpers import columnate, debug1, log, merge_dict, tty_width
-from bup.io import byte_stream, path_msg
-from bup.options import _tty_width
-
-cmdpath = path.cmddir()
-
-# We manipulate the subcmds here as strings, but they must be ASCII
-# compatible, since we're going to be looking for exactly
-# b'bup-SUBCMD' to exec.
-
-def usage(msg=""):
-    log('Usage: bup [-?|--help] [-d BUP_DIR] [--debug] [--profile] '
-        '<command> [options...]\n\n')
-    common = dict(
-        ftp = 'Browse backup sets using an ftp-like client',
-        fsck = 'Check backup sets for damage and add redundancy information',
-        fuse = 'Mount your backup sets as a filesystem',
-        help = 'Print detailed help for the given command',
-        index = 'Create or display the index of files to back up',
-        on = 'Backup a remote machine to the local one',
-        restore = 'Extract files from a backup set',
-        save = 'Save files into a backup set (note: run "bup index" first)',
-        tag = 'Tag commits for easier access',
-        web = 'Launch a web server to examine backup sets',
-    )
-
-    log('Common commands:\n')
-    for cmd,synopsis in sorted(common.items()):
-        log('    %-10s %s\n' % (cmd, synopsis))
-    log('\n')
-    
-    log('Other available commands:\n')
-    cmds = []
-    for c in sorted(os.listdir(cmdpath)):
-        if c.startswith(b'bup-') and c.find(b'.') < 0:
-            cname = fsdecode(c[4:])
-            if cname not in common:
-                cmds.append(c[4:].decode(errors='backslashreplace'))
-    log(columnate(cmds, '    '))
-    log('\n')
-    
-    log("See 'bup help COMMAND' for more information on " +
-        "a specific command.\n")
-    if msg:
-        log("\n%s\n" % msg)
-    sys.exit(99)
-
-argv = compat.argv
-if len(argv) < 2:
-    usage()
-
-# Handle global options.
-try:
-    optspec = ['help', 'version', 'debug', 'profile', 'bup-dir=']
-    global_args, subcmd = getopt.getopt(argv[1:], '?VDd:', optspec)
-except getopt.GetoptError as ex:
-    usage('error: %s' % ex.msg)
-
-subcmd = [argv_bytes(x) for x in subcmd]
-help_requested = None
-do_profile = False
-bup_dir = None
-
-for opt in global_args:
-    if opt[0] in ['-?', '--help']:
-        help_requested = True
-    elif opt[0] in ['-V', '--version']:
-        subcmd = [b'version']
-    elif opt[0] in ['-D', '--debug']:
-        helpers.buglvl += 1
-        environ[b'BUP_DEBUG'] = b'%d' % helpers.buglvl
-    elif opt[0] in ['--profile']:
-        do_profile = True
-    elif opt[0] in ['-d', '--bup-dir']:
-        bup_dir = argv_bytes(opt[1])
-    else:
-        usage('error: unexpected option "%s"' % opt[0])
-
-if bup_dir:
-    bup_dir = argv_bytes(bup_dir)
-
-# Make BUP_DIR absolute, so we aren't affected by chdir (i.e. save -C, etc.).
-if bup_dir:
-    environ[b'BUP_DIR'] = os.path.abspath(bup_dir)
-
-if len(subcmd) == 0:
-    if help_requested:
-        subcmd = [b'help']
-    else:
-        usage()
-
-if help_requested and subcmd[0] != b'help':
-    subcmd = [b'help'] + subcmd
-
-if len(subcmd) > 1 and subcmd[1] == b'--help' and subcmd[0] != b'help':
-    subcmd = [b'help', subcmd[0]] + subcmd[2:]
-
-subcmd_name = subcmd[0]
-if not subcmd_name:
-    usage()
-
-def subpath(subcmd):
-    return os.path.join(cmdpath, b'bup-' + subcmd)
-
-subcmd[0] = subpath(subcmd_name)
-if not os.path.exists(subcmd[0]):
-    usage('error: unknown command "%s"' % path_msg(subcmd_name))
-
-already_fixed = int(environ.get(b'BUP_FORCE_TTY', 0))
-if subcmd_name in [b'mux', b'ftp', b'help']:
-    already_fixed = True
-fix_stdout = not already_fixed and os.isatty(1)
-fix_stderr = not already_fixed and os.isatty(2)
-
-if fix_stdout or fix_stderr:
-    tty_env = merge_dict(environ,
-                         {b'BUP_FORCE_TTY': (b'%d'
-                                             % ((fix_stdout and 1 or 0)
-                                                + (fix_stderr and 2 or 0))),
-                          b'BUP_TTY_WIDTH': b'%d' % _tty_width(), })
-else:
-    tty_env = environ
-
-
-sep_rx = re.compile(br'([\r\n])')
-
-def print_clean_line(dest, content, width, sep=None):
-    """Write some or all of content, followed by sep, to the dest fd after
-    padding the content with enough spaces to fill the current
-    terminal width or truncating it to the terminal width if sep is a
-    carriage return."""
-    global sep_rx
-    assert sep in (b'\r', b'\n', None)
-    if not content:
-        if sep:
-            os.write(dest, sep)
-        return
-    for x in content:
-        assert not sep_rx.match(x)
-    content = b''.join(content)
-    if sep == b'\r' and len(content) > width:
-        content = content[width:]
-    os.write(dest, content)
-    if len(content) < width:
-        os.write(dest, b' ' * (width - len(content)))
-    if sep:
-        os.write(dest, sep)
-
-def filter_output(src_out, src_err, dest_out, dest_err):
-    """Transfer data from src_out to dest_out and src_err to dest_err via
-    print_clean_line until src_out and src_err close."""
-    global sep_rx
-    assert not isinstance(src_out, bool)
-    assert not isinstance(src_err, bool)
-    assert not isinstance(dest_out, bool)
-    assert not isinstance(dest_err, bool)
-    assert src_out is not None or src_err is not None
-    assert (src_out is None) == (dest_out is None)
-    assert (src_err is None) == (dest_err is None)
-    pending = {}
-    pending_ex = None
-    try:
-        fds = tuple([x for x in (src_out, src_err) if x is not None])
-        while fds:
-            ready_fds, _, _ = select.select(fds, [], [])
-            width = tty_width()
-            for fd in ready_fds:
-                buf = os.read(fd, 4096)
-                dest = dest_out if fd == src_out else dest_err
-                if not buf:
-                    fds = tuple([x for x in fds if x is not fd])
-                    print_clean_line(dest, pending.pop(fd, []), width)
-                else:
-                    split = sep_rx.split(buf)
-                    while len(split) > 1:
-                        content, sep = split[:2]
-                        split = split[2:]
-                        print_clean_line(dest,
-                                         pending.pop(fd, []) + [content],
-                                         width,
-                                         sep)
-                    assert(len(split) == 1)
-                    if split[0]:
-                        pending.setdefault(fd, []).extend(split)
-    except BaseException as ex:
-        pending_ex = add_ex_ctx(add_ex_tb(ex), pending_ex)
-    try:
-        # Try to finish each of the streams
-        for fd, pending_items in compat.items(pending):
-            dest = dest_out if fd == src_out else dest_err
-            try:
-                print_clean_line(dest, pending_items, width)
-            except (EnvironmentError, EOFError) as ex:
-                pending_ex = add_ex_ctx(add_ex_tb(ex), pending_ex)
-    except BaseException as ex:
-        pending_ex = add_ex_ctx(add_ex_tb(ex), pending_ex)
-    if pending_ex:
-        raise pending_ex
-
-def run_subcmd(subcmd):
-
-    c = (do_profile and [sys.executable, b'-m', b'cProfile'] or []) + subcmd
-    if not (fix_stdout or fix_stderr):
-        os.execvp(c[0], c)
-
-    sys.stdout.flush()
-    sys.stderr.flush()
-    out = byte_stream(sys.stdout)
-    err = byte_stream(sys.stderr)
-    p = None
-    try:
-        p = subprocess.Popen(c,
-                             stdout=PIPE if fix_stdout else out,
-                             stderr=PIPE if fix_stderr else err,
-                             env=tty_env, bufsize=4096, close_fds=True)
-        # Assume p will receive these signals and quit, which will
-        # then cause us to quit.
-        for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGQUIT):
-            signal.signal(sig, signal.SIG_IGN)
-
-        filter_output(fix_stdout and p.stdout.fileno() or None,
-                      fix_stderr and p.stderr.fileno() or None,
-                      fix_stdout and out.fileno() or None,
-                      fix_stderr and err.fileno() or None)
-        return p.wait()
-    except BaseException as ex:
-        add_ex_tb(ex)
-        try:
-            if p and p.poll() == None:
-                os.kill(p.pid, signal.SIGTERM)
-                p.wait()
-        except BaseException as kill_ex:
-            raise add_ex_ctx(add_ex_tb(kill_ex), ex)
-        raise ex
-        
-wrap_main(lambda : run_subcmd(subcmd))
diff --git a/lib/cmd/bup-import-rdiff-backup b/lib/cmd/bup-import-rdiff-backup
new file mode 100755 (executable)
index 0000000..0bbf327
--- /dev/null
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+
+cmd_dir="$(cd "$(dirname "$0")" && pwd)" || exit $?
+
+set -o pipefail
+
+must() {
+    local file=${BASH_SOURCE[0]}
+    local line=${BASH_LINENO[0]}
+    "$@"
+    local rc=$?
+    if test $rc -ne 0; then
+        echo "Failed at line $line in $file" 1>&2
+        exit $rc
+    fi
+}
+
+usage() {
+    echo "Usage: bup import-rdiff-backup [-n]" \
+        "<path to rdiff-backup root> <backup name>"
+    echo "-n,--dry-run: just print what would be done"
+    exit 1
+}
+
+control_c() {
+    echo "bup import-rdiff-backup: signal 2 received" 1>&2
+    exit 128
+}
+
+must trap control_c INT
+
+dry_run=
+while [ "$1" = "-n" -o "$1" = "--dry-run" ]; do
+    dry_run=echo
+    shift
+done
+
+bup()
+{
+    $dry_run "$cmd_dir/bup" "$@"
+}
+
+snapshot_root="$1"
+branch="$2"
+
+[ -n "$snapshot_root" -a "$#" = 2 ] || usage
+
+if [ ! -e "$snapshot_root/." ]; then
+    echo "'$snapshot_root' isn't a directory!"
+    exit 1
+fi
+
+
+backups=$(must rdiff-backup --list-increments --parsable-output "$snapshot_root") \
+    || exit $?
+backups_count=$(echo "$backups" | must wc -l) || exit $?
+counter=1
+echo "$backups" |
+while read timestamp type; do
+    tmpdir=$(must mktemp -d import-rdiff-backup-XXXXXXX) || exit $?
+
+    echo "Importing backup from $(date -d @$timestamp +%c) " \
+        "($counter / $backups_count)" 1>&2
+    echo 1>&2
+
+    echo "Restoring from rdiff-backup..." 1>&2
+    must rdiff-backup -r $timestamp "$snapshot_root" "$tmpdir"
+    echo 1>&2
+
+    echo "Importing into bup..." 1>&2
+    tmpidx=$(must mktemp -u import-rdiff-backup-idx-XXXXXXX) || exit $?
+    must bup index -ux -f "$tmpidx" "$tmpdir"
+    must bup save --strip --date="$timestamp" -f "$tmpidx" -n "$branch" "$tmpdir"
+    must rm -f "$tmpidx"
+
+    must rm -rf "$tmpdir"
+    counter=$((counter+1))
+    echo 1>&2
+    echo 1>&2
+done
diff --git a/lib/cmd/bup-import-rsnapshot b/lib/cmd/bup-import-rsnapshot
new file mode 100755 (executable)
index 0000000..91f711e
--- /dev/null
@@ -0,0 +1,59 @@
+#!/bin/sh
+# Does an import of a rsnapshot archive.
+
+cmd_dir="$(cd "$(dirname "$0")" && pwd)" || exit $?
+
+usage() {
+    echo "Usage: bup import-rsnapshot [-n]" \
+        "<path to snapshot_root> [<backuptarget>]"
+    echo "-n,--dry-run: just print what would be done"
+    exit 1
+}
+
+DRY_RUN=
+while [ "$1" = "-n" -o "$1" = "--dry-run" ]; do
+    DRY_RUN=echo
+    shift
+done
+
+bup()
+{
+    $DRY_RUN "$cmd_dir/bup" "$@"
+}
+
+SNAPSHOT_ROOT=$1
+TARGET=$2
+
+[ -n "$SNAPSHOT_ROOT" -a "$#" -le 2 ] || usage
+
+if [ ! -e "$SNAPSHOT_ROOT/." ]; then
+    echo "'$SNAPSHOT_ROOT' isn't a directory!"
+    exit 1
+fi
+
+
+cd "$SNAPSHOT_ROOT" || exit 2
+
+for SNAPSHOT in *; do
+    [ -e "$SNAPSHOT/." ] || continue
+    echo "snapshot='$SNAPSHOT'" >&2
+    for BRANCH_PATH in "$SNAPSHOT/"*; do
+        BRANCH=$(basename "$BRANCH_PATH") || exit $?
+        [ -e "$BRANCH_PATH/." ] || continue
+        [ -z "$TARGET" -o "$TARGET" = "$BRANCH" ] || continue
+        
+        echo "snapshot='$SNAPSHOT' branch='$BRANCH'" >&2
+
+        # Get the snapshot's ctime
+        DATE=$(perl -e '@a=stat($ARGV[0]) or die "$ARGV[0]: $!";
+                        print $a[10];' "$BRANCH_PATH")
+       [ -n "$DATE" ] || exit 3
+
+        TMPIDX=bupindex.$BRANCH.tmp
+        bup index -ux -f "$TMPIDX" "$BRANCH_PATH/" || exit $?
+        bup save --strip --date="$DATE" \
+            -f "$TMPIDX" -n "$BRANCH" \
+            "$BRANCH_PATH/" || exit $?
+        rm "$TMPIDX" || exit $?
+    done
+done
diff --git a/lib/cmd/bup.c b/lib/cmd/bup.c
new file mode 100644 (file)
index 0000000..f85029a
--- /dev/null
@@ -0,0 +1,387 @@
+
+#define PY_SSIZE_T_CLEAN
+#define _GNU_SOURCE  1 // asprintf
+#undef NDEBUG
+
+// According to Python, its header has to go first:
+//   http://docs.python.org/2/c-api/intro.html#include-files
+//   http://docs.python.org/3/c-api/intro.html#include-files
+#include <Python.h>
+
+#include <libgen.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+# include <sys/sysctl.h>
+#endif
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "bup/compat.h"
+#include "bup/io.h"
+
+static int prog_argc = 0;
+static char **prog_argv = NULL;
+static char *orig_env_pythonpath = NULL;
+
+static PyObject*
+get_argv(PyObject *self, PyObject *args)
+{
+    if (!PyArg_ParseTuple(args, ""))
+       return NULL;
+
+    PyObject *result = PyList_New(prog_argc);
+    for (int i = 0; i < prog_argc; i++) {
+        PyObject *s = PyBytes_FromString(prog_argv[i]);
+        if (!s)
+            die(2, "cannot convert argument to bytes: %s\n", prog_argv[i]);
+        PyList_SET_ITEM(result, i, s);
+    }
+    return result;
+}
+
+static PyMethodDef bup_main_methods[] = {
+    {"argv", get_argv, METH_VARARGS,
+     "Return the program's current argv array as a list of byte strings." },
+    {NULL, NULL, 0, NULL}
+};
+
+static int setup_module(PyObject *mod)
+{
+    if (!orig_env_pythonpath) {
+        PyObject_SetAttrString(mod, "env_pythonpath", Py_None);
+    } else {
+        PyObject *py_p = PyBytes_FromString(orig_env_pythonpath);
+        if (!py_p)
+            die(2, "cannot convert PYTHONPATH to bytes: %s\n",
+                orig_env_pythonpath);
+        PyObject_SetAttrString(mod, "env_pythonpath", py_p);
+        Py_DECREF(py_p);
+    }
+    return 1;
+}
+
+#if PY_MAJOR_VERSION >= 3
+
+static struct PyModuleDef bup_main_module_def = {
+    .m_base = PyModuleDef_HEAD_INIT,
+    .m_name = "bup_main",
+    .m_doc = "Built-in bup module providing direct access to argv.",
+    .m_size = -1,
+    .m_methods = bup_main_methods
+};
+
+PyObject *
+PyInit_bup_main(void) {
+    PyObject *mod =  PyModule_Create(&bup_main_module_def);
+    if (!setup_module(mod))
+    {
+        Py_DECREF(mod);
+        return NULL;
+    }
+    return mod;
+}
+
+#else // PY_MAJOR_VERSION < 3
+
+void PyInit_bup_main(void)
+{
+    PyObject *mod = Py_InitModule("bup_main", bup_main_methods);
+    if (mod == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "bup._helpers init failed");
+        return;
+    }
+    if (!setup_module(mod))
+    {
+        PyErr_SetString(PyExc_RuntimeError, "bup._helpers set up failed");
+        Py_DECREF(mod);
+        return;
+    }
+}
+
+#endif // PY_MAJOR_VERSION < 3
+
+static void
+setup_bup_main_module(void) {
+
+    char *path = getenv("PYTHONPATH");
+    if (path)
+        orig_env_pythonpath = strdup(path);
+
+    if (PyImport_AppendInittab("bup_main", PyInit_bup_main) == -1)
+        die(2, "unable to register bup_main module\n");
+}
+
+#if defined(__APPLE__) && defined(__MACH__)
+
+static char *exe_parent_dir(const char * const argv_0) {
+    char *mpath = NULL;
+    char spath[2048];
+    uint32_t size = sizeof(spath);
+    int rc = _NSGetExecutablePath(spath, &size);
+    if (rc == -1) {
+        mpath = malloc(size);
+        if (!mpath) die(2, "unable to allocate memory for executable path\n");
+        rc = _NSGetExecutablePath(mpath, &size);
+    }
+    if(rc != 0) die(2, "unable to find executable path\n");
+    char *path = mpath ? mpath : spath;
+    char *abs_exe = realpath(path, NULL);
+    if (!abs_exe)
+        die(2, "cannot resolve path (%s): %s\n", strerror(errno), path);
+    char * const abs_parent = strdup(dirname(abs_exe));
+    assert(abs_parent);
+    if (mpath) free(mpath);
+    free(abs_exe);
+    return abs_parent;
+}
+
+#elif defined(__FreeBSD__) || defined(__NetBSD__)
+
+static char *exe_path ()
+{
+    const int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+    size_t path_len;
+    int rc = sysctl (mib, 4, NULL, &path_len, NULL, 0);
+    if (rc != 0) die(2, "unable to determine executable path length\n");
+    char *path = malloc (path_len);
+    if (!path) die(2, "unable to allocate memory for executable path\n");
+    rc = sysctl (mib, 4, path, &path_len, NULL, 0);
+    if (rc != 0) die(2, "unable to determine executable path via sysctl\n");
+    return path;
+}
+
+static char *exe_parent_dir(const char * const argv_0)
+{
+    char * const exe = exe_path();
+    if (!exe) die(2, "unable to determine executable path\n");
+    char * const parent = strdup(dirname(exe));
+    if (!parent) die(2, "unable to determine parent directory of executable\n");
+    free(exe);
+    return parent;
+}
+
+#else // not defined(__FreeBSD__) || defined(__NetBSD__)
+
+/// Use /proc if possible, and if all else fails, search in the PATH
+
+#if defined(__linux__)
+# define PROC_SELF_EXE "/proc/self/exe"
+#elif defined(__sun) || defined (sun)
+# define PROC_SELF_EXE "/proc/self/path/a.out"
+#else
+# define PROC_SELF_EXE NULL
+#endif
+
+static char *find_in_path(const char * const name, const char * const path)
+{
+    char *result = NULL;
+    char *tmp_path = strdup(path);
+    assert(tmp_path);
+    const char *elt;
+    char *tok_path = tmp_path;
+    while ((elt = strtok(tok_path, ":")) != NULL) {
+        tok_path = NULL;
+        char *candidate;
+        int rc = asprintf(&candidate, "%s/%s", elt, name);
+        assert(rc >= 0);
+        struct stat st;
+        rc = stat(candidate, &st);
+        if (rc != 0) {
+            switch (errno) {
+                case EACCES: case ELOOP: case ENOENT: case ENAMETOOLONG:
+                case ENOTDIR:
+                    break;
+                default:
+                    die(2, "cannot stat %s: %s\n", candidate, strerror(errno));
+                    break;
+            }
+        } else if (S_ISREG(st.st_mode)) {
+            if (access(candidate, X_OK) == 0) {
+                result = candidate;
+                break;
+            }
+            switch (errno) {
+                case EACCES: case ELOOP: case ENOENT: case ENAMETOOLONG:
+                case ENOTDIR:
+                    break;
+                default:
+                    die(2, "cannot determine executability of %s: %s\n",
+                        candidate, strerror(errno));
+                    break;
+            }
+        }
+        free(candidate);
+    }
+    free(tmp_path);
+    return result;
+}
+
+static char *find_exe_parent(const char * const argv_0)
+{
+    char *candidate = NULL;
+    const char * const slash = index(argv_0, '/');
+    if (slash) {
+        candidate = strdup(argv_0);
+        assert(candidate);
+    } else {
+        const char * const env_path = getenv("PATH");
+        if (!env_path)
+            die(2, "no PATH and executable isn't relative or absolute: %s\n",
+                argv_0);
+        char *path_exe = find_in_path(argv_0, env_path);
+        if (path_exe) {
+            char * abs_exe = realpath(path_exe, NULL);
+            if (!abs_exe)
+                die(2, "cannot resolve path (%s): %s\n",
+                    strerror(errno), path_exe);
+            free(path_exe);
+            candidate = abs_exe;
+        }
+    }
+    if (!candidate)
+        return NULL;
+
+    char * const abs_exe = realpath(candidate, NULL);
+    if (!abs_exe)
+        die(2, "cannot resolve path (%s): %s\n", strerror(errno), candidate);
+    free(candidate);
+    char * const abs_parent = strdup(dirname(abs_exe));
+    assert(abs_parent);
+    free(abs_exe);
+    return abs_parent;
+}
+
+static char *exe_parent_dir(const char * const argv_0)
+{
+    if (PROC_SELF_EXE != NULL) {
+        char sbuf[2048];
+        char *path = sbuf;
+        size_t path_n = sizeof(sbuf);
+        ssize_t len;
+        while (1) {
+            len = readlink(PROC_SELF_EXE, path, path_n);
+            if (len == -1 || (size_t) len != path_n)
+                break;
+            path_n *= 2;
+            if (path != sbuf) free(path);
+            path = malloc(path_n);
+            if (!path)
+                die(2, "unable to allocate memory for executable path\n");
+        }
+        if (len != -1) {
+            path[len] = '\0';
+            char *result = strdup(dirname(path));
+            if (path != sbuf)
+                free(path);
+            return result;
+        }
+        switch (errno) {
+        case ENOENT: case EACCES: case EINVAL: case ELOOP: case ENOTDIR:
+        case ENAMETOOLONG:
+            break;
+        default:
+            die(2, "cannot resolve %s: %s\n", path, strerror(errno));
+            break;
+        }
+        if (path != sbuf)
+            free(path);
+    }
+    return find_exe_parent(argv_0);
+}
+
+#endif // use /proc if possible, and if all else fails, search in the PATh
+
+static void
+setenv_or_die(const char *name, const char *value)
+{
+    int rc = setenv(name, value, 1);
+    if (rc != 0)
+        die(2, "setenv %s=%s failed (%s)\n", name, value, strerror(errno));
+}
+
+static void
+prepend_lib_to_pythonpath(const char * const exec_path,
+                          const char * const relative_path)
+{
+    char *parent = exe_parent_dir(exec_path);
+    assert(parent);
+    char *bupmodpath;
+    int rc = asprintf(&bupmodpath, "%s/%s", parent, relative_path);
+    assert(rc >= 0);
+    struct stat st;
+    rc = stat(bupmodpath, &st);
+    if (rc != 0)
+        die(2, "unable find lib dir (%s): %s\n", strerror(errno), bupmodpath);
+    if (!S_ISDIR(st.st_mode))
+        die(2, "lib path is not dir: %s\n", bupmodpath);
+    char *curpypath = getenv("PYTHONPATH");
+    if (curpypath) {
+        char *path;
+        int rc = asprintf(&path, "%s:%s", bupmodpath, curpypath);
+        assert(rc >= 0);
+        setenv_or_die("PYTHONPATH", path);
+        free(path);
+    } else {
+        setenv_or_die("PYTHONPATH", bupmodpath);
+    }
+
+    free(bupmodpath);
+    free(parent);
+}
+
+#if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 8
+# define bup_py_main bup_py_bytes_main
+#elif PY_MAJOR_VERSION > 2
+# define bup_py_main Py_BytesMain
+#else
+# define bup_py_main Py_Main
+#endif
+
+#if defined(BUP_DEV_BUP_PYTHON) && defined(BUP_DEV_BUP_EXEC)
+# error "Both BUP_DEV_BUP_PYTHON and BUP_DEV_BUP_EXEC are defined"
+#endif
+
+#ifdef BUP_DEV_BUP_PYTHON
+
+int main(int argc, char **argv)
+{
+    prog_argc = argc;
+    prog_argv = argv;
+    setup_bup_main_module();
+    prepend_lib_to_pythonpath(argv[0], "../lib");
+    return bup_py_main (argc, argv);
+}
+
+#elif defined(BUP_DEV_BUP_EXEC)
+
+int main(int argc, char **argv)
+{
+    prog_argc = argc - 1;
+    prog_argv = argv + 1;
+    setup_bup_main_module();
+    prepend_lib_to_pythonpath(argv[0], "../lib");
+    if (argc == 1)
+        return bup_py_main (1, argv);
+    // This can't handle a script with a name like "-c", but that's
+    // python's problem, not ours.
+    return bup_py_main (2, argv);
+}
+
+#else // normal bup command
+
+int main(int argc, char **argv)
+{
+    prog_argc = argc;
+    prog_argv = argv;
+    setup_bup_main_module();
+    prepend_lib_to_pythonpath(argv[0], "..");
+    char *bup_argv[] = { argv[0], "-m", "bup.main" };
+    return bup_py_main (3, bup_argv);
+}
+
+#endif // normal bup command
diff --git a/lib/cmd/cat-file-cmd.py b/lib/cmd/cat-file-cmd.py
deleted file mode 100755 (executable)
index 388ca03..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os.path, re, stat, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, git, vfs
-from bup.compat import argv_bytes
-from bup.helpers import chunkyreader, handle_ctrl_c, log, saved_errors
-from bup.io import byte_stream
-from bup.repo import LocalRepo
-
-optspec = """
-bup cat-file [--meta|--bupm] /branch/revision/[path]
---
-meta        print the target's metadata entry (decoded then reencoded) to stdout
-bupm        print the target directory's .bupm file directly to stdout
-"""
-
-handle_ctrl_c()
-
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-git.check_repo_or_die()
-
-if not extra:
-    o.fatal('must specify a target')
-if len(extra) > 1:
-    o.fatal('only one target file allowed')
-if opt.bupm and opt.meta:
-    o.fatal('--meta and --bupm are incompatible')
-    
-target = argv_bytes(extra[0])
-
-if not re.match(br'/*[^/]+/[^/]+', target):
-    o.fatal("path %r doesn't include a branch and revision" % target)
-
-repo = LocalRepo()
-resolved = vfs.resolve(repo, target, follow=False)
-leaf_name, leaf_item = resolved[-1]
-if not leaf_item:
-    log('error: cannot access %r in %r\n'
-        % ('/'.join(name for name, item in resolved), path))
-    sys.exit(1)
-
-mode = vfs.item_mode(leaf_item)
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-if opt.bupm:
-    if not stat.S_ISDIR(mode):
-        o.fatal('%r is not a directory' % target)
-    _, bupm_oid = vfs.tree_data_and_bupm(repo, leaf_item.oid)
-    if bupm_oid:
-        with vfs.tree_data_reader(repo, bupm_oid) as meta_stream:
-            out.write(meta_stream.read())
-elif opt.meta:
-    augmented = vfs.augment_item_meta(repo, leaf_item, include_size=True)
-    out.write(augmented.meta.encode())
-else:
-    if stat.S_ISREG(mode):
-        with vfs.fopen(repo, leaf_item) as f:
-            for b in chunkyreader(f):
-                out.write(b)
-    else:
-        o.fatal('%r is not a plain file' % target)
-
-if saved_errors:
-    log('warning: %d errors encountered\n' % len(saved_errors))
-    sys.exit(1)
diff --git a/lib/cmd/daemon-cmd.py b/lib/cmd/daemon-cmd.py
deleted file mode 100755 (executable)
index ffe79dd..0000000
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os, sys, getopt, socket, subprocess, fcntl
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, path
-from bup.helpers import *
-
-optspec = """
-bup daemon [options...] -- [bup-server options...]
---
-l,listen  ip address to listen on, defaults to *
-p,port    port to listen on, defaults to 1982
-"""
-o = options.Options(optspec, optfunc=getopt.getopt)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-host = opt.listen
-port = opt.port and int(opt.port) or 1982
-
-import socket
-import sys
-
-socks = []
-e = None
-for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC,
-                              socket.SOCK_STREAM, 0, socket.AI_PASSIVE):
-    af, socktype, proto, canonname, sa = res
-    try:
-        s = socket.socket(af, socktype, proto)
-    except socket.error as e:
-        continue
-    try:
-        if af == socket.AF_INET6:
-            log("bup daemon: listening on [%s]:%s\n" % sa[:2])
-        else:
-            log("bup daemon: listening on %s:%s\n" % sa[:2])
-        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        s.bind(sa)
-        s.listen(1)
-        fcntl.fcntl(s.fileno(), fcntl.F_SETFD, fcntl.FD_CLOEXEC)
-    except socket.error as e:
-        s.close()
-        continue
-    socks.append(s)
-
-if not socks:
-    log('bup daemon: listen socket: %s\n' % e.args[1])
-    sys.exit(1)
-
-try:
-    while True:
-        [rl,wl,xl] = select.select(socks, [], [], 60)
-        for l in rl:
-            s, src = l.accept()
-            try:
-                log("Socket accepted connection from %s\n" % (src,))
-                fd1 = os.dup(s.fileno())
-                fd2 = os.dup(s.fileno())
-                s.close()
-                sp = subprocess.Popen([path.exe(), 'mux', '--',
-                                       path.exe(), 'server']
-                                      + extra, stdin=fd1, stdout=fd2)
-            finally:
-                os.close(fd1)
-                os.close(fd2)
-finally:
-    for l in socks:
-        l.shutdown(socket.SHUT_RDWR)
-        l.close()
-
-debug1("bup daemon: done")
diff --git a/lib/cmd/damage-cmd.py b/lib/cmd/damage-cmd.py
deleted file mode 100755 (executable)
index 85995ed..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os, random, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options
-from bup.compat import argv_bytes, bytes_from_uint, range
-from bup.helpers import log
-from bup.io import path_msg
-
-
-def randblock(n):
-    return b''.join(bytes_from_uint(random.randrange(0,256)) for i in range(n))
-
-
-optspec = """
-bup damage [-n count] [-s maxsize] [-S seed] <filenames...>
---
-   WARNING: THIS COMMAND IS EXTREMELY DANGEROUS
-n,num=   number of blocks to damage
-s,size=  maximum size of each damaged block
-percent= maximum size of each damaged block (as a percent of entire file)
-equal    spread damage evenly throughout the file
-S,seed=  random number seed (for repeatable tests)
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if not extra:
-    o.fatal('filenames expected')
-
-if opt.seed != None:
-    random.seed(opt.seed)
-
-for name in extra:
-    name = argv_bytes(name)
-    log('Damaging "%s"...\n' % path_msg(name))
-    with open(name, 'r+b') as f:
-        st = os.fstat(f.fileno())
-        size = st.st_size
-        if opt.percent or opt.size:
-            ms1 = int(float(opt.percent or 0)/100.0*size) or size
-            ms2 = opt.size or size
-            maxsize = min(ms1, ms2)
-        else:
-            maxsize = 1
-        chunks = opt.num or 10
-        chunksize = size // chunks
-        for r in range(chunks):
-            sz = random.randrange(1, maxsize+1)
-            if sz > size:
-                sz = size
-            if opt.equal:
-                ofs = r*chunksize
-            else:
-                ofs = random.randrange(0, size - sz + 1)
-            log('  %6d bytes at %d\n' % (sz, ofs))
-            f.seek(ofs)
-            f.write(randblock(sz))
diff --git a/lib/cmd/drecurse-cmd.py b/lib/cmd/drecurse-cmd.py
deleted file mode 100755 (executable)
index 1c67918..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-from os.path import relpath
-import os.path, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, drecurse
-from bup.compat import argv_bytes
-from bup.helpers import log, parse_excludes, parse_rx_excludes, saved_errors
-from bup.io import byte_stream
-
-
-optspec = """
-bup drecurse <path>
---
-x,xdev,one-file-system   don't cross filesystem boundaries
-exclude= a path to exclude from the backup (can be used more than once)
-exclude-from= a file that contains exclude paths (can be used more than once)
-exclude-rx= skip paths matching the unanchored regex (may be repeated)
-exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
-q,quiet  don't actually print filenames
-profile  run under the python profiler
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if len(extra) != 1:
-    o.fatal("exactly one filename expected")
-
-drecurse_top = argv_bytes(extra[0])
-excluded_paths = parse_excludes(flags, o.fatal)
-if not drecurse_top.startswith(b'/'):
-    excluded_paths = [relpath(x) for x in excluded_paths]
-exclude_rxs = parse_rx_excludes(flags, o.fatal)
-it = drecurse.recursive_dirlist([drecurse_top], opt.xdev,
-                                excluded_paths=excluded_paths,
-                                exclude_rxs=exclude_rxs)
-if opt.profile:
-    import cProfile
-    def do_it():
-        for i in it:
-            pass
-    cProfile.run('do_it()')
-else:
-    if opt.quiet:
-        for i in it:
-            pass
-    else:
-        sys.stdout.flush()
-        out = byte_stream(sys.stdout)
-        for (name,st) in it:
-            out.write(name + b'\n')
-
-if saved_errors:
-    log('WARNING: %d errors encountered.\n' % len(saved_errors))
-    sys.exit(1)
diff --git a/lib/cmd/features-cmd.py b/lib/cmd/features-cmd.py
deleted file mode 100755 (executable)
index 3a5911d..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-import os.path, sys, platform
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import _helpers, compat, metadata, options, version
-from bup.io import byte_stream
-
-out = None
-
-def show_support(out, bool_opt, what):
-    out.write(b'    %s: %s\n' % (what, b'yes' if bool_opt else b'no'))
-
-optspec = """
-bup features
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-out.write(b'bup %s\n' % version.version)
-out.write(b'Source %s %s\n' % (version.commit, version.date))
-
-have_readline = getattr(_helpers, 'readline', None)
-have_libacl = getattr(_helpers, 'read_acl', None)
-have_xattr = metadata.xattr
-
-out.write(b'    Python: %s\n' % platform.python_version().encode('ascii'))
-show_support(out, have_readline, b'Command line editing (e.g. bup ftp)')
-show_support(out, have_libacl, b'Saving and restoring POSIX ACLs')
-show_support(out, have_xattr, b'Saving and restoring extended attributes (xattrs)')
diff --git a/lib/cmd/fsck-cmd.py b/lib/cmd/fsck-cmd.py
deleted file mode 100755 (executable)
index 54b91f4..0000000
+++ /dev/null
@@ -1,278 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-import os, glob, subprocess, sys
-from shutil import rmtree
-from subprocess import PIPE, Popen
-from tempfile import mkdtemp
-from binascii import hexlify
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, git
-from bup.compat import argv_bytes
-from bup.helpers import Sha1, chunkyreader, istty2, log, progress
-from bup.io import byte_stream
-
-
-par2_ok = 0
-nullf = open(os.devnull, 'wb+')
-
-def debug(s):
-    if opt.verbose > 1:
-        log(s)
-
-def run(argv):
-    # at least in python 2.5, using "stdout=2" or "stdout=sys.stderr" below
-    # doesn't actually work, because subprocess closes fd #2 right before
-    # execing for some reason.  So we work around it by duplicating the fd
-    # first.
-    fd = os.dup(2)  # copy stderr
-    try:
-        p = subprocess.Popen(argv, stdout=fd, close_fds=False)
-        return p.wait()
-    finally:
-        os.close(fd)
-
-def par2_setup():
-    global par2_ok
-    rv = 1
-    try:
-        p = subprocess.Popen([b'par2', b'--help'],
-                             stdout=nullf, stderr=nullf, stdin=nullf)
-        rv = p.wait()
-    except OSError:
-        log('fsck: warning: par2 not found; disabling recovery features.\n')
-    else:
-        par2_ok = 1
-
-def is_par2_parallel():
-    # A true result means it definitely allows -t1; a false result is
-    # technically inconclusive, but likely means no.
-    tmpdir = mkdtemp(prefix=b'bup-fsck')
-    try:
-        canary = tmpdir + b'/canary'
-        with open(canary, 'wb') as f:
-            f.write(b'canary\n')
-        p = subprocess.Popen((b'par2', b'create', b'-qq', b'-t1', canary),
-                             stderr=PIPE, stdin=nullf)
-        _, err = p.communicate()
-        parallel = p.returncode == 0
-        if opt.verbose:
-            if len(err) > 0 and err != b'Invalid option specified: -t1\n':
-                log('Unexpected par2 error output\n')
-                log(repr(err) + '\n')
-            if parallel:
-                log('Assuming par2 supports parallel processing\n')
-            else:
-                log('Assuming par2 does not support parallel processing\n')
-        return parallel
-    finally:
-        rmtree(tmpdir)
-
-_par2_parallel = None
-
-def par2(action, args, verb_floor=0):
-    global _par2_parallel
-    if _par2_parallel is None:
-        _par2_parallel = is_par2_parallel()
-    cmd = [b'par2', action]
-    if opt.verbose >= verb_floor and not istty2:
-        cmd.append(b'-q')
-    else:
-        cmd.append(b'-qq')
-    if _par2_parallel:
-        cmd.append(b'-t1')
-    cmd.extend(args)
-    return run(cmd)
-
-def par2_generate(base):
-    return par2(b'create',
-                [b'-n1', b'-c200', b'--', base, base + b'.pack', base + b'.idx'],
-                verb_floor=2)
-
-def par2_verify(base):
-    return par2(b'verify', [b'--', base], verb_floor=3)
-
-def par2_repair(base):
-    return par2(b'repair', [b'--', base], verb_floor=2)
-
-def quick_verify(base):
-    f = open(base + b'.pack', 'rb')
-    f.seek(-20, 2)
-    wantsum = f.read(20)
-    assert(len(wantsum) == 20)
-    f.seek(0)
-    sum = Sha1()
-    for b in chunkyreader(f, os.fstat(f.fileno()).st_size - 20):
-        sum.update(b)
-    if sum.digest() != wantsum:
-        raise ValueError('expected %r, got %r' % (hexlify(wantsum),
-                                                  sum.hexdigest()))
-        
-
-def git_verify(base):
-    if opt.quick:
-        try:
-            quick_verify(base)
-        except Exception as e:
-            log('error: %s\n' % e)
-            return 1
-        return 0
-    else:
-        return run([b'git', b'verify-pack', b'--', base])
-    
-    
-def do_pack(base, last, par2_exists, out):
-    code = 0
-    if par2_ok and par2_exists and (opt.repair or not opt.generate):
-        vresult = par2_verify(base)
-        if vresult != 0:
-            if opt.repair:
-                rresult = par2_repair(base)
-                if rresult != 0:
-                    action_result = b'failed'
-                    log('%s par2 repair: failed (%d)\n' % (last, rresult))
-                    code = rresult
-                else:
-                    action_result = b'repaired'
-                    log('%s par2 repair: succeeded (0)\n' % last)
-                    code = 100
-            else:
-                action_result = b'failed'
-                log('%s par2 verify: failed (%d)\n' % (last, vresult))
-                code = vresult
-        else:
-            action_result = b'ok'
-    elif not opt.generate or (par2_ok and not par2_exists):
-        gresult = git_verify(base)
-        if gresult != 0:
-            action_result = b'failed'
-            log('%s git verify: failed (%d)\n' % (last, gresult))
-            code = gresult
-        else:
-            if par2_ok and opt.generate:
-                presult = par2_generate(base)
-                if presult != 0:
-                    action_result = b'failed'
-                    log('%s par2 create: failed (%d)\n' % (last, presult))
-                    code = presult
-                else:
-                    action_result = b'generated'
-            else:
-                action_result = b'ok'
-    else:
-        assert(opt.generate and (not par2_ok or par2_exists))
-        action_result = b'exists' if par2_exists else b'skipped'
-    if opt.verbose:
-        out.write(last + b' ' +  action_result + b'\n')
-    return code
-
-
-optspec = """
-bup fsck [options...] [filenames...]
---
-r,repair    attempt to repair errors using par2 (dangerous!)
-g,generate  generate auto-repair information using par2
-v,verbose   increase verbosity (can be used more than once)
-quick       just check pack sha1sum, don't use git verify-pack
-j,jobs=     run 'n' jobs in parallel
-par2-ok     immediately return 0 if par2 is ok, 1 if not
-disable-par2  ignore par2 even if it is available
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-opt.verbose = opt.verbose or 0
-
-par2_setup()
-if opt.par2_ok:
-    if par2_ok:
-        sys.exit(0)  # 'true' in sh
-    else:
-        sys.exit(1)
-if opt.disable_par2:
-    par2_ok = 0
-
-git.check_repo_or_die()
-
-if extra:
-    extra = [argv_bytes(x) for x in extra]
-else:
-    debug('fsck: No filenames given: checking all packs.\n')
-    extra = glob.glob(git.repo(b'objects/pack/*.pack'))
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-code = 0
-count = 0
-outstanding = {}
-for name in extra:
-    if name.endswith(b'.pack'):
-        base = name[:-5]
-    elif name.endswith(b'.idx'):
-        base = name[:-4]
-    elif name.endswith(b'.par2'):
-        base = name[:-5]
-    elif os.path.exists(name + b'.pack'):
-        base = name
-    else:
-        raise Exception('%r is not a pack file!' % name)
-    (dir,last) = os.path.split(base)
-    par2_exists = os.path.exists(base + b'.par2')
-    if par2_exists and os.stat(base + b'.par2').st_size == 0:
-        par2_exists = 0
-    sys.stdout.flush()  # Not sure we still need this, but it'll flush out too
-    debug('fsck: checking %r (%s)\n'
-          % (last, par2_ok and par2_exists and 'par2' or 'git'))
-    if not opt.verbose:
-        progress('fsck (%d/%d)\r' % (count, len(extra)))
-    
-    if not opt.jobs:
-        nc = do_pack(base, last, par2_exists, out)
-        code = code or nc
-        count += 1
-    else:
-        while len(outstanding) >= opt.jobs:
-            (pid,nc) = os.wait()
-            nc >>= 8
-            if pid in outstanding:
-                del outstanding[pid]
-                code = code or nc
-                count += 1
-        pid = os.fork()
-        if pid:  # parent
-            outstanding[pid] = 1
-        else: # child
-            try:
-                sys.exit(do_pack(base, last, par2_exists, out))
-            except Exception as e:
-                log('exception: %r\n' % e)
-                sys.exit(99)
-                
-while len(outstanding):
-    (pid,nc) = os.wait()
-    nc >>= 8
-    if pid in outstanding:
-        del outstanding[pid]
-        code = code or nc
-        count += 1
-    if not opt.verbose:
-        progress('fsck (%d/%d)\r' % (count, len(extra)))
-
-if istty2:
-    debug('fsck done.           \n')
-sys.exit(code)
diff --git a/lib/cmd/ftp-cmd.py b/lib/cmd/ftp-cmd.py
deleted file mode 100755 (executable)
index 02a6fec..0000000
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-# For now, this completely relies on the assumption that the current
-# encoding (LC_CTYPE, etc.) is ASCII compatible, and that it returns
-# the exact same bytes from a decode/encode round-trip (or the reverse
-# (e.g. ISO-8859-1).
-
-from __future__ import absolute_import, print_function
-import os, fnmatch, stat, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import _helpers, compat, options, git, shquote, ls, vfs
-from bup.compat import argv_bytes, fsdecode
-from bup.helpers import chunkyreader, handle_ctrl_c, log
-from bup.io import byte_stream, path_msg
-from bup.repo import LocalRepo
-
-handle_ctrl_c()
-
-
-class OptionError(Exception):
-    pass
-
-
-def do_ls(repo, args, out):
-    try:
-        opt = ls.opts_from_cmdline([fsdecode(arg) for arg in args],
-                                   onabort=OptionError)
-    except OptionError as e:
-        log('error: %s' % e)
-        return
-    return ls.within_repo(repo, opt, out)
-
-
-def write_to_file(inf, outf):
-    for blob in chunkyreader(inf):
-        outf.write(blob)
-
-
-def inputiter():
-    if os.isatty(stdin.fileno()):
-        while 1:
-            if hasattr(_helpers, 'readline'):
-                try:
-                    yield _helpers.readline(b'bup> ')
-                except EOFError:
-                    print()  # Clear the line for the terminal's next prompt
-                    break
-            else:
-                out.write(b'bup> ')
-                out.flush()
-                read_line = stdin.readline()
-                if not read_line:
-                    print('')
-                    break
-                yield read_line
-    else:
-        for line in stdin:
-            yield line
-
-
-def _completer_get_subs(repo, line):
-    (qtype, lastword) = shquote.unfinished_word(line)
-    dir, name = os.path.split(lastword)
-    dir_path = vfs.resolve(repo, dir or b'/')
-    _, dir_item = dir_path[-1]
-    if not dir_item:
-        subs = tuple()
-    else:
-        subs = tuple(dir_path + (entry,)
-                     for entry in vfs.contents(repo, dir_item)
-                     if (entry[0] != b'.' and entry[0].startswith(name)))
-    return qtype, lastword, subs
-
-
-_attempt_start = None
-_attempt_end = None
-def attempt_completion(text, start, end):
-    global _attempt_start, _attempt_end
-    _attempt_start = start
-    _attempt_end = end
-    return None
-
-_last_line = None
-_last_res = None
-def enter_completion(text, iteration):
-    global repo
-    global _attempt_end
-    global _last_line
-    global _last_res
-    try:
-        line = _helpers.get_line_buffer()[:_attempt_end]
-        if _last_line != line:
-            _last_res = _completer_get_subs(repo, line)
-            _last_line = line
-        qtype, lastword, subs = _last_res
-        if iteration < len(subs):
-            path = subs[iteration]
-            leaf_name, leaf_item = path[-1]
-            res = vfs.try_resolve(repo, leaf_name, parent=path[:-1])
-            leaf_name, leaf_item = res[-1]
-            fullname = os.path.join(*(name for name, item in res))
-            if stat.S_ISDIR(vfs.item_mode(leaf_item)):
-                ret = shquote.what_to_add(qtype, lastword, fullname + b'/',
-                                          terminate=False)
-            else:
-                ret = shquote.what_to_add(qtype, lastword, fullname,
-                                          terminate=True) + b' '
-            return text + ret
-    except Exception as e:
-        log('\n')
-        try:
-            import traceback
-            traceback.print_tb(sys.exc_traceback)
-        except Exception as e2:
-            log('Error printing traceback: %s\n' % e2)
-        log('\nError in completion: %s\n' % e)
-
-
-optspec = """
-bup ftp [commands...]
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-git.check_repo_or_die()
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-stdin = byte_stream(sys.stdin)
-repo = LocalRepo()
-pwd = vfs.resolve(repo, b'/')
-rv = 0
-
-
-
-if extra:
-    lines = (argv_bytes(arg) for arg in extra)
-else:
-    if hasattr(_helpers, 'readline'):
-        _helpers.set_completer_word_break_characters(b' \t\n\r/')
-        _helpers.set_attempted_completion_function(attempt_completion)
-        _helpers.set_completion_entry_function(enter_completion)
-        if sys.platform.startswith('darwin'):
-            # MacOS uses a slightly incompatible clone of libreadline
-            _helpers.parse_and_bind(b'bind ^I rl_complete')
-        _helpers.parse_and_bind(b'tab: complete')
-    lines = inputiter()
-
-for line in lines:
-    if not line.strip():
-        continue
-    words = [word for (wordstart,word) in shquote.quotesplit(line)]
-    cmd = words[0].lower()
-    #log('execute: %r %r\n' % (cmd, parm))
-    try:
-        if cmd == b'ls':
-            # FIXME: respect pwd (perhaps via ls accepting resolve path/parent)
-            do_ls(repo, words[1:], out)
-            out.flush()
-        elif cmd == b'cd':
-            np = pwd
-            for parm in words[1:]:
-                res = vfs.resolve(repo, parm, parent=np)
-                _, leaf_item = res[-1]
-                if not leaf_item:
-                    raise Exception('%s does not exist'
-                                    % path_msg(b'/'.join(name for name, item
-                                                         in res)))
-                if not stat.S_ISDIR(vfs.item_mode(leaf_item)):
-                    raise Exception('%s is not a directory' % path_msg(parm))
-                np = res
-            pwd = np
-        elif cmd == b'pwd':
-            if len(pwd) == 1:
-                out.write(b'/')
-            out.write(b'/'.join(name for name, item in pwd) + b'\n')
-            out.flush()
-        elif cmd == b'cat':
-            for parm in words[1:]:
-                res = vfs.resolve(repo, parm, parent=pwd)
-                _, leaf_item = res[-1]
-                if not leaf_item:
-                    raise Exception('%s does not exist' %
-                                    path_msg(b'/'.join(name for name, item
-                                                       in res)))
-                with vfs.fopen(repo, leaf_item) as srcfile:
-                    write_to_file(srcfile, out)
-            out.flush()
-        elif cmd == b'get':
-            if len(words) not in [2,3]:
-                rv = 1
-                raise Exception('Usage: get <filename> [localname]')
-            rname = words[1]
-            (dir,base) = os.path.split(rname)
-            lname = len(words) > 2 and words[2] or base
-            res = vfs.resolve(repo, rname, parent=pwd)
-            _, leaf_item = res[-1]
-            if not leaf_item:
-                raise Exception('%s does not exist' %
-                                path_msg(b'/'.join(name for name, item in res)))
-            with vfs.fopen(repo, leaf_item) as srcfile:
-                with open(lname, 'wb') as destfile:
-                    log('Saving %s\n' % path_msg(lname))
-                    write_to_file(srcfile, destfile)
-        elif cmd == b'mget':
-            for parm in words[1:]:
-                dir, base = os.path.split(parm)
-
-                res = vfs.resolve(repo, dir, parent=pwd)
-                _, dir_item = res[-1]
-                if not dir_item:
-                    raise Exception('%s does not exist' % path_msg(dir))
-                for name, item in vfs.contents(repo, dir_item):
-                    if name == b'.':
-                        continue
-                    if fnmatch.fnmatch(name, base):
-                        if stat.S_ISLNK(vfs.item_mode(item)):
-                            deref = vfs.resolve(repo, name, parent=res)
-                            deref_name, deref_item = deref[-1]
-                            if not deref_item:
-                                raise Exception('%s does not exist' %
-                                                path_msg('/'.join(name for name, item
-                                                                  in deref)))
-                            item = deref_item
-                        with vfs.fopen(repo, item) as srcfile:
-                            with open(name, 'wb') as destfile:
-                                log('Saving %s\n' % path_msg(name))
-                                write_to_file(srcfile, destfile)
-        elif cmd == b'help' or cmd == b'?':
-            out.write(b'Commands: ls cd pwd cat get mget help quit\n')
-            out.flush()
-        elif cmd in (b'quit', b'exit', b'bye'):
-            break
-        else:
-            rv = 1
-            raise Exception('no such command %r' % cmd)
-    except Exception as e:
-        rv = 1
-        log('error: %s\n' % e)
-        raise
-
-sys.exit(rv)
diff --git a/lib/cmd/fuse-cmd.py b/lib/cmd/fuse-cmd.py
deleted file mode 100755 (executable)
index 1b8bd9c..0000000
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-import errno, os, sys
-
-try:
-    import fuse
-except ImportError:
-    print('error: cannot find the python "fuse" module; please install it',
-          file=sys.stderr)
-    sys.exit(2)
-if not hasattr(fuse, '__version__'):
-    print('error: fuse module is too old for fuse.__version__', file=sys.stderr)
-    sys.exit(2)
-fuse.fuse_python_api = (0, 2)
-
-if sys.version_info[0] > 2:
-    try:
-        fuse_ver = fuse.__version__.split('.')
-        fuse_ver_maj = int(fuse_ver[0])
-    except:
-        log('error: cannot determine the fuse major version; please report',
-            file=sys.stderr)
-        sys.exit(2)
-    if len(fuse_ver) < 3 or fuse_ver_maj < 1:
-        print("error: fuse module can't handle binary data; please upgrade to 1.0+\n",
-              file=sys.stderr)
-        sys.exit(2)
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, git, vfs, xstat
-from bup.compat import argv_bytes, fsdecode, py_maj
-from bup.helpers import log
-from bup.repo import LocalRepo
-
-
-# FIXME: self.meta and want_meta?
-
-# The path handling is just wrong, but the current fuse module can't
-# handle bytes paths.
-
-class BupFs(fuse.Fuse):
-    def __init__(self, repo, verbose=0, fake_metadata=False):
-        fuse.Fuse.__init__(self)
-        self.repo = repo
-        self.verbose = verbose
-        self.fake_metadata = fake_metadata
-    
-    def getattr(self, path):
-        path = argv_bytes(path)
-        global opt
-        if self.verbose > 0:
-            log('--getattr(%r)\n' % path)
-        res = vfs.resolve(self.repo, path, want_meta=(not self.fake_metadata),
-                          follow=False)
-        name, item = res[-1]
-        if not item:
-            return -errno.ENOENT
-        if self.fake_metadata:
-            item = vfs.augment_item_meta(self.repo, item, include_size=True)
-        else:
-            item = vfs.ensure_item_has_metadata(self.repo, item,
-                                                include_size=True)
-        meta = item.meta
-        # FIXME: do we want/need to do anything more with nlink?
-        st = fuse.Stat(st_mode=meta.mode, st_nlink=1, st_size=meta.size)
-        st.st_mode = meta.mode
-        st.st_uid = meta.uid or 0
-        st.st_gid = meta.gid or 0
-        st.st_atime = max(0, xstat.fstime_floor_secs(meta.atime))
-        st.st_mtime = max(0, xstat.fstime_floor_secs(meta.mtime))
-        st.st_ctime = max(0, xstat.fstime_floor_secs(meta.ctime))
-        return st
-
-    def readdir(self, path, offset):
-        path = argv_bytes(path)
-        assert not offset  # We don't return offsets, so offset should be unused
-        res = vfs.resolve(self.repo, path, follow=False)
-        dir_name, dir_item = res[-1]
-        if not dir_item:
-            yield -errno.ENOENT
-        yield fuse.Direntry('..')
-        # FIXME: make sure want_meta=False is being completely respected
-        for ent_name, ent_item in vfs.contents(repo, dir_item, want_meta=False):
-            fusename = fsdecode(ent_name.replace(b'/', b'-'))
-            yield fuse.Direntry(fusename)
-
-    def readlink(self, path):
-        path = argv_bytes(path)
-        if self.verbose > 0:
-            log('--readlink(%r)\n' % path)
-        res = vfs.resolve(self.repo, path, follow=False)
-        name, item = res[-1]
-        if not item:
-            return -errno.ENOENT
-        return fsdecode(vfs.readlink(repo, item))
-
-    def open(self, path, flags):
-        path = argv_bytes(path)
-        if self.verbose > 0:
-            log('--open(%r)\n' % path)
-        res = vfs.resolve(self.repo, path, follow=False)
-        name, item = res[-1]
-        if not item:
-            return -errno.ENOENT
-        accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR
-        if (flags & accmode) != os.O_RDONLY:
-            return -errno.EACCES
-        # Return None since read doesn't need the file atm...
-        # If we *do* return the file, it'll show up as the last argument
-        #return vfs.fopen(repo, item)
-
-    def read(self, path, size, offset):
-        path = argv_bytes(path)
-        if self.verbose > 0:
-            log('--read(%r)\n' % path)
-        res = vfs.resolve(self.repo, path, follow=False)
-        name, item = res[-1]
-        if not item:
-            return -errno.ENOENT
-        with vfs.fopen(repo, item) as f:
-            f.seek(offset)
-            return f.read(size)
-
-
-optspec = """
-bup fuse [-d] [-f] <mountpoint>
---
-f,foreground  run in foreground
-d,debug       run in the foreground and display FUSE debug information
-o,allow-other allow other users to access the filesystem
-meta          report original metadata for paths when available
-v,verbose     increase log output (can be used more than once)
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-if not opt.verbose:
-    opt.verbose = 0
-
-# Set stderr to be line buffered, even if it's not connected to the console
-# so that we'll be able to see diagnostics in a timely fashion.
-errfd = sys.stderr.fileno()
-sys.stderr.flush()
-sys.stderr = os.fdopen(errfd, 'w', 1)
-
-if len(extra) != 1:
-    o.fatal('only one mount point argument expected')
-
-git.check_repo_or_die()
-repo = LocalRepo()
-f = BupFs(repo=repo, verbose=opt.verbose, fake_metadata=(not opt.meta))
-
-# This is likely wrong, but the fuse module doesn't currently accept bytes
-f.fuse_args.mountpoint = extra[0]
-
-if opt.debug:
-    f.fuse_args.add('debug')
-if opt.foreground:
-    f.fuse_args.setmod('foreground')
-f.multithreaded = False
-if opt.allow_other:
-    f.fuse_args.add('allow_other')
-f.main()
diff --git a/lib/cmd/gc-cmd.py b/lib/cmd/gc-cmd.py
deleted file mode 100755 (executable)
index 71e4e75..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os.path, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, git, options
-from bup.gc import bup_gc
-from bup.helpers import die_if_errors, handle_ctrl_c, log
-
-
-optspec = """
-bup gc [options...]
---
-v,verbose   increase log output (can be used more than once)
-threshold=  only rewrite a packfile if it's over this percent garbage [10]
-#,compress= set compression level to # (0-9, 9 is highest) [1]
-unsafe      use the command even though it may be DANGEROUS
-"""
-
-# FIXME: server mode?
-# FIXME: make sure client handles server-side changes reasonably
-
-handle_ctrl_c()
-
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if not opt.unsafe:
-    o.fatal('refusing to run dangerous, experimental command without --unsafe')
-
-if extra:
-    o.fatal('no positional parameters expected')
-
-if opt.threshold:
-    try:
-        opt.threshold = int(opt.threshold)
-    except ValueError:
-        o.fatal('threshold must be an integer percentage value')
-    if opt.threshold < 0 or opt.threshold > 100:
-        o.fatal('threshold must be an integer percentage value')
-
-git.check_repo_or_die()
-
-bup_gc(threshold=opt.threshold,
-       compression=opt.compress,
-       verbosity=opt.verbose)
-
-die_if_errors()
diff --git a/lib/cmd/get-cmd.py b/lib/cmd/get-cmd.py
deleted file mode 100755 (executable)
index 7fc741e..0000000
+++ /dev/null
@@ -1,678 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-import os, re, stat, sys, textwrap, time
-from binascii import hexlify, unhexlify
-from collections import namedtuple
-from functools import partial
-from stat import S_ISDIR
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, git, client, helpers, vfs
-from bup.compat import argv_bytes, environ, hexstr, items, wrap_main
-from bup.git import get_cat_data, parse_commit, walk_object
-from bup.helpers import add_error, debug1, handle_ctrl_c, log, saved_errors
-from bup.helpers import hostname, shstr, tty_width
-from bup.io import path_msg
-from bup.pwdgrp import userfullname, username
-from bup.repo import LocalRepo, RemoteRepo
-
-argspec = (
-    "usage: bup get [-s source] [-r remote] (<--ff|--append|...> REF [DEST])...",
-
-    """Transfer data from a source repository to a destination repository
-    according to the methods specified (--ff, --ff:, --append, etc.).
-    Both repositories default to BUP_DIR.  A remote destination may be
-    specified with -r, and data may be pulled from a remote repository
-    with the related "bup on HOST get ..." command.""",
-
-    ('optional arguments:',
-     (('-h, --help', 'show this help message and exit'),
-      ('-v, --verbose',
-       'increase log output (can be specified more than once)'),
-      ('-q, --quiet', "don't show progress meter"),
-      ('-s SOURCE, --source SOURCE',
-       'path to the source repository (defaults to BUP_DIR)'),
-      ('-r REMOTE, --remote REMOTE',
-       'hostname:/path/to/repo of remote destination repository'),
-      ('-t --print-trees', 'output a tree id for each ref set'),
-      ('-c, --print-commits', 'output a commit id for each ref set'),
-      ('--print-tags', 'output an id for each tag'),
-      ('--bwlimit BWLIMIT', 'maximum bytes/sec to transmit to server'),
-      ('-0, -1, -2, -3, -4, -5, -6, -7, -8, -9, --compress LEVEL',
-       'set compression LEVEL (default: 1)'))),
-
-    ('transfer methods:',
-     (('--ff REF, --ff: REF DEST',
-       'fast-forward dest REF (or DEST) to match source REF'),
-      ('--append REF, --append: REF DEST',
-       'append REF (treeish or committish) to dest REF (or DEST)'),
-      ('--pick REF, --pick: REF DEST',
-       'append single source REF commit to dest REF (or DEST)'),
-      ('--force-pick REF, --force-pick: REF DEST',
-       '--pick, overwriting REF (or DEST)'),
-      ('--new-tag REF, --new-tag: REF DEST',
-       'tag source ref REF as REF (or DEST) in dest unless it already exists'),
-      ('--replace, --replace: REF DEST',
-       'overwrite REF (or DEST) in dest with source REF'),
-      ('--unnamed REF',
-       'fetch REF anonymously (without destination ref)'))))
-
-def render_opts(opts, width=None):
-    if not width:
-        width = tty_width()
-    result = []
-    for args, desc in opts:
-        result.append(textwrap.fill(args, width=width,
-                                    initial_indent=(' ' * 2),
-                                    subsequent_indent=(' ' * 4)))
-        result.append('\n')
-        result.append(textwrap.fill(desc, width=width,
-                                    initial_indent=(' ' * 6),
-                                    subsequent_indent=(' ' * 6)))
-        result.append('\n')
-    return result
-
-def usage(argspec, width=None):
-    if not width:
-        width = tty_width()
-    usage, preamble, groups = argspec[0], argspec[1], argspec[2:]
-    msg = []
-    msg.append(textwrap.fill(usage, width=width, subsequent_indent='  '))
-    msg.append('\n\n')
-    msg.append(textwrap.fill(preamble.replace('\n', ' '), width=width))
-    msg.append('\n')
-    for group_name, group_args in groups:
-        msg.extend(['\n', group_name, '\n'])
-        msg.extend(render_opts(group_args, width=width))
-    return ''.join(msg)
-
-def misuse(message=None):
-    sys.stderr.write(usage(argspec))
-    if message:
-        sys.stderr.write('\nerror: ')
-        sys.stderr.write(message)
-        sys.stderr.write('\n')
-    sys.exit(1)
-
-def require_n_args_or_die(n, args):
-    if len(args) < n + 1:
-        misuse('%s argument requires %d %s'
-               % (n, 'values' if n == 1 else 'value'))
-    result = args[1:1+n], args[1+n:]
-    assert len(result[0]) == n
-    return result
-
-Spec = namedtuple('Spec', ('method', 'src', 'dest'))
-
-def spec_msg(s):
-    if not s.dest:
-        return '--%s %s' % (s.method, path_msg(s.src))
-    return '--%s: %s %s' % (s.method, path_msg(s.src), path_msg(s.dest))
-
-def parse_args(args):
-    class GetOpts:
-        pass
-    opt = GetOpts()
-    opt.help = False
-    opt.verbose = 0
-    opt.quiet = False
-    opt.print_commits = opt.print_trees = opt.print_tags = False
-    opt.bwlimit = None
-    opt.compress = 1
-    opt.source = opt.remote = None
-    opt.target_specs = []
-
-    remaining = args[1:]  # Skip argv[0]
-    while remaining:
-        arg = remaining[0]
-        if arg in ('-h', '--help'):
-            sys.stdout.write(usage(argspec))
-            sys.exit(0)
-        elif arg in ('-v', '--verbose'):
-            opt.verbose += 1
-            remaining = remaining[1:]
-        elif arg in ('--ff', '--append', '--pick', '--force-pick',
-                     '--new-tag', '--replace', '--unnamed'):
-            (ref,), remaining = require_n_args_or_die(1, remaining)
-            ref = argv_bytes(ref)
-            opt.target_specs.append(Spec(method=arg[2:], src=ref, dest=None))
-        elif arg in ('--ff:', '--append:', '--pick:', '--force-pick:',
-                     '--new-tag:', '--replace:'):
-            (ref, dest), remaining = require_n_args_or_die(2, remaining)
-            ref, dest = argv_bytes(ref), argv_bytes(dest)
-            opt.target_specs.append(Spec(method=arg[2:-1], src=ref, dest=dest))
-        elif arg in ('-s', '--source'):
-            (opt.source,), remaining = require_n_args_or_die(1, remaining)
-        elif arg in ('-r', '--remote'):
-            (opt.remote,), remaining = require_n_args_or_die(1, remaining)
-        elif arg in ('-c', '--print-commits'):
-            opt.print_commits, remaining = True, remaining[1:]
-        elif arg in ('-t', '--print-trees'):
-            opt.print_trees, remaining = True, remaining[1:]
-        elif arg == '--print-tags':
-            opt.print_tags, remaining = True, remaining[1:]
-        elif arg in ('-0', '-1', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9'):
-            opt.compress = int(arg[1:])
-            remaining = remaining[1:]
-        elif arg == '--compress':
-            (opt.compress,), remaining = require_n_args_or_die(1, remaining)
-            opt.compress = int(opt.compress)
-        elif arg == '--bwlimit':
-            (opt.bwlimit,), remaining = require_n_args_or_die(1, remaining)
-            opt.bwlimit = long(opt.bwlimit)
-        elif arg.startswith('-') and len(arg) > 2 and arg[1] != '-':
-            # Try to interpret this as -xyz, i.e. "-xyz -> -x -y -z".
-            # We do this last so that --foo -bar is valid if --foo
-            # requires a value.
-            remaining[0:1] = ('-' + c for c in arg[1:])
-            # FIXME
-            continue
-        else:
-            misuse()
-    return opt
-
-# FIXME: client error handling (remote exceptions, etc.)
-
-# FIXME: walk_object in in git.py doesn't support opt.verbose.  Do we
-# need to adjust for that here?
-def get_random_item(name, hash, repo, writer, opt):
-    def already_seen(oid):
-        return writer.exists(unhexlify(oid))
-    for item in walk_object(repo.cat, hash, stop_at=already_seen,
-                            include_data=True):
-        # already_seen ensures that writer.exists(id) is false.
-        # Otherwise, just_write() would fail.
-        writer.just_write(item.oid, item.type, item.data)
-
-
-def append_commit(name, hash, parent, src_repo, writer, opt):
-    now = time.time()
-    items = parse_commit(get_cat_data(src_repo.cat(hash), b'commit'))
-    tree = unhexlify(items.tree)
-    author = b'%s <%s>' % (items.author_name, items.author_mail)
-    author_time = (items.author_sec, items.author_offset)
-    committer = b'%s <%s@%s>' % (userfullname(), username(), hostname())
-    get_random_item(name, hexlify(tree), src_repo, writer, opt)
-    c = writer.new_commit(tree, parent,
-                          author, items.author_sec, items.author_offset,
-                          committer, now, None,
-                          items.message)
-    return c, tree
-
-
-def append_commits(commits, src_name, dest_hash, src_repo, writer, opt):
-    last_c, tree = dest_hash, None
-    for commit in commits:
-        last_c, tree = append_commit(src_name, commit, last_c,
-                                     src_repo, writer, opt)
-    assert(tree is not None)
-    return last_c, tree
-
-Loc = namedtuple('Loc', ['type', 'hash', 'path'])
-default_loc = Loc(None, None, None)
-
-def find_vfs_item(name, repo):
-    res = repo.resolve(name, follow=False, want_meta=False)
-    leaf_name, leaf_item = res[-1]
-    if not leaf_item:
-        return None
-    kind = type(leaf_item)
-    if kind == vfs.Root:
-        kind = 'root'
-    elif kind == vfs.Tags:
-        kind = 'tags'
-    elif kind == vfs.RevList:
-        kind = 'branch'
-    elif kind == vfs.Commit:
-        if len(res) > 1 and type(res[-2][1]) == vfs.RevList:
-            kind = 'save'
-        else:
-            kind = 'commit'
-    elif kind == vfs.Item:
-        if S_ISDIR(vfs.item_mode(leaf_item)):
-            kind = 'tree'
-        else:
-            kind = 'blob'
-    elif kind == vfs.Chunky:
-        kind = 'tree'
-    elif kind == vfs.FakeLink:
-        # Don't have to worry about ELOOP, excepting malicious
-        # remotes, since "latest" is the only FakeLink.
-        assert leaf_name == b'latest'
-        res = repo.resolve(leaf_item.target, parent=res[:-1],
-                           follow=False, want_meta=False)
-        leaf_name, leaf_item = res[-1]
-        assert leaf_item
-        assert type(leaf_item) == vfs.Commit
-        name = b'/'.join(x[0] for x in res)
-        kind = 'save'
-    else:
-        raise Exception('unexpected resolution for %s: %r'
-                        % (path_msg(name), res))
-    path = b'/'.join(name for name, item in res)
-    if hasattr(leaf_item, 'coid'):
-        result = Loc(type=kind, hash=leaf_item.coid, path=path)
-    elif hasattr(leaf_item, 'oid'):
-        result = Loc(type=kind, hash=leaf_item.oid, path=path)
-    else:
-        result = Loc(type=kind, hash=None, path=path)
-    return result
-
-
-Target = namedtuple('Target', ['spec', 'src', 'dest'])
-
-def loc_desc(loc):
-    if loc and loc.hash:
-        loc = loc._replace(hash=hexlify(loc.hash))
-    return repr(loc)
-
-
-# FIXME: see if resolve() means we can drop the vfs path cleanup
-
-def cleanup_vfs_path(p):
-    result = os.path.normpath(p)
-    if result.startswith(b'/'):
-        return result
-    return b'/' + result
-
-
-def validate_vfs_path(p):
-    if p.startswith(b'/.') \
-       and not p.startswith(b'/.tag/'):
-        misuse('unsupported destination path %s in %s'
-               % (path_msg(dest.path), spec_msg(spec)))
-    return p
-
-
-def resolve_src(spec, src_repo):
-    src = find_vfs_item(spec.src, src_repo)
-    spec_args = spec_msg(spec)
-    if not src:
-        misuse('cannot find source for %s' % spec_args)
-    if src.type == 'root':
-        misuse('cannot fetch entire repository for %s' % spec_args)
-    if src.type == 'tags':
-        misuse('cannot fetch entire /.tag directory for %s' % spec_args)
-    debug1('src: %s\n' % loc_desc(src))
-    return src
-
-
-def get_save_branch(repo, path):
-    res = repo.resolve(path, follow=False, want_meta=False)
-    leaf_name, leaf_item = res[-1]
-    if not leaf_item:
-        misuse('error: cannot access %r in %r' % (leaf_name, path))
-    assert len(res) == 3
-    res_path = b'/'.join(name for name, item in res[:-1])
-    return res_path
-
-
-def resolve_branch_dest(spec, src, src_repo, dest_repo):
-    # Resulting dest must be treeish, or not exist.
-    if not spec.dest:
-        # Pick a default dest.
-        if src.type == 'branch':
-            spec = spec._replace(dest=spec.src)
-        elif src.type == 'save':
-            spec = spec._replace(dest=get_save_branch(src_repo, spec.src))
-        elif src.path.startswith(b'/.tag/'):  # Dest defaults to the same.
-            spec = spec._replace(dest=spec.src)
-
-    spec_args = spec_msg(spec)
-    if not spec.dest:
-        misuse('no destination (implicit or explicit) for %s', spec_args)
-
-    dest = find_vfs_item(spec.dest, dest_repo)
-    if dest:
-        if dest.type == 'commit':
-            misuse('destination for %s is a tagged commit, not a branch'
-                  % spec_args)
-        if dest.type != 'branch':
-            misuse('destination for %s is a %s, not a branch'
-                  % (spec_args, dest.type))
-    else:
-        dest = default_loc._replace(path=cleanup_vfs_path(spec.dest))
-
-    if dest.path.startswith(b'/.'):
-        misuse('destination for %s must be a valid branch name' % spec_args)
-
-    debug1('dest: %s\n' % loc_desc(dest))
-    return spec, dest
-
-
-def resolve_ff(spec, src_repo, dest_repo):
-    src = resolve_src(spec, src_repo)
-    spec_args = spec_msg(spec)
-    if src.type == 'tree':
-        misuse('%s is impossible; can only --append a tree to a branch'
-              % spec_args)
-    if src.type not in ('branch', 'save', 'commit'):
-        misuse('source for %s must be a branch, save, or commit, not %s'
-              % (spec_args, src.type))
-    spec, dest = resolve_branch_dest(spec, src, src_repo, dest_repo)
-    return Target(spec=spec, src=src, dest=dest)
-
-
-def handle_ff(item, src_repo, writer, opt):
-    assert item.spec.method == 'ff'
-    assert item.src.type in ('branch', 'save', 'commit')
-    src_oidx = hexlify(item.src.hash)
-    dest_oidx = hexlify(item.dest.hash) if item.dest.hash else None
-    if not dest_oidx or dest_oidx in src_repo.rev_list(src_oidx):
-        # Can fast forward.
-        get_random_item(item.spec.src, src_oidx, src_repo, writer, opt)
-        commit_items = parse_commit(get_cat_data(src_repo.cat(src_oidx), b'commit'))
-        return item.src.hash, unhexlify(commit_items.tree)
-    misuse('destination is not an ancestor of source for %s'
-           % spec_msg(item.spec))
-
-
-def resolve_append(spec, src_repo, dest_repo):
-    src = resolve_src(spec, src_repo)
-    if src.type not in ('branch', 'save', 'commit', 'tree'):
-        misuse('source for %s must be a branch, save, commit, or tree, not %s'
-              % (spec_msg(spec), src.type))
-    spec, dest = resolve_branch_dest(spec, src, src_repo, dest_repo)
-    return Target(spec=spec, src=src, dest=dest)
-
-
-def handle_append(item, src_repo, writer, opt):
-    assert item.spec.method == 'append'
-    assert item.src.type in ('branch', 'save', 'commit', 'tree')
-    assert item.dest.type == 'branch' or not item.dest.type
-    src_oidx = hexlify(item.src.hash)
-    if item.src.type == 'tree':
-        get_random_item(item.spec.src, src_oidx, src_repo, writer, opt)
-        parent = item.dest.hash
-        msg = b'bup save\n\nGenerated by command:\n%r\n' % compat.argvb
-        userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
-        now = time.time()
-        commit = writer.new_commit(item.src.hash, parent,
-                                   userline, now, None,
-                                   userline, now, None, msg)
-        return commit, item.src.hash
-    commits = list(src_repo.rev_list(src_oidx))
-    commits.reverse()
-    return append_commits(commits, item.spec.src, item.dest.hash,
-                          src_repo, writer, opt)
-
-
-def resolve_pick(spec, src_repo, dest_repo):
-    src = resolve_src(spec, src_repo)
-    spec_args = spec_msg(spec)
-    if src.type == 'tree':
-        misuse('%s is impossible; can only --append a tree' % spec_args)
-    if src.type not in ('commit', 'save'):
-        misuse('%s impossible; can only pick a commit or save, not %s'
-              % (spec_args, src.type))
-    if not spec.dest:
-        if src.path.startswith(b'/.tag/'):
-            spec = spec._replace(dest=spec.src)
-        elif src.type == 'save':
-            spec = spec._replace(dest=get_save_branch(src_repo, spec.src))
-    if not spec.dest:
-        misuse('no destination provided for %s', spec_args)
-    dest = find_vfs_item(spec.dest, dest_repo)
-    if not dest:
-        cp = validate_vfs_path(cleanup_vfs_path(spec.dest))
-        dest = default_loc._replace(path=cp)
-    else:
-        if not dest.type == 'branch' and not dest.path.startswith(b'/.tag/'):
-            misuse('%s destination is not a tag or branch' % spec_args)
-        if spec.method == 'pick' \
-           and dest.hash and dest.path.startswith(b'/.tag/'):
-            misuse('cannot overwrite existing tag for %s (requires --force-pick)'
-                  % spec_args)
-    return Target(spec=spec, src=src, dest=dest)
-
-
-def handle_pick(item, src_repo, writer, opt):
-    assert item.spec.method in ('pick', 'force-pick')
-    assert item.src.type in ('save', 'commit')
-    src_oidx = hexlify(item.src.hash)
-    if item.dest.hash:
-        return append_commit(item.spec.src, src_oidx, item.dest.hash,
-                             src_repo, writer, opt)
-    return append_commit(item.spec.src, src_oidx, None, src_repo, writer, opt)
-
-
-def resolve_new_tag(spec, src_repo, dest_repo):
-    src = resolve_src(spec, src_repo)
-    spec_args = spec_msg(spec)
-    if not spec.dest and src.path.startswith(b'/.tag/'):
-        spec = spec._replace(dest=src.path)
-    if not spec.dest:
-        misuse('no destination (implicit or explicit) for %s', spec_args)
-    dest = find_vfs_item(spec.dest, dest_repo)
-    if not dest:
-        dest = default_loc._replace(path=cleanup_vfs_path(spec.dest))
-    if not dest.path.startswith(b'/.tag/'):
-        misuse('destination for %s must be a VFS tag' % spec_args)
-    if dest.hash:
-        misuse('cannot overwrite existing tag for %s (requires --replace)'
-              % spec_args)
-    return Target(spec=spec, src=src, dest=dest)
-
-
-def handle_new_tag(item, src_repo, writer, opt):
-    assert item.spec.method == 'new-tag'
-    assert item.dest.path.startswith(b'/.tag/')
-    get_random_item(item.spec.src, hexlify(item.src.hash),
-                    src_repo, writer, opt)
-    return (item.src.hash,)
-
-
-def resolve_replace(spec, src_repo, dest_repo):
-    src = resolve_src(spec, src_repo)
-    spec_args = spec_msg(spec)
-    if not spec.dest:
-        if src.path.startswith(b'/.tag/') or src.type == 'branch':
-            spec = spec._replace(dest=spec.src)
-    if not spec.dest:
-        misuse('no destination provided for %s', spec_args)
-    dest = find_vfs_item(spec.dest, dest_repo)
-    if dest:
-        if not dest.type == 'branch' and not dest.path.startswith(b'/.tag/'):
-            misuse('%s impossible; can only overwrite branch or tag'
-                  % spec_args)
-    else:
-        cp = validate_vfs_path(cleanup_vfs_path(spec.dest))
-        dest = default_loc._replace(path=cp)
-    if not dest.path.startswith(b'/.tag/') \
-       and not src.type in ('branch', 'save', 'commit'):
-        misuse('cannot overwrite branch with %s for %s' % (src.type, spec_args))
-    return Target(spec=spec, src=src, dest=dest)
-
-
-def handle_replace(item, src_repo, writer, opt):
-    assert(item.spec.method == 'replace')
-    if item.dest.path.startswith(b'/.tag/'):
-        get_random_item(item.spec.src, hexlify(item.src.hash),
-                        src_repo, writer, opt)
-        return (item.src.hash,)
-    assert(item.dest.type == 'branch' or not item.dest.type)
-    src_oidx = hexlify(item.src.hash)
-    get_random_item(item.spec.src, src_oidx, src_repo, writer, opt)
-    commit_items = parse_commit(get_cat_data(src_repo.cat(src_oidx), b'commit'))
-    return item.src.hash, unhexlify(commit_items.tree)
-
-
-def resolve_unnamed(spec, src_repo, dest_repo):
-    if spec.dest:
-        misuse('destination name given for %s' % spec_msg(spec))
-    src = resolve_src(spec, src_repo)
-    return Target(spec=spec, src=src, dest=None)
-
-
-def handle_unnamed(item, src_repo, writer, opt):
-    get_random_item(item.spec.src, hexlify(item.src.hash),
-                    src_repo, writer, opt)
-    return (None,)
-
-
-def resolve_targets(specs, src_repo, dest_repo):
-    resolved_items = []
-    common_args = src_repo, dest_repo
-    for spec in specs:
-        debug1('initial-spec: %r\n' % (spec,))
-        if spec.method == 'ff':
-            resolved_items.append(resolve_ff(spec, *common_args))
-        elif spec.method == 'append':
-            resolved_items.append(resolve_append(spec, *common_args))
-        elif spec.method in ('pick', 'force-pick'):
-            resolved_items.append(resolve_pick(spec, *common_args))
-        elif spec.method == 'new-tag':
-            resolved_items.append(resolve_new_tag(spec, *common_args))
-        elif spec.method == 'replace':
-            resolved_items.append(resolve_replace(spec, *common_args))
-        elif spec.method == 'unnamed':
-            resolved_items.append(resolve_unnamed(spec, *common_args))
-        else: # Should be impossible -- prevented by the option parser.
-            assert(False)
-
-    # FIXME: check for prefix overlap?  i.e.:
-    #   bup get --ff foo --ff: baz foo/bar
-    #   bup get --new-tag .tag/foo --new-tag: bar .tag/foo/bar
-
-    # Now that we have all the items, check for duplicate tags.
-    tags_targeted = set()
-    for item in resolved_items:
-        dest_path = item.dest and item.dest.path
-        if dest_path:
-            assert(dest_path.startswith(b'/'))
-            if dest_path.startswith(b'/.tag/'):
-                if dest_path in tags_targeted:
-                    if item.spec.method not in ('replace', 'force-pick'):
-                        misuse('cannot overwrite tag %s via %s' \
-                              % (path_msg(dest_path), spec_msg(item.spec)))
-                else:
-                    tags_targeted.add(dest_path)
-    return resolved_items
-
-
-def log_item(name, type, opt, tree=None, commit=None, tag=None):
-    if tag and opt.print_tags:
-        print(hexstr(tag))
-    if tree and opt.print_trees:
-        print(hexstr(tree))
-    if commit and opt.print_commits:
-        print(hexstr(commit))
-    if opt.verbose:
-        last = ''
-        if type in ('root', 'branch', 'save', 'commit', 'tree'):
-            if not name.endswith(b'/'):
-                last = '/'
-        log('%s%s\n' % (path_msg(name), last))
-
-def main():
-    handle_ctrl_c()
-    is_reverse = environ.get(b'BUP_SERVER_REVERSE')
-    opt = parse_args(compat.argv)
-    git.check_repo_or_die()
-    if opt.source:
-        opt.source = argv_bytes(opt.source)
-    if opt.bwlimit:
-        client.bwlimit = parse_num(opt.bwlimit)
-    if is_reverse and opt.remote:
-        misuse("don't use -r in reverse mode; it's automatic")
-    if opt.remote:
-        opt.remote = argv_bytes(opt.remote)
-    if opt.remote or is_reverse:
-        dest_repo = RemoteRepo(opt.remote)
-    else:
-        dest_repo = LocalRepo()
-
-    with dest_repo as dest_repo:
-        with LocalRepo(repo_dir=opt.source) as src_repo:
-            with dest_repo.new_packwriter(compression_level=opt.compress) as writer:
-                # Resolve and validate all sources and destinations,
-                # implicit or explicit, and do it up-front, so we can
-                # fail before we start writing (for any obviously
-                # broken cases).
-                target_items = resolve_targets(opt.target_specs,
-                                               src_repo, dest_repo)
-
-                updated_refs = {}  # ref_name -> (original_ref, tip_commit(bin))
-                no_ref_info = (None, None)
-
-                handlers = {'ff': handle_ff,
-                            'append': handle_append,
-                            'force-pick': handle_pick,
-                            'pick': handle_pick,
-                            'new-tag': handle_new_tag,
-                            'replace': handle_replace,
-                            'unnamed': handle_unnamed}
-
-                for item in target_items:
-                    debug1('get-spec: %r\n' % (item.spec,))
-                    debug1('get-src: %s\n' % loc_desc(item.src))
-                    debug1('get-dest: %s\n' % loc_desc(item.dest))
-                    dest_path = item.dest and item.dest.path
-                    if dest_path:
-                        if dest_path.startswith(b'/.tag/'):
-                            dest_ref = b'refs/tags/%s' % dest_path[6:]
-                        else:
-                            dest_ref = b'refs/heads/%s' % dest_path[1:]
-                    else:
-                        dest_ref = None
-
-                    dest_hash = item.dest and item.dest.hash
-                    orig_ref, cur_ref = updated_refs.get(dest_ref, no_ref_info)
-                    orig_ref = orig_ref or dest_hash
-                    cur_ref = cur_ref or dest_hash
-
-                    handler = handlers[item.spec.method]
-                    item_result = handler(item, src_repo, writer, opt)
-                    if len(item_result) > 1:
-                        new_id, tree = item_result
-                    else:
-                        new_id = item_result[0]
-
-                    if not dest_ref:
-                        log_item(item.spec.src, item.src.type, opt)
-                    else:
-                        updated_refs[dest_ref] = (orig_ref, new_id)
-                        if dest_ref.startswith(b'refs/tags/'):
-                            log_item(item.spec.src, item.src.type, opt, tag=new_id)
-                        else:
-                            log_item(item.spec.src, item.src.type, opt,
-                                     tree=tree, commit=new_id)
-
-        # Only update the refs at the very end, once the writer is
-        # closed, so that if something goes wrong above, the old refs
-        # will be undisturbed.
-        for ref_name, info in items(updated_refs):
-            orig_ref, new_ref = info
-            try:
-                dest_repo.update_ref(ref_name, new_ref, orig_ref)
-                if opt.verbose:
-                    new_hex = hexlify(new_ref)
-                    if orig_ref:
-                        orig_hex = hexlify(orig_ref)
-                        log('updated %r (%s -> %s)\n' % (ref_name, orig_hex, new_hex))
-                    else:
-                        log('updated %r (%s)\n' % (ref_name, new_hex))
-            except (git.GitError, client.ClientError) as ex:
-                add_error('unable to update ref %r: %s' % (ref_name, ex))
-
-    if saved_errors:
-        log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
-        sys.exit(1)
-
-wrap_main(main)
diff --git a/lib/cmd/help-cmd.py b/lib/cmd/help-cmd.py
deleted file mode 100755 (executable)
index 684df72..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os, glob, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, path
-from bup.compat import argv_bytes
-
-
-optspec = """
-bup help <command>
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if len(extra) == 0:
-    # the wrapper program provides the default usage string
-    os.execvp(path.exe(), [b'bup'])
-elif len(extra) == 1:
-    docname = (extra[0]=='bup' and b'bup' or (b'bup-%s' % argv_bytes(extra[0])))
-    manpath = os.path.join(path.exedir(),
-                           b'../../Documentation/' + docname + b'.[1-9]')
-    g = glob.glob(manpath)
-    try:
-        if g:
-            os.execvp('man', ['man', '-l', g[0]])
-        else:
-            os.execvp('man', ['man', docname])
-    except OSError as e:
-        sys.stderr.write('Unable to run man command: %s\n' % e)
-        sys.exit(1)
-else:
-    o.fatal("exactly one command name expected")
diff --git a/lib/cmd/import-duplicity-cmd.py b/lib/cmd/import-duplicity-cmd.py
deleted file mode 100755 (executable)
index 348be8c..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-from calendar import timegm
-from pipes import quote
-from subprocess import check_call
-from time import strftime, strptime
-import os
-import os.path
-import sys
-import tempfile
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, git, helpers, options
-from bup.compat import argv_bytes, str_type
-from bup.helpers import (handle_ctrl_c,
-                         log,
-                         readpipe,
-                         shstr,
-                         saved_errors,
-                         unlink)
-import bup.path
-
-optspec = """
-bup import-duplicity [-n] <duplicity-source-url> <bup-save-name>
---
-n,dry-run  don't do anything; just print what would be done
-"""
-
-def logcmd(cmd):
-    log(shstr(cmd).decode(errors='backslashreplace') + '\n')
-
-def exc(cmd, shell=False):
-    global opt
-    logcmd(cmd)
-    if not opt.dry_run:
-        check_call(cmd, shell=shell)
-
-def exo(cmd, shell=False, preexec_fn=None, close_fds=True):
-    global opt
-    logcmd(cmd)
-    if not opt.dry_run:
-        return helpers.exo(cmd, shell=shell, preexec_fn=preexec_fn,
-                           close_fds=close_fds)[0]
-
-def redirect_dup_output():
-    os.dup2(1, 3)
-    os.dup2(1, 2)
-
-
-handle_ctrl_c()
-
-log('\nbup: import-duplicity is EXPERIMENTAL (proceed with caution)\n\n')
-
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if len(extra) < 1 or not extra[0]:
-    o.fatal('duplicity source URL required')
-if len(extra) < 2 or not extra[1]:
-    o.fatal('bup destination save name required')
-if len(extra) > 2:
-    o.fatal('too many arguments')
-
-source_url, save_name = extra
-source_url = argv_bytes(source_url)
-save_name = argv_bytes(save_name)
-bup = bup.path.exe()
-
-git.check_repo_or_die()
-
-tmpdir = tempfile.mkdtemp(prefix=b'bup-import-dup-')
-try:
-    dup = [b'duplicity', b'--archive-dir', tmpdir + b'/dup-cache']
-    restoredir = tmpdir + b'/restore'
-    tmpidx = tmpdir + b'/index'
-
-    collection_status = \
-        exo(dup + [b'collection-status', b'--log-fd=3', source_url],
-            close_fds=False, preexec_fn=redirect_dup_output)  # i.e. 3>&1 1>&2
-    # Duplicity output lines of interest look like this (one leading space):
-    #  full 20150222T073111Z 1 noenc
-    #  inc 20150222T073233Z 1 noenc
-    dup_timestamps = []
-    for line in collection_status.splitlines():
-        if line.startswith(b' inc '):
-            assert(len(line) >= len(b' inc 20150222T073233Z'))
-            dup_timestamps.append(line[5:21])
-        elif line.startswith(b' full '):
-            assert(len(line) >= len(b' full 20150222T073233Z'))
-            dup_timestamps.append(line[6:22])
-    for i, dup_ts in enumerate(dup_timestamps):
-        tm = strptime(dup_ts.decode('ascii'), '%Y%m%dT%H%M%SZ')
-        exc([b'rm', b'-rf', restoredir])
-        exc(dup + [b'restore', b'-t', dup_ts, source_url, restoredir])
-        exc([bup, b'index', b'-uxf', tmpidx, restoredir])
-        exc([bup, b'save', b'--strip', b'--date', b'%d' % timegm(tm),
-             b'-f', tmpidx, b'-n', save_name, restoredir])
-    sys.stderr.flush()
-finally:
-    exc([b'rm', b'-rf', tmpdir])
-
-if saved_errors:
-    log('warning: %d errors encountered\n' % len(saved_errors))
-    sys.exit(1)
diff --git a/lib/cmd/import-rdiff-backup-cmd.sh b/lib/cmd/import-rdiff-backup-cmd.sh
deleted file mode 100755 (executable)
index 0bbf327..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env bash
-
-cmd_dir="$(cd "$(dirname "$0")" && pwd)" || exit $?
-
-set -o pipefail
-
-must() {
-    local file=${BASH_SOURCE[0]}
-    local line=${BASH_LINENO[0]}
-    "$@"
-    local rc=$?
-    if test $rc -ne 0; then
-        echo "Failed at line $line in $file" 1>&2
-        exit $rc
-    fi
-}
-
-usage() {
-    echo "Usage: bup import-rdiff-backup [-n]" \
-        "<path to rdiff-backup root> <backup name>"
-    echo "-n,--dry-run: just print what would be done"
-    exit 1
-}
-
-control_c() {
-    echo "bup import-rdiff-backup: signal 2 received" 1>&2
-    exit 128
-}
-
-must trap control_c INT
-
-dry_run=
-while [ "$1" = "-n" -o "$1" = "--dry-run" ]; do
-    dry_run=echo
-    shift
-done
-
-bup()
-{
-    $dry_run "$cmd_dir/bup" "$@"
-}
-
-snapshot_root="$1"
-branch="$2"
-
-[ -n "$snapshot_root" -a "$#" = 2 ] || usage
-
-if [ ! -e "$snapshot_root/." ]; then
-    echo "'$snapshot_root' isn't a directory!"
-    exit 1
-fi
-
-
-backups=$(must rdiff-backup --list-increments --parsable-output "$snapshot_root") \
-    || exit $?
-backups_count=$(echo "$backups" | must wc -l) || exit $?
-counter=1
-echo "$backups" |
-while read timestamp type; do
-    tmpdir=$(must mktemp -d import-rdiff-backup-XXXXXXX) || exit $?
-
-    echo "Importing backup from $(date -d @$timestamp +%c) " \
-        "($counter / $backups_count)" 1>&2
-    echo 1>&2
-
-    echo "Restoring from rdiff-backup..." 1>&2
-    must rdiff-backup -r $timestamp "$snapshot_root" "$tmpdir"
-    echo 1>&2
-
-    echo "Importing into bup..." 1>&2
-    tmpidx=$(must mktemp -u import-rdiff-backup-idx-XXXXXXX) || exit $?
-    must bup index -ux -f "$tmpidx" "$tmpdir"
-    must bup save --strip --date="$timestamp" -f "$tmpidx" -n "$branch" "$tmpdir"
-    must rm -f "$tmpidx"
-
-    must rm -rf "$tmpdir"
-    counter=$((counter+1))
-    echo 1>&2
-    echo 1>&2
-done
diff --git a/lib/cmd/import-rsnapshot-cmd.sh b/lib/cmd/import-rsnapshot-cmd.sh
deleted file mode 100755 (executable)
index 91f711e..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/sh
-# Does an import of a rsnapshot archive.
-
-cmd_dir="$(cd "$(dirname "$0")" && pwd)" || exit $?
-
-usage() {
-    echo "Usage: bup import-rsnapshot [-n]" \
-        "<path to snapshot_root> [<backuptarget>]"
-    echo "-n,--dry-run: just print what would be done"
-    exit 1
-}
-
-DRY_RUN=
-while [ "$1" = "-n" -o "$1" = "--dry-run" ]; do
-    DRY_RUN=echo
-    shift
-done
-
-bup()
-{
-    $DRY_RUN "$cmd_dir/bup" "$@"
-}
-
-SNAPSHOT_ROOT=$1
-TARGET=$2
-
-[ -n "$SNAPSHOT_ROOT" -a "$#" -le 2 ] || usage
-
-if [ ! -e "$SNAPSHOT_ROOT/." ]; then
-    echo "'$SNAPSHOT_ROOT' isn't a directory!"
-    exit 1
-fi
-
-
-cd "$SNAPSHOT_ROOT" || exit 2
-
-for SNAPSHOT in *; do
-    [ -e "$SNAPSHOT/." ] || continue
-    echo "snapshot='$SNAPSHOT'" >&2
-    for BRANCH_PATH in "$SNAPSHOT/"*; do
-        BRANCH=$(basename "$BRANCH_PATH") || exit $?
-        [ -e "$BRANCH_PATH/." ] || continue
-        [ -z "$TARGET" -o "$TARGET" = "$BRANCH" ] || continue
-        
-        echo "snapshot='$SNAPSHOT' branch='$BRANCH'" >&2
-
-        # Get the snapshot's ctime
-        DATE=$(perl -e '@a=stat($ARGV[0]) or die "$ARGV[0]: $!";
-                        print $a[10];' "$BRANCH_PATH")
-       [ -n "$DATE" ] || exit 3
-
-        TMPIDX=bupindex.$BRANCH.tmp
-        bup index -ux -f "$TMPIDX" "$BRANCH_PATH/" || exit $?
-        bup save --strip --date="$DATE" \
-            -f "$TMPIDX" -n "$BRANCH" \
-            "$BRANCH_PATH/" || exit $?
-        rm "$TMPIDX" || exit $?
-    done
-done
diff --git a/lib/cmd/index-cmd.py b/lib/cmd/index-cmd.py
deleted file mode 100755 (executable)
index 8941ecb..0000000
+++ /dev/null
@@ -1,330 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-from binascii import hexlify
-import errno, os, re, stat, sys, time
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, metadata, options, git, index, drecurse, hlinkdb
-from bup.compat import argv_bytes
-from bup.drecurse import recursive_dirlist
-from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
-from bup.helpers import (add_error, handle_ctrl_c, log, parse_excludes, parse_rx_excludes,
-                         progress, qprogress, saved_errors)
-from bup.io import byte_stream, path_msg
-
-
-class IterHelper:
-    def __init__(self, l):
-        self.i = iter(l)
-        self.cur = None
-        self.next()
-
-    def __next__(self):
-        self.cur = next(self.i, None)
-        return self.cur
-
-    next = __next__
-
-def check_index(reader):
-    try:
-        log('check: checking forward iteration...\n')
-        e = None
-        d = {}
-        for e in reader.forward_iter():
-            if e.children_n:
-                if opt.verbose:
-                    log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n,
-                                            path_msg(e.name)))
-                assert(e.children_ofs)
-                assert e.name.endswith(b'/')
-                assert(not d.get(e.children_ofs))
-                d[e.children_ofs] = 1
-            if e.flags & index.IX_HASHVALID:
-                assert(e.sha != index.EMPTY_SHA)
-                assert(e.gitmode)
-        assert not e or bytes(e.name) == b'/'  # last entry is *always* /
-        log('check: checking normal iteration...\n')
-        last = None
-        for e in reader:
-            if last:
-                assert(last > e.name)
-            last = e.name
-    except:
-        log('index error! at %r\n' % e)
-        raise
-    log('check: passed.\n')
-
-
-def clear_index(indexfile):
-    indexfiles = [indexfile, indexfile + b'.meta', indexfile + b'.hlink']
-    for indexfile in indexfiles:
-        try:
-            os.remove(indexfile)
-            if opt.verbose:
-                log('clear: removed %s\n' % path_msg(indexfile))
-        except OSError as e:
-            if e.errno != errno.ENOENT:
-                raise
-
-
-def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions, out=None):
-    # tmax must be epoch nanoseconds.
-    tmax = (time.time() - 1) * 10**9
-    ri = index.Reader(indexfile)
-    msw = index.MetaStoreWriter(indexfile + b'.meta')
-    wi = index.Writer(indexfile, msw, tmax)
-    rig = IterHelper(ri.iter(name=top))
-
-    hlinks = hlinkdb.HLinkDB(indexfile + b'.hlink')
-
-    fake_hash = None
-    if opt.fake_valid:
-        def fake_hash(name):
-            return (GIT_MODE_FILE, index.FAKE_SHA)
-
-    total = 0
-    bup_dir = os.path.abspath(git.repo())
-    index_start = time.time()
-    for path, pst in recursive_dirlist([top],
-                                       xdev=opt.xdev,
-                                       bup_dir=bup_dir,
-                                       excluded_paths=excluded_paths,
-                                       exclude_rxs=exclude_rxs,
-                                       xdev_exceptions=xdev_exceptions):
-        if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
-            out.write(b'%s\n' % path)
-            out.flush()
-            elapsed = time.time() - index_start
-            paths_per_sec = total / elapsed if elapsed else 0
-            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
-        elif not (total % 128):
-            elapsed = time.time() - index_start
-            paths_per_sec = total / elapsed if elapsed else 0
-            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
-        total += 1
-
-        while rig.cur and rig.cur.name > path:  # deleted paths
-            if rig.cur.exists():
-                rig.cur.set_deleted()
-                rig.cur.repack()
-                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
-                    hlinks.del_path(rig.cur.name)
-            rig.next()
-
-        if rig.cur and rig.cur.name == path:    # paths that already existed
-            need_repack = False
-            if(rig.cur.stale(pst, check_device=opt.check_device)):
-                try:
-                    meta = metadata.from_path(path, statinfo=pst)
-                except (OSError, IOError) as e:
-                    add_error(e)
-                    rig.next()
-                    continue
-                if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
-                    hlinks.del_path(rig.cur.name)
-                if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
-                    hlinks.add_path(path, pst.st_dev, pst.st_ino)
-                # Clear these so they don't bloat the store -- they're
-                # already in the index (since they vary a lot and they're
-                # fixed length).  If you've noticed "tmax", you might
-                # wonder why it's OK to do this, since that code may
-                # adjust (mangle) the index mtime and ctime -- producing
-                # fake values which must not end up in a .bupm.  However,
-                # it looks like that shouldn't be possible:  (1) When
-                # "save" validates the index entry, it always reads the
-                # metadata from the filesytem. (2) Metadata is only
-                # read/used from the index if hashvalid is true. (3)
-                # "faked" entries will be stale(), and so we'll invalidate
-                # them below.
-                meta.ctime = meta.mtime = meta.atime = 0
-                meta_ofs = msw.store(meta)
-                rig.cur.update_from_stat(pst, meta_ofs)
-                rig.cur.invalidate()
-                need_repack = True
-            if not (rig.cur.flags & index.IX_HASHVALID):
-                if fake_hash:
-                    if rig.cur.sha == index.EMPTY_SHA:
-                        rig.cur.gitmode, rig.cur.sha = fake_hash(path)
-                    rig.cur.flags |= index.IX_HASHVALID
-                    need_repack = True
-            if opt.fake_invalid:
-                rig.cur.invalidate()
-                need_repack = True
-            if need_repack:
-                rig.cur.repack()
-            rig.next()
-        else:  # new paths
-            try:
-                meta = metadata.from_path(path, statinfo=pst)
-            except (OSError, IOError) as e:
-                add_error(e)
-                continue
-            # See same assignment to 0, above, for rationale.
-            meta.atime = meta.mtime = meta.ctime = 0
-            meta_ofs = msw.store(meta)
-            wi.add(path, pst, meta_ofs, hashgen=fake_hash)
-            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
-                hlinks.add_path(path, pst.st_dev, pst.st_ino)
-
-    elapsed = time.time() - index_start
-    paths_per_sec = total / elapsed if elapsed else 0
-    progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))
-
-    hlinks.prepare_save()
-
-    if ri.exists():
-        ri.save()
-        wi.flush()
-        if wi.count:
-            wr = wi.new_reader()
-            if opt.check:
-                log('check: before merging: oldfile\n')
-                check_index(ri)
-                log('check: before merging: newfile\n')
-                check_index(wr)
-            mi = index.Writer(indexfile, msw, tmax)
-
-            for e in index.merge(ri, wr):
-                # FIXME: shouldn't we remove deleted entries eventually?  When?
-                mi.add_ixentry(e)
-
-            ri.close()
-            mi.close()
-            wr.close()
-        wi.abort()
-    else:
-        wi.close()
-
-    msw.close()
-    hlinks.commit_save()
-
-
-optspec = """
-bup index <-p|-m|-s|-u|--clear|--check> [options...] <filenames...>
---
- Modes:
-p,print    print the index entries for the given names (also works with -u)
-m,modified print only added/deleted/modified files (implies -p)
-s,status   print each filename with a status char (A/M/D) (implies -p)
-u,update   recursively update the index entries for the given file/dir names (default if no mode is specified)
-check      carefully check index file integrity
-clear      clear the default index
- Options:
-H,hash     print the hash for each object next to its name
-l,long     print more information about each file
-no-check-device don't invalidate an entry if the containing device changes
-fake-valid mark all index entries as up-to-date even if they aren't
-fake-invalid mark all index entries as invalid
-f,indexfile=  the name of the index file (normally BUP_DIR/bupindex)
-exclude= a path to exclude from the backup (may be repeated)
-exclude-from= skip --exclude paths in file (may be repeated)
-exclude-rx= skip paths matching the unanchored regex (may be repeated)
-exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
-v,verbose  increase log output (can be used more than once)
-x,xdev,one-file-system  don't cross filesystem boundaries
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if not (opt.modified or \
-        opt['print'] or \
-        opt.status or \
-        opt.update or \
-        opt.check or \
-        opt.clear):
-    opt.update = 1
-if (opt.fake_valid or opt.fake_invalid) and not opt.update:
-    o.fatal('--fake-{in,}valid are meaningless without -u')
-if opt.fake_valid and opt.fake_invalid:
-    o.fatal('--fake-valid is incompatible with --fake-invalid')
-if opt.clear and opt.indexfile:
-    o.fatal('cannot clear an external index (via -f)')
-
-# FIXME: remove this once we account for timestamp races, i.e. index;
-# touch new-file; index.  It's possible for this to happen quickly
-# enough that new-file ends up with the same timestamp as the first
-# index, and then bup will ignore it.
-tick_start = time.time()
-time.sleep(1 - (tick_start - int(tick_start)))
-
-git.check_repo_or_die()
-
-handle_ctrl_c()
-
-if opt.verbose is None:
-    opt.verbose = 0
-
-if opt.indexfile:
-    indexfile = argv_bytes(opt.indexfile)
-else:
-    indexfile = git.repo(b'bupindex')
-
-if opt.check:
-    log('check: starting initial check.\n')
-    check_index(index.Reader(indexfile))
-
-if opt.clear:
-    log('clear: clearing index.\n')
-    clear_index(indexfile)
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-if opt.update:
-    if not extra:
-        o.fatal('update mode (-u) requested but no paths given')
-    extra = [argv_bytes(x) for x in extra]
-    excluded_paths = parse_excludes(flags, o.fatal)
-    exclude_rxs = parse_rx_excludes(flags, o.fatal)
-    xexcept = index.unique_resolved_paths(extra)
-    for rp, path in index.reduce_paths(extra):
-        update_index(rp, excluded_paths, exclude_rxs, xdev_exceptions=xexcept,
-                     out=out)
-
-if opt['print'] or opt.status or opt.modified:
-    extra = [argv_bytes(x) for x in extra]
-    for name, ent in index.Reader(indexfile).filter(extra or [b'']):
-        if (opt.modified 
-            and (ent.is_valid() or ent.is_deleted() or not ent.mode)):
-            continue
-        line = b''
-        if opt.status:
-            if ent.is_deleted():
-                line += b'D '
-            elif not ent.is_valid():
-                if ent.sha == index.EMPTY_SHA:
-                    line += b'A '
-                else:
-                    line += b'M '
-            else:
-                line += b'  '
-        if opt.hash:
-            line += hexlify(ent.sha) + b' '
-        if opt.long:
-            line += b'%7s %7s ' % (oct(ent.mode).encode('ascii'),
-                                   oct(ent.gitmode).encode('ascii'))
-        out.write(line + (name or b'./') + b'\n')
-
-if opt.check and (opt['print'] or opt.status or opt.modified or opt.update):
-    log('check: starting final check.\n')
-    check_index(index.Reader(indexfile))
-
-if saved_errors:
-    log('WARNING: %d errors encountered.\n' % len(saved_errors))
-    sys.exit(1)
diff --git a/lib/cmd/init-cmd.py b/lib/cmd/init-cmd.py
deleted file mode 100755 (executable)
index b0b0399..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os.path, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, git, options, client
-from bup.helpers import log, saved_errors
-from bup.compat import argv_bytes
-
-
-optspec = """
-[BUP_DIR=...] bup init [-r host:path]
---
-r,remote=  remote repository path
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if extra:
-    o.fatal("no arguments expected")
-
-
-try:
-    git.init_repo()  # local repo
-except git.GitError as e:
-    log("bup: error: could not init repository: %s" % e)
-    sys.exit(1)
-
-if opt.remote:
-    git.check_repo_or_die()
-    cli = client.Client(argv_bytes(opt.remote), create=True)
-    cli.close()
diff --git a/lib/cmd/join-cmd.py b/lib/cmd/join-cmd.py
deleted file mode 100755 (executable)
index 0cf6ef8..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os.path, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, git, options
-from bup.compat import argv_bytes
-from bup.helpers import linereader, log
-from bup.io import byte_stream
-from bup.repo import LocalRepo, RemoteRepo
-
-
-optspec = """
-bup join [-r host:path] [refs or hashes...]
---
-r,remote=  remote repository path
-o=         output filename
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-if opt.remote:
-    opt.remote = argv_bytes(opt.remote)
-
-git.check_repo_or_die()
-
-stdin = byte_stream(sys.stdin)
-
-if not extra:
-    extra = linereader(stdin)
-
-ret = 0
-repo = RemoteRepo(opt.remote) if opt.remote else LocalRepo()
-
-if opt.o:
-    outfile = open(opt.o, 'wb')
-else:
-    sys.stdout.flush()
-    outfile = byte_stream(sys.stdout)
-
-for ref in [argv_bytes(x) for x in extra]:
-    try:
-        for blob in repo.join(ref):
-            outfile.write(blob)
-    except KeyError as e:
-        outfile.flush()
-        log('error: %s\n' % e)
-        ret = 1
-
-sys.exit(ret)
diff --git a/lib/cmd/list-idx-cmd.py b/lib/cmd/list-idx-cmd.py
deleted file mode 100755 (executable)
index e7e7600..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-from binascii import hexlify, unhexlify
-import os, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, git, options
-from bup.compat import argv_bytes
-from bup.helpers import add_error, handle_ctrl_c, log, qprogress, saved_errors
-from bup.io import byte_stream
-
-optspec = """
-bup list-idx [--find=<prefix>] <idxfilenames...>
---
-find=   display only objects that start with <prefix>
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-handle_ctrl_c()
-
-opt.find = argv_bytes(opt.find) if opt.find else b''
-
-if not extra:
-    o.fatal('you must provide at least one filename')
-
-if len(opt.find) > 40:
-    o.fatal('--find parameter must be <= 40 chars long')
-else:
-    if len(opt.find) % 2:
-        s = opt.find + b'0'
-    else:
-        s = opt.find
-    try:
-        bin = unhexlify(s)
-    except TypeError:
-        o.fatal('--find parameter is not a valid hex string')
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-find = opt.find.lower()
-count = 0
-idxfiles = [argv_bytes(x) for x in extra]
-for name in idxfiles:
-    try:
-        ix = git.open_idx(name)
-    except git.GitError as e:
-        add_error('%r: %s' % (name, e))
-        continue
-    if len(opt.find) == 40:
-        if ix.exists(bin):
-            out.write(b'%s %s\n' % (name, find))
-    else:
-        # slow, exhaustive search
-        for _i in ix:
-            i = hexlify(_i)
-            if i.startswith(find):
-                out.write(b'%s %s\n' % (name, i))
-            qprogress('Searching: %d\r' % count)
-            count += 1
-
-if saved_errors:
-    log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
-    sys.exit(1)
diff --git a/lib/cmd/ls-cmd.py b/lib/cmd/ls-cmd.py
deleted file mode 100755 (executable)
index f034c09..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os.path, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, git, ls
-from bup.io import byte_stream
-
-
-git.check_repo_or_die()
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-# Check out lib/bup/ls.py for the opt spec
-rc = ls.via_cmdline(compat.argv[1:], out=out)
-sys.exit(rc)
diff --git a/lib/cmd/margin-cmd.py b/lib/cmd/margin-cmd.py
deleted file mode 100755 (executable)
index 09411bc..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import math, os.path, struct, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, git, _helpers
-from bup.helpers import log
-from bup.io import byte_stream
-
-POPULATION_OF_EARTH=6.7e9  # as of September, 2010
-
-optspec = """
-bup margin
---
-predict    Guess object offsets and report the maximum deviation
-ignore-midx  Don't use midx files; use only plain pack idx files.
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if extra:
-    o.fatal("no arguments expected")
-
-git.check_repo_or_die()
-
-mi = git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx)
-
-def do_predict(ix, out):
-    total = len(ix)
-    maxdiff = 0
-    for count,i in enumerate(ix):
-        prefix = struct.unpack('!Q', i[:8])[0]
-        expected = prefix * total // (1 << 64)
-        diff = count - expected
-        maxdiff = max(maxdiff, abs(diff))
-    out.write(b'%d of %d (%.3f%%) '
-              % (maxdiff, len(ix), maxdiff * 100.0 / len(ix)))
-    out.flush()
-    assert(count+1 == len(ix))
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-if opt.predict:
-    if opt.ignore_midx:
-        for pack in mi.packs:
-            do_predict(pack, out)
-    else:
-        do_predict(mi, out)
-else:
-    # default mode: find longest matching prefix
-    last = b'\0'*20
-    longmatch = 0
-    for i in mi:
-        if i == last:
-            continue
-        #assert(str(i) >= last)
-        pm = _helpers.bitmatch(last, i)
-        longmatch = max(longmatch, pm)
-        last = i
-    out.write(b'%d\n' % longmatch)
-    log('%d matching prefix bits\n' % longmatch)
-    doublings = math.log(len(mi), 2)
-    bpd = longmatch / doublings
-    log('%.2f bits per doubling\n' % bpd)
-    remain = 160 - longmatch
-    rdoublings = remain / bpd
-    log('%d bits (%.2f doublings) remaining\n' % (remain, rdoublings))
-    larger = 2**rdoublings
-    log('%g times larger is possible\n' % larger)
-    perperson = larger/POPULATION_OF_EARTH
-    log('\nEveryone on earth could have %d data sets like yours, all in one\n'
-        'repository, and we would expect 1 object collision.\n'
-        % int(perperson))
diff --git a/lib/cmd/memtest-cmd.py b/lib/cmd/memtest-cmd.py
deleted file mode 100755 (executable)
index c5d8677..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-import os.path, re, resource, struct, sys, time
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, git, bloom, midx, options, _helpers
-from bup.compat import range
-from bup.helpers import handle_ctrl_c
-from bup.io import byte_stream
-
-
-handle_ctrl_c()
-
-
-_linux_warned = 0
-def linux_memstat():
-    global _linux_warned
-    #fields = ['VmSize', 'VmRSS', 'VmData', 'VmStk', 'ms']
-    d = {}
-    try:
-        f = open(b'/proc/self/status', 'rb')
-    except IOError as e:
-        if not _linux_warned:
-            log('Warning: %s\n' % e)
-            _linux_warned = 1
-        return {}
-    for line in f:
-        # Note that on Solaris, this file exists but is binary.  If that
-        # happens, this split() might not return two elements.  We don't
-        # really need to care about the binary format since this output
-        # isn't used for much and report() can deal with missing entries.
-        t = re.split(br':\s*', line.strip(), 1)
-        if len(t) == 2:
-            k,v = t
-            d[k] = v
-    return d
-
-
-last = last_u = last_s = start = 0
-def report(count, out):
-    global last, last_u, last_s, start
-    headers = ['RSS', 'MajFlt', 'user', 'sys', 'ms']
-    ru = resource.getrusage(resource.RUSAGE_SELF)
-    now = time.time()
-    rss = int(ru.ru_maxrss // 1024)
-    if not rss:
-        rss = linux_memstat().get(b'VmRSS', b'??')
-    fields = [rss,
-              ru.ru_majflt,
-              int((ru.ru_utime - last_u) * 1000),
-              int((ru.ru_stime - last_s) * 1000),
-              int((now - last) * 1000)]
-    fmt = '%9s  ' + ('%10s ' * len(fields))
-    if count >= 0:
-        line = fmt % tuple([count] + fields)
-        out.write(line.encode('ascii') + b'\n')
-    else:
-        start = now
-        out.write((fmt % tuple([''] + headers)).encode('ascii') + b'\n')
-    out.flush()
-
-    # don't include time to run report() in usage counts
-    ru = resource.getrusage(resource.RUSAGE_SELF)
-    last_u = ru.ru_utime
-    last_s = ru.ru_stime
-    last = time.time()
-
-
-optspec = """
-bup memtest [-n elements] [-c cycles]
---
-n,number=  number of objects per cycle [10000]
-c,cycles=  number of cycles to run [100]
-ignore-midx  ignore .midx files, use only .idx files
-existing   test with existing objects instead of fake ones
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if extra:
-    o.fatal('no arguments expected')
-
-git.check_repo_or_die()
-m = git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx)
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-report(-1, out)
-_helpers.random_sha()
-report(0, out)
-
-if opt.existing:
-    def foreverit(mi):
-        while 1:
-            for e in mi:
-                yield e
-    objit = iter(foreverit(m))
-
-for c in range(opt.cycles):
-    for n in range(opt.number):
-        if opt.existing:
-            bin = next(objit)
-            assert(m.exists(bin))
-        else:
-            bin = _helpers.random_sha()
-
-            # technically, a randomly generated object id might exist.
-            # but the likelihood of that is the likelihood of finding
-            # a collision in sha-1 by accident, which is so unlikely that
-            # we don't care.
-            assert(not m.exists(bin))
-    report((c+1)*opt.number, out)
-
-if bloom._total_searches:
-    out.write(b'bloom: %d objects searched in %d steps: avg %.3f steps/object\n'
-              % (bloom._total_searches, bloom._total_steps,
-                 bloom._total_steps*1.0/bloom._total_searches))
-if midx._total_searches:
-    out.write(b'midx: %d objects searched in %d steps: avg %.3f steps/object\n'
-              % (midx._total_searches, midx._total_steps,
-                 midx._total_steps*1.0/midx._total_searches))
-if git._total_searches:
-    out.write(b'idx: %d objects searched in %d steps: avg %.3f steps/object\n'
-              % (git._total_searches, git._total_steps,
-                 git._total_steps*1.0/git._total_searches))
-out.write(b'Total time: %.3fs\n' % (time.time() - start))
diff --git a/lib/cmd/meta-cmd.py b/lib/cmd/meta-cmd.py
deleted file mode 100755 (executable)
index e9a3d61..0000000
+++ /dev/null
@@ -1,182 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-# Copyright (C) 2010 Rob Browning
-#
-# This code is covered under the terms of the GNU Library General
-# Public License as described in the bup LICENSE file.
-
-# TODO: Add tar-like -C option.
-
-from __future__ import absolute_import
-import os, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, metadata
-from bup import options
-from bup.compat import argv_bytes
-from bup.io import byte_stream
-from bup.helpers import handle_ctrl_c, log, saved_errors
-
-
-def open_input(name):
-    if not name or name == b'-':
-        return byte_stream(sys.stdin)
-    return open(name, 'rb')
-
-
-def open_output(name):
-    if not name or name == b'-':
-        sys.stdout.flush()
-        return byte_stream(sys.stdout)
-    return open(name, 'wb')
-
-
-optspec = """
-bup meta --create [OPTION ...] <PATH ...>
-bup meta --list [OPTION ...]
-bup meta --extract [OPTION ...]
-bup meta --start-extract [OPTION ...]
-bup meta --finish-extract [OPTION ...]
-bup meta --edit [OPTION ...] <PATH ...>
---
-c,create       write metadata for PATHs to stdout (or --file)
-t,list         display metadata
-x,extract      perform --start-extract followed by --finish-extract
-start-extract  build tree matching metadata provided on standard input (or --file)
-finish-extract finish applying standard input (or --file) metadata to filesystem
-edit           alter metadata; write to stdout (or --file)
-f,file=        specify source or destination file
-R,recurse      recurse into subdirectories
-xdev,one-file-system  don't cross filesystem boundaries
-numeric-ids    apply numeric IDs (user, group, etc.) rather than names
-symlinks       handle symbolic links (default is true)
-paths          include paths in metadata (default is true)
-set-uid=       set metadata uid (via --edit)
-set-gid=       set metadata gid (via --edit)
-set-user=      set metadata user (via --edit)
-unset-user     remove metadata user (via --edit)
-set-group=     set metadata group (via --edit)
-unset-group    remove metadata group (via --edit)
-v,verbose      increase log output (can be used more than once)
-q,quiet        don't show progress meter
-"""
-
-handle_ctrl_c()
-
-o = options.Options(optspec)
-(opt, flags, remainder) = o.parse(['--paths', '--symlinks', '--recurse']
-                                  + compat.argv[1:])
-
-opt.verbose = opt.verbose or 0
-opt.quiet = opt.quiet or 0
-metadata.verbose = opt.verbose - opt.quiet
-opt.file = argv_bytes(opt.file) if opt.file else None
-
-action_count = sum([bool(x) for x in [opt.create, opt.list, opt.extract,
-                                      opt.start_extract, opt.finish_extract,
-                                      opt.edit]])
-if action_count > 1:
-    o.fatal("bup: only one action permitted: --create --list --extract --edit")
-if action_count == 0:
-    o.fatal("bup: no action specified")
-
-if opt.create:
-    if len(remainder) < 1:
-        o.fatal("no paths specified for create")
-    output_file = open_output(opt.file)
-    metadata.save_tree(output_file,
-                       [argv_bytes(r) for r in remainder],
-                       recurse=opt.recurse,
-                       write_paths=opt.paths,
-                       save_symlinks=opt.symlinks,
-                       xdev=opt.xdev)
-elif opt.list:
-    if len(remainder) > 0:
-        o.fatal("cannot specify paths for --list")
-    src = open_input(opt.file)
-    metadata.display_archive(src, open_output(b'-'))
-elif opt.start_extract:
-    if len(remainder) > 0:
-        o.fatal("cannot specify paths for --start-extract")
-    src = open_input(opt.file)
-    metadata.start_extract(src, create_symlinks=opt.symlinks)
-elif opt.finish_extract:
-    if len(remainder) > 0:
-        o.fatal("cannot specify paths for --finish-extract")
-    src = open_input(opt.file)
-    metadata.finish_extract(src, restore_numeric_ids=opt.numeric_ids)
-elif opt.extract:
-    if len(remainder) > 0:
-        o.fatal("cannot specify paths for --extract")
-    src = open_input(opt.file)
-    metadata.extract(src,
-                     restore_numeric_ids=opt.numeric_ids,
-                     create_symlinks=opt.symlinks)
-elif opt.edit:
-    if len(remainder) < 1:
-        o.fatal("no paths specified for edit")
-    output_file = open_output(opt.file)
-
-    unset_user = False # True if --unset-user was the last relevant option.
-    unset_group = False # True if --unset-group was the last relevant option.
-    for flag in flags:
-        if flag[0] == '--set-user':
-            unset_user = False
-        elif flag[0] == '--unset-user':
-            unset_user = True
-        elif flag[0] == '--set-group':
-            unset_group = False
-        elif flag[0] == '--unset-group':
-            unset_group = True
-
-    for path in remainder:
-        f = open(argv_bytes(path), 'rb')
-        try:
-            for m in metadata._ArchiveIterator(f):
-                if opt.set_uid is not None:
-                    try:
-                        m.uid = int(opt.set_uid)
-                    except ValueError:
-                        o.fatal("uid must be an integer")
-
-                if opt.set_gid is not None:
-                    try:
-                        m.gid = int(opt.set_gid)
-                    except ValueError:
-                        o.fatal("gid must be an integer")
-
-                if unset_user:
-                    m.user = b''
-                elif opt.set_user is not None:
-                    m.user = argv_bytes(opt.set_user)
-
-                if unset_group:
-                    m.group = b''
-                elif opt.set_group is not None:
-                    m.group = argv_bytes(opt.set_group)
-
-                m.write(output_file)
-        finally:
-            f.close()
-
-
-if saved_errors:
-    log('WARNING: %d errors encountered.\n' % len(saved_errors))
-    sys.exit(1)
-else:
-    sys.exit(0)
diff --git a/lib/cmd/midx-cmd.py b/lib/cmd/midx-cmd.py
deleted file mode 100755 (executable)
index 35e2bc2..0000000
+++ /dev/null
@@ -1,306 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-from binascii import hexlify
-import glob, math, os, resource, struct, sys, tempfile
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, git, midx, _helpers, xstat
-from bup.compat import argv_bytes, hexstr, range
-from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
-                         handle_ctrl_c, log, mmap_readwrite, qprogress,
-                         saved_errors, unlink)
-from bup.io import byte_stream, path_msg
-
-
-PAGE_SIZE=4096
-SHA_PER_PAGE=PAGE_SIZE/20.
-
-optspec = """
-bup midx [options...] <idxnames...>
---
-o,output=  output midx filename (default: auto-generated)
-a,auto     automatically use all existing .midx/.idx files as input
-f,force    merge produce exactly one .midx containing all objects
-p,print    print names of generated midx files
-check      validate contents of the given midx files (with -a, all midx files)
-max-files= maximum number of idx files to open at once [-1]
-d,dir=     directory containing idx/midx files
-"""
-
-merge_into = _helpers.merge_into
-
-
-def _group(l, count):
-    for i in range(0, len(l), count):
-        yield l[i:i+count]
-
-
-def max_files():
-    mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
-    if mf > 32:
-        mf -= 20  # just a safety margin
-    else:
-        mf -= 6   # minimum safety margin
-    return mf
-
-
-def check_midx(name):
-    nicename = git.repo_rel(name)
-    log('Checking %s.\n' % path_msg(nicename))
-    try:
-        ix = git.open_idx(name)
-    except git.GitError as e:
-        add_error('%s: %s' % (pathmsg(name), e))
-        return
-    for count,subname in enumerate(ix.idxnames):
-        sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
-        for ecount,e in enumerate(sub):
-            if not (ecount % 1234):
-                qprogress('  %d/%d: %s %d/%d\r' 
-                          % (count, len(ix.idxnames),
-                             git.shorten_hash(subname).decode('ascii'),
-                             ecount, len(sub)))
-            if not sub.exists(e):
-                add_error("%s: %s: %s missing from idx"
-                          % (path_msg(nicename),
-                             git.shorten_hash(subname).decode('ascii'),
-                             hexstr(e)))
-            if not ix.exists(e):
-                add_error("%s: %s: %s missing from midx"
-                          % (path_msg(nicename),
-                             git.shorten_hash(subname).decode('ascii'),
-                             hexstr(e)))
-    prev = None
-    for ecount,e in enumerate(ix):
-        if not (ecount % 1234):
-            qprogress('  Ordering: %d/%d\r' % (ecount, len(ix)))
-        if e and prev and not e >= prev:
-            add_error('%s: ordering error: %s < %s'
-                      % (nicename, hexstr(e), hexstr(prev)))
-        prev = e
-
-
-_first = None
-def _do_midx(outdir, outfilename, infilenames, prefixstr):
-    global _first
-    if not outfilename:
-        assert(outdir)
-        sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
-        outfilename = b'%s/midx-%s.midx' % (outdir, sum)
-    
-    inp = []
-    total = 0
-    allfilenames = []
-    midxs = []
-    try:
-        for name in infilenames:
-            ix = git.open_idx(name)
-            midxs.append(ix)
-            inp.append((
-                ix.map,
-                len(ix),
-                ix.sha_ofs,
-                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
-                len(allfilenames),
-            ))
-            for n in ix.idxnames:
-                allfilenames.append(os.path.basename(n))
-            total += len(ix)
-        inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20])
-
-        if not _first: _first = outdir
-        dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
-        debug1('midx: %s%screating from %d files (%d objects).\n'
-               % (dirprefix, prefixstr, len(infilenames), total))
-        if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
-           or ((opt.auto or opt.force) and len(infilenames) < 2) \
-           or (opt.force and not total):
-            debug1('midx: nothing to do.\n')
-            return
-
-        pages = int(total/SHA_PER_PAGE) or 1
-        bits = int(math.ceil(math.log(pages, 2)))
-        entries = 2**bits
-        debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
-
-        unlink(outfilename)
-        with atomically_replaced_file(outfilename, 'wb') as f:
-            f.write(b'MIDX')
-            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
-            assert(f.tell() == 12)
-
-            f.truncate(12 + 4*entries + 20*total + 4*total)
-            f.flush()
-            fdatasync(f.fileno())
-
-            fmap = mmap_readwrite(f, close=False)
-            count = merge_into(fmap, bits, total, inp)
-            del fmap # Assume this calls msync() now.
-            f.seek(0, os.SEEK_END)
-            f.write(b'\0'.join(allfilenames))
-    finally:
-        for ix in midxs:
-            if isinstance(ix, midx.PackMidx):
-                ix.close()
-        midxs = None
-        inp = None
-
-
-    # This is just for testing (if you enable this, don't clear inp above)
-    if 0:
-        p = midx.PackMidx(outfilename)
-        assert(len(p.idxnames) == len(infilenames))
-        log(repr(p.idxnames) + '\n')
-        assert(len(p) == total)
-        for pe, e in p, git.idxmerge(inp, final_progress=False):
-            pin = next(pi)
-            assert(i == pin)
-            assert(p.exists(i))
-
-    return total, outfilename
-
-
-def do_midx(outdir, outfilename, infilenames, prefixstr, prout):
-    rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
-    if rv and opt['print']:
-        prout.write(rv[1] + b'\n')
-
-
-def do_midx_dir(path, outfilename, prout):
-    already = {}
-    sizes = {}
-    if opt.force and not opt.auto:
-        midxs = []   # don't use existing midx files
-    else:
-        midxs = glob.glob(b'%s/*.midx' % path)
-        contents = {}
-        for mname in midxs:
-            m = git.open_idx(mname)
-            contents[mname] = [(b'%s/%s' % (path,i)) for i in m.idxnames]
-            sizes[mname] = len(m)
-                    
-        # sort the biggest+newest midxes first, so that we can eliminate
-        # smaller (or older) redundant ones that come later in the list
-        midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
-        
-        for mname in midxs:
-            any = 0
-            for iname in contents[mname]:
-                if not already.get(iname):
-                    already[iname] = 1
-                    any = 1
-            if not any:
-                debug1('%r is redundant\n' % mname)
-                unlink(mname)
-                already[mname] = 1
-
-    midxs = [k for k in midxs if not already.get(k)]
-    idxs = [k for k in glob.glob(b'%s/*.idx' % path) if not already.get(k)]
-
-    for iname in idxs:
-        i = git.open_idx(iname)
-        sizes[iname] = len(i)
-
-    all = [(sizes[n],n) for n in (midxs + idxs)]
-    
-    # FIXME: what are the optimal values?  Does this make sense?
-    DESIRED_HWM = opt.force and 1 or 5
-    DESIRED_LWM = opt.force and 1 or 2
-    existed = dict((name,1) for sz,name in all)
-    debug1('midx: %d indexes; want no more than %d.\n' 
-           % (len(all), DESIRED_HWM))
-    if len(all) <= DESIRED_HWM:
-        debug1('midx: nothing to do.\n')
-    while len(all) > DESIRED_HWM:
-        all.sort()
-        part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
-        part2 = all[len(all)-DESIRED_LWM+1:]
-        all = list(do_midx_group(path, outfilename, part1)) + part2
-        if len(all) > DESIRED_HWM:
-            debug1('\nStill too many indexes (%d > %d).  Merging again.\n'
-                   % (len(all), DESIRED_HWM))
-
-    if opt['print']:
-        for sz,name in all:
-            if not existed.get(name):
-                prout.write(name + b'\n')
-
-
-def do_midx_group(outdir, outfilename, infiles):
-    groups = list(_group(infiles, opt.max_files))
-    gprefix = ''
-    for n,sublist in enumerate(groups):
-        if len(groups) != 1:
-            gprefix = 'Group %d: ' % (n+1)
-        rv = _do_midx(outdir, outfilename, sublist, gprefix)
-        if rv:
-            yield rv
-
-
-handle_ctrl_c()
-
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-opt.dir = argv_bytes(opt.dir) if opt.dir else None
-opt.output = argv_bytes(opt.output) if opt.output else None
-
-if extra and (opt.auto or opt.force):
-    o.fatal("you can't use -f/-a and also provide filenames")
-if opt.check and (not extra and not opt.auto):
-    o.fatal("if using --check, you must provide filenames or -a")
-
-git.check_repo_or_die()
-
-if opt.max_files < 0:
-    opt.max_files = max_files()
-assert(opt.max_files >= 5)
-
-extra = [argv_bytes(x) for x in extra]
-
-if opt.check:
-    # check existing midx files
-    if extra:
-        midxes = extra
-    else:
-        midxes = []
-        paths = opt.dir and [opt.dir] or git.all_packdirs()
-        for path in paths:
-            debug1('midx: scanning %s\n' % path)
-            midxes += glob.glob(os.path.join(path, b'*.midx'))
-    for name in midxes:
-        check_midx(name)
-    if not saved_errors:
-        log('All tests passed.\n')
-else:
-    if extra:
-        sys.stdout.flush()
-        do_midx(git.repo(b'objects/pack'), opt.output, extra, b'',
-                byte_stream(sys.stdout))
-    elif opt.auto or opt.force:
-        sys.stdout.flush()
-        paths = opt.dir and [opt.dir] or git.all_packdirs()
-        for path in paths:
-            debug1('midx: scanning %s\n' % path_msg(path))
-            do_midx_dir(path, opt.output, byte_stream(sys.stdout))
-    else:
-        o.fatal("you must use -f or -a or provide input filenames")
-
-if saved_errors:
-    log('WARNING: %d errors encountered.\n' % len(saved_errors))
-    sys.exit(1)
diff --git a/lib/cmd/mux-cmd.py b/lib/cmd/mux-cmd.py
deleted file mode 100755 (executable)
index a3b4d57..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os, struct, subprocess, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options
-from bup.helpers import debug1, debug2, mux
-from bup.io import byte_stream
-
-# Give the subcommand exclusive access to stdin.
-orig_stdin = os.dup(0)
-devnull = os.open(os.devnull, os.O_RDONLY)
-os.dup2(devnull, 0)
-os.close(devnull)
-
-optspec = """
-bup mux command [arguments...]
---
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-if len(extra) < 1:
-    o.fatal('command is required')
-
-subcmd = extra
-
-debug2('bup mux: starting %r\n' % (extra,))
-
-outr, outw = os.pipe()
-errr, errw = os.pipe()
-def close_fds():
-    os.close(outr)
-    os.close(errr)
-
-p = subprocess.Popen(subcmd, stdin=orig_stdin, stdout=outw, stderr=errw,
-                     close_fds=False, preexec_fn=close_fds)
-os.close(outw)
-os.close(errw)
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-out.write(b'BUPMUX')
-out.flush()
-mux(p, out.fileno(), outr, errr)
-os.close(outr)
-os.close(errr)
-prv = p.wait()
-
-if prv:
-    debug1('%s exited with code %d\n' % (extra[0], prv))
-
-debug1('bup mux: done\n')
-
-sys.exit(prv)
diff --git a/lib/cmd/on--server-cmd.py b/lib/cmd/on--server-cmd.py
deleted file mode 100755 (executable)
index 0c151b0..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os, struct, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, helpers, path
-from bup.compat import environ, py_maj
-from bup.io import byte_stream
-
-optspec = """
-bup on--server
---
-    This command is run automatically by 'bup on'
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-if extra:
-    o.fatal('no arguments expected')
-
-# get the subcommand's argv.
-# Normally we could just pass this on the command line, but since we'll often
-# be getting called on the other end of an ssh pipe, which tends to mangle
-# argv (by sending it via the shell), this way is much safer.
-
-stdin = byte_stream(sys.stdin)
-buf = stdin.read(4)
-sz = struct.unpack('!I', buf)[0]
-assert(sz > 0)
-assert(sz < 1000000)
-buf = stdin.read(sz)
-assert(len(buf) == sz)
-argv = buf.split(b'\0')
-argv[0] = path.exe()
-argv = [argv[0], b'mux', b'--'] + argv
-
-
-# stdin/stdout are supposedly connected to 'bup server' that the caller
-# started for us (often on the other end of an ssh tunnel), so we don't want
-# to misuse them.  Move them out of the way, then replace stdout with
-# a pointer to stderr in case our subcommand wants to do something with it.
-#
-# It might be nice to do the same with stdin, but my experiments showed that
-# ssh seems to make its child's stderr a readable-but-never-reads-anything
-# socket.  They really should have used shutdown(SHUT_WR) on the other end
-# of it, but probably didn't.  Anyway, it's too messy, so let's just make sure
-# anyone reading from stdin is disappointed.
-#
-# (You can't just leave stdin/stdout "not open" by closing the file
-# descriptors.  Then the next file that opens is automatically assigned 0 or 1,
-# and people *trying* to read/write stdin/stdout get screwed.)
-os.dup2(0, 3)
-os.dup2(1, 4)
-os.dup2(2, 1)
-fd = os.open(os.devnull, os.O_RDONLY)
-os.dup2(fd, 0)
-os.close(fd)
-
-environ[b'BUP_SERVER_REVERSE'] = helpers.hostname()
-os.execvp(argv[0], argv)
-sys.exit(99)
diff --git a/lib/cmd/on-cmd.py b/lib/cmd/on-cmd.py
deleted file mode 100755 (executable)
index 9eaabef..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-from subprocess import PIPE
-import getopt, os, signal, struct, subprocess, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, ssh, path
-from bup.compat import argv_bytes
-from bup.helpers import DemuxConn, log
-from bup.io import byte_stream
-
-
-optspec = """
-bup on <hostname> index ...
-bup on <hostname> save ...
-bup on <hostname> split ...
-bup on <hostname> get ...
-"""
-o = options.Options(optspec, optfunc=getopt.getopt)
-opt, flags, extra = o.parse(compat.argv[1:])
-if len(extra) < 2:
-    o.fatal('arguments expected')
-
-class SigException(Exception):
-    def __init__(self, signum):
-        self.signum = signum
-        Exception.__init__(self, 'signal %d received' % signum)
-def handler(signum, frame):
-    raise SigException(signum)
-
-signal.signal(signal.SIGTERM, handler)
-signal.signal(signal.SIGINT, handler)
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-try:
-    sp = None
-    p = None
-    ret = 99
-
-    hp = argv_bytes(extra[0]).split(b':')
-    if len(hp) == 1:
-        (hostname, port) = (hp[0], None)
-    else:
-        (hostname, port) = hp
-    argv = [argv_bytes(x) for x in extra[1:]]
-    p = ssh.connect(hostname, port, b'on--server', stderr=PIPE)
-
-    try:
-        argvs = b'\0'.join([b'bup'] + argv)
-        p.stdin.write(struct.pack('!I', len(argvs)) + argvs)
-        p.stdin.flush()
-        sp = subprocess.Popen([path.exe(), b'server'],
-                              stdin=p.stdout, stdout=p.stdin)
-        p.stdin.close()
-        p.stdout.close()
-        # Demultiplex remote client's stderr (back to stdout/stderr).
-        dmc = DemuxConn(p.stderr.fileno(), open(os.devnull, "wb"))
-        for line in iter(dmc.readline, b''):
-            out.write(line)
-    finally:
-        while 1:
-            # if we get a signal while waiting, we have to keep waiting, just
-            # in case our child doesn't die.
-            try:
-                ret = p.wait()
-                if sp:
-                    sp.wait()
-                break
-            except SigException as e:
-                log('\nbup on: %s\n' % e)
-                os.kill(p.pid, e.signum)
-                ret = 84
-except SigException as e:
-    if ret == 0:
-        ret = 99
-    log('\nbup on: %s\n' % e)
-
-sys.exit(ret)
diff --git a/lib/cmd/prune-older-cmd.py b/lib/cmd/prune-older-cmd.py
deleted file mode 100755 (executable)
index aa09d41..0000000
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-from binascii import hexlify, unhexlify
-from collections import defaultdict
-from itertools import groupby
-from sys import stderr
-from time import localtime, strftime, time
-import os.path, re, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, git, options
-from bup.compat import argv_bytes, int_types
-from bup.gc import bup_gc
-from bup.helpers import die_if_errors, log, partition, period_as_secs
-from bup.io import byte_stream
-from bup.repo import LocalRepo
-from bup.rm import bup_rm
-
-
-def branches(refnames=tuple()):
-    return ((name[11:], hexlify(sha)) for (name,sha)
-            in git.list_refs(patterns=(b'refs/heads/' + n for n in refnames),
-                             limit_to_heads=True))
-
-def save_name(branch, utc):
-    return branch + b'/' \
-            + strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
-
-def classify_saves(saves, period_start):
-    """For each (utc, id) in saves, yield (True, (utc, id)) if the save
-    should be kept and (False, (utc, id)) if the save should be removed.
-    The ids are binary hashes.
-    """
-
-    def retain_newest_in_region(region):
-        for save in region[0:1]:
-            yield True, save
-        for save in region[1:]:
-            yield False, save
-
-    matches, rest = partition(lambda s: s[0] >= period_start['all'], saves)
-    for save in matches:
-        yield True, save
-
-    tm_ranges = ((period_start['dailies'], lambda s: localtime(s[0]).tm_yday),
-                 (period_start['monthlies'], lambda s: localtime(s[0]).tm_mon),
-                 (period_start['yearlies'], lambda s: localtime(s[0]).tm_year))
-
-    # Break the decreasing utc sorted saves up into the respective
-    # period ranges (dailies, monthlies, ...).  Within each range,
-    # group the saves by the period scale (days, months, ...), and
-    # then yield a "keep" action (True, utc) for the newest save in
-    # each group, and a "drop" action (False, utc) for the rest.
-    for pstart, time_region_id in tm_ranges:
-        matches, rest = partition(lambda s: s[0] >= pstart, rest)
-        for region_id, region_saves in groupby(matches, time_region_id):
-            for action in retain_newest_in_region(list(region_saves)):
-                yield action
-
-    # Finally, drop any saves older than the specified periods
-    for save in rest:
-        yield False, save
-
-
-optspec = """
-bup prune-older [options...] [BRANCH...]
---
-keep-all-for=       retain all saves within the PERIOD
-keep-dailies-for=   retain the newest save per day within the PERIOD
-keep-monthlies-for= retain the newest save per month within the PERIOD
-keep-yearlies-for=  retain the newest save per year within the PERIOD
-wrt=                end all periods at this number of seconds since the epoch
-pretend       don't prune, just report intended actions to standard output
-gc            collect garbage after removals [1]
-gc-threshold= only rewrite a packfile if it's over this percent garbage [10]
-#,compress=   set compression level to # (0-9, 9 is highest) [1]
-v,verbose     increase log output (can be used more than once)
-unsafe        use the command even though it may be DANGEROUS
-"""
-
-o = options.Options(optspec)
-opt, flags, roots = o.parse(compat.argv[1:])
-roots = [argv_bytes(x) for x in roots]
-
-if not opt.unsafe:
-    o.fatal('refusing to run dangerous, experimental command without --unsafe')
-
-now = int(time()) if opt.wrt is None else opt.wrt
-if not isinstance(now, int_types):
-    o.fatal('--wrt value ' + str(now) + ' is not an integer')
-
-period_start = {}
-for period, extent in (('all', opt.keep_all_for),
-                       ('dailies', opt.keep_dailies_for),
-                       ('monthlies', opt.keep_monthlies_for),
-                       ('yearlies', opt.keep_yearlies_for)):
-    if extent:
-        secs = period_as_secs(extent.encode('ascii'))
-        if not secs:
-            o.fatal('%r is not a valid period' % extent)
-        period_start[period] = now - secs
-
-if not period_start:
-    o.fatal('at least one keep argument is required')
-
-period_start = defaultdict(lambda: float('inf'), period_start)
-
-if opt.verbose:
-    epoch_ymd = strftime('%Y-%m-%d-%H%M%S', localtime(0))
-    for kind in ['all', 'dailies', 'monthlies', 'yearlies']:
-        period_utc = period_start[kind]
-        if period_utc != float('inf'):
-            if not (period_utc > float('-inf')):
-                log('keeping all ' + kind)
-            else:
-                try:
-                    when = strftime('%Y-%m-%d-%H%M%S', localtime(period_utc))
-                    log('keeping ' + kind + ' since ' + when + '\n')
-                except ValueError as ex:
-                    if period_utc < 0:
-                        log('keeping %s since %d seconds before %s\n'
-                            %(kind, abs(period_utc), epoch_ymd))
-                    elif period_utc > 0:
-                        log('keeping %s since %d seconds after %s\n'
-                            %(kind, period_utc, epoch_ymd))
-                    else:
-                        log('keeping %s since %s\n' % (kind, epoch_ymd))
-
-git.check_repo_or_die()
-
-# This could be more efficient, but for now just build the whole list
-# in memory and let bup_rm() do some redundant work.
-
-def parse_info(f):
-    author_secs = f.readline().strip()
-    return int(author_secs)
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-removals = []
-for branch, branch_id in branches(roots):
-    die_if_errors()
-    saves = ((utc, unhexlify(oidx)) for (oidx, utc) in
-             git.rev_list(branch_id, format=b'%at', parse=parse_info))
-    for keep_save, (utc, id) in classify_saves(saves, period_start):
-        assert(keep_save in (False, True))
-        # FIXME: base removals on hashes
-        if opt.pretend:
-            out.write((b'+ ' if keep_save else b'- ')
-                      + save_name(branch, utc) + b'\n')
-        elif not keep_save:
-            removals.append(save_name(branch, utc))
-
-if not opt.pretend:
-    die_if_errors()
-    repo = LocalRepo()
-    bup_rm(repo, removals, compression=opt.compress, verbosity=opt.verbose)
-    if opt.gc:
-        die_if_errors()
-        bup_gc(threshold=opt.gc_threshold,
-               compression=opt.compress,
-               verbosity=opt.verbose)
-
-die_if_errors()
diff --git a/lib/cmd/random-cmd.py b/lib/cmd/random-cmd.py
deleted file mode 100755 (executable)
index 62889a2..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, _helpers
-from bup.helpers import handle_ctrl_c, log, parse_num
-
-
-optspec = """
-bup random [-S seed] <numbytes>
---
-S,seed=   optional random number seed [1]
-f,force   print random data to stdout even if it's a tty
-v,verbose print byte counter to stderr
-"""
-o = options.Options(optspec)
-(opt, flags, extra) = o.parse(compat.argv[1:])
-
-if len(extra) != 1:
-    o.fatal("exactly one argument expected")
-
-total = parse_num(extra[0])
-
-handle_ctrl_c()
-
-if opt.force or (not os.isatty(1) and
-                 not int(os.environ.get('BUP_FORCE_TTY', 0)) & 1):
-    _helpers.write_random(sys.stdout.fileno(), total, opt.seed,
-                          opt.verbose and 1 or 0)
-else:
-    log('error: not writing binary data to a terminal. Use -f to force.\n')
-    sys.exit(1)
diff --git a/lib/cmd/restore-cmd.py b/lib/cmd/restore-cmd.py
deleted file mode 100755 (executable)
index 09fb9ff..0000000
+++ /dev/null
@@ -1,321 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-from stat import S_ISDIR
-import copy, errno, os, re, stat, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, git, metadata, vfs
-from bup._helpers import write_sparsely
-from bup.compat import argv_bytes, fsencode, wrap_main
-from bup.helpers import (add_error, chunkyreader, die_if_errors, handle_ctrl_c,
-                         log, mkdirp, parse_rx_excludes, progress, qprogress,
-                         saved_errors, should_rx_exclude_path, unlink)
-from bup.io import byte_stream
-from bup.repo import LocalRepo, RemoteRepo
-
-
-optspec = """
-bup restore [-r host:path] [-C outdir] </branch/revision/path/to/dir ...>
---
-r,remote=   remote repository path
-C,outdir=   change to given outdir before extracting files
-numeric-ids restore numeric IDs (user, group, etc.) rather than names
-exclude-rx= skip paths matching the unanchored regex (may be repeated)
-exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
-sparse      create sparse files
-v,verbose   increase log output (can be used more than once)
-map-user=   given OLD=NEW, restore OLD user as NEW user
-map-group=  given OLD=NEW, restore OLD group as NEW group
-map-uid=    given OLD=NEW, restore OLD uid as NEW uid
-map-gid=    given OLD=NEW, restore OLD gid as NEW gid
-q,quiet     don't show progress meter
-"""
-
-total_restored = 0
-
-# stdout should be flushed after each line, even when not connected to a tty
-stdoutfd = sys.stdout.fileno()
-sys.stdout.flush()
-sys.stdout = os.fdopen(stdoutfd, 'w', 1)
-out = byte_stream(sys.stdout)
-
-def valid_restore_path(path):
-    path = os.path.normpath(path)
-    if path.startswith(b'/'):
-        path = path[1:]
-    if b'/' in path:
-        return True
-
-def parse_owner_mappings(type, options, fatal):
-    """Traverse the options and parse all --map-TYPEs, or call Option.fatal()."""
-    opt_name = '--map-' + type
-    if type in ('uid', 'gid'):
-        value_rx = re.compile(br'^(-?[0-9]+)=(-?[0-9]+)$')
-    else:
-        value_rx = re.compile(br'^([^=]+)=([^=]*)$')
-    owner_map = {}
-    for flag in options:
-        (option, parameter) = flag
-        if option != opt_name:
-            continue
-        parameter = argv_bytes(parameter)
-        match = value_rx.match(parameter)
-        if not match:
-            raise fatal("couldn't parse %r as %s mapping" % (parameter, type))
-        old_id, new_id = match.groups()
-        if type in ('uid', 'gid'):
-            old_id = int(old_id)
-            new_id = int(new_id)
-        owner_map[old_id] = new_id
-    return owner_map
-
-def apply_metadata(meta, name, restore_numeric_ids, owner_map):
-    m = copy.deepcopy(meta)
-    m.user = owner_map['user'].get(m.user, m.user)
-    m.group = owner_map['group'].get(m.group, m.group)
-    m.uid = owner_map['uid'].get(m.uid, m.uid)
-    m.gid = owner_map['gid'].get(m.gid, m.gid)
-    m.apply_to_path(name, restore_numeric_ids = restore_numeric_ids)
-    
-def hardlink_compatible(prev_path, prev_item, new_item, top):
-    prev_candidate = top + prev_path
-    if not os.path.exists(prev_candidate):
-        return False
-    prev_meta, new_meta = prev_item.meta, new_item.meta
-    if new_item.oid != prev_item.oid \
-            or new_meta.mtime != prev_meta.mtime \
-            or new_meta.ctime != prev_meta.ctime \
-            or new_meta.mode != prev_meta.mode:
-        return False
-    # FIXME: should we be checking the path on disk, or the recorded metadata?
-    # The exists() above might seem to suggest the former.
-    if not new_meta.same_file(prev_meta):
-        return False
-    return True
-
-def hardlink_if_possible(fullname, item, top, hardlinks):
-    """Find a suitable hardlink target, link to it, and return true,
-    otherwise return false."""
-    # The cwd will be dirname(fullname), and fullname will be
-    # absolute, i.e. /foo/bar, and the caller is expected to handle
-    # restoring the metadata if hardlinking isn't possible.
-
-    # FIXME: we can probably replace the target_vfs_path with the
-    # relevant vfs item
-    
-    # hardlinks tracks a list of (restore_path, vfs_path, meta)
-    # triples for each path we've written for a given hardlink_target.
-    # This allows us to handle the case where we restore a set of
-    # hardlinks out of order (with respect to the original save
-    # call(s)) -- i.e. when we don't restore the hardlink_target path
-    # first.  This data also allows us to attempt to handle other
-    # situations like hardlink sets that change on disk during a save,
-    # or between index and save.
-
-    target = item.meta.hardlink_target
-    assert(target)
-    assert(fullname.startswith(b'/'))
-    target_versions = hardlinks.get(target)
-    if target_versions:
-        # Check every path in the set that we've written so far for a match.
-        for prev_path, prev_item in target_versions:
-            if hardlink_compatible(prev_path, prev_item, item, top):
-                try:
-                    os.link(top + prev_path, top + fullname)
-                    return True
-                except OSError as e:
-                    if e.errno != errno.EXDEV:
-                        raise
-    else:
-        target_versions = []
-        hardlinks[target] = target_versions
-    target_versions.append((fullname, item))
-    return False
-
-def write_file_content(repo, dest_path, vfs_file):
-    with vfs.fopen(repo, vfs_file) as inf:
-        with open(dest_path, 'wb') as outf:
-            for b in chunkyreader(inf):
-                outf.write(b)
-
-def write_file_content_sparsely(repo, dest_path, vfs_file):
-    with vfs.fopen(repo, vfs_file) as inf:
-        outfd = os.open(dest_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
-        try:
-            trailing_zeros = 0;
-            for b in chunkyreader(inf):
-                trailing_zeros = write_sparsely(outfd, b, 512, trailing_zeros)
-            pos = os.lseek(outfd, trailing_zeros, os.SEEK_END)
-            os.ftruncate(outfd, pos)
-        finally:
-            os.close(outfd)
-            
-def restore(repo, parent_path, name, item, top, sparse, numeric_ids, owner_map,
-            exclude_rxs, verbosity, hardlinks):
-    global total_restored
-    mode = vfs.item_mode(item)
-    treeish = S_ISDIR(mode)
-    fullname = parent_path + b'/' + name
-    # Match behavior of index --exclude-rx with respect to paths.
-    if should_rx_exclude_path(fullname + (b'/' if treeish else b''),
-                              exclude_rxs):
-        return
-
-    if not treeish:
-        # Do this now so we'll have meta.symlink_target for verbose output
-        item = vfs.augment_item_meta(repo, item, include_size=True)
-        meta = item.meta
-        assert(meta.mode == mode)
-
-    if stat.S_ISDIR(mode):
-        if verbosity >= 1:
-            out.write(b'%s/\n' % fullname)
-    elif stat.S_ISLNK(mode):
-        assert(meta.symlink_target)
-        if verbosity >= 2:
-            out.write(b'%s@ -> %s\n' % (fullname, meta.symlink_target))
-    else:
-        if verbosity >= 2:
-            out.write(fullname + b'\n')
-
-    orig_cwd = os.getcwd()
-    try:
-        if treeish:
-            # Assumes contents() returns '.' with the full metadata first
-            sub_items = vfs.contents(repo, item, want_meta=True)
-            dot, item = next(sub_items, None)
-            assert(dot == b'.')
-            item = vfs.augment_item_meta(repo, item, include_size=True)
-            meta = item.meta
-            meta.create_path(name)
-            os.chdir(name)
-            total_restored += 1
-            if verbosity >= 0:
-                qprogress('Restoring: %d\r' % total_restored)
-            for sub_name, sub_item in sub_items:
-                restore(repo, fullname, sub_name, sub_item, top, sparse,
-                        numeric_ids, owner_map, exclude_rxs, verbosity,
-                        hardlinks)
-            os.chdir(b'..')
-            apply_metadata(meta, name, numeric_ids, owner_map)
-        else:
-            created_hardlink = False
-            if meta.hardlink_target:
-                created_hardlink = hardlink_if_possible(fullname, item, top,
-                                                        hardlinks)
-            if not created_hardlink:
-                meta.create_path(name)
-                if stat.S_ISREG(meta.mode):
-                    if sparse:
-                        write_file_content_sparsely(repo, name, item)
-                    else:
-                        write_file_content(repo, name, item)
-            total_restored += 1
-            if verbosity >= 0:
-                qprogress('Restoring: %d\r' % total_restored)
-            if not created_hardlink:
-                apply_metadata(meta, name, numeric_ids, owner_map)
-    finally:
-        os.chdir(orig_cwd)
-
-def main():
-    o = options.Options(optspec)
-    opt, flags, extra = o.parse(compat.argv[1:])
-    verbosity = (opt.verbose or 0) if not opt.quiet else -1
-    if opt.remote:
-        opt.remote = argv_bytes(opt.remote)
-    if opt.outdir:
-        opt.outdir = argv_bytes(opt.outdir)
-    
-    git.check_repo_or_die()
-
-    if not extra:
-        o.fatal('must specify at least one filename to restore')
-
-    exclude_rxs = parse_rx_excludes(flags, o.fatal)
-
-    owner_map = {}
-    for map_type in ('user', 'group', 'uid', 'gid'):
-        owner_map[map_type] = parse_owner_mappings(map_type, flags, o.fatal)
-
-    if opt.outdir:
-        mkdirp(opt.outdir)
-        os.chdir(opt.outdir)
-
-    repo = RemoteRepo(opt.remote) if opt.remote else LocalRepo()
-    top = fsencode(os.getcwd())
-    hardlinks = {}
-    for path in [argv_bytes(x) for x in extra]:
-        if not valid_restore_path(path):
-            add_error("path %r doesn't include a branch and revision" % path)
-            continue
-        try:
-            resolved = vfs.resolve(repo, path, want_meta=True, follow=False)
-        except vfs.IOError as e:
-            add_error(e)
-            continue
-        if len(resolved) == 3 and resolved[2][0] == b'latest':
-            # Follow latest symlink to the actual save
-            try:
-                resolved = vfs.resolve(repo, b'latest', parent=resolved[:-1],
-                                       want_meta=True)
-            except vfs.IOError as e:
-                add_error(e)
-                continue
-            # Rename it back to 'latest'
-            resolved = tuple(elt if i != 2 else (b'latest',) + elt[1:]
-                             for i, elt in enumerate(resolved))
-        path_parent, path_name = os.path.split(path)
-        leaf_name, leaf_item = resolved[-1]
-        if not leaf_item:
-            add_error('error: cannot access %r in %r'
-                      % (b'/'.join(name for name, item in resolved),
-                         path))
-            continue
-        if not path_name or path_name == b'.':
-            # Source is /foo/what/ever/ or /foo/what/ever/. -- extract
-            # what/ever/* to the current directory, and if name == '.'
-            # (i.e. /foo/what/ever/.), then also restore what/ever's
-            # metadata to the current directory.
-            treeish = vfs.item_mode(leaf_item)
-            if not treeish:
-                add_error('%r cannot be restored as a directory' % path)
-            else:
-                items = vfs.contents(repo, leaf_item, want_meta=True)
-                dot, leaf_item = next(items, None)
-                assert dot == b'.'
-                for sub_name, sub_item in items:
-                    restore(repo, b'', sub_name, sub_item, top,
-                            opt.sparse, opt.numeric_ids, owner_map,
-                            exclude_rxs, verbosity, hardlinks)
-                if path_name == b'.':
-                    leaf_item = vfs.augment_item_meta(repo, leaf_item,
-                                                      include_size=True)
-                    apply_metadata(leaf_item.meta, b'.',
-                                   opt.numeric_ids, owner_map)
-        else:
-            restore(repo, b'', leaf_name, leaf_item, top,
-                    opt.sparse, opt.numeric_ids, owner_map,
-                    exclude_rxs, verbosity, hardlinks)
-
-    if verbosity >= 0:
-        progress('Restoring: %d, done.\n' % total_restored)
-    die_if_errors()
-
-wrap_main(main)
diff --git a/lib/cmd/rm-cmd.py b/lib/cmd/rm-cmd.py
deleted file mode 100755 (executable)
index 1af1e59..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os.path, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat
-from bup.compat import argv_bytes
-from bup.git import check_repo_or_die
-from bup.options import Options
-from bup.helpers import die_if_errors, handle_ctrl_c, log
-from bup.repo import LocalRepo
-from bup.rm import bup_rm
-
-optspec = """
-bup rm <branch|save...>
---
-#,compress=  set compression level to # (0-9, 9 is highest) [6]
-v,verbose    increase verbosity (can be specified multiple times)
-unsafe       use the command even though it may be DANGEROUS
-"""
-
-handle_ctrl_c()
-
-o = Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if not opt.unsafe:
-    o.fatal('refusing to run dangerous, experimental command without --unsafe')
-
-if len(extra) < 1:
-    o.fatal('no paths specified')
-
-check_repo_or_die()
-repo = LocalRepo()
-bup_rm(repo, [argv_bytes(x) for x in extra],
-       compression=opt.compress, verbosity=opt.verbose)
-die_if_errors()
diff --git a/lib/cmd/save-cmd.py b/lib/cmd/save-cmd.py
deleted file mode 100755 (executable)
index 37155cb..0000000
+++ /dev/null
@@ -1,518 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-from binascii import hexlify
-from errno import EACCES
-from io import BytesIO
-import math, os, stat, sys, time
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, hashsplit, git, options, index, client, metadata
-from bup import hlinkdb
-from bup.compat import argv_bytes, environ
-from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
-from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
-                         hostname, istty2, log, parse_date_or_fatal, parse_num,
-                         path_components, progress, qprogress, resolve_parent,
-                         saved_errors, stripped_path_components,
-                         valid_save_name)
-from bup.io import byte_stream, path_msg
-from bup.pwdgrp import userfullname, username
-
-
-optspec = """
-bup save [-tc] [-n name] <filenames...>
---
-r,remote=  hostname:/path/to/repo of remote repository
-t,tree     output a tree id
-c,commit   output a commit id
-n,name=    name of backup set to update (if any)
-d,date=    date for the commit (seconds since the epoch)
-v,verbose  increase log output (can be used more than once)
-q,quiet    don't show progress meter
-smaller=   only back up files smaller than n bytes
-bwlimit=   maximum bytes/sec to transmit to server
-f,indexfile=  the name of the index file (normally BUP_DIR/bupindex)
-strip      strips the path to every filename given
-strip-path= path-prefix to be stripped when saving
-graft=     a graft point *old_path*=*new_path* (can be used more than once)
-#,compress=  set compression level to # (0-9, 9 is highest) [1]
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if opt.indexfile:
-    opt.indexfile = argv_bytes(opt.indexfile)
-if opt.name:
-    opt.name = argv_bytes(opt.name)
-if opt.remote:
-    opt.remote = argv_bytes(opt.remote)
-if opt.strip_path:
-    opt.strip_path = argv_bytes(opt.strip_path)
-
-git.check_repo_or_die()
-if not (opt.tree or opt.commit or opt.name):
-    o.fatal("use one or more of -t, -c, -n")
-if not extra:
-    o.fatal("no filenames given")
-
-extra = [argv_bytes(x) for x in extra]
-
-opt.progress = (istty2 and not opt.quiet)
-opt.smaller = parse_num(opt.smaller or 0)
-if opt.bwlimit:
-    client.bwlimit = parse_num(opt.bwlimit)
-
-if opt.date:
-    date = parse_date_or_fatal(opt.date, o.fatal)
-else:
-    date = time.time()
-
-if opt.strip and opt.strip_path:
-    o.fatal("--strip is incompatible with --strip-path")
-
-graft_points = []
-if opt.graft:
-    if opt.strip:
-        o.fatal("--strip is incompatible with --graft")
-
-    if opt.strip_path:
-        o.fatal("--strip-path is incompatible with --graft")
-
-    for (option, parameter) in flags:
-        if option == "--graft":
-            parameter = argv_bytes(parameter)
-            splitted_parameter = parameter.split(b'=')
-            if len(splitted_parameter) != 2:
-                o.fatal("a graft point must be of the form old_path=new_path")
-            old_path, new_path = splitted_parameter
-            if not (old_path and new_path):
-                o.fatal("a graft point cannot be empty")
-            graft_points.append((resolve_parent(old_path),
-                                 resolve_parent(new_path)))
-
-is_reverse = environ.get(b'BUP_SERVER_REVERSE')
-if is_reverse and opt.remote:
-    o.fatal("don't use -r in reverse mode; it's automatic")
-
-name = opt.name
-if name and not valid_save_name(name):
-    o.fatal("'%s' is not a valid branch name" % path_msg(name))
-refname = name and b'refs/heads/%s' % name or None
-if opt.remote or is_reverse:
-    try:
-        cli = client.Client(opt.remote)
-    except client.ClientError as e:
-        log('error: %s' % e)
-        sys.exit(1)
-    oldref = refname and cli.read_ref(refname) or None
-    w = cli.new_packwriter(compression_level=opt.compress)
-else:
-    cli = None
-    oldref = refname and git.read_ref(refname) or None
-    w = git.PackWriter(compression_level=opt.compress)
-
-handle_ctrl_c()
-
-
-# Metadata is stored in a file named .bupm in each directory.  The
-# first metadata entry will be the metadata for the current directory.
-# The remaining entries will be for each of the other directory
-# elements, in the order they're listed in the index.
-#
-# Since the git tree elements are sorted according to
-# git.shalist_item_sort_key, the metalist items are accumulated as
-# (sort_key, metadata) tuples, and then sorted when the .bupm file is
-# created.  The sort_key should have been computed using the element's
-# mangled name and git mode (after hashsplitting), but the code isn't
-# actually doing that but rather uses the element's real name and mode.
-# This makes things a bit more difficult when reading it back, see
-# vfs.ordered_tree_entries().
-
-# Maintain a stack of information representing the current location in
-# the archive being constructed.  The current path is recorded in
-# parts, which will be something like ['', 'home', 'someuser'], and
-# the accumulated content and metadata for of the dirs in parts is
-# stored in parallel stacks in shalists and metalists.
-
-parts = [] # Current archive position (stack of dir names).
-shalists = [] # Hashes for each dir in paths.
-metalists = [] # Metadata for each dir in paths.
-
-
-def _push(part, metadata):
-    # Enter a new archive directory -- make it the current directory.
-    parts.append(part)
-    shalists.append([])
-    metalists.append([(b'', metadata)]) # This dir's metadata (no name).
-
-
-def _pop(force_tree, dir_metadata=None):
-    # Leave the current archive directory and add its tree to its parent.
-    assert(len(parts) >= 1)
-    part = parts.pop()
-    shalist = shalists.pop()
-    metalist = metalists.pop()
-    # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
-    if force_tree:
-        tree = force_tree
-    else:
-        names_seen = set()
-        clean_list = []
-        metaidx = 1 # entry at 0 is for the dir
-        for x in shalist:
-            name = x[1]
-            if name in names_seen:
-                parent_path = b'/'.join(parts) + b'/'
-                add_error('error: ignoring duplicate path %s in %s'
-                          % (path_msg(name), path_msg(parent_path)))
-                if not stat.S_ISDIR(x[0]):
-                    del metalist[metaidx]
-            else:
-                names_seen.add(name)
-                clean_list.append(x)
-                if not stat.S_ISDIR(x[0]):
-                    metaidx += 1
-
-        if dir_metadata: # Override the original metadata pushed for this dir.
-            metalist = [(b'', dir_metadata)] + metalist[1:]
-        sorted_metalist = sorted(metalist, key = lambda x : x[0])
-        metadata = b''.join([m[1].encode() for m in sorted_metalist])
-        metadata_f = BytesIO(metadata)
-        mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
-                                                   [metadata_f],
-                                                   keep_boundaries=False)
-        clean_list.append((mode, b'.bupm', id))
-
-        tree = w.new_tree(clean_list)
-    if shalists:
-        shalists[-1].append((GIT_MODE_TREE,
-                             git.mangle_name(part,
-                                             GIT_MODE_TREE, GIT_MODE_TREE),
-                             tree))
-    return tree
-
-
-lastremain = None
-def progress_report(n):
-    global count, subcount, lastremain
-    subcount += n
-    cc = count + subcount
-    pct = total and (cc*100.0/total) or 0
-    now = time.time()
-    elapsed = now - tstart
-    kps = elapsed and int(cc/1024./elapsed)
-    kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
-    kps = int(kps/kps_frac)*kps_frac
-    if cc:
-        remain = elapsed*1.0/cc * (total-cc)
-    else:
-        remain = 0.0
-    if (lastremain and (remain > lastremain)
-          and ((remain - lastremain)/lastremain < 0.05)):
-        remain = lastremain
-    else:
-        lastremain = remain
-    hours = int(remain/60/60)
-    mins = int(remain/60 - hours*60)
-    secs = int(remain - hours*60*60 - mins*60)
-    if elapsed < 30:
-        remainstr = ''
-        kpsstr = ''
-    else:
-        kpsstr = '%dk/s' % kps
-        if hours:
-            remainstr = '%dh%dm' % (hours, mins)
-        elif mins:
-            remainstr = '%dm%d' % (mins, secs)
-        else:
-            remainstr = '%ds' % secs
-    qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
-              % (pct, cc/1024, total/1024, fcount, ftotal,
-                 remainstr, kpsstr))
-
-
-indexfile = opt.indexfile or git.repo(b'bupindex')
-r = index.Reader(indexfile)
-try:
-    msr = index.MetaStoreReader(indexfile + b'.meta')
-except IOError as ex:
-    if ex.errno != EACCES:
-        raise
-    log('error: cannot access %r; have you run bup index?'
-        % path_msg(indexfile))
-    sys.exit(1)
-hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')
-
-def already_saved(ent):
-    return ent.is_valid() and w.exists(ent.sha) and ent.sha
-
-def wantrecurse_pre(ent):
-    return not already_saved(ent)
-
-def wantrecurse_during(ent):
-    return not already_saved(ent) or ent.sha_missing()
-
-def find_hardlink_target(hlink_db, ent):
-    if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
-        link_paths = hlink_db.node_paths(ent.dev, ent.ino)
-        if link_paths:
-            return link_paths[0]
-
-total = ftotal = 0
-if opt.progress:
-    for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
-        if not (ftotal % 10024):
-            qprogress('Reading index: %d\r' % ftotal)
-        exists = ent.exists()
-        hashvalid = already_saved(ent)
-        ent.set_sha_missing(not hashvalid)
-        if not opt.smaller or ent.size < opt.smaller:
-            if exists and not hashvalid:
-                total += ent.size
-        ftotal += 1
-    progress('Reading index: %d, done.\n' % ftotal)
-    hashsplit.progress_callback = progress_report
-
-# Root collisions occur when strip or graft options map more than one
-# path to the same directory (paths which originally had separate
-# parents).  When that situation is detected, use empty metadata for
-# the parent.  Otherwise, use the metadata for the common parent.
-# Collision example: "bup save ... --strip /foo /foo/bar /bar".
-
-# FIXME: Add collision tests, or handle collisions some other way.
-
-# FIXME: Detect/handle strip/graft name collisions (other than root),
-# i.e. if '/foo/bar' and '/bar' both map to '/'.
-
-first_root = None
-root_collision = None
-tstart = time.time()
-count = subcount = fcount = 0
-lastskip_name = None
-lastdir = b''
-for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
-    (dir, file) = os.path.split(ent.name)
-    exists = (ent.flags & index.IX_EXISTS)
-    hashvalid = already_saved(ent)
-    wasmissing = ent.sha_missing()
-    oldsize = ent.size
-    if opt.verbose:
-        if not exists:
-            status = 'D'
-        elif not hashvalid:
-            if ent.sha == index.EMPTY_SHA:
-                status = 'A'
-            else:
-                status = 'M'
-        else:
-            status = ' '
-        if opt.verbose >= 2:
-            log('%s %-70s\n' % (status, path_msg(ent.name)))
-        elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
-            if not lastdir.startswith(dir):
-                log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b''))))
-            lastdir = dir
-
-    if opt.progress:
-        progress_report(0)
-    fcount += 1
-    
-    if not exists:
-        continue
-    if opt.smaller and ent.size >= opt.smaller:
-        if exists and not hashvalid:
-            if opt.verbose:
-                log('skipping large file "%s"\n' % path_msg(ent.name))
-            lastskip_name = ent.name
-        continue
-
-    assert(dir.startswith(b'/'))
-    if opt.strip:
-        dirp = stripped_path_components(dir, extra)
-    elif opt.strip_path:
-        dirp = stripped_path_components(dir, [opt.strip_path])
-    elif graft_points:
-        dirp = grafted_path_components(graft_points, dir)
-    else:
-        dirp = path_components(dir)
-
-    # At this point, dirp contains a representation of the archive
-    # path that looks like [(archive_dir_name, real_fs_path), ...].
-    # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
-    # might look like this at some point:
-    #   [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
-
-    # This dual representation supports stripping/grafting, where the
-    # archive path may not have a direct correspondence with the
-    # filesystem.  The root directory is represented by an initial
-    # component named '', and any component that doesn't have a
-    # corresponding filesystem directory (due to grafting, for
-    # example) will have a real_fs_path of None, i.e. [('', None),
-    # ...].
-
-    if first_root == None:
-        first_root = dirp[0]
-    elif first_root != dirp[0]:
-        root_collision = True
-
-    # If switching to a new sub-tree, finish the current sub-tree.
-    while parts > [x[0] for x in dirp]:
-        _pop(force_tree = None)
-
-    # If switching to a new sub-tree, start a new sub-tree.
-    for path_component in dirp[len(parts):]:
-        dir_name, fs_path = path_component
-        # Not indexed, so just grab the FS metadata or use empty metadata.
-        try:
-            meta = metadata.from_path(fs_path, normalized=True) \
-                if fs_path else metadata.Metadata()
-        except (OSError, IOError) as e:
-            add_error(e)
-            lastskip_name = dir_name
-            meta = metadata.Metadata()
-        _push(dir_name, meta)
-
-    if not file:
-        if len(parts) == 1:
-            continue # We're at the top level -- keep the current root dir
-        # Since there's no filename, this is a subdir -- finish it.
-        oldtree = already_saved(ent) # may be None
-        newtree = _pop(force_tree = oldtree)
-        if not oldtree:
-            if lastskip_name and lastskip_name.startswith(ent.name):
-                ent.invalidate()
-            else:
-                ent.validate(GIT_MODE_TREE, newtree)
-            ent.repack()
-        if exists and wasmissing:
-            count += oldsize
-        continue
-
-    # it's not a directory
-    if hashvalid:
-        id = ent.sha
-        git_name = git.mangle_name(file, ent.mode, ent.gitmode)
-        git_info = (ent.gitmode, git_name, id)
-        shalists[-1].append(git_info)
-        sort_key = git.shalist_item_sort_key((ent.mode, file, id))
-        meta = msr.metadata_at(ent.meta_ofs)
-        meta.hardlink_target = find_hardlink_target(hlink_db, ent)
-        # Restore the times that were cleared to 0 in the metastore.
-        (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
-        metalists[-1].append((sort_key, meta))
-    else:
-        id = None
-        if stat.S_ISREG(ent.mode):
-            try:
-                with hashsplit.open_noatime(ent.name) as f:
-                    (mode, id) = hashsplit.split_to_blob_or_tree(
-                                            w.new_blob, w.new_tree, [f],
-                                            keep_boundaries=False)
-            except (IOError, OSError) as e:
-                add_error('%s: %s' % (ent.name, e))
-                lastskip_name = ent.name
-        elif stat.S_ISDIR(ent.mode):
-            assert(0)  # handled above
-        elif stat.S_ISLNK(ent.mode):
-            try:
-                rl = os.readlink(ent.name)
-            except (OSError, IOError) as e:
-                add_error(e)
-                lastskip_name = ent.name
-            else:
-                (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
-        else:
-            # Everything else should be fully described by its
-            # metadata, so just record an empty blob, so the paths
-            # in the tree and .bupm will match up.
-            (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))
-
-        if id:
-            ent.validate(mode, id)
-            ent.repack()
-            git_name = git.mangle_name(file, ent.mode, ent.gitmode)
-            git_info = (mode, git_name, id)
-            shalists[-1].append(git_info)
-            sort_key = git.shalist_item_sort_key((ent.mode, file, id))
-            hlink = find_hardlink_target(hlink_db, ent)
-            try:
-                meta = metadata.from_path(ent.name, hardlink_target=hlink,
-                                          normalized=True)
-            except (OSError, IOError) as e:
-                add_error(e)
-                lastskip_name = ent.name
-                meta = metadata.Metadata()
-            metalists[-1].append((sort_key, meta))
-
-    if exists and wasmissing:
-        count += oldsize
-        subcount = 0
-
-
-if opt.progress:
-    pct = total and count*100.0/total or 100
-    progress('Saving: %.2f%% (%d/%dk, %d/%d files), done.    \n'
-             % (pct, count/1024, total/1024, fcount, ftotal))
-
-while len(parts) > 1: # _pop() all the parts above the root
-    _pop(force_tree = None)
-assert(len(shalists) == 1)
-assert(len(metalists) == 1)
-
-# Finish the root directory.
-tree = _pop(force_tree = None,
-            # When there's a collision, use empty metadata for the root.
-            dir_metadata = metadata.Metadata() if root_collision else None)
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-if opt.tree:
-    out.write(hexlify(tree))
-    out.write(b'\n')
-if opt.commit or name:
-    if compat.py_maj > 2:
-        # Strip b prefix from python 3 bytes reprs to preserve previous format
-         msgcmd = b'[%s]' % b', '.join([repr(argv_bytes(x))[1:].encode('ascii')
-                                       for x in compat.argv])
-    else:
-        msgcmd = repr(compat.argv)
-    msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd
-    userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
-    commit = w.new_commit(tree, oldref, userline, date, None,
-                          userline, date, None, msg)
-    if opt.commit:
-        out.write(hexlify(commit))
-        out.write(b'\n')
-
-msr.close()
-w.close()  # must close before we can update the ref
-        
-if opt.name:
-    if cli:
-        cli.update_ref(refname, commit, oldref)
-    else:
-        git.update_ref(refname, commit, oldref)
-
-if cli:
-    cli.close()
-
-if saved_errors:
-    log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
-    sys.exit(1)
diff --git a/lib/cmd/server-cmd.py b/lib/cmd/server-cmd.py
deleted file mode 100755 (executable)
index ada92d2..0000000
+++ /dev/null
@@ -1,326 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-from binascii import hexlify, unhexlify
-import os, struct, subprocess, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, git, vfs, vint
-from bup.compat import environ, hexstr
-from bup.git import MissingObject
-from bup.helpers import (Conn, debug1, debug2, linereader, lines_until_sentinel,
-                         log)
-from bup.io import byte_stream, path_msg
-from bup.repo import LocalRepo
-
-
-suspended_w = None
-dumb_server_mode = False
-repo = None
-
-def do_help(conn, junk):
-    conn.write(b'Commands:\n    %s\n' % b'\n    '.join(sorted(commands)))
-    conn.ok()
-
-
-def _set_mode():
-    global dumb_server_mode
-    dumb_server_mode = os.path.exists(git.repo(b'bup-dumb-server'))
-    debug1('bup server: serving in %s mode\n' 
-           % (dumb_server_mode and 'dumb' or 'smart'))
-
-
-def _init_session(reinit_with_new_repopath=None):
-    global repo
-    if reinit_with_new_repopath is None and git.repodir:
-        if not repo:
-            repo = LocalRepo()
-        return
-    git.check_repo_or_die(reinit_with_new_repopath)
-    if repo:
-        repo.close()
-    repo = LocalRepo()
-    # OK. we now know the path is a proper repository. Record this path in the
-    # environment so that subprocesses inherit it and know where to operate.
-    environ[b'BUP_DIR'] = git.repodir
-    debug1('bup server: bupdir is %s\n' % path_msg(git.repodir))
-    _set_mode()
-
-
-def init_dir(conn, arg):
-    git.init_repo(arg)
-    debug1('bup server: bupdir initialized: %s\n' % path_msg(git.repodir))
-    _init_session(arg)
-    conn.ok()
-
-
-def set_dir(conn, arg):
-    _init_session(arg)
-    conn.ok()
-
-    
-def list_indexes(conn, junk):
-    _init_session()
-    suffix = b''
-    if dumb_server_mode:
-        suffix = b' load'
-    for f in os.listdir(git.repo(b'objects/pack')):
-        if f.endswith(b'.idx'):
-            conn.write(b'%s%s\n' % (f, suffix))
-    conn.ok()
-
-
-def send_index(conn, name):
-    _init_session()
-    assert name.find(b'/') < 0
-    assert name.endswith(b'.idx')
-    idx = git.open_idx(git.repo(b'objects/pack/%s' % name))
-    conn.write(struct.pack('!I', len(idx.map)))
-    conn.write(idx.map)
-    conn.ok()
-
-
-def receive_objects_v2(conn, junk):
-    global suspended_w
-    _init_session()
-    suggested = set()
-    if suspended_w:
-        w = suspended_w
-        suspended_w = None
-    else:
-        if dumb_server_mode:
-            w = git.PackWriter(objcache_maker=None)
-        else:
-            w = git.PackWriter()
-    while 1:
-        ns = conn.read(4)
-        if not ns:
-            w.abort()
-            raise Exception('object read: expected length header, got EOF\n')
-        n = struct.unpack('!I', ns)[0]
-        #debug2('expecting %d bytes\n' % n)
-        if not n:
-            debug1('bup server: received %d object%s.\n' 
-                % (w.count, w.count!=1 and "s" or ''))
-            fullpath = w.close(run_midx=not dumb_server_mode)
-            if fullpath:
-                (dir, name) = os.path.split(fullpath)
-                conn.write(b'%s.idx\n' % name)
-            conn.ok()
-            return
-        elif n == 0xffffffff:
-            debug2('bup server: receive-objects suspended.\n')
-            suspended_w = w
-            conn.ok()
-            return
-            
-        shar = conn.read(20)
-        crcr = struct.unpack('!I', conn.read(4))[0]
-        n -= 20 + 4
-        buf = conn.read(n)  # object sizes in bup are reasonably small
-        #debug2('read %d bytes\n' % n)
-        _check(w, n, len(buf), 'object read: expected %d bytes, got %d\n')
-        if not dumb_server_mode:
-            oldpack = w.exists(shar, want_source=True)
-            if oldpack:
-                assert(not oldpack == True)
-                assert(oldpack.endswith(b'.idx'))
-                (dir,name) = os.path.split(oldpack)
-                if not (name in suggested):
-                    debug1("bup server: suggesting index %s\n"
-                           % git.shorten_hash(name).decode('ascii'))
-                    debug1("bup server:   because of object %s\n"
-                           % hexstr(shar))
-                    conn.write(b'index %s\n' % name)
-                    suggested.add(name)
-                continue
-        nw, crc = w._raw_write((buf,), sha=shar)
-        _check(w, crcr, crc, 'object read: expected crc %d, got %d\n')
-    # NOTREACHED
-    
-
-def _check(w, expected, actual, msg):
-    if expected != actual:
-        w.abort()
-        raise Exception(msg % (expected, actual))
-
-
-def read_ref(conn, refname):
-    _init_session()
-    r = git.read_ref(refname)
-    conn.write(b'%s\n' % hexlify(r) if r else b'')
-    conn.ok()
-
-
-def update_ref(conn, refname):
-    _init_session()
-    newval = conn.readline().strip()
-    oldval = conn.readline().strip()
-    git.update_ref(refname, unhexlify(newval), unhexlify(oldval))
-    conn.ok()
-
-def join(conn, id):
-    _init_session()
-    try:
-        for blob in git.cp().join(id):
-            conn.write(struct.pack('!I', len(blob)))
-            conn.write(blob)
-    except KeyError as e:
-        log('server: error: %s\n' % e)
-        conn.write(b'\0\0\0\0')
-        conn.error(e)
-    else:
-        conn.write(b'\0\0\0\0')
-        conn.ok()
-
-def cat_batch(conn, dummy):
-    _init_session()
-    cat_pipe = git.cp()
-    # For now, avoid potential deadlock by just reading them all
-    for ref in tuple(lines_until_sentinel(conn, b'\n', Exception)):
-        ref = ref[:-1]
-        it = cat_pipe.get(ref)
-        info = next(it)
-        if not info[0]:
-            conn.write(b'missing\n')
-            continue
-        conn.write(b'%s %s %d\n' % info)
-        for buf in it:
-            conn.write(buf)
-    conn.ok()
-
-def refs(conn, args):
-    limit_to_heads, limit_to_tags = args.split()
-    assert limit_to_heads in (b'0', b'1')
-    assert limit_to_tags in (b'0', b'1')
-    limit_to_heads = int(limit_to_heads)
-    limit_to_tags = int(limit_to_tags)
-    _init_session()
-    patterns = tuple(x[:-1] for x in lines_until_sentinel(conn, b'\n', Exception))
-    for name, oid in git.list_refs(patterns=patterns,
-                                   limit_to_heads=limit_to_heads,
-                                   limit_to_tags=limit_to_tags):
-        assert b'\n' not in name
-        conn.write(b'%s %s\n' % (hexlify(oid), name))
-    conn.write(b'\n')
-    conn.ok()
-
-def rev_list(conn, _):
-    _init_session()
-    count = conn.readline()
-    if not count:
-        raise Exception('Unexpected EOF while reading rev-list count')
-    assert count == b'\n'
-    count = None
-    fmt = conn.readline()
-    if not fmt:
-        raise Exception('Unexpected EOF while reading rev-list format')
-    fmt = None if fmt == b'\n' else fmt[:-1]
-    refs = tuple(x[:-1] for x in lines_until_sentinel(conn, b'\n', Exception))
-    args = git.rev_list_invocation(refs, format=fmt)
-    p = subprocess.Popen(args, env=git._gitenv(git.repodir),
-                         stdout=subprocess.PIPE)
-    while True:
-        out = p.stdout.read(64 * 1024)
-        if not out:
-            break
-        conn.write(out)
-    conn.write(b'\n')
-    rv = p.wait()  # not fatal
-    if rv:
-        msg = 'git rev-list returned error %d' % rv
-        conn.error(msg)
-        raise GitError(msg)
-    conn.ok()
-
-def resolve(conn, args):
-    _init_session()
-    (flags,) = args.split()
-    flags = int(flags)
-    want_meta = bool(flags & 1)
-    follow = bool(flags & 2)
-    have_parent = bool(flags & 4)
-    parent = vfs.read_resolution(conn) if have_parent else None
-    path = vint.read_bvec(conn)
-    if not len(path):
-        raise Exception('Empty resolve path')
-    try:
-        res = list(vfs.resolve(repo, path, parent=parent, want_meta=want_meta,
-                               follow=follow))
-    except vfs.IOError as ex:
-        res = ex
-    if isinstance(res, vfs.IOError):
-        conn.write(b'\x00')  # error
-        vfs.write_ioerror(conn, res)
-    else:
-        conn.write(b'\x01')  # success
-        vfs.write_resolution(conn, res)
-    conn.ok()
-
-optspec = """
-bup server
-"""
-o = options.Options(optspec)
-(opt, flags, extra) = o.parse(compat.argv[1:])
-
-if extra:
-    o.fatal('no arguments expected')
-
-debug2('bup server: reading from stdin.\n')
-
-commands = {
-    b'quit': None,
-    b'help': do_help,
-    b'init-dir': init_dir,
-    b'set-dir': set_dir,
-    b'list-indexes': list_indexes,
-    b'send-index': send_index,
-    b'receive-objects-v2': receive_objects_v2,
-    b'read-ref': read_ref,
-    b'update-ref': update_ref,
-    b'join': join,
-    b'cat': join,  # apocryphal alias
-    b'cat-batch' : cat_batch,
-    b'refs': refs,
-    b'rev-list': rev_list,
-    b'resolve': resolve
-}
-
-# FIXME: this protocol is totally lame and not at all future-proof.
-# (Especially since we abort completely as soon as *anything* bad happens)
-sys.stdout.flush()
-conn = Conn(byte_stream(sys.stdin), byte_stream(sys.stdout))
-lr = linereader(conn)
-for _line in lr:
-    line = _line.strip()
-    if not line:
-        continue
-    debug1('bup server: command: %r\n' % line)
-    words = line.split(b' ', 1)
-    cmd = words[0]
-    rest = len(words)>1 and words[1] or b''
-    if cmd == b'quit':
-        break
-    else:
-        cmd = commands.get(cmd)
-        if cmd:
-            cmd(conn, rest)
-        else:
-            raise Exception('unknown server command: %r\n' % line)
-
-debug1('bup server: done\n')
diff --git a/lib/cmd/split-cmd.py b/lib/cmd/split-cmd.py
deleted file mode 100755 (executable)
index 3105eb8..0000000
+++ /dev/null
@@ -1,253 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, division, print_function
-from binascii import hexlify
-import os, sys, time
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, hashsplit, git, options, client
-from bup.compat import argv_bytes, environ
-from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
-                         qprogress, reprogress, saved_errors,
-                         valid_save_name,
-                         parse_date_or_fatal)
-from bup.io import byte_stream
-from bup.pwdgrp import userfullname, username
-
-
-optspec = """
-bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
-bup split -b OPTIONS [--git-ids | filenames...]
-bup split --copy OPTIONS [--git-ids | filenames...]
-bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
---
- Modes:
-b,blobs    output a series of blob ids.  Implies --fanout=0.
-t,tree     output a tree id
-c,commit   output a commit id
-n,name=    save the result under the given name
-noop       split the input, but throw away the result
-copy       split the input, copy it to stdout, don't save to repo
- Options:
-r,remote=  remote repository path
-d,date=    date for the commit (seconds since the epoch)
-q,quiet    don't print progress messages
-v,verbose  increase log output (can be used more than once)
-git-ids    read a list of git object ids from stdin and split their contents
-keep-boundaries  don't let one chunk span two input files
-bench      print benchmark timings to stderr
-max-pack-size=  maximum bytes in a single pack
-max-pack-objects=  maximum number of objects in a single pack
-fanout=    average number of blobs in a single tree
-bwlimit=   maximum bytes/sec to transmit to server
-#,compress=  set compression level to # (0-9, 9 is highest) [1]
-"""
-handle_ctrl_c()
-
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-if opt.name: opt.name = argv_bytes(opt.name)
-if opt.remote: opt.remote = argv_bytes(opt.remote)
-if opt.verbose is None: opt.verbose = 0
-
-if not (opt.blobs or opt.tree or opt.commit or opt.name or
-        opt.noop or opt.copy):
-    o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
-if opt.copy and (opt.blobs or opt.tree):
-    o.fatal('--copy is incompatible with -b, -t')
-if (opt.noop or opt.copy) and (opt.commit or opt.name):
-    o.fatal('--noop and --copy are incompatible with -c, -n')
-if opt.blobs and (opt.tree or opt.commit or opt.name):
-    o.fatal('-b is incompatible with -t, -c, -n')
-if extra and opt.git_ids:
-    o.fatal("don't provide filenames when using --git-ids")
-
-if opt.verbose >= 2:
-    git.verbose = opt.verbose - 1
-    opt.bench = 1
-
-max_pack_size = None
-if opt.max_pack_size:
-    max_pack_size = parse_num(opt.max_pack_size)
-max_pack_objects = None
-if opt.max_pack_objects:
-    max_pack_objects = parse_num(opt.max_pack_objects)
-
-if opt.fanout:
-    hashsplit.fanout = parse_num(opt.fanout)
-if opt.blobs:
-    hashsplit.fanout = 0
-if opt.bwlimit:
-    client.bwlimit = parse_num(opt.bwlimit)
-if opt.date:
-    date = parse_date_or_fatal(opt.date, o.fatal)
-else:
-    date = time.time()
-
-total_bytes = 0
-def prog(filenum, nbytes):
-    global total_bytes
-    total_bytes += nbytes
-    if filenum > 0:
-        qprogress('Splitting: file #%d, %d kbytes\r'
-                  % (filenum+1, total_bytes // 1024))
-    else:
-        qprogress('Splitting: %d kbytes\r' % (total_bytes // 1024))
-
-
-is_reverse = environ.get(b'BUP_SERVER_REVERSE')
-if is_reverse and opt.remote:
-    o.fatal("don't use -r in reverse mode; it's automatic")
-start_time = time.time()
-
-if opt.name and not valid_save_name(opt.name):
-    o.fatal("'%r' is not a valid branch name." % opt.name)
-refname = opt.name and b'refs/heads/%s' % opt.name or None
-
-if opt.noop or opt.copy:
-    cli = pack_writer = oldref = None
-elif opt.remote or is_reverse:
-    git.check_repo_or_die()
-    cli = client.Client(opt.remote)
-    oldref = refname and cli.read_ref(refname) or None
-    pack_writer = cli.new_packwriter(compression_level=opt.compress,
-                                     max_pack_size=max_pack_size,
-                                     max_pack_objects=max_pack_objects)
-else:
-    git.check_repo_or_die()
-    cli = None
-    oldref = refname and git.read_ref(refname) or None
-    pack_writer = git.PackWriter(compression_level=opt.compress,
-                                 max_pack_size=max_pack_size,
-                                 max_pack_objects=max_pack_objects)
-
-input = byte_stream(sys.stdin)
-
-if opt.git_ids:
-    # the input is actually a series of git object ids that we should retrieve
-    # and split.
-    #
-    # This is a bit messy, but basically it converts from a series of
-    # CatPipe.get() iterators into a series of file-type objects.
-    # It would be less ugly if either CatPipe.get() returned a file-like object
-    # (not very efficient), or split_to_shalist() expected an iterator instead
-    # of a file.
-    cp = git.CatPipe()
-    class IterToFile:
-        def __init__(self, it):
-            self.it = iter(it)
-        def read(self, size):
-            v = next(self.it, None)
-            return v or b''
-    def read_ids():
-        while 1:
-            line = input.readline()
-            if not line:
-                break
-            if line:
-                line = line.strip()
-            try:
-                it = cp.get(line.strip())
-                next(it, None)  # skip the file info
-            except KeyError as e:
-                add_error('error: %s' % e)
-                continue
-            yield IterToFile(it)
-    files = read_ids()
-else:
-    # the input either comes from a series of files or from stdin.
-    files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input]
-
-if pack_writer:
-    new_blob = pack_writer.new_blob
-    new_tree = pack_writer.new_tree
-elif opt.blobs or opt.tree:
-    # --noop mode
-    new_blob = lambda content: git.calc_hash(b'blob', content)
-    new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist))
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-if opt.blobs:
-    shalist = hashsplit.split_to_blobs(new_blob, files,
-                                       keep_boundaries=opt.keep_boundaries,
-                                       progress=prog)
-    for (sha, size, level) in shalist:
-        out.write(hexlify(sha) + b'\n')
-        reprogress()
-elif opt.tree or opt.commit or opt.name:
-    if opt.name: # insert dummy_name which may be used as a restore target
-        mode, sha = \
-            hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
-                                            keep_boundaries=opt.keep_boundaries,
-                                            progress=prog)
-        splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode)
-        shalist = [(mode, splitfile_name, sha)]
-    else:
-        shalist = hashsplit.split_to_shalist(
-                      new_blob, new_tree, files,
-                      keep_boundaries=opt.keep_boundaries, progress=prog)
-    tree = new_tree(shalist)
-else:
-    last = 0
-    it = hashsplit.hashsplit_iter(files,
-                                  keep_boundaries=opt.keep_boundaries,
-                                  progress=prog)
-    for (blob, level) in it:
-        hashsplit.total_split += len(blob)
-        if opt.copy:
-            sys.stdout.write(str(blob))
-        megs = hashsplit.total_split // 1024 // 1024
-        if not opt.quiet and last != megs:
-            last = megs
-
-if opt.verbose:
-    log('\n')
-if opt.tree:
-    out.write(hexlify(tree) + b'\n')
-if opt.commit or opt.name:
-    msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.argvb
-    ref = opt.name and (b'refs/heads/%s' % opt.name) or None
-    userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
-    commit = pack_writer.new_commit(tree, oldref, userline, date, None,
-                                    userline, date, None, msg)
-    if opt.commit:
-        out.write(hexlify(commit) + b'\n')
-
-if pack_writer:
-    pack_writer.close()  # must close before we can update the ref
-
-if opt.name:
-    if cli:
-        cli.update_ref(refname, commit, oldref)
-    else:
-        git.update_ref(refname, commit, oldref)
-
-if cli:
-    cli.close()
-
-secs = time.time() - start_time
-size = hashsplit.total_split
-if opt.bench:
-    log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
-        % (size / 1024, secs, size / 1024 / secs))
-
-if saved_errors:
-    log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
-    sys.exit(1)
diff --git a/lib/cmd/tag-cmd.py b/lib/cmd/tag-cmd.py
deleted file mode 100755 (executable)
index 0d52677..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-from binascii import hexlify
-import os, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, git, options
-from bup.compat import argv_bytes
-from bup.helpers import debug1, handle_ctrl_c, log
-from bup.io import byte_stream, path_msg
-
-# FIXME: review for safe writes.
-
-handle_ctrl_c()
-
-optspec = """
-bup tag
-bup tag [-f] <tag name> <commit>
-bup tag [-f] -d <tag name>
---
-d,delete=   Delete a tag
-f,force     Overwrite existing tag, or ignore missing tag when deleting
-"""
-
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-git.check_repo_or_die()
-
-tags = [t for sublist in git.tags().values() for t in sublist]
-
-if opt.delete:
-    # git.delete_ref() doesn't complain if a ref doesn't exist.  We
-    # could implement this verification but we'd need to read in the
-    # contents of the tag file and pass the hash, and we already know
-    # about the tag's existance via "tags".
-    tag_name = argv_bytes(opt.delete)
-    if not opt.force and tag_name not in tags:
-        log("error: tag '%s' doesn't exist\n" % path_msg(tag_name))
-        sys.exit(1)
-    tag_file = b'refs/tags/%s' % tag_name
-    git.delete_ref(tag_file)
-    sys.exit(0)
-
-if not extra:
-    for t in tags:
-        sys.stdout.flush()
-        out = byte_stream(sys.stdout)
-        out.write(t)
-        out.write(b'\n')
-    sys.exit(0)
-elif len(extra) != 2:
-    o.fatal('expected commit ref and hash')
-
-tag_name, commit = map(argv_bytes, extra[:2])
-if not tag_name:
-    o.fatal("tag name must not be empty.")
-debug1("args: tag name = %s; commit = %s\n"
-       % (path_msg(tag_name), commit.decode('ascii')))
-
-if tag_name in tags and not opt.force:
-    log("bup: error: tag '%s' already exists\n" % path_msg(tag_name))
-    sys.exit(1)
-
-if tag_name.startswith(b'.'):
-    o.fatal("'%s' is not a valid tag name." % path_msg(tag_name))
-
-try:
-    hash = git.rev_parse(commit)
-except git.GitError as e:
-    log("bup: error: %s" % e)
-    sys.exit(2)
-
-if not hash:
-    log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
-    sys.exit(2)
-
-pL = git.PackIdxList(git.repo(b'objects/pack'))
-if not pL.exists(hash):
-    log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
-    sys.exit(2)
-
-tag_file = git.repo(b'refs/tags/' + tag_name)
-try:
-    tag = open(tag_file, 'wb')
-except OSError as e:
-    log("bup: error: could not create tag '%s': %s" % (path_msg(tag_name), e))
-    sys.exit(3)
-with tag as tag:
-    tag.write(hexlify(hash))
-    tag.write(b'\n')
diff --git a/lib/cmd/tick-cmd.py b/lib/cmd/tick-cmd.py
deleted file mode 100755 (executable)
index 697057e..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import
-import os, sys, time
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options
-
-
-optspec = """
-bup tick
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if extra:
-    o.fatal("no arguments expected")
-
-t = time.time()
-tleft = 1 - (t - int(t))
-time.sleep(tleft)
diff --git a/lib/cmd/version-cmd.py b/lib/cmd/version-cmd.py
deleted file mode 100755 (executable)
index bc1329c..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-import os.path, re, sys
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, version
-from bup.io import byte_stream
-
-version_rx = re.compile(r'^[0-9]+\.[0-9]+(\.[0-9]+)?(-[0-9]+-g[0-9abcdef]+)?$')
-
-optspec = """
-bup version [--date|--commit]
---
-date    display the date this version of bup was created
-commit  display the git commit id of this version of bup
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-
-total = (opt.date or 0) + (opt.commit or 0)
-if total > 1:
-    o.fatal('at most one option expected')
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-if opt.date:
-    out.write(version.date.split(b' ')[0] + b'\n')
-elif opt.commit:
-    out.write(version.commit + b'\n')
-else:
-    out.write(version.version + b'\n')
diff --git a/lib/cmd/web-cmd.py b/lib/cmd/web-cmd.py
deleted file mode 100755 (executable)
index 77e5de0..0000000
+++ /dev/null
@@ -1,328 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-from __future__ import absolute_import, print_function
-from collections import namedtuple
-import mimetypes, os, posixpath, signal, stat, sys, time, urllib, webbrowser
-from binascii import hexlify
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, options, git, vfs
-from bup.helpers import (chunkyreader, debug1, format_filesize, handle_ctrl_c,
-                         log, saved_errors)
-from bup.metadata import Metadata
-from bup.path import resource_path
-from bup.repo import LocalRepo
-from bup.io import path_msg
-
-try:
-    from tornado import gen
-    from tornado.httpserver import HTTPServer
-    from tornado.ioloop import IOLoop
-    from tornado.netutil import bind_unix_socket
-    import tornado.web
-except ImportError:
-    log('error: cannot find the python "tornado" module; please install it\n')
-    sys.exit(1)
-
-
-# FIXME: right now the way hidden files are handled causes every
-# directory to be traversed twice.
-
-handle_ctrl_c()
-
-
-def http_date_from_utc_ns(utc_ns):
-    return time.strftime('%a, %d %b %Y %H:%M:%S', time.gmtime(utc_ns / 10**9))
-
-
-def _compute_breadcrumbs(path, show_hidden=False):
-    """Returns a list of breadcrumb objects for a path."""
-    breadcrumbs = []
-    breadcrumbs.append((b'[root]', b'/'))
-    path_parts = path.split(b'/')[1:-1]
-    full_path = b'/'
-    for part in path_parts:
-        full_path += part + b"/"
-        url_append = b""
-        if show_hidden:
-            url_append = b'?hidden=1'
-        breadcrumbs.append((part, full_path+url_append))
-    return breadcrumbs
-
-
-def _contains_hidden_files(repo, dir_item):
-    """Return true if the directory contains items with names other than
-    '.' and '..' that begin with '.'
-
-    """
-    for name, item in vfs.contents(repo, dir_item, want_meta=False):
-        if name in (b'.', b'..'):
-            continue
-        if name.startswith(b'.'):
-            return True
-    return False
-
-
-def _dir_contents(repo, resolution, show_hidden=False):
-    """Yield the display information for the contents of dir_item."""
-
-    url_query = b'?hidden=1' if show_hidden else b''
-
-    def display_info(name, item, resolved_item, display_name=None):
-        # link should be based on fully resolved type to avoid extra
-        # HTTP redirect.
-        link = tornado.escape.url_escape(name, plus=False)
-        if stat.S_ISDIR(vfs.item_mode(resolved_item)):
-            link += '/'
-        link = link.encode('ascii')
-
-        size = vfs.item_size(repo, item)
-        if opt.human_readable:
-            display_size = format_filesize(size)
-        else:
-            display_size = size
-
-        if not display_name:
-            mode = vfs.item_mode(item)
-            if stat.S_ISDIR(mode):
-                display_name = name + b'/'
-            elif stat.S_ISLNK(mode):
-                display_name = name + b'@'
-            else:
-                display_name = name
-
-        return display_name, link + url_query, display_size
-
-    dir_item = resolution[-1][1]    
-    for name, item in vfs.contents(repo, dir_item):
-        if not show_hidden:
-            if (name not in (b'.', b'..')) and name.startswith(b'.'):
-                continue
-        if name == b'.':
-            yield display_info(name, item, item, b'.')
-            parent_item = resolution[-2][1] if len(resolution) > 1 else dir_item
-            yield display_info(b'..', parent_item, parent_item, b'..')
-            continue
-        res_item = vfs.ensure_item_has_metadata(repo, item, include_size=True)
-        yield display_info(name, item, res_item)
-
-
-class BupRequestHandler(tornado.web.RequestHandler):
-
-    def initialize(self, repo=None):
-        self.repo = repo
-
-    def decode_argument(self, value, name=None):
-        if name == 'path':
-            return value
-        return super(BupRequestHandler, self).decode_argument(value, name)
-
-    def get(self, path):
-        return self._process_request(path)
-
-    def head(self, path):
-        return self._process_request(path)
-    
-    def _process_request(self, path):
-        print('Handling request for %s' % path)
-        sys.stdout.flush()
-        # Set want_meta because dir metadata won't be fetched, and if
-        # it's not a dir, then we're going to want the metadata.
-        res = vfs.resolve(self.repo, path, want_meta=True)
-        leaf_name, leaf_item = res[-1]
-        if not leaf_item:
-            self.send_error(404)
-            return
-        mode = vfs.item_mode(leaf_item)
-        if stat.S_ISDIR(mode):
-            self._list_directory(path, res)
-        else:
-            self._get_file(self.repo, path, res)
-
-    def _list_directory(self, path, resolution):
-        """Helper to produce a directory listing.
-
-        Return value is either a file object, or None (indicating an
-        error).  In either case, the headers are sent.
-        """
-        if not path.endswith(b'/') and len(path) > 0:
-            print('Redirecting from %s to %s' % (path_msg(path), path_msg(path + b'/')))
-            return self.redirect(path + b'/', permanent=True)
-
-        hidden_arg = self.request.arguments.get('hidden', [0])[-1]
-        try:
-            show_hidden = int(hidden_arg)
-        except ValueError as e:
-            show_hidden = False
-
-        self.render(
-            'list-directory.html',
-            path=path,
-            breadcrumbs=_compute_breadcrumbs(path, show_hidden),
-            files_hidden=_contains_hidden_files(self.repo, resolution[-1][1]),
-            hidden_shown=show_hidden,
-            dir_contents=_dir_contents(self.repo, resolution,
-                                       show_hidden=show_hidden))
-
-    @gen.coroutine
-    def _get_file(self, repo, path, resolved):
-        """Process a request on a file.
-
-        Return value is either a file object, or None (indicating an error).
-        In either case, the headers are sent.
-        """
-        file_item = resolved[-1][1]
-        file_item = vfs.augment_item_meta(repo, file_item, include_size=True)
-        meta = file_item.meta
-        ctype = self._guess_type(path)
-        self.set_header("Last-Modified", http_date_from_utc_ns(meta.mtime))
-        self.set_header("Content-Type", ctype)
-        
-        self.set_header("Content-Length", str(meta.size))
-        assert len(file_item.oid) == 20
-        self.set_header("Etag", hexlify(file_item.oid))
-        if self.request.method != 'HEAD':
-            with vfs.fopen(self.repo, file_item) as f:
-                it = chunkyreader(f)
-                for blob in chunkyreader(f):
-                    self.write(blob)
-        raise gen.Return()
-
-    def _guess_type(self, path):
-        """Guess the type of a file.
-
-        Argument is a PATH (a filename).
-
-        Return value is a string of the form type/subtype,
-        usable for a MIME Content-type header.
-
-        The default implementation looks the file's extension
-        up in the table self.extensions_map, using application/octet-stream
-        as a default; however it would be permissible (if
-        slow) to look inside the data to make a better guess.
-        """
-        base, ext = posixpath.splitext(path)
-        if ext in self.extensions_map:
-            return self.extensions_map[ext]
-        ext = ext.lower()
-        if ext in self.extensions_map:
-            return self.extensions_map[ext]
-        else:
-            return self.extensions_map['']
-
-    if not mimetypes.inited:
-        mimetypes.init() # try to read system mime.types
-    extensions_map = mimetypes.types_map.copy()
-    extensions_map.update({
-        '': 'text/plain', # Default
-        '.py': 'text/plain',
-        '.c': 'text/plain',
-        '.h': 'text/plain',
-        })
-
-
-io_loop = None
-
-def handle_sigterm(signum, frame):
-    global io_loop
-    debug1('\nbup-web: signal %d received\n' % signum)
-    log('Shutdown requested\n')
-    if not io_loop:
-        sys.exit(0)
-    io_loop.stop()
-
-
-signal.signal(signal.SIGTERM, handle_sigterm)
-
-UnixAddress = namedtuple('UnixAddress', ['path'])
-InetAddress = namedtuple('InetAddress', ['host', 'port'])
-
-optspec = """
-bup web [[hostname]:port]
-bup web unix://path
---
-human-readable    display human readable file sizes (i.e. 3.9K, 4.7M)
-browser           show repository in default browser (incompatible with unix://)
-"""
-o = options.Options(optspec)
-opt, flags, extra = o.parse(compat.argv[1:])
-
-if len(extra) > 1:
-    o.fatal("at most one argument expected")
-
-if len(extra) == 0:
-    address = InetAddress(host='127.0.0.1', port=8080)
-else:
-    bind_url = extra[0]
-    if bind_url.startswith('unix://'):
-        address = UnixAddress(path=bind_url[len('unix://'):])
-    else:
-        addr_parts = extra[0].split(':', 1)
-        if len(addr_parts) == 1:
-            host = '127.0.0.1'
-            port = addr_parts[0]
-        else:
-            host, port = addr_parts
-        try:
-            port = int(port)
-        except (TypeError, ValueError) as ex:
-            o.fatal('port must be an integer, not %r' % port)
-        address = InetAddress(host=host, port=port)
-
-git.check_repo_or_die()
-
-settings = dict(
-    debug = 1,
-    template_path = resource_path(b'web').decode('utf-8'),
-    static_path = resource_path(b'web/static').decode('utf-8'),
-)
-
-# Disable buffering on stdout, for debug messages
-try:
-    sys.stdout._line_buffering = True
-except AttributeError:
-    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
-
-application = tornado.web.Application([
-    (r"(?P<path>/.*)", BupRequestHandler, dict(repo=LocalRepo())),
-], **settings)
-
-http_server = HTTPServer(application)
-io_loop_pending = IOLoop.instance()
-
-if isinstance(address, InetAddress):
-    sockets = tornado.netutil.bind_sockets(address.port, address.host)
-    http_server.add_sockets(sockets)
-    print('Serving HTTP on %s:%d...' % sockets[0].getsockname()[0:2])
-    if opt.browser:
-        browser_addr = 'http://' + address[0] + ':' + str(address[1])
-        io_loop_pending.add_callback(lambda : webbrowser.open(browser_addr))
-elif isinstance(address, UnixAddress):
-    unix_socket = bind_unix_socket(address.path)
-    http_server.add_socket(unix_socket)
-    print('Serving HTTP on filesystem socket %r' % address.path)
-else:
-    log('error: unexpected address %r', address)
-    sys.exit(1)
-
-io_loop = io_loop_pending
-io_loop.start()
-
-if saved_errors:
-    log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
-    sys.exit(1)
diff --git a/lib/cmd/xstat-cmd.py b/lib/cmd/xstat-cmd.py
deleted file mode 100755 (executable)
index 9935b07..0000000
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/bin/sh
-"""": # -*-python-*-
-# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
-export "BUP_ARGV_0"="$0"
-arg_i=1
-for arg in "$@"; do
-    export "BUP_ARGV_${arg_i}"="$arg"
-    shift
-    arg_i=$((arg_i + 1))
-done
-# Here to end of preamble replaced during install
-bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
-exec "$bup_python" "$0"
-"""
-# end of bup preamble
-
-# Copyright (C) 2010 Rob Browning
-#
-# This code is covered under the terms of the GNU Library General
-# Public License as described in the bup LICENSE file.
-
-from __future__ import absolute_import, print_function
-import errno, os.path, sys, stat
-
-sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
-
-from bup import compat, metadata, options, xstat
-from bup.compat import argv_bytes
-from bup.helpers import add_error, handle_ctrl_c, parse_timestamp, saved_errors, \
-    add_error, log
-from bup.io import byte_stream
-
-
-def parse_timestamp_arg(field, value):
-    res = str(value) # Undo autoconversion.
-    try:
-        res = parse_timestamp(res)
-    except ValueError as ex:
-        if ex.args:
-            o.fatal('unable to parse %s resolution "%s" (%s)'
-                    % (field, value, ex))
-        else:
-            o.fatal('unable to parse %s resolution "%s"' % (field, value))
-
-    if res != 1 and res % 10:
-        o.fatal('%s resolution "%s" must be a power of 10' % (field, value))
-    return res
-
-
-optspec = """
-bup xstat pathinfo [OPTION ...] <PATH ...>
---
-v,verbose       increase log output (can be used more than once)
-q,quiet         don't show progress meter
-exclude-fields= exclude comma-separated fields
-include-fields= include comma-separated fields (definitive if first)
-atime-resolution=  limit s, ms, us, ns, 10ns (value must be a power of 10) [ns]
-mtime-resolution=  limit s, ms, us, ns, 10ns (value must be a power of 10) [ns]
-ctime-resolution=  limit s, ms, us, ns, 10ns (value must be a power of 10) [ns]
-"""
-
-target_filename = b''
-active_fields = metadata.all_fields
-
-handle_ctrl_c()
-
-o = options.Options(optspec)
-(opt, flags, remainder) = o.parse(compat.argv[1:])
-
-atime_resolution = parse_timestamp_arg('atime', opt.atime_resolution)
-mtime_resolution = parse_timestamp_arg('mtime', opt.mtime_resolution)
-ctime_resolution = parse_timestamp_arg('ctime', opt.ctime_resolution)
-
-treat_include_fields_as_definitive = True
-for flag, value in flags:
-    if flag == '--exclude-fields':
-        exclude_fields = frozenset(value.split(','))
-        for f in exclude_fields:
-            if not f in metadata.all_fields:
-                o.fatal(f + ' is not a valid field name')
-        active_fields = active_fields - exclude_fields
-        treat_include_fields_as_definitive = False
-    elif flag == '--include-fields':
-        include_fields = frozenset(value.split(','))
-        for f in include_fields:
-            if not f in metadata.all_fields:
-                o.fatal(f + ' is not a valid field name')
-        if treat_include_fields_as_definitive:
-            active_fields = include_fields
-            treat_include_fields_as_definitive = False
-        else:
-            active_fields = active_fields | include_fields
-
-opt.verbose = opt.verbose or 0
-opt.quiet = opt.quiet or 0
-metadata.verbose = opt.verbose - opt.quiet
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-first_path = True
-for path in remainder:
-    path = argv_bytes(path)
-    try:
-        m = metadata.from_path(path, archive_path = path)
-    except (OSError,IOError) as e:
-        if e.errno == errno.ENOENT:
-            add_error(e)
-            continue
-        else:
-            raise
-    if metadata.verbose >= 0:
-        if not first_path:
-            out.write(b'\n')
-        if atime_resolution != 1:
-            m.atime = (m.atime / atime_resolution) * atime_resolution
-        if mtime_resolution != 1:
-            m.mtime = (m.mtime / mtime_resolution) * mtime_resolution
-        if ctime_resolution != 1:
-            m.ctime = (m.ctime / ctime_resolution) * ctime_resolution
-        out.write(metadata.detailed_bytes(m, active_fields))
-        out.write(b'\n')
-        first_path = False
-
-if saved_errors:
-    log('WARNING: %d errors encountered.\n' % len(saved_errors))
-    sys.exit(1)
-else:
-    sys.exit(0)
diff --git a/src/bup/compat.c b/src/bup/compat.c
new file mode 100644 (file)
index 0000000..a1ae8ee
--- /dev/null
@@ -0,0 +1,44 @@
+
+#define PY_SSIZE_T_CLEAN
+#define _GNU_SOURCE  1 // asprintf
+#undef NDEBUG
+
+// According to Python, its header has to go first:
+//   http://docs.python.org/2/c-api/intro.html#include-files
+//   http://docs.python.org/3/c-api/intro.html#include-files
+#include <Python.h>
+
+#include "bup/compat.h"
+#include "bup/io.h"
+
+#if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 8
+
+int bup_py_bytes_main(int argc, char **argv)
+{
+    wchar_t **wargv = PyMem_RawMalloc(argc * sizeof(wchar_t *));
+    if (!wargv)
+        die(2, "memory insufficient to decode command line arguments");
+    int i;
+    for (i = 0; i < argc; i++) {
+        size_t wargn;
+        wargv[i] = Py_DecodeLocale(argv[i], &wargn);
+        if (!wargv[i]) {
+            switch (wargn) {
+            case (size_t) -1:
+                die(2, "too little memory to decode command line argument %d\n",
+                    i);
+                break;
+            case (size_t) -2:
+                die(2, "unable to decode command line argument %d\n", i);
+                break;
+            default:
+                die(2, "unexpected error from Py_DecodeLocale(): %zu\n", wargn);
+                break;
+            }
+            exit(2);
+        }
+    }
+    return Py_Main(argc, wargv);
+}
+
+#endif // PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 8
diff --git a/src/bup/compat.h b/src/bup/compat.h
new file mode 100644 (file)
index 0000000..2e3797c
--- /dev/null
@@ -0,0 +1,3 @@
+#pragma once
+
+int bup_py_bytes_main(int argc, char **argv);
diff --git a/src/bup/io.c b/src/bup/io.c
new file mode 100644 (file)
index 0000000..5a0bae1
--- /dev/null
@@ -0,0 +1,35 @@
+
+#define _GNU_SOURCE  1
+#undef NDEBUG
+
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include "bup/io.h"
+
+__attribute__ ((format(printf, 2, 3)))
+void
+msg(FILE* f, const char * const msg, ...)
+{
+    if (fputs("bup: ", f) == EOF)
+        exit(3);
+    va_list ap;
+    va_start(ap, msg);
+    if (vfprintf(f, msg, ap) < 0)
+        exit(3);
+    va_end(ap);
+}
+
+__attribute__ ((format(printf, 2, 3)))
+void
+die(int exit_status, const char * const msg, ...)
+{
+    if (fputs("bup: ", stderr) == EOF)
+        exit(3);
+    va_list ap;
+    va_start(ap, msg);
+    if (vfprintf(stderr, msg, ap) < 0)
+        exit(3);
+    va_end(ap);
+    exit(exit_status);
+}
diff --git a/src/bup/io.h b/src/bup/io.h
new file mode 100644 (file)
index 0000000..d355e6b
--- /dev/null
@@ -0,0 +1,6 @@
+#pragma once
+
+#include <stdio.h>
+
+void msg(FILE* f, const char * const msg, ...);
+void die(int exit_status, const char * const msg, ...);
index 0f8bb6054c8303bf39842bd466786e8fb46e4a07..a20748b207092518c886a3c1dc85355ac08e95ea 100755 (executable)
@@ -160,8 +160,8 @@ WVSTART 'metadata save/restore (general)'
     # Test a deeper subdir/ to make sure top-level non-dir metadata is
     # restored correctly.  We need at least one dir and one non-dir at
     # the "top-level".
-    WVPASS test -d src/var/cmd
-    WVPASS test -f src/var/cmd/save-cmd.py
+    WVPASS test -d src/var/lib/bup
+    WVPASS test -f src/var/lib/bup/git.py
     WVPASS rm -rf "$BUP_DIR"
     WVPASS bup init
     WVPASS touch -t 201111111111 src-restore # Make sure the top won't match.
index 5fb96be353e716116298ad69fd2e9923eadbb0e4..6f3b2f377512ddd8708b0e0cac3b2d1c641f64ed 100755 (executable)
@@ -123,10 +123,18 @@ c/"
 
 
 WVSTART features
-expect_py_ver=$(LC_CTYPE=C "$top/config/bin/python" \
+expect_py_ver=$(LC_CTYPE=C "$top/dev/python" \
                         -c 'import platform; print(platform.python_version())') \
     || exit $?
 actual_py_ver=$(bup features | grep Python: | sed -Ee 's/ +Python: //') || exit $?
 WVPASSEQ "$expect_py_ver" "$actual_py_ver"
 
+
+WVSTART id-other-than
+result=$("$top/dev/id-other-than" --user 0) ||  exit $?
+WVPASS echo "$result" | WVPASS grep -qE '.*:[0-9]+$'
+result=$("$top/dev/id-other-than" --group 0) ||  exit $?
+WVPASS echo "$result" | WVPASS grep -qE '.*:[0-9]+$'
+
+
 WVPASS rm -rf "$tmpdir"
index 7b65f16e2319752b448b824b43a2f81b7ad2a6ec..78daeb3d01ef6cd98f5bec23df1eb2f5a89a5f97 100755 (executable)
@@ -29,11 +29,10 @@ touch -t 199901010000 "$tmpdir/save/a/1"
 WVSTART "metadata read error for a file"
 WVPASS bup index "$tmpdir/save"
 
-# now do a hack to inject save errors while reading metadata
-# essentially, we create a bup-save command for ourselves
-# that gets an error for the .../5 file in metadata.from_path()
-cat > "$tmpdir/bup-save" << EOF
-#!/usr/bin/env $top/dev/bup-python
+# Inject save errors while reading metadata via --import-py-module.
+WVPASS rm -rf "$tmpdir/mod"
+WVPASS mkdir -p "$tmpdir/mod"
+cat > "$tmpdir/mod/bup_fail_on_5.py" << EOF
 from bup import metadata
 
 orig_from_path = metadata.from_path
@@ -42,13 +41,10 @@ def from_path(path, *args, **kw):
         raise IOError('intentionally failing metadata read for .../5')
     return orig_from_path(path, *args, **kw)
 metadata.from_path = from_path
-
-exec(open("$top/lib/cmd/bup-save", "rb").read())
 EOF
-chmod +x "$tmpdir/bup-save"
 
-# use it to save the data
-"$tmpdir/bup-save" -n test "$tmpdir/save"
+PYTHONPATH="$tmpdir/mod" \
+          bup --import-py-module bup_fail_on_5 save -n test "$tmpdir/save"
 
 # this should work anyway
 WVPASS bup ls -l "test/latest/$tmpdir/save"
@@ -69,8 +65,10 @@ WVSTART "metadata read error for a folder"
 WVPASS bup index --clear
 WVPASS bup index "$tmpdir/save"
 
-cat > "$tmpdir/bup-save" << EOF
-#!/usr/bin/env $top/dev/bup-python
+# Inject save errors while reading metadata via --import-py-module.
+WVPASS rm -rf "$tmpdir/mod"
+WVPASS mkdir -p "$tmpdir/mod"
+cat > "$tmpdir/mod/bup_fail_on_a.py" << EOF
 from bup import metadata
 
 orig_from_path = metadata.from_path
@@ -79,13 +77,10 @@ def from_path(path, *args, **kw):
         raise IOError('intentionally failing metadata read for .../a')
     return orig_from_path(path, *args, **kw)
 metadata.from_path = from_path
-
-exec(open("$top/lib/cmd/bup-save", "rb").read())
 EOF
-chmod +x "$tmpdir/bup-save"
 
-# use it to save the data
-"$tmpdir/bup-save" -n test "$tmpdir/save"
+PYTHONPATH="$tmpdir/mod" \
+          bup --import-py-module bup_fail_on_a save -n test "$tmpdir/save"
 
 # this should work anyway
 WVPASS bup ls -l "test/latest/$tmpdir/save"
@@ -102,8 +97,10 @@ WVSTART "duplicate entries"
 WVPASS bup index --clear
 WVPASS bup index "$tmpdir/save"
 
-cat > "$tmpdir/bup-save" << EOF
-#!/usr/bin/env $top/dev/bup-python
+# Inject save errors while reading metadata via --import-py-module.
+WVPASS rm -rf "$tmpdir/mod"
+WVPASS mkdir -p "$tmpdir/mod"
+cat > "$tmpdir/mod/bup_dup_reader_path.py" << EOF
 from bup import index
 
 Reader = index.Reader
@@ -115,13 +112,10 @@ class DupReader(index.Reader):
                 yield transname, ent
             yield transname, ent
 index.Reader = DupReader
-
-exec(open("$top/lib/cmd/bup-save", "rb").read())
 EOF
-chmod +x "$tmpdir/bup-save"
 
-# use it to save the data
-"$tmpdir/bup-save" -n test "$tmpdir/save"
+PYTHONPATH="$tmpdir/mod" \
+          bup --import-py-module bup_dup_reader_path save -n test "$tmpdir/save"
 
 # this should work
 WVPASS bup ls -l "test/latest/$tmpdir/save"
index dafd9cd568a23126b70dfc012cd3410ca9eb4023..b53eb5cfd7a0b0f8ef219134098cb89be5d40448 100755 (executable)
@@ -56,7 +56,7 @@ WVPASS bup save -n '¡excitement!' --strip src
 "$TOP/bup" web unix://socket </dev/null >bup-web.log 2>&1 &
 web_pid=$!
 # output the log if something fails
-trap cat bup-web.log EXIT
+trap 'cat bup-web.log' EXIT
 wait-for-server-start
 
 WVPASS curl --unix-socket ./socket \
index b024fe17d7882513256f322129af131f1e8d1f13..f5cf907fc528fd23212649e6ed88d3cbab670338 100644 (file)
@@ -5,7 +5,7 @@
 
 . ./wvtest.sh
 
-_wvtop="$(pwd)"
+_wvtop="$(pwd -P)"
 
 wvmktempdir ()
 {