X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=Documentation%2Fbup-split.md;h=be083a64cde06fee1eae4a0465514208f2f70d0f;hb=27e6ee3680585fd445ce04955333a4bdf585db30;hp=41a5731db6f94a2add2023ca9dcca10b4c5f157e;hpb=d05d9df50c50ac944c81338a274b775b9972100f;p=bup.git

diff --git a/Documentation/bup-split.md b/Documentation/bup-split.md
index 41a5731..be083a6 100644
--- a/Documentation/bup-split.md
+++ b/Documentation/bup-split.md
@@ -8,9 +8,19 @@ bup-split - save individual files to bup backup sets
 
 # SYNOPSIS
 
-bup split [-r *host*:*path*] <-b|-t|-c|-n *name*> [-v] [-q]
-  [--bench] [--max-pack-size=*bytes*]
-  [--max-pack-objects=*n*] [--fanout=*count] [filenames...]
+bup split \[-t\] \[-c\] \[-n *name*\] COMMON\_OPTIONS
+
+bup split -b COMMON\_OPTIONS
+
+bup split --copy COMMON\_OPTIONS
+
+bup split --noop \[-t|-b\] COMMON\_OPTIONS
+
+COMMON\_OPTIONS
+  ~ \[-r *host*:*path*\] \[-v\] \[-q\] \[-d *seconds-since-epoch*\] \[\--bench\]
+    \[\--max-pack-size=*bytes*\] \[-#\] \[\--bwlimit=*bytes*\]
+    \[\--max-pack-objects=*n*\] \[\--fanout=*count*\]
+    \[\--keep-boundaries\] \[\--git-ids | filenames...\]
 
 # DESCRIPTION
 
@@ -19,7 +29,7 @@ bup split [-r *host*:*path*] <-b|-t|-c|-n *name*> [-v] [-q]
 the content into chunks of around 8k using a rolling
 checksum algorithm, and saves the chunks into a bup
 repository.  Chunks which have previously been stored are
-not stored again (ie. they are "deduplicated").
+not stored again (ie. they are 'deduplicated').
 
 Because of the way the rolling checksum works, chunks
 tend to be very stable across changes to a given file,
@@ -40,59 +50,118 @@ accomplish this, however.)
 
 To get the data back, use `bup-join`(1).
 
-# OPTIONS
+# MODES
 
--r, --remote=*host*:*path*
-:   save the backup set to the given remote server.  If
-    *path* is omitted, uses the default path on the remote
-    server (you still need to include the ':')
-    
--b, --blobs
-:   output a series of git blob ids that correspond to the
-    chunks in the dataset.
+These options select the primary behavior of the command, with -n
+being the most likely choice.
 
--t, --tree
-:   output the git tree id of the resulting dataset.
-    
--c, --commit
-:   output the git commit id of the resulting dataset.
-
--n, --name=*name*
+-n, \--name=*name*
 :   after creating the dataset, create a git branch
     named *name* so that it can be accessed using
     that name.  If *name* already exists, the new dataset
-    will be considered a descendant of the old *name*. 
+    will be considered a descendant of the old *name*.
     (Thus, you can continually create new datasets with
     the same name, and later view the history of that
-    dataset to see how it has changed over time.)
-    
--v, --verbose
-:   increase verbosity (can be used more than once).
+    dataset to see how it has changed over time.)  The original data
+    will also be available as a top-level file named "data" in the VFS,
+    accessible via `bup fuse`, `bup ftp`, etc.
+
+-t, \--tree
+:   output the git tree id of the resulting dataset.
+
+-c, \--commit
+:   output the git commit id of the resulting dataset.
+
+-b, \--blobs
+:   output a series of git blob ids that correspond to the chunks in
+    the dataset.  Incompatible with -n, -t, and -c.
+
+\--noop
+:   read the data and split it into blocks based on the "bupsplit"
+    rolling checksum algorithm, but don't store anything in the repo.
+    Can be combined with -b or -t to compute (but not store) the git
+    blobs or tree ids for the dataset. This is mostly useful for
+    benchmarking and validating the bupsplit algorithm. Incompatible
+    with -n and -c.
+
+\--copy
+:   like `--noop`, but also write the data to stdout.  This can be
+    useful for benchmarking the speed of read+bupsplit+write for large
+    amounts of data.  Incompatible with -n, -t, -c, and -b.
+
+# OPTIONS
+
+-r, \--remote=*host*:*path*
+:   save the backup set to the given remote server.  If *path* is
+    omitted, uses the default path on the remote server (you still
+    need to include the ':').  The connection to the remote server is
+    made with SSH.  If you'd like to specify which port, user or
+    private key to use for the SSH connection, we recommend you use
+    the `~/.ssh/config` file.  Even though the destination is remote,
+    a local bup repository is still required.
+
+-d, \--date=*seconds-since-epoch*
+:   specify the date inscribed in the commit (seconds since 1970-01-01).
 
--q, --quiet
+-q, \--quiet
 :   disable progress messages.
 
---bench
+-v, \--verbose
+:   increase verbosity (can be used more than once).
+
+\--git-ids
+:   stdin is a list of git object ids instead of raw data.
+    `bup split` will read the contents of each named git
+    object (if it exists in the bup repository) and split
+    it.  This might be useful for converting a git
+    repository with large binary files to use bup-style
+    hashsplitting instead.  This option is probably most
+    useful when combined with `--keep-boundaries`.
+
+\--keep-boundaries
+:   if multiple filenames are given on the command line,
+    they are normally concatenated together as if the
+    content all came from a single file.  That is, the
+    set of blobs/trees produced is identical to what it
+    would have been if there had been a single input file. 
+    However, if you use `--keep-boundaries`, each file is
+    split separately.  You still only get a single tree or
+    commit or series of blobs, but each blob comes from
+    only one of the files; the end of one of the input
+    files always ends a blob.
+
+\--bench
 :   print benchmark timings to stderr.
 
---max-pack-size=*bytes*
+\--max-pack-size=*bytes*
 :   never create git packfiles larger than the given number
     of bytes.  Default is 1 billion bytes.  Usually there
     is no reason to change this.
 
---max-pack-objects=*numobjs*
+\--max-pack-objects=*numobjs*
 :   never create git packfiles with more than the given
     number of objects.  Default is 200 thousand objects. 
     Usually there is no reason to change this.
     
---fanout=*numobjs*
-:   when splitting very large files, never put more than
-    this number of git blobs in a single git tree.  Instead,
-    generate a new tree and link to that.  Default is
-    4096 objects per tree.
+\--fanout=*numobjs*
+:   when splitting very large files, try and keep the number
+    of elements in trees to an average of *numobjs*.
+
+\--bwlimit=*bytes/sec*
+:   don't transmit more than *bytes/sec* bytes per second
+    to the server.  This is good for making your backups
+    not suck up all your network bandwidth.  Use a suffix
+    like k, M, or G to specify multiples of 1024,
+    1024*1024, 1024*1024*1024 respectively.
+
+-*#*, \--compress=*#*
+:   set the compression level to # (a value from 0-9, where
+    9 is the highest and 0 is no compression).  The default
+    is 1 (fast, loose compression)
+
+
+# EXAMPLES
 
-# EXAMPLE
-    
     $ tar -cf - /etc | bup split -r myserver: -n mybackup-tar
     tar: Removing leading /' from member names
     Indexing objects: 100% (196/196), done.
@@ -103,7 +172,7 @@ To get the data back, use `bup-join`(1).
 
 # SEE ALSO
 
-`bup-join`(1), `bup-index`(1), `bup-save`(1)
+`bup-join`(1), `bup-index`(1), `bup-save`(1), `bup-on`(1), `ssh_config`(5)
 
 # BUP