Update bup-split(1); document -d, clarify the split "modes", and reorganize.

[bup.git] / Documentation / bup-split.md
diff --git a/Documentation/bup-split.md b/Documentation/bup-split.md

index eba57ea0cf3dad3518c0316d58e6b85cc37f2368..12e866d72f2de1546473f291862ebd5b632ec6ad 100644 (file)
--- a/Documentation/bup-split.md
+++ b/Documentation/bup-split.md
@@ -8,10 +8,17 @@ bup-split - save individual files to bup backup sets
  
  # SYNOPSIS
  
-bup split [-r *host*:*path*] <-b|-t|-c|-n *name*> [-v] [-q]
-  [--bench] [--max-pack-size=*bytes*] [-#]
-  [--max-pack-objects=*n*] [--fanout=*count]
-  [--git-ids] [--keep-boundaries] [filenames...]
+bup split \[-t\] \[-c\] \[-n *name*\] COMMON\_OPTIONS
+
+bup split -b COMMON\_OPTIONS
+
+bup split \<--noop \[--copy\]|--copy\> COMMON\_OPTIONS
+
+COMMON\_OPTIONS
+  ~ \[-r *host*:*path*\] \[-v\] \[-q\] \[-d *seconds-since-epoch*\] \[\--bench\]
+    \[\--max-pack-size=*bytes*\] \[-#\] \[\--bwlimit=*bytes*\]
+    \[\--max-pack-objects=*n*\] \[\--fanout=*count*\]
+    \[\--keep-boundaries\] \[--git-ids | filenames...\]
  
  # DESCRIPTION
  
@@ -41,9 +48,44 @@ accomplish this, however.)
  
  To get the data back, use `bup-join`(1).
  
+# MODES
+
+These options select the primary behavior of the command, with -n
+being the most likely choice.
+
+-n, \--name=*name*
+:   after creating the dataset, create a git branch
+    named *name* so that it can be accessed using
+    that name.  If *name* already exists, the new dataset
+    will be considered a descendant of the old *name*.
+    (Thus, you can continually create new datasets with
+    the same name, and later view the history of that
+    dataset to see how it has changed over time.)
+
+-t, \--tree
+:   output the git tree id of the resulting dataset.
+
+-c, \--commit
+:   output the git commit id of the resulting dataset.
+
+-b, \--blobs
+:   output a series of git blob ids that correspond to the chunks in
+    the dataset.  Incompatible with -n, -t, and -c.
+
+\--noop
+:   read the data and split it into blocks based on the "bupsplit"
+    rolling checksum algorithm, but don't do anything with the blocks.
+    This is mostly useful for benchmarking.  Incompatible with -n, -t,
+    -c, and -b.
+
+\--copy
+:   like `--noop`, but also write the data to stdout.  This can be
+    useful for benchmarking the speed of read+bupsplit+write for large
+    amounts of data.  Incompatible with -n, -t, -c, and -b.
+
  # OPTIONS
  
--r, --remote=*host*:*path*
+-r, \--remote=*host*:*path*
  :   save the backup set to the given remote server.  If
      *path* is omitted, uses the default path on the remote
      server (you still need to include the ':').  The connection to the
@@ -51,32 +93,16 @@ To get the data back, use `bup-join`(1).
      or private key to use for the SSH connection, we recommend you use the
      `~/.ssh/config` file.
  
--b, --blobs
-:   output a series of git blob ids that correspond to the
-    chunks in the dataset.
+-d, \--date=*seconds-since-epoch*
+:   specify the date inscribed in the commit (seconds since 1970-01-01).
  
--t, --tree
-:   output the git tree id of the resulting dataset.
-    
--c, --commit
-:   output the git commit id of the resulting dataset.
-
--n, --name=*name*
-:   after creating the dataset, create a git branch
-    named *name* so that it can be accessed using
-    that name.  If *name* already exists, the new dataset
-    will be considered a descendant of the old *name*. 
-    (Thus, you can continually create new datasets with
-    the same name, and later view the history of that
-    dataset to see how it has changed over time.)
-    
--q, --quiet
+-q, \--quiet
  :   disable progress messages.
  
--v, --verbose
+-v, \--verbose
  :   increase verbosity (can be used more than once).
  
---git-ids
+\--git-ids
  :   stdin is a list of git object ids instead of raw data.
      `bup split` will read the contents of each named git
      object (if it exists in the bup repository) and split
@@ -85,7 +111,7 @@ To get the data back, use `bup-join`(1).
      hashsplitting instead.  This option is probably most
      useful when combined with `--keep-boundaries`.
  
---keep-boundaries
+\--keep-boundaries
  :   if multiple filenames are given on the command line,
      they are normally concatenated together as if the
      content all came from a single file.  That is, the
@@ -97,43 +123,31 @@ To get the data back, use `bup-join`(1).
      only one of the files; the end of one of the input
      files always ends a blob.
  
---noop
-:   read the data and split it into blocks based on the "bupsplit"
-    rolling checksum algorithm, but don't do anything with
-    the blocks.  This is mostly useful for benchmarking.
-
---copy
-:   like --noop, but also write the data to stdout.  This
-    can be useful for benchmarking the speed of read+bupsplit+write
-    for large amounts of data.
-
---bench
+\--bench
  :   print benchmark timings to stderr.
  
---max-pack-size=*bytes*
+\--max-pack-size=*bytes*
  :   never create git packfiles larger than the given number
      of bytes.  Default is 1 billion bytes.  Usually there
      is no reason to change this.
  
---max-pack-objects=*numobjs*
+\--max-pack-objects=*numobjs*
  :   never create git packfiles with more than the given
      number of objects.  Default is 200 thousand objects. 
      Usually there is no reason to change this.
      
---fanout=*numobjs*
-:   when splitting very large files, never put more than
-    this number of git blobs in a single git tree.  Instead,
-    generate a new tree and link to that.  Default is
-    4096 objects per tree.
+\--fanout=*numobjs*
+:   when splitting very large files, try and keep the number
+    of elements in trees to an average of *numobjs*.
  
---bwlimit=*bytes/sec*
+\--bwlimit=*bytes/sec*
  :   don't transmit more than *bytes/sec* bytes per second
      to the server.  This is good for making your backups
      not suck up all your network bandwidth.  Use a suffix
      like k, M, or G to specify multiples of 1024,
      1024*1024, 1024*1024*1024 respectively.
  
--*#*, --compress=*#*
+-*#*, \--compress=*#*
  :   set the compression level to # (a value from 0-9, where
      9 is the highest and 0 is no compression).  The default
      is 1 (fast, loose compression)