.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{\
.    if \nF \{\
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{\
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "ZPAQ 1"
.TH ZPAQ 1 "2021-01-05" "perl v5.32.0" "User Contributed Perl Documentation"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
zpaq \- Journaling archiver for incremental backups.
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
zpaq \fIcommand\fR \fIarchive\fR[\f(CW\*(C`.zpaq\*(C'\fR] [\fIfiles\fR]... [\-\fIoptions\fR]...
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
\&\fIzpaq\fR manages journaling archives for incremental user-level
local or remote backups
that conform to \fIThe \s-1ZPAQ\s0 Open Standard Format for Highly Compressed Data\fR
(see \fI\s-1AVAILABILITY\s0\fR). The format supports encrypted, deduplicated, and
compressed single or multi-part archives with rollback capability.
It supports archives as large as 1000 times available memory or up to
250 \s-1TB\s0 and 4 billion files, interoperable between Windows
and Unix/Linux/OS X.
.SH "COMMANDS"
.IX Header "COMMANDS"
\&\fIcommand\fR is one of \f(CW\*(C`add\*(C'\fR, \f(CW\*(C`extract\*(C'\fR, or \f(CW\*(C`list\*(C'\fR
Commands may be abbreviated to \f(CW\*(C`a\*(C'\fR, \f(CW\*(C`x\*(C'\fR, or \f(CW\*(C`l\*(C'\fR respectively.
\&\fIarchive\fR is assumed to have a \f(CW\*(C`.zpaq\*(C'\fR extension if no extension is
specified.
.PP
If \fIarchive\fR contains wildcards \f(CW\*(C`*\*(C'\fR or \f(CW\*(C`?\*(C'\fR, then the archive is
in multiple parts where \f(CW\*(C`*\*(C'\fR matches the part number and \f(CW\*(C`?\*(C'\fR matches
single digits. zpaq will consider the concatenation of the parts in
numerical order starting with 1 to be equivalent to a single archive.
For example, \f(CW\*(C`arc??\*(C'\fR would match the concatenation of \f(CW\*(C`arc01.zpaq\*(C'\fR,
\&\f(CW\*(C`arc02.zpaq\*(C'\fR, etc. up to the last existing part.
.IP "a" 4
.IX Item "a"
.PD 0
.IP "add" 4
.IX Item "add"
.PD
Append changes in \fIfiles\fR to \fIarchive\fR, or create \fIarchive\fR if it does not
exist. \fIfiles\fR is a list of file and directory names separated by spaces. If a
name is a directory, then it recursively includes all
files and subdirectories within. In Windows, \fIfiles\fR may contain
wildcards \f(CW\*(C`*\*(C'\fR and \f(CW\*(C`?\*(C'\fR in the last component of the path (after the last slash).
\&\f(CW\*(C`*\*(C'\fR matches any string and \f(CW\*(C`?\*(C'\fR matches any character. In Unix/Linux, wildcards
are expanded by the shell, which has the same effect.
.Sp
A change is an addition, update, or deletion of any file or directory in
\&\fIfiles\fR or any of its subdirectories to any depth. A file or directory is
considered changed if its size or last-modified date (with 1 second resolution),
or Windows attributes or Unix/Linux permissions (if saved)
differ between the internal
and external versions. File contents are not compared. If the attributes
but not the date has changed, then the attributes are updated in the
archive with the assumption that the file contents have not changed.
.Sp
Files are added by splitting them into fragments along content-dependent
boundaries, computing their \s-1SHA\-1\s0 hashes, and comparing with hashes already
stored in the archive. If the hash matches, it is assumed that the fragments
are identical and only a pointer to the previous compressed fragment is
saved. Unmatched fragments are packed into blocks, compressed, and appended
to the archive.
.Sp
For each added or updated file or directory, the following information is saved
in the archive: the compressed contents, fragment hashes, the file or directory
name as it appears in \fIfiles\fR plus any trailing path, the last-modified
date with 1 second resolution, and the Unix/Linux permissions or Windows
attributes. Other metadata such as owner, group, ACLs,
last access time, etc. are not saved. Symbolic links are not saved or followed.
Hard links are followed as if they were ordinary files. Special file types
such as devices, named pipes, and named sockets are not saved.
The 64 bit Windows version will save alternate data streams.
.Sp
If any file cannot be read (e.g. permission denied), then it is skipped and
a warning is reported. However, other files are still added and the update
is still valid.
.Sp
If \fIarchive\fR is \f(CW""\fR (a quoted empty string), then zpaq compresses
\&\fIfiles\fR as if creating a new archive, but discards the output without
writing to disk.
.Sp
If \fIarchive\fR is multi-part, the zpaq will create a new part using
the next available part number. For example:
.Sp
.Vb 4
\&    zpaq add "arc??" files   (creates arc01.zpaq)
\&    zpaq add "arc??" files   (creates arc02.zpaq)
\&    zpaq add "arc??" files   (creates arc03.zpaq)
\&    zpaq extract "arc??"     (extracts all parts)
.Ve
.Sp
Updates are transacted. If zpaq is interrupted before completing
the update, then the partially appended data is ignored and overwritten on the
next update. This is accomplished by first appending a temporary update header,
appending the compressed data and index, then updating the header as the
last step.
.Sp
As the archive is updated, the program will report the percent complete, estimated
time remaining, the name and size of the file preceded by \f(CW\*(C`+\*(C'\fR if the file
is being added, \f(CW\*(C`#\*(C'\fR if updated, or \f(CW\*(C`\-\*(C'\fR if deleted. If the file
is deduplicated, then the new size after deduplication but before
compression is shown.
.IP "x" 4
.IX Item "x"
.PD 0
.IP "extract" 4
.IX Item "extract"
.PD
Extract \fIfiles\fR (including the contents of directories), or extract
the whole archive contents if \fIfiles\fR is omitted.
The file names, last-modified date,
and permissions or attributes are restored as saved in the archive.
If there are multiple versions of a file stored, then only the latest
version is extracted. If a stored file has been marked as deleted,
then it is not extracted.
.Sp
Existing files are skipped without being overwritten. (Use \f(CW\*(C`\-force\*(C'\fR
to overwrite).
.Sp
As files are extracted, the fragment \s-1SHA\-1\s0 hashes are computed and compared
with the stored hashes. The program reports an error in case of mismatches.
Blocks are only decompressed up to the last used fragment.
If the archive is damaged, then zpaq will extract as much as possible
from the undamaged blocks.
.Sp
As files are extracted, the program reports the percent completed,
estimated time remaining, and the name of the file preceded by \*(L">\*(R"
if the file is created or overwritten (with \f(CW\*(C`\-force\*(C'\fR), \f(CW\*(C`?\*(C'\fR if
the file is skipped because it already exists, or \f(CW\*(C`=\*(C'\fR if decompression is
skipped with \f(CW\*(C`\-force\*(C'\fR because the contents were compared and
found to be identical. The date and attributes are still
extracted in this case.
.IP "l" 4
.IX Item "l"
.PD 0
.IP "list" 4
.IX Item "list"
.PD
List the archive contents. With \fIfiles\fR, list only the specified
files and directories and compare them with the same files on disk.
For each file or directory, show the comparison result,
last modified date, uncompressed size,
Windows attributes or Unix/Linux permissions,
and the saved name. If the internal and external
versions of the file differ, then show both.
.Sp
The comparison result is reported in the first column as \f(CW\*(C`=\*(C'\fR if the
last-modified date, attributes (if saved), and size are identical,
\&\f(CW\*(C`#\*(C'\fR if different, \f(CW\*(C`\-\*(C'\fR if the external file does not exist, or
\&\f(CW\*(C`+\*(C'\fR if the internal file does not exist. With \f(CW\*(C`\-force\*(C'\fR, the
contents are compared, but not the dates or attributes. Contents
are compared by reading the files, computing \s-1SHA\-1\s0 hashes and comparing
with the stored hashes. In either
case, replacing \f(CW\*(C`list\*(C'\fR with \f(CW\*(C`add\*(C'\fR will show exactly what changes
would be made to the archive.
.Sp
In Unix/Linux, permissions are listed as a file type \f(CW\*(C`d\*(C'\fR for directory
or blank for a regular file, followed by a 4 digit octal number as
per \f(CWchmod(1)\fR. In Windows, attributes are listed from the set
\&\f(CW\*(C`RHS DAdFTprCoIEivs\*(C'\fR where the character is present if
the corresponding bit 0..17 is set as returned by \fBGetFileAttributes()\fR.
The meanings are as follows: \f(CW\*(C`R\*(C'\fRead-only, \f(CW\*(C`H\*(C'\fRidden,
\&\f(CW\*(C`S\*(C'\fRystem, unused (blank), \f(CW\*(C`D\*(C'\fRirectory, \f(CW\*(C`A\*(C'\fRrchive, \f(CW\*(C`d\*(C'\fRevice,
normal \f(CW\*(C`F\*(C'\fRile, \f(CW\*(C`T\*(C'\fRemporary, s\f(CW\*(C`p\*(C'\fRarse file, \f(CW\*(C`r\*(C'\fReparse point,
\&\f(CW\*(C`C\*(C'\fRompressed, \f(CW\*(C`o\*(C'\fRffline, not content \f(CW\*(C`I\*(C'\fRindexed, \f(CW\*(C`E\*(C'\fRncrypted,
\&\f(CW\*(C`i\*(C'\fRntegrity stream, \f(CW\*(C`v\*(C'\fRirtual, no \f(CW\*(C`s\*(C'\fRcrub data.
.Sp
\&\fIarchive\fR may be "", which is equivalent to comparing with an empty
archive.
.SH "OPTIONS"
.IX Header "OPTIONS"
.IP "\-all [\fIN\fR]" 4
.IX Item "-all [N]"
With \f(CW\*(C`list\*(C'\fR, list all saved versions and not just the latest version,
including versions where the file is marked as deleted. Each version
is shown in a separate numbered directory beginning with \f(CW\*(C`0001/\*(C'\fR.
Absolute paths are first converted to relative paths. In Windows, the \f(CW\*(C`:\*(C'\fR
on the drive letter is removed. For example, \f(CW\*(C`foo\*(C'\fR and \f(CW\*(C`/foo\*(C'\fR are
shown as \f(CW\*(C`0001/foo\*(C'\fR. \f(CW\*(C`C:/foo\*(C'\fR and \f(CW\*(C`C:foo\*(C'\fR are shown as \f(CW\*(C`0001/C/foo\*(C'\fR.
.Sp
The date shown on the root directory of each version is the date of the
update. The root directory listing also shows the number of updates
and deletions in that version and the compressed size.
.Sp
When a file is deleted, it is shown with the dates and attributes
blank with size 0.
.Sp
With \f(CW\*(C`extract\*(C'\fR, extract the files in each version as shown with \f(CW\*(C`list \-all\*(C'\fR.
.Sp
\&\fIN\fR selects the number of digits in the directory name. The default is 4.
More digits will be used when necessary. For example:
.Sp
.Vb 1
\&    zpaq list archive \-all 2 \-not "??/?*"
.Ve
.Sp
will show the dates when the archive was updated as \f(CW\*(C`01/\*(C'\fR, \f(CW\*(C`02/\*(C'\fR,
etc. but not their contents.
.IP "\-f" 4
.IX Item "-f"
.PD 0
.IP "\-force" 4
.IX Item "-force"
.PD
With \f(CW\*(C`add\*(C'\fR, attempt to add files even if the last-modified date has
not changed. Files are added only if they really are different, based
on comparing the computed and stored \s-1SHA\-1\s0 hashes
.Sp
With \f(CW\*(C`extract\*(C'\fR, overwrite existing output files. If the
contents differ (tested by comparing \s-1SHA\-1\s0 hashes), then the file is
decompressed and extracted. If the dates or attributes/permissions
differ, then they are set to match those stored in the archive.
.Sp
With \f(CW\*(C`list\*(C'\fR \fIfiles\fR, compare files by computing \s-1SHA\-1\s0 fragment hashes
and comparing with stored hashes. Ignore differences in dates and
attributes.
.IP "\-fragment \fIN\fR" 4
.IX Item "-fragment N"
Set the dedupe fragment size range from 64 2^\fIN\fR to 8128 2^\fIN\fR
bytes with an average size of 1024 2^\fIN\fR bytes. The default is 6
(range 4096..520192, average 65536). Smaller fragment sizes can
improve compression through deduplication of similar files, but
require more memory and more overhead. Each fragment adds about 28 bytes
to the archive and requires about 40 bytes of memory. For the default,
this is less than 0.1% of the archive size.
.Sp
Values other than 6 conform to the \s-1ZPAQ\s0 specification and will decompress
correctly by all versions, but do not conform to the recommendation
for best deduplication. Adding identical files with different values
of \fIN\fR will not deduplicate because the fragment boundaries will differ.
\&\f(CW\*(C`list \-summary\*(C'\fR will not identify these files as identical for
the same reason.
.IP "\-index \fIindexfile\fR" 4
.IX Item "-index indexfile"
With \f(CW\*(C`add\*(C'\fR, create \fIarchive\fR\f(CW\*(C`.zpaq\*(C'\fR as a suffix to append to a remote
archive which is assumed to be identical to \fIindexfile\fR except that
\&\fIindexfile\fR contains no compressed file contents (D blocks).
Then update \fIindexfile\fR by appending a copy of \fIarchive\fR\f(CW\*(C`.zpaq\*(C'\fR
without the D blocks. With \f(CW\*(C`extract\*(C'\fR, specify the index to create
for \fIarchive\fR\f(CW\*(C`.zpaq\*(C'\fR and do not extract any files.
.Sp
The purpose is to maintain a backup offsite without using much
local disk space. The normal usage is to append the suffix at the
remote site and delete it locally, keeping only the much smaller index.
For example:
.Sp
.Vb 3
\&    zpaq add part files \-index index.zpaq
\&    cat part.zpaq >> remote.zpaq
\&    rm part.zpaq
.Ve
.Sp
\&\fIindexfile\fR has no default extension. However, with a \f(CW\*(C`.zpaq\*(C'\fR
extension it can be listed to show the contents of the remote archive
or compare with local files. It cannot be extracted or updated as
a regular archive. Thus, the following should produce identical output:
.Sp
.Vb 2
\&    zpaq list remote.zpaq
\&    zpaq list index.zpaq
.Ve
.Sp
If \fIarchive\fR is multi-part (contains \f(CW\*(C`*\*(C'\fR or \f(CW\*(C`?\*(C'\fR), then zpaq will
substitute a part number equal to 1 plus the number of previous updates.
The parts may then be accessed as a multi-part archive without
appending or renaming.
.Sp
With \f(CW\*(C`add\*(C'\fR, it is an error if the \fIarchive\fR to be created
already exists, or if \fIindexfile\fR is a regular archive. \f(CW\*(C`\-index\*(C'\fR
cannot be used with \f(CW\*(C`\-until\*(C'\fR or a streaming archive \f(CW\*(C`\-method s...\*(C'\fR.
With \f(CW\*(C`extract\*(C'\fR, it is an error if \fIindexfile\fR exists and \f(CW\*(C`\-force\*(C'\fR
is not used to overwrite.
.IP "\-key \fIpassword\fR" 4
.IX Item "-key password"
This option is required for all commands operating on an encrypted archive.
When creating a new archive with \f(CW\*(C`add\*(C'\fR, the new archive will be encrypted
with \fIpassword\fR and all subsequent operations will require the same
password.
.Sp
An archive is encrypted with \s-1AES\-256\s0 in \s-1CTR\s0 mode. The password is
strengthened using Scrypt(\s-1SHA\-256\s0(password), salt, N=16384, r=8, p=1),
which would require 208M operations and 16 \s-1MB\s0 memory per test in a
brute force key search.
When creating a new archive, a 32 byte salt is generated
using \fBCryptGenRandom()\fR in Windows or from /dev/urandom in Unix/Linux,
such that the first byte is different from the normal header
of an unencrypted archive (\f(CW\*(C`z\*(C'\fR or \f(CW7\fR). A multi-part archive
is encrypted with a single keystream as if the parts were concatenated.
An index is encrypted with the same password, where the first byte
of the salt is modified by \s-1XOR\s0 with ('z' \s-1XOR\s0 '7').
.Sp
Encryption provides secrecy but not authentication. An attacker
who knows or can guess any bits of the plaintext can set them without
knowing the key.
.IP "\-m\fItype\fR[\fIBlocksize\fR[.\fIpre\fR[.\fIarg\fR][\fIcomp\fR[.\fIarg\fR]]...]]" 4
.IX Item "-mtype[Blocksize[.pre[.arg][comp[.arg]]...]]"
.PD 0
.IP "\-method \fItype\fR[\fIBlocksize\fR[.\fIpre\fR[.\fIarg\fR][\fIcomp\fR[.\fIarg\fR]]...]]" 4
.IX Item "-method type[Blocksize[.pre[.arg][comp[.arg]]...]]"
.PD
With \f(CW\*(C`add\*(C'\fR, select a compression method. \fItype\fR may be 0, 1, 2, 3, 4,
5, \f(CW\*(C`x\*(C'\fR, or \f(CW\*(C`s\*(C'\fR. The optional \fIBlocksize\fR may be 0..11, written with
no space after the type, like \f(CW\*(C`\-m10\*(C'\fR or \f(CW\*(C`\-method 511\*(C'\fR. The remaining
arguments, separated by periods or commas without spaces, are only allowed for
types \f(CW\*(C`x\*(C'\fR or \f(CW\*(C`s\*(C'\fR, for example \f(CW\*(C`\-mx4.3ci1\*(C'\fR.
.Sp
If \fItype\fR is numeric, then higher numbers compress better but are slower.
The default is \f(CW\*(C`\-m1\*(C'\fR. It is recommended for backups. \f(CW\*(C`\-m2\*(C'\fR compresses
slower but decompresses just as fast as 1. It is recommended for
archives to be compressed once and decompressed many times, such as
downloads. \f(CW\*(C`\-m0\*(C'\fR stores with deduplication but no further compression.
.Sp
\&\fIBlocksize\fR says
to pack fragments into blocks up to 2^\fIBlocksize\fR MiB. Using larger
blocks can improve compression but require more memory and may be slower
because each block is compressed or decompressed by a separate thread.
The memory requirement is up to 8 times \fIBlocksize\fR per thread
for levels up to 4 and 16 times block size per thread for level 5.
The default \fIBlocksize\fR is 4 (16 MiB) for types 0 and 1, and 6 (64 MiB)
otherwise.
.Sp
Types \f(CW\*(C`x\*(C'\fR and \f(CW\*(C`s\*(C'\fR are for experimental use. Normally, zpaq selects
different methods depending on the compression level
and an analysis of the data (text, executable, or other binary,
and degree of compressibility).
\&\fItype\fR selects journaling or streaming format.
\&\fIpre\fR is 0..7 selecting a preprocessing step (\s-1LZ77, BWT, E8E9\s0),
\&\fIcomp\fR is a series of context modeling components from the
set {c,i,a,w,m,s,t} selecting a \s-1CM\s0 or \s-1ICM, ISSE\s0 chain, \s-1MATCH,\s0
word model, \s-1MIX, SSE,\s0 or \s-1MIX2\s0 respectively. \fIpre\fR and \fIcomp\fR may be followed
by a list of numeric arguments (\fIarg\fR) separated by periods or commas.
For example:
.Sp
.Vb 1
\&    \-method x6.3ci1
.Ve
.Sp
selects a journaling archive (x), block size 2^6 = 64 MiB, \s-1BWT\s0 transform (3),
an order 0 \s-1ICM\s0 (c), and order 1 \s-1ISSE\s0 (i1). (zpaq normally selects this method
for level 3 text compression). \fItype\fR is as follows.
.RS 4
.IP "x" 4
.IX Item "x"
Selects normal (journaling) mode. Files are split into fragments, deduplicated,
packed into blocks, and compressed by the method described. The compressed
blocks are preceded by a transaction header giving the date of the update.
The blocks are followed by a list of fragment hashes and sizes and a list
of files added, updated, or deleted. Each added or updated file lists
the last-modifed date, attributes, and a list of fragment IDs.
.IP "s" 4
.IX Item "s"
Selectes streaming mode for single-pass extraction and compatibility with
zpaq versions prior to 6.00 (2012). Streaming archives do not support
deduplication or rollback. Files are split into fragments of size
2^\fIblocksize\fR MiB \- 4 KiB. Each file or fragment is compressed in a
separate block with no attempt at deduplication. The file name, date,
and attributes are stored in the header of the first fragment. The hashes
are stored in the trailers of each block. There is no transaction block
to allow rollback. Files are added to the previously dated update.
Streaming mode with \f(CW\*(C`\-index\*(C'\fR is an error.
.IP "\fIpre\fR[.\fImin1\fR.\fImin2\fR.\fIdepth\fR.\fIsize\fR[.\fIlookahead\fR]]" 4
.IX Item "pre[.min1.min2.depth.size[.lookahead]]"
\&\fIpre\fR selects a pre/post processing step before context modeling as follows.
.Sp
.Vb 8
\&    0 = no preprocessing
\&    1 = Packed LZ77
\&    2 = Byte aligned LZ77
\&    3 = BWT (Burrows\-Wheeler Transform)
\&    4 = E8E9
\&    5 = E8E9 + packed LZ77 
\&    6 = E8E9 + byte aligned LZ77
\&    7 = E8E9 + BWT
.Ve
.Sp
The E8E9 transform (4..7) improves the compression of x86 executable
files (.exe or .dll). The transform scans backward for 5 byte patterns of
the form (E8|E9 xx xx xx 00|FF) hex and adds the block offset to the three
middle bytes. The E8 and E9 opcodes are \s-1CALL\s0 and \s-1JMP,\s0 respectively. The
transform replaces relative addresses with absolute addresses. The transform
is applied prior to \s-1LZ77\s0 or \s-1BWT.\s0 Decompression reverses the transforms
in the opposite order.
.Sp
\&\s-1LZ77\s0 (1, 2, 5, 6) compresses by searching for matching strings using a
hash table or suffix array and replacing them with pointers to the previous
match. Types 1 and 2 select variable bit length coding or byte aligned coding
respectively.
Variable bit length encoding compresses better by itself, but byte aligned
coding allows for further compression using a context model.
Types 6 and 7 are the same as 1 and 2 respectively, except that the
block is E8E9 transformed first.
.Sp
\&\s-1BWT\s0 (Burrows Wheeler Transform, 3 or 7), sorts the input block by
context, which brings bytes with similar contexts together. It does not
compress by itself, but makes the input suited to compression
with a fast adapting low order context model.
.Sp
The remaining arguments apply only to \s-1LZ77.\s0
\&\fImin1\fR selects the minimum match length, which must be at least 4 for
packed \s-1LZ77\s0 or 1 for byte aligned \s-1LZ77.\s0 \fImin2\fR selects a longer minimum
match length to try first, or is 0 to skip this step. The block is encoded
by testing 2^\fIdepth\fR locations indexed by a hash table of
2^\fIsize\fR elements indexed by hashes of the next \fImin2\fR and then \fImin1\fR
characters. If \fIlookahead\fR is specified and greater than 0, then, the
search is repeated \fIlookahead\fR + 1 times to consider coding the next
0 to \fIlookahead\fR bytes as literals to find a longer match.
.Sp
If \fIsize\fR = \fIblocksize\fR + 21, then matches are found using a suffix
array instead of a hash table, scanning forward and backward 2^\fIdepth\fR
elements to find the longest past match. \fImin2\fR has no effect.
A suffix array requires 4.5 x 2^\fIblocksize\fR MiB memory. A hash table requires
4 x 2^\fIsize\fR bytes memory. For example:
.Sp
.Vb 1
\&    \-method x6.1.4.0.5.27.1
.Ve
.Sp
specifies 64 MiB blocks (6), variable length \s-1LZ77\s0 without E8E9 (1), minimum
match length 4, no secondary search (0), search depth 2^5 = 32 in each
direction in the suffix array (27 = 6 + 21), and 1 byte lookahead.
.RE
.RS 4
.Sp
\&\fIcomp\fR specifies a component of a context model. If this section is
empty, then no further compression is performed. Otherwise the block
is compressed by an array of components. Each component takes a context
and possibly the outputs of earlier components, and outputs
a prediction, a probability that the next bit of input is a 1.
The final prediction is used to arithmetic code the bit.
Components normally allocate memory equal to the block size, or less for
smaller contexts as needed. Components are as follows:
.IP "c[.\fImaxcount\fR[.\fIoffset\fR[.\fImask\fR]...]]" 4
.IX Item "c[.maxcount[.offset[.mask]...]]"
Specifies a context model (\s-1CM\s0), or indirect context model (\s-1ICM\s0). A \s-1CM\s0
maps a context hash to a prediction by looking up the context in a table,
and then adjusts the prediction to reduce the coding error by 1/count,
where count is bounded by \fImaxcount\fR x 4, and \fImaxcount\fR is in 1..255.
.Sp
If \fImaxcount\fR is 0, then specify an \s-1ICM.\s0 An \s-1ICM\s0 maps a context to
a state representing two bit counts and the most recent bit. That state
is mapped to a prediction and updated at a fixed rate. An \s-1ICM\s0 adapts faster
to changing statistics. A \s-1CM\s0 with a high count compresses stationary
data better. The default is 0 (\s-1ICM\s0).
.Sp
If \fImaxcount\fR has the form 1000\fIm\fR + n, then the effect is the same
as \fImaxcount\fR = n while reducing memory to 1/2^m of block size.
.Sp
The remaining arguments represent contexts, all of which are hashed
together. If \fIoffset\fR is 1..255, then the block offset mod \fIoffset\fR
is hashed in. If \fIoffset\fR is 1000..1255, then the distance to the last
occurrence of \fIoffset\fR \- 1000 is hashed in. For example, \f(CW\*(C`c0.1010\*(C'\fR
specifies an \s-1ICM\s0 taking the text column number (distance back to the last
linefeed = 10) as context. The default is 0 (no context).
.Sp
Each \fImask\fR is ANDed with previous bytes. For example, \f(CW\*(C`c0.0.255.255.255\*(C'\fR
is an \s-1ICM\s0 with order 3 context. A value in 256..511 specifies a context
of \fImask\fR \- 256 hashed together with the byte aligned \s-1LZ77\s0 parse state
(whether a literal or match code is expected). For example,
\&\f(CW\*(C`\-method x6.2.12.0.8.27c0.0.511.255\*(C'\fR specifes block size 2^6 MiB,
byte aligned \s-1LZ77\s0 (2), minimum match length 12, search depth 2^8,
suffix array search (27 = 6 + 21), an \s-1ICM\s0 (c0), no offset context (0),
and order 2 context plus \s-1LZ77\s0 state (511.255).
.Sp
A mask greater than 1000 is shorthand for \fImask\fR \- 1000 zeros. For example,
the sparse context \f(CW\*(C`c0.0.255.1003.255\*(C'\fR is equivalent to \f(CW\*(C`c0.0.255.0.0.0.255\*(C'\fR.
.IP "m[\fIsize\fR[.\fIrate\fR]]" 4
.IX Item "m[size[.rate]]"
Specifies a \s-1MIX\s0 (mixer). A \s-1MIX\s0 computes a weighted average of the predictions
of all previous components. (The averaging is in the logistic domain:
log(p / (1 \- p))). The weights are then adjusted in proportion to \fIrate\fR
(0..255) to reduce the prediction error. A \fIsize\fR bit context can be
used to select a set of weights to be used. The first 8 bits of context
are the previously coded bits of the current byte. The default is \f(CW\*(C`m8.24\*(C'\fR.
A \s-1MIX\s0 with n inputs requires 4n x 2^\fIsize\fR bytes of memory.
.IP "t[\fIsize\fR[.\fIrate\fR]]" 4
.IX Item "t[size[.rate]]"
Specifies a \s-1MIX2. A MIX2\s0 is like a \s-1MIX\s0 except that it takes only the
last 2 components as input, and its weights are constrained to add to 1.
A \s-1MIX2\s0 requires 4 x 2^\fIsize\fR bytes of memory. The default is \f(CW\*(C`t8.24\*(C'\fR.
.IP "s[\fIsize\fR[.\fImincount\fR[.\fImaxcount\fR]]]" 4
.IX Item "s[size[.mincount[.maxcount]]]"
Specifes a \s-1SSE\s0 (secondary symbol estimator). A \s-1SSE\s0 takes the last \fIsize\fR
bits of context and the quantized and interpolated prediction of the
previous component as input to output an adjusted prediction. The output
is adjusted to reduce the prediction error by 1/count, where the count
is constrained between \fImincount\fR and 4 x \fImaxcount\fR. The default
is \f(CW\*(C`s8.32.255\*(C'\fR.
.IP "i\fIorder\fR[.\fIincrement\fR]..." 4
.IX Item "iorder[.increment]..."
Specifies an \s-1ISSE\s0 (indirect secondary symbol estimator) chain. An \s-1ISSE\s0 adjusts
the predition of the previous component by mixing it with a constant 1.
The pair of mixing weights is selected by a bit history state (like an \s-1ICM\s0).
The bit history is selected by a hash of the last \fIorder\fR bytes hashed
together with the context of the previous component. Each \fIincrement\fR
specifies an additional \s-1ISSE\s0 whose context order is increased
by \fIincrement\fR. For example, \f(CW\*(C`ci1.1.2\*(C'\fR specifies an order 0 \s-1ICM\s0
and order 1, 2, and 4 ISSEs.
.IP "w[\fIorder\fR[.\fIA\fR[.\fIZ\fR[.\fIcap\fR[.\fImul\fR[.\fImem\fR]]]]]]" 4
.IX Item "w[order[.A[.Z[.cap[.mul[.mem]]]]]]"
Specifies an ICM-ISSE chain of length \fIorder\fR taking as contexts the
hashes of the last 1, 2, 3..., \fIorder\fR whole words. A word is defined
as a sequence of characters in the range \fIA\fR to \fIA\fR + \fIZ\fR \- 1, ANDed
with \fIcap\fR before hashing. The hash H is updated by byte c as
H := (H x \fImul\fR + c) (mod 2^(\fIblocksize\fR + 24 \- \fImem\fR)).
Each component requires 2^(\fIblocksize\fR
\&\- \fImem\fR) MiB. The default is \f(CW\*(C`w1.65.26.223.20.0\*(C'\fR, which defines a
word as 65..90 (A..Z). ANDing with 223 converts to upper case before
hashing. \fImul\fR = 20 has the effect of shifting 2 bits left. For typical
block sizes (28 or 30 bit H), the word hash depends on the last
14 or 15 letters.
.IP "a[\fImul\fR[.\fIbmem\fR][.\fIhmem\fR]]]" 4
.IX Item "a[mul[.bmem][.hmem]]]"
Specifies a \s-1MATCH. A MATCH\s0 searches for a past matching context and predicts
whatever bit came next. The search is done by updating a context hash H
with byte c by H := H x \fImul\fR + c (mod 2^(\fIblocksize\fR + 18 \- \fIhmem\fR)).
A \s-1MATCH\s0 uses 2^(\fIblocksize\fR \- \fIbmem\fR) MiB history buffer and a
2^(\fIblocksize\fR \- \fIhmem\fR) MiB hash table. The default is \fIa24.0.0\fR.
If \fIblocksize\fR is 6, then H is 24 bits. \fImul\fR = 24 shifts 4 bits
left, making the context hash effectively order 6.
.RE
.RS 4
.RE
.IP "\-noattributes" 4
.IX Item "-noattributes"
With \f(CW\*(C`add\*(C'\fR, do not save Windows attributes or Unix/Linux permissions
to the archive. With \f(CW\*(C`extract\*(C'\fR, ignore the saved values and extract using
default values. With \f(CW\*(C`list\*(C'\fR, do not list or compare attributes.
.IP "\-not [\fIfile\fR]..." 4
.IX Item "-not [file]..."
.PD 0
.IP "\-not =[#+\-?^]..." 4
.IX Item "-not =[#+-?^]..."
.PD
In the first form, do not add, extract, or list files that match any \fIfile\fR
by name. \fIfile\fR may contain
wildcards \f(CW\*(C`*\*(C'\fR and \f(CW\*(C`?\*(C'\fR that match any string or character respectively,
including \f(CW\*(C`/\*(C'\fR. A match to a directory also matches all of
its contents. In Windows, matches are not case sensitive, and \f(CW\*(C`\e\*(C'\fR
matches \f(CW\*(C`/\*(C'\fR. In Unix/Linux, arguments with wildcards must be quoted
to protect them from the shell.
.Sp
When comparing with \f(CW\*(C`list\*(C'\fR \fIfiles\fR, \f(CW\*(C`\-not =\*(C'\fR means do not list identical
files. Additionally it is possible to suppress listing of
differences with \f(CW\*(C`#\*(C'\fR, missing external files with \f(CW\*(C`\-\*(C'\fR, missing
internal files with \f(CW\*(C`+\*(C'\fR, and duplicates (\f(CW\*(C`list \-summary\*(C'\fR) with \f(CW\*(C`^\*(C'\fR.
.IP "\-only \fIfile\fR..." 4
.IX Item "-only file..."
Do not add, extract, or list any files unless they match
at least one argument. The rules for matching wildcards are the
same as \f(CW\*(C`\-not\*(C'\fR. The default is \f(CW\*(C`*\*(C'\fR which matches everything.
.Sp
If a file matches an argument to both \f(CW\*(C`\-only\*(C'\fR and \f(CW\*(C`\-not\*(C'\fR, then
\&\f(CW\*(C`\-not\*(C'\fR takes precedence.
.IP "\-repack \fInew_archive\fR [\fInew_password\fR]" 4
.IX Item "-repack new_archive [new_password]"
With \f(CW\*(C`extract\*(C'\fR, store the extracted files in \fInew_archive\fR instead
of writing them individually to disk. If \fInew_password\fR is specified,
then the output is encrypted with this password. Otherwise the
output is not encrypted, even if the input is.
.Sp
It is an error if \fInew_archive\fR
exists unless \f(CW\*(C`\-force\*(C'\fR is used to allow it to be overwritten.
\&\fInew_archive\fR does not automatically get a \f(CW\*(C`.zpaq\*(C'\fR extension.
.Sp
Repacking is implemented by copying those D blocks (compressed file contents)
which are referenced by at least one selected file. This can result in
a larger archive than a new one because unreferenced fragments in the
same block are also copied.
.Sp
The repacked archive block dates range from the first to last
update of the input archive. Using \f(CW\*(C`add \-until\*(C'\fR with a date between these
two dates will result in the date being adjust to 1 second after the
last update.
.Sp
With \f(CW\*(C`\-all\*(C'\fR, the input archive is simply copied without modification
except to decrypt and encrypt. Thus, the input may be any file, not
just an archive. \fIfiles\fR and the options \f(CW\*(C`\-to\*(C'\fR, \f(CW\*(C`\-not\*(C'\fR, \f(CW\*(C`\-only\*(C'\fR,
\&\f(CW\*(C`\-until\*(C'\fR, \f(CW\*(C`\-noattributes\*(C'\fR, and \f(CW\*(C`\-method\*(C'\fR are not valid with \f(CW\*(C`\-repack \-all\*(C'\fR.
.IP "\-s\fIN\fR" 4
.IX Item "-sN"
.PD 0
.IP "\-summary \fIN\fR" 4
.IX Item "-summary N"
.PD
With \f(CW\*(C`list\*(C'\fR, sort by decreasing size and show only the \fIN\fR
largest files and directories. Label duplicates of the previous
file with \f(CW\*(C`^\*(C'\fR. A file is a duplicate if its contents are identical
(based on stored hashes)
although the name, dates, and attributes may differ. If \fIfiles\fR
is specified, then these are included in the listing but not compared
with internal files or each other.
Internal and external files are labeled with \f(CW\*(C`\-\*(C'\fR and \f(CW\*(C`+\*(C'\fR respectively.
.Sp
If \fIN\fR is negative as in \f(CW\*(C`\-s\-1\*(C'\fR then list normally but show
fragment IDs after each file name. Files with identical fragment IDs have
identical contents.
.Sp
With \f(CW\*(C`add\*(C'\fR and \f(CW\*(C`extract\*(C'\fR, when \fIN\fR > 0, do not list files as they
are added or extracted. Show only percent completed and estimated
time remaining on a 1 line display.
.IP "\-test" 4
.IX Item "-test"
With \f(CW\*(C`extract\*(C'\fR, do not write to disk, but perform all
other operations normally. \f(CW\*(C`extract\*(C'\fR will decompress, compute
the \s-1SHA\-1\s0 hashes of the output, report if it differs from the stored
value, but not compare, create or update any files. With \f(CW\*(C`\-index\*(C'\fR,
test for errors but do not create an index file.
.IP "\-t\fIN\fR" 4
.IX Item "-tN"
.PD 0
.IP "\-threads \fIN\fR" 4
.IX Item "-threads N"
.PD
Add or extract at most \fIN\fR blocks in parallel. The default is 0, which
uses the number of processor cores, except not more than 2 when when zpaq
is compiled to 32\-bit code. Selecting fewer threads will reduce memory
usage but run slower. Selecting more threads than cores does not help.
.IP "\-to \fIname\fR..." 4
.IX Item "-to name..."
With \f(CW\*(C`add\*(C'\fR and \f(CW\*(C`list\*(C'\fR rename external \fIfiles\fR to respective
internal \fInames\fR. With \f(CW\*(C`extract\*(C'\fR, rename internal \fIfiles\fR
to external \fInames\fR. When \fIfiles\fR is empty, prefix the extracted
files with the first name in \fInames\fR, inserting \f(CW\*(C`/\*(C'\fR if needed
and removing \f(CW\*(C`:\*(C'\fR from drive letters. For example:
.Sp
.Vb 1
\&    zpaq extract archive file dir \-to newfile newdir
.Ve
.Sp
extracts \f(CW\*(C`file\*(C'\fR as \f(CW\*(C`newfile\*(C'\fR and \f(CW\*(C`dir\*(C'\fR as \f(CW\*(C`newdir\*(C'\fR.
.Sp
.Vb 1
\&    zpaq extract archive \-to tmp
.Ve
.Sp
will extract \f(CW\*(C`foo\*(C'\fR or \f(CW\*(C`/foo\*(C'\fR as \f(CW\*(C`tmp/foo\*(C'\fR and extract \f(CW\*(C`C:/foo\*(C'\fR
or \f(CW\*(C`C:foo\*(C'\fR as \f(CW\*(C`tmp/C/foo\*(C'\fR.
.Sp
.Vb 1
\&    zpaq add archive dir \-to newdir
.Ve
.Sp
will save \f(CW\*(C`dir/file\*(C'\fR as \f(CW\*(C`newdir/file\*(C'\fR, and so on.
.Sp
.Vb 1
\&    zpaq list archive dir \-to newdir
.Ve
.Sp
will compare external \f(CW\*(C`dir\*(C'\fR with internal \f(CW\*(C`newdir\*(C'\fR.
.Sp
The \f(CW\*(C`\-only\*(C'\fR and \f(CW\*(C`\-not\*(C'\fR options apply prior to renaming.
.IP "\-until \fIdate\fR | [\-]\fIversion\fR" 4
.IX Item "-until date | [-]version"
Ignore any part of the archive updated after \fIdate\fR or after \fIversion\fR
updates or \-\fIversion\fRs from the end if negative.
Additionally, \f(CW\*(C`add\*(C'\fR will truncate the archive at
this point before appending the next update. When a date is specified,
the update will be timestamped with \fIdate\fR rather than the current date.
.Sp
A date is specified as a 4 digit year (1900 to 2999), 2 digit month (01 to 12),
2 digit day (01 to 31), optional 2 digit hour (00 to 23, default 23),
optional 2 digit minute (00 to 59, default 59), and optional 2 digit
seconds (00 to 59, default 59). Dates and times are always universal
time zone (\s-1UT\s0), not local time. Numbers up to 9999999 are interpreted
as version numbers rather than dates. Dates may contain spaces and
punctuation characters for readability but are ignored. For example:
.Sp
.Vb 1
\&    zpaq list backup \-until 3
.Ve
.Sp
shows the archive as it existed after the first 3 updates.
.Sp
.Vb 1
\&    zpaq add backup files \-until 2014/04/30 11:30
.Ve
.Sp
truncates any data added after April 30, 2014 at 11:30:59 universal time,
then appends the update as if this were the current time. (It does
not matter if any files are dated in the future).
.Sp
.Vb 1
\&    zpaq add backup files \-until 0
.Ve
.Sp
deletes backup.zpaq and creates a new archive.
.Sp
\&\f(CW\*(C`add \-until\*(C'\fR is an error on multi-part archives or with an index.
A multi-part archive can be rolled back by deleting the highest numbered
parts.
.Sp
Truncating and appending an encrypted archive with \f(CW\*(C`add \-until\*(C'\fR
(even \f(CW\*(C`\-until 0\*(C'\fR)
does not change the salt or keystream. Thus, it is possible for an attacker
with the old and new versions to obtain the \s-1XOR\s0 of the trailing
plaintexts without a password.
.SH "EXIT STATUS"
.IX Header "EXIT STATUS"
Returns 0 if successful, 1 in case of warnings, or 2 in case of an error.
.SH "ENVIRONMENT"
.IX Header "ENVIRONMENT"
In Windows, the default number of threads (set by \f(CW\*(C`\-threads\*(C'\fR) is
\&\f(CW%NUMBER_OF_PROCESSORS\fR%. In Linux, the number of lines of the
form \*(L"Processor : 0\*(R", \*(L"Processor : 1\*(R",... in \fI/cpu/procinfo\fR
is used instead.
.SH "STANDARDS"
.IX Header "STANDARDS"
The archive format is described in
\&\fIThe \s-1ZPAQ\s0 Open Standard Format for Highly Compressed Data\fR
(see \fI\s-1AVAILABILITY\s0\fR).
.SH "AVAILABILITY"
.IX Header "AVAILABILITY"
http://mattmahoney.net/zpaq/
.SH "BUGS"
.IX Header "BUGS"
There is no \s-1GUI.\s0
.PP
The archive format does not save sufficient information for backing
up and restoring the operating system.
.SH "SEE ALSO"
.IX Header "SEE ALSO"
\&\f(CWbzip2(1)\fR
\&\f(CWgzip(1)\fR
\&\f(CWlrzip(1)\fR
\&\f(CWlzop(1)\fR
\&\f(CWlzma(1)\fR
\&\f(CWp7zip(1)\fR
\&\f(CWrzip(1)\fR
\&\f(CWunace(1)\fR
\&\f(CWunrar(1)\fR
\&\f(CWunzip(1)\fR
\&\f(CWzip(1)\fR
.SH "AUTHORS"
.IX Header "AUTHORS"
\&\f(CW\*(C`zpaq\*(C'\fR and \f(CW\*(C`libzpaq\*(C'\fR are written by Matt Mahoney and released to the
public domain in 2015\-2016. \f(CW\*(C`libzpaq\*(C'\fR contains
\&\fIlibdivsufsort-lite\fR v2.01, copyright (C) 2003\-2008,
Yuta Mori. It is licensed under the \s-1MIT\s0 license. See the source
code for license text. The \s-1AES\s0 code is
modified from libtomcrypt by Tom St Denis (public domain).
The salsa20/8 code in \fBScrypt()\fR is by D. J. Bernstein (public domain).