.\" Automatically generated by Pandoc 2.17.1.1
.\"
.\" Define V font for inline verbatim, using C font in formats
.\" that render this, and otherwise B font.
.ie "\f[CB]x\f[]"x" \{\
. ftr V B
. ftr VI BI
. ftr VB B
. ftr VBI BI
.\}
.el \{\
. ftr V CR
. ftr VI CI
. ftr VB CB
. ftr VBI CBI
.\}
.TH "alignment-thin" "1" "Feb 2018" "" ""
.hy
.SH NAME
.PP
\f[B]alignment-thin\f[R] - Remove sequences or columns from an
alignment.
.SH SYNOPSIS
.PP
\f[B]alignment-thin\f[R] \f[I]alignment-file\f[R] [OPTIONS]
.SH DESCRIPTION
.PP
Remove sequences or columns from an alignment.
.SH GENERAL OPTIONS:
.TP
\f[B]-h\f[R], \f[B]--help\f[R]
Print usage information.
.TP
\f[B]-V\f[R], \f[B]--verbose\f[R]
Output more log messages on stderr.
.SH SEQUENCE FILTERING OPTIONS:
.TP
\f[B]-p\f[R] \f[I]arg\f[R], \f[B]--protect\f[R] \f[I]arg\f[R]
Sequences that cannot be removed (comma-separated).
.TP
\f[B]-k\f[R] \f[I]arg\f[R], \f[B]--keep\f[R] \f[I]arg\f[R]
Remove sequences not in comma-separated list \f[I]arg\f[R].
.TP
\f[B]-r\f[R] \f[I]arg\f[R], \f[B]--remove\f[R] \f[I]arg\f[R]
Remove sequences in comma-separated list \f[I]arg\f[R].
.TP
\f[B]-l\f[R] \f[I]arg\f[R], \f[B]--longer-than\f[R] \f[I]arg\f[R]
Remove sequences not longer than \f[I]arg\f[R].
.TP
\f[B]-s\f[R] \f[I]arg\f[R], \f[B]--shorter-than\f[R] \f[I]arg\f[R]
Remove sequences not shorter than \f[I]arg\f[R].
.TP
\f[B]-c\f[R] \f[I]arg\f[R], \f[B]--cutoff\f[R] \f[I]arg\f[R]
Remove similar sequences with #mismatches < cutoff.
.TP
\f[B]-d\f[R] \f[I]arg\f[R], \f[B]--down-to\f[R] \f[I]arg\f[R]
Remove similar sequences down to \f[I]arg\f[R] sequences.
.TP
\f[B]--remove-crazy\f[R] \f[I]arg\f[R]
Remove \f[I]arg\f[R] outlier sequences -- defined as sequences that are
missing too many conserved sites.
.TP
\f[B]--conserved\f[R] \f[I]arg\f[R] (=0.75)
Fraction of sequences that must contain a letter for it to be considered
conserved.
.SH COLUMN FILTERING OPTIONS:
.TP
\f[B]-K\f[R] \f[I]arg\f[R], \f[B]--keep-columns\f[R] \f[I]arg\f[R]
Keep columns from this sequence
.TP
\f[B]-m\f[R] \f[I]arg\f[R], \f[B]--min-letters\f[R] \f[I]arg\f[R]
Remove columns with fewer than \f[I]arg\f[R] letters.
.TP
\f[B]-u\f[R] \f[I]arg\f[R], \f[B]--remove-unique\f[R] \f[I]arg\f[R]
Remove insertions in a single sequence if longer than \f[I]arg\f[R]
letters
.TP
\f[B]-e\f[R], \f[B]--erase-empty-columns\f[R]
Remove columns with no characters (all gaps).
.SH OUTPUT OPTIONS:
.TP
\f[B]-S\f[R], \f[B]--sort\f[R]
Sort partially ordered columns to group similar gaps.
.TP
\f[B]-L\f[R], \f[B]--show-lengths\f[R]
Just print out sequence lengths.
.TP
\f[B]-N\f[R], \f[B]--show-names\f[R]
Just print out sequence lengths.
.TP
\f[B]-F\f[R] \f[I]arg\f[R], \f[B]--find-dups\f[R] \f[I]arg\f[R]
For each sequence, find the closest other sequence.
.SH EXAMPLES:
.PP
Remove columns without a minimum number of letters:
.IP
.nf
\f[C]
% alignment-thin --min-letters=5 file.fasta > file-thinned.fasta
\f[R]
.fi
.PP
Remove sequences by name:
.IP
.nf
\f[C]
% alignment-thin --remove=seq1,seq2 file.fasta > file2.fasta
\f[R]
.fi
.IP
.nf
\f[C]
% alignment-thin --keep=seq1,seq2   file.fasta > file2.fasta
\f[R]
.fi
.PP
Remove short sequences:
.IP
.nf
\f[C]
% alignment-thin --longer-than=250 file.fasta > file-long.fasta
\f[R]
.fi
.PP
Remove similar sequences with <= 5 differences from the closest other
sequence:
.IP
.nf
\f[C]
% alignment-thin --cutoff=5 file.fasta > more-than-5-differences.fasta
\f[R]
.fi
.PP
Remove similar sequences until we have the right number of sequences:
.IP
.nf
\f[C]
% alignment-thin --down-to=30 file.fasta > file-30taxa.fasta
\f[R]
.fi
.PP
Remove dissimilar sequences that are missing conserved columns:
.IP
.nf
\f[C]
% alignment-thin --remove-crazy=10 file.fasta > file2.fasta
\f[R]
.fi
.PP
Protect some sequences from being removed:
.IP
.nf
\f[C]
% alignment-thin --down-to=30 file.fasta --protect=seq1,seq2 > file2.fasta
\f[R]
.fi
.IP
.nf
\f[C]
% alignment-thin --down-to=30 file.fasta --protect=\[at]filename > file2.fasta
\f[R]
.fi
.SH REPORTING BUGS:
.PP
BAli-Phy online help: <http://www.bali-phy.org/docs.php>.
.PP
Please send bug reports to <bali-phy-users@googlegroups.com>.
.SH AUTHORS
Benjamin Redelings.