.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{\
.    if \nF \{\
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{\
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\" ========================================================================
.\"
.IX Title "Grinder::KmerCollection 3pm"
.TH Grinder::KmerCollection 3pm "2020-12-03" "perl v5.32.0" "User Contributed Perl Documentation"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
Grinder::KmerCollection \- A collection of kmers from sequences
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
.Vb 2
\&  my $col = Grinder::KmerCollection\->new( \-k    => 10,
\&                                          \-file => \*(Aqseqs.fa\*(Aq );
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
Manage a collection of kmers found in various sequences. Store information about
what sequence a kmer was found in and its starting position on the sequence.
.SH "AUTHOR"
.IX Header "AUTHOR"
Florent Angly <florent.angly@gmail.com>
.SH "APPENDIX"
.IX Header "APPENDIX"
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _
.SS "new"
.IX Subsection "new"
.Vb 10
\& Title   : new
\& Usage   : my $col = Grinder::KmerCollection\->new( \-k => 10, \-file => \*(Aqseqs.fa\*(Aq, \-revcom => 1 );
\& Function: Build a new kmer collection
\& Args    : \-k        set the kmer length (default: 10 bp)
\&           \-revcom   count kmers before and after reverse\-complementing sequences
\&                     (default: 0)
\&           \-seqs     count kmers in the provided arrayref of sequences (Bio::Seq
\&                     or Bio::SeqFeature objects)
\&           \-ids      if specified, index the sequences provided to \-seq using the
\&                     the IDs in this arrayref instead of using the sequences
\&                     $seq\->id() method
\&           \-file     count kmers in the provided file of sequences
\&           \-weights  if specified, assign the abundance of each sequence from the
\&                     values in this arrayref
\&
\& Returns : Grinder::KmerCollection object
.Ve
.SS "k"
.IX Subsection "k"
.Vb 4
\& Usage   : $col\->k;
\& Function: Get the length of the kmers
\& Args    : None
\& Returns : Positive integer
.Ve
.SS "weights"
.IX Subsection "weights"
.Vb 6
\& Usage   : $col\->weights({\*(Aqseq1\*(Aq => 3, \*(Aqseq10\*(Aq => 0.45});
\& Function: Get or set the weight of each sequence. Each sequence is given a
\&           weight of 1 by default.
\& Args    : hashref where the keys are sequence IDs and the values are the weight
\&           of the corresponding (e.g. their relative abundance)
\& Returns : Grinder::KmerCollection object
.Ve
.SS "collection_by_kmer"
.IX Subsection "collection_by_kmer"
.Vb 5
\& Usage   : $col\->collection_by_kmer;
\& Function: Get the collection of kmers, indexed by kmer
\& Args    : None
\& Returns : A hashref of hashref of arrayref:
\&              hash\->{kmer}\->{ID of sequences with this kmer}\->[starts of kmer on sequence]
.Ve
.SS "collection_by_seq"
.IX Subsection "collection_by_seq"
.Vb 5
\& Usage   : $col\->collection_by_seq;
\& Function: Get the collection of kmers, indexed by sequence ID
\& Args    : None
\& Returns : A hashref of hashref of arrayref:
\&              hash\->{ID of sequences with this kmer}\->{kmer}\->[starts of kmer on sequence]
.Ve
.SS "add_file"
.IX Subsection "add_file"
.Vb 4
\& Usage   : $col\->add_file(\*(Aqseqs.fa\*(Aq);
\& Function: Process the kmers in the given file of sequences.
\& Args    : filename
\& Returns : Grinder::KmerCollection object
.Ve
.SS "add_seqs"
.IX Subsection "add_seqs"
.Vb 5
\& Usage   : $col\->add_seqs([$seq1, $seq2]);
\& Function: Process the kmers in the given sequences.
\& Args    : * arrayref of Bio::Seq or Bio::SeqFeature objects
\&           * arrayref of IDs to use for the indexing of the sequences
\& Returns : Grinder::KmerCollection object
.Ve
.SS "filter_rare"
.IX Subsection "filter_rare"
.Vb 4
\& Usage   : $col\->filter_rare( 2 );
\& Function: Remove kmers occurring at less than the (weighted) abundance specified
\& Args    : integer
\& Returns : Grinder::KmerCollection object
.Ve
.SS "filter_shared"
.IX Subsection "filter_shared"
.Vb 4
\& Usage   : $col\->filter_shared( 2 );
\& Function: Remove kmers occurring in less than the number of sequences specified
\& Args    : integer
\& Returns : Grinder::KmerCollection object
.Ve
.SS "counts"
.IX Subsection "counts"
.Vb 8
\& Usage   : $col\->counts
\& Function: Calculate the total count of each kmer. Counts are affected by the
\&           weights given to the sequences.
\& Args    : * restrict sequences to search to specified sequence ID (optional)
\&           * starting position from which counting should start (optional)
\&           * 0 to report counts (default), 1 to report frequencies (normalize to 1)
\& Returns : * arrayref of the different kmers
\&           * arrayref of the corresponding total counts
.Ve
.SS "sources"
.IX Subsection "sources"
.Vb 8
\& Usage   : $col\->sources()
\& Function: Return the sources of a kmer and their (weighted) abundance.
\& Args    : * kmer to get the sources of
\&           * sources to exclude from the results (optional)
\&           * 0 to report counts (default), 1 to report frequencies (normalize to 1)
\& Returns : * arrayref of the different sources
\&           * arrayref of the corresponding total counts
\&           If the kmer requested does not exist, the array will be empty.
.Ve
.SS "kmers"
.IX Subsection "kmers"
.Vb 8
\& Usage   : $col\->kmers(\*(Aqseq1\*(Aq);
\& Function: This is the inverse of sources(). Return the kmers found in a sequence
\&           (given its ID) and their (weighted) abundance.
\& Args    : * sequence ID to get the kmers of
\&           * 0 to report counts (default), 1 to report frequencies (normalize to 1)
\& Returns : * arrayref of sequence IDs
\&           * arrayref of the corresponding total counts
\&           If the sequence ID requested does not exist, the arrays will be empty.
.Ve
.SS "positions"
.IX Subsection "positions"
.Vb 7
\& Usage   : $col\->positions()
\& Function: Return the positions of the given kmer on a given sequence. An error
\&           is reported if the kmer requested does not exist
\& Args    : * desired kmer
\&           * desired sequence with this kmer
\& Returns : Arrayref of the different positions. The arrays will be empty if the
\&           desired combination of kmer and sequence was not found.
.Ve