.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.43)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{\
.    if \nF \{\
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{\
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\" ========================================================================
.\"
.IX Title "MCE::Grep 3pm"
.TH MCE::Grep 3pm "2023-09-29" "perl v5.36.0" "User Contributed Perl Documentation"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
MCE::Grep \- Parallel grep model similar to the native grep function
.SH "VERSION"
.IX Header "VERSION"
This document describes MCE::Grep version 1.889
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
.Vb 2
\& ## Exports mce_grep, mce_grep_f, and mce_grep_s
\& use MCE::Grep;
\&
\& ## Array or array_ref
\& my @a = mce_grep { $_ % 5 == 0 } 1..10000;
\& my @b = mce_grep { $_ % 5 == 0 } \e@list;
\&
\& ## Important; pass an array_ref for deeply input data
\& my @c = mce_grep { $_\->[1] % 2 == 0 } [ [ 0, 1 ], [ 0, 2 ], ... ];
\& my @d = mce_grep { $_\->[1] % 2 == 0 } \e@deeply_list;
\&
\& ## File path, glob ref, IO::All::{ File, Pipe, STDIO } obj, or scalar ref
\& ## Workers read directly and not involve the manager process
\& my @e = mce_grep_f { /pattern/ } "/path/to/file"; # efficient
\&
\& ## Involves the manager process, therefore slower
\& my @f = mce_grep_f { /pattern/ } $file_handle;
\& my @g = mce_grep_f { /pattern/ } $io;
\& my @h = mce_grep_f { /pattern/ } \e$scalar;
\&
\& ## Sequence of numbers (begin, end [, step, format])
\& my @i = mce_grep_s { %_ * 3 == 0 } 1, 10000, 5;
\& my @j = mce_grep_s { %_ * 3 == 0 } [ 1, 10000, 5 ];
\&
\& my @k = mce_grep_s { %_ * 3 == 0 } {
\&    begin => 1, end => 10000, step => 5, format => undef
\& };
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
This module provides a parallel grep implementation via Many-Core Engine.
\&\s-1MCE\s0 incurs a small overhead due to passing of data. A fast code block will
run faster natively. However, the overhead will likely diminish as the
complexity increases for the code.
.PP
.Vb 2
\& my @m1 =     grep { $_ % 5 == 0 } 1..1000000;          ## 0.065 secs
\& my @m2 = mce_grep { $_ % 5 == 0 } 1..1000000;          ## 0.194 secs
.Ve
.PP
Chunking, enabled by default, greatly reduces the overhead behind the scene.
The time for mce_grep below also includes the time for data exchanges between
the manager and worker processes. More parallelization will be seen when the
code incurs additional \s-1CPU\s0 time.
.PP
.Vb 2
\& my @m1 =     grep { /[2357][1468][9]/ } 1..1000000;    ## 0.353 secs
\& my @m2 = mce_grep { /[2357][1468][9]/ } 1..1000000;    ## 0.218 secs
.Ve
.PP
Even faster is mce_grep_s; useful when input data is a range of numbers.
Workers generate sequences mathematically among themselves without any
interaction from the manager process. Two arguments are required for
mce_grep_s (begin, end). Step defaults to 1 if begin is smaller than end,
otherwise \-1.
.PP
.Vb 1
\& my @m3 = mce_grep_s { /[2357][1468][9]/ } 1, 1000000;  ## 0.165 secs
.Ve
.PP
Although this document is about MCE::Grep, the MCE::Stream module can write
results immediately without waiting for all chunks to complete. This is made
possible by passing the reference to an array (in this case \f(CW@m4\fR and \f(CW@m5\fR).
.PP
.Vb 1
\& use MCE::Stream default_mode => \*(Aqgrep\*(Aq;
\&
\& my @m4; mce_stream \e@m4, sub { /[2357][1468][9]/ }, 1..1000000;
\&
\&    ## Completed in 0.203 secs. This is amazing considering the
\&    ## overhead for passing data between the manager and workers.
\&
\& my @m5; mce_stream_s \e@m5, sub { /[2357][1468][9]/ }, 1, 1000000;
\&
\&    ## Completed in 0.120 secs. Like with mce_grep_s, specifying a
\&    ## sequence specification turns out to be faster due to lesser
\&    ## overhead for the manager process.
.Ve
.PP
A common scenario is grepping for pattern(s) inside a massive log file.
Notice how parallelism increases as complexity increases for the pattern.
Testing was done against a 300 \s-1MB\s0 file containing 250k lines.
.PP
.Vb 1
\& use MCE::Grep;
\&
\& my @m; open my $LOG, "<", "/path/to/log/file" or die "$!\en";
\&
\& @m = grep { /pattern/ } <$LOG>;                      ##  0.756 secs
\& @m = grep { /foobar|[2357][1468][9]/ } <$LOG>;       ## 24.681 secs
\&
\& ## Parallelism with mce_grep. This involves the manager process
\& ## due to processing a file handle.
\&
\& @m = mce_grep { /pattern/ } <$LOG>;                  ##  0.997 secs
\& @m = mce_grep { /foobar|[2357][1468][9]/ } <$LOG>;   ##  7.439 secs
\&
\& ## Even faster with mce_grep_f. Workers access the file directly
\& ## with zero interaction from the manager process.
\&
\& my $LOG = "/path/to/file";
\& @m = mce_grep_f { /pattern/ } $LOG;                  ##  0.112 secs
\& @m = mce_grep_f { /foobar|[2357][1468][9]/ } $LOG;   ##  6.840 secs
.Ve
.SH "PARSING HUGE FILES"
.IX Header "PARSING HUGE FILES"
The MCE::Grep module lacks an optimization for quickly determining if a match
is found from not knowing the pattern inside the code block. Use the following
snippet as a template to achieve better performance. Also, take a look at
examples/egrep.pl, included with the distribution.
.PP
.Vb 1
\& use MCE::Loop;
\&
\& MCE::Loop\->init(
\&    max_workers => 8, use_slurpio => 1
\& );
\&
\& my $pattern  = \*(Aqkarl\*(Aq;
\& my $hugefile = \*(Aqvery_huge.file\*(Aq;
\&
\& my @result = mce_loop_f {
\&    my ($mce, $slurp_ref, $chunk_id) = @_;
\&
\&    ## Quickly determine if a match is found.
\&    ## Process slurped chunk only if true.
\&
\&    if ($$slurp_ref =~ /$pattern/m) {
\&       my @matches;
\&
\&       ## The following is fast on Unix. Performance degrades
\&       ## drastically on Windows beyond 4 workers.
\&
\&       open my $MEM_FH, \*(Aq<\*(Aq, $slurp_ref;
\&       binmode $MEM_FH, \*(Aq:raw\*(Aq;
\&       while (<$MEM_FH>) { push @matches, $_ if (/$pattern/); }
\&       close   $MEM_FH;
\&
\&       ## Therefore, use the following construct on Windows.
\&
\&       while ( $$slurp_ref =~ /([^\en]+\en)/mg ) {
\&          my $line = $1; # save $1 to not lose the value
\&          push @matches, $line if ($line =~ /$pattern/);
\&       }
\&
\&       ## Gather matched lines.
\&
\&       MCE\->gather(@matches);
\&    }
\&
\& } $hugefile;
\&
\& print join(\*(Aq\*(Aq, @result);
.Ve
.SH "OVERRIDING DEFAULTS"
.IX Header "OVERRIDING DEFAULTS"
The following list options which may be overridden when loading the module.
.PP
.Vb 3
\& use Sereal qw( encode_sereal decode_sereal );
\& use CBOR::XS qw( encode_cbor decode_cbor );
\& use JSON::XS qw( encode_json decode_json );
\&
\& use MCE::Grep
\&     max_workers => 4,                # Default \*(Aqauto\*(Aq
\&     chunk_size => 100,               # Default \*(Aqauto\*(Aq
\&     tmp_dir => "/path/to/app/tmp",   # $MCE::Signal::tmp_dir
\&     freeze => \e&encode_sereal,       # \e&Storable::freeze
\&     thaw => \e&decode_sereal,         # \e&Storable::thaw
\&     init_relay => 0,                 # Default undef; MCE 1.882+
\&     use_threads => 0,                # Default undef; MCE 1.882+
\& ;
.Ve
.PP
From \s-1MCE 1.8\s0 onwards, Sereal 3.015+ is loaded automatically if available.
Specify \f(CW\*(C`Sereal => 0\*(C'\fR to use Storable instead.
.PP
.Vb 1
\& use MCE::Grep Sereal => 0;
.Ve
.SH "CUSTOMIZING MCE"
.IX Header "CUSTOMIZING MCE"
.IP "MCE::Grep\->init ( options )" 3
.IX Item "MCE::Grep->init ( options )"
.PD 0
.IP "MCE::Grep::init { options }" 3
.IX Item "MCE::Grep::init { options }"
.PD
.PP
The init function accepts a hash of \s-1MCE\s0 options. The gather option, if
specified, is ignored due to being used internally by the module.
.PP
.Vb 1
\& use MCE::Grep;
\&
\& MCE::Grep\->init(
\&    chunk_size => 1, max_workers => 4,
\&
\&    user_begin => sub {
\&       print "## ", MCE\->wid, " started\en";
\&    },
\&
\&    user_end => sub {
\&       print "## ", MCE\->wid, " completed\en";
\&    }
\& );
\&
\& my @a = mce_grep { $_ % 5 == 0 } 1..100;
\&
\& print "\en", "@a", "\en";
\&
\& \-\- Output
\&
\& ## 2 started
\& ## 3 started
\& ## 1 started
\& ## 4 started
\& ## 3 completed
\& ## 4 completed
\& ## 1 completed
\& ## 2 completed
\&
\& 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100
.Ve
.SH "API DOCUMENTATION"
.IX Header "API DOCUMENTATION"
.IP "MCE::Grep\->run ( sub { code }, list )" 3
.IX Item "MCE::Grep->run ( sub { code }, list )"
.PD 0
.IP "mce_grep { code } list" 3
.IX Item "mce_grep { code } list"
.PD
.PP
Input data may be defined using a list or an array reference. Unlike MCE::Loop,
Flow, and Step, specifying a hash reference as input data isn't allowed.
.PP
.Vb 3
\& ## Array or array_ref
\& my @a = mce_grep { /[2357]/ } 1..1000;
\& my @b = mce_grep { /[2357]/ } \e@list;
\&
\& ## Important; pass an array_ref for deeply input data
\& my @c = mce_grep { $_\->[1] =~ /[2357]/ } [ [ 0, 1 ], [ 0, 2 ], ... ];
\& my @d = mce_grep { $_\->[1] =~ /[2357]/ } \e@deeply_list;
\&
\& ## Not supported
\& my @z = mce_grep { ... } \e%hash;
.Ve
.IP "MCE::Grep\->run_file ( sub { code }, file )" 3
.IX Item "MCE::Grep->run_file ( sub { code }, file )"
.PD 0
.IP "mce_grep_f { code } file" 3
.IX Item "mce_grep_f { code } file"
.PD
.PP
The fastest of these is the /path/to/file. Workers communicate the next offset
position among themselves with zero interaction by the manager process.
.PP
\&\f(CW\*(C`IO::All\*(C'\fR { File, Pipe, \s-1STDIO\s0 } is supported since \s-1MCE 1.845.\s0
.PP
.Vb 4
\& my @c = mce_grep_f { /pattern/ } "/path/to/file";  # faster
\& my @d = mce_grep_f { /pattern/ } $file_handle;
\& my @e = mce_grep_f { /pattern/ } $io;              # IO::All
\& my @f = mce_grep_f { /pattern/ } \e$scalar;
.Ve
.ie n .IP "MCE::Grep\->run_seq ( sub { code }, $beg, $end [, $step, $fmt ] )" 3
.el .IP "MCE::Grep\->run_seq ( sub { code }, \f(CW$beg\fR, \f(CW$end\fR [, \f(CW$step\fR, \f(CW$fmt\fR ] )" 3
.IX Item "MCE::Grep->run_seq ( sub { code }, $beg, $end [, $step, $fmt ] )"
.PD 0
.ie n .IP "mce_grep_s { code } $beg, $end [, $step, $fmt ]" 3
.el .IP "mce_grep_s { code } \f(CW$beg\fR, \f(CW$end\fR [, \f(CW$step\fR, \f(CW$fmt\fR ]" 3
.IX Item "mce_grep_s { code } $beg, $end [, $step, $fmt ]"
.PD
.PP
Sequence may be defined as a list, an array reference, or a hash reference.
The functions require both begin and end values to run. Step and format are
optional. The format is passed to sprintf (% may be omitted below).
.PP
.Vb 1
\& my ($beg, $end, $step, $fmt) = (10, 20, 0.1, "%4.1f");
\&
\& my @f = mce_grep_s { /[1234]\e.[5678]/ } $beg, $end, $step, $fmt;
\& my @g = mce_grep_s { /[1234]\e.[5678]/ } [ $beg, $end, $step, $fmt ];
\&
\& my @h = mce_grep_s { /[1234]\e.[5678]/ } {
\&    begin => $beg, end => $end,
\&    step => $step, format => $fmt
\& };
.Ve
.IP "MCE::Grep\->run ( sub { code }, iterator )" 3
.IX Item "MCE::Grep->run ( sub { code }, iterator )"
.PD 0
.IP "mce_grep { code } iterator" 3
.IX Item "mce_grep { code } iterator"
.PD
.PP
An iterator reference may be specified for input_data. Iterators are described
under section \*(L"\s-1SYNTAX\s0 for \s-1INPUT_DATA\*(R"\s0 at MCE::Core.
.PP
.Vb 1
\& my @a = mce_grep { $_ % 3 == 0 } make_iterator(10, 30, 2);
.Ve
.SH "MANUAL SHUTDOWN"
.IX Header "MANUAL SHUTDOWN"
.IP "MCE::Grep\->finish" 3
.IX Item "MCE::Grep->finish"
.PD 0
.IP "MCE::Grep::finish" 3
.IX Item "MCE::Grep::finish"
.PD
.PP
Workers remain persistent as much as possible after running. Shutdown occurs
automatically when the script terminates. Call finish when workers are no
longer needed.
.PP
.Vb 1
\& use MCE::Grep;
\&
\& MCE::Grep\->init(
\&    chunk_size => 20, max_workers => \*(Aqauto\*(Aq
\& );
\&
\& my @a = mce_grep { ... } 1..100;
\&
\& MCE::Grep\->finish;
.Ve
.SH "INDEX"
.IX Header "INDEX"
\&\s-1MCE\s0, MCE::Core
.SH "AUTHOR"
.IX Header "AUTHOR"
Mario E. Roy, <marioeroy AT gmail DOT com>