.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.28)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{
.    if \nF \{
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "Lucy::Docs::Cookbook::FastUpdates 3pm"
.TH Lucy::Docs::Cookbook::FastUpdates 3pm "2015-03-06" "perl v5.20.2" "User Contributed Perl Documentation"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
Lucy::Docs::Cookbook::FastUpdates \- Near real\-time index updates.
.SH "ABSTRACT"
.IX Header "ABSTRACT"
While index updates are fast on average, worst-case update performance may be
significantly slower.  To make index updates consistently quick, we must
manually intervene to control the process of index segment consolidation.
.SH "The problem"
.IX Header "The problem"
Ordinarily, modifying an index is cheap. New data is added to new segments,
and the time to write a new segment scales more or less linearly with the
number of documents added during the indexing session.
.PP
Deletions are also cheap most of the time, because we don't remove documents
immediately but instead mark them as deleted, and adding the deletion mark is
cheap.
.PP
However, as new segments are added and the deletion rate for existing segments
increases, search-time performance slowly begins to degrade.  At some point,
it becomes necessary to consolidate existing segments, rewriting their data
into a new segment.
.PP
If the recycled segments are small, the time it takes to rewrite them may not
be significant.  Every once in a while, though, a large amount of data must be
rewritten.
.SH "Procrastinating and playing catch-up"
.IX Header "Procrastinating and playing catch-up"
The simplest way to force fast index updates is to avoid rewriting anything.
.PP
Indexer relies upon IndexManager's
\&\fIrecycle()\fR method to tell it which segments should be consolidated.  If we
subclass IndexManager and override \fIrecycle()\fR so that it always returns an
empty array, we get consistently quick performance:
.PP
.Vb 3
\&    package NoMergeManager;
\&    use base qw( Lucy::Index::IndexManager );
\&    sub recycle { [] }
\&    
\&    package main;
\&    my $indexer = Lucy::Index::Indexer\->new(
\&        index => \*(Aq/path/to/index\*(Aq,
\&        manager => NoMergeManager\->new,
\&    );
\&    ...
\&    $indexer\->commit;
.Ve
.PP
However, we can't procrastinate forever.  Eventually, we'll have to run an
ordinary, uncontrolled indexing session, potentially triggering a large
rewrite of lots of small and/or degraded segments:
.PP
.Vb 6
\&    my $indexer = Lucy::Index::Indexer\->new( 
\&        index => \*(Aq/path/to/index\*(Aq, 
\&        # manager => NoMergeManager\->new,
\&    );
\&    ...
\&    $indexer\->commit;
.Ve
.SH "Acceptable worst-case update time, slower degradation"
.IX Header "Acceptable worst-case update time, slower degradation"
Never merging anything at all in the main indexing process is probably
overkill.  Small segments are relatively cheap to merge; we just need to guard
against the big rewrites.
.PP
Setting a ceiling on the number of documents in the segments to be recycled
allows us to avoid a mass proliferation of tiny, single-document segments,
while still offering decent worst-case update speed:
.PP
.Vb 2
\&    package LightMergeManager;
\&    use base qw( Lucy::Index::IndexManager );
\&    
\&    sub recycle {
\&        my $self = shift;
\&        my $seg_readers = $self\->SUPER::recycle(@_);
\&        @$seg_readers = grep { $_\->doc_max < 10 } @$seg_readers;
\&        return $seg_readers;
\&    }
.Ve
.PP
However, we still have to consolidate every once in a while, and while that
happens content updates will be locked out.
.SH "Background merging"
.IX Header "Background merging"
If it's not acceptable to lock out updates while the index consolidation
process runs, the alternative is to move the consolidation process out of
band, using Lucy::Index::BackgroundMerger.
.PP
It's never safe to have more than one Indexer attempting to modify the content
of an index at the same time, but a BackgroundMerger and an Indexer can
operate simultaneously:
.PP
.Vb 10
\&    # Indexing process.
\&    use Scalar::Util qw( blessed );
\&    my $retries = 0;
\&    while (1) {
\&        eval {
\&            my $indexer = Lucy::Index::Indexer\->new(
\&                    index => \*(Aq/path/to/index\*(Aq,
\&                    manager => LightMergeManager\->new,
\&                );
\&            $indexer\->add_doc($doc);
\&            $indexer\->commit;
\&        };
\&        last unless $@;
\&        if ( blessed($@) and $@\->isa("Lucy::Store::LockErr") ) {
\&            # Catch LockErr.
\&            warn "Couldn\*(Aqt get lock ($retries retries)";
\&            $retries++;
\&        }
\&        else {
\&            die "Write failed: $@";
\&        }
\&    }
\&
\&    # Background merge process.
\&    my $manager = Lucy::Index::IndexManager\->new;
\&    $index_manager\->set_write_lock_timeout(60_000);
\&    my $bg_merger = Lucy::Index::BackgroundMerger\->new(
\&        index   => \*(Aq/path/to/index\*(Aq,
\&        manager => $manager,
\&    );
\&    $bg_merger\->commit;
.Ve
.PP
The exception handling code becomes useful once you have more than one index
modification process happening simultaneously.  By default, Indexer tries
several times to acquire a write lock over the span of one second, then holds
it until \fIcommit()\fR completes.  BackgroundMerger handles most of its work
without the write lock, but it does need it briefly once at the beginning and
once again near the end.  Under normal loads, the internal retry logic will
resolve conflicts, but if it's not acceptable to miss an insert, you probably
want to catch LockErr exceptions thrown by Indexer.  In contrast, a LockErr
from BackgroundMerger probably just needs to be logged.