.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.if !\nF .nr F 0
.if \nF>0 \{\
.    de IX
.    tm Index:\\$1\t\\n%\t"\\$2"
..
.    if !\nF==2 \{\
.        nr % 0
.        nr F 2
.    \}
.\}
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "Lucy::Docs::Cookbook::CustomQuery 3pm"
.TH Lucy::Docs::Cookbook::CustomQuery 3pm "2017-08-02" "perl v5.26.0" "User Contributed Perl Documentation"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
Lucy::Docs::Cookbook::CustomQuery \- Sample subclass of Query.
.SH "ABSTRACT"
.IX Header "ABSTRACT"
Explore Apache Lucy's support for custom query types by creating a
\&\*(L"PrefixQuery\*(R" class to handle trailing wildcards.
.PP
.Vb 6
\&    my $prefix_query = PrefixQuery\->new(
\&        field        => \*(Aqcontent\*(Aq,
\&        query_string => \*(Aqfoo*\*(Aq,
\&    );
\&    my $hits = $searcher\->hits( query => $prefix_query );
\&    ...
.Ve
.SH "Query, Compiler, and Matcher"
.IX Header "Query, Compiler, and Matcher"
To add support for a new query type, we need three classes: a Query, a
Compiler, and a Matcher.
.IP "\(bu" 4
PrefixQuery \- a subclass of Lucy::Search::Query, and the only class
that client code will deal with directly.
.IP "\(bu" 4
PrefixCompiler \- a subclass of Lucy::Search::Compiler, whose primary 
role is to compile a PrefixQuery to a PrefixMatcher.
.IP "\(bu" 4
PrefixMatcher \- a subclass of Lucy::Search::Matcher, which does the
heavy lifting: it applies the query to individual documents and assigns a
score to each match.
.PP
The PrefixQuery class on its own isn't enough because a Query object's role is
limited to expressing an abstract specification for the search.  A Query is
basically nothing but metadata; execution is left to the Query's companion
Compiler and Matcher.
.PP
Here's a simplified sketch illustrating how a Searcher's \fIhits()\fR method ties
together the three classes.
.PP
.Vb 10
\&    sub hits {
\&        my ( $self, $query ) = @_;
\&        my $compiler = $query\->make_compiler( searcher => $self );
\&        my $matcher = $compiler\->make_matcher(
\&            reader     => $self\->get_reader,
\&            need_score => 1,
\&        );
\&        my @hits = $matcher\->capture_hits;
\&        return \e@hits;
\&    }
.Ve
.SS "PrefixQuery"
.IX Subsection "PrefixQuery"
Our PrefixQuery class will have two attributes: a query string and a field
name.
.PP
.Vb 4
\&    package PrefixQuery;
\&    use base qw( Lucy::Search::Query );
\&    use Carp;
\&    use Scalar::Util qw( blessed );
\&    
\&    # Inside\-out member vars and hand\-rolled accessors.
\&    my %query_string;
\&    my %field;
\&    sub get_query_string { my $self = shift; return $query_string{$$self} }
\&    sub get_field        { my $self = shift; return $field{$$self} }
.Ve
.PP
PrefixQuery's constructor collects and validates the attributes.
.PP
.Vb 10
\&    sub new {
\&        my ( $class, %args ) = @_;
\&        my $query_string = delete $args{query_string};
\&        my $field        = delete $args{field};
\&        my $self         = $class\->SUPER::new(%args);
\&        confess("\*(Aqquery_string\*(Aq param is required")
\&            unless defined $query_string;
\&        confess("Invalid query_string: \*(Aq$query_string\*(Aq")
\&            unless $query_string =~ /\e*\es*$/;
\&        confess("\*(Aqfield\*(Aq param is required")
\&            unless defined $field;
\&        $query_string{$$self} = $query_string;
\&        $field{$$self}        = $field;
\&        return $self;
\&    }
.Ve
.PP
Since this is an inside-out class, we'll need a destructor:
.PP
.Vb 6
\&    sub DESTROY {
\&        my $self = shift;
\&        delete $query_string{$$self};
\&        delete $field{$$self};
\&        $self\->SUPER::DESTROY;
\&    }
.Ve
.PP
The \fIequals()\fR method determines whether two Queries are logically equivalent:
.PP
.Vb 8
\&    sub equals {
\&        my ( $self, $other ) = @_;
\&        return 0 unless blessed($other);
\&        return 0 unless $other\->isa("PrefixQuery");
\&        return 0 unless $field{$$self} eq $field{$$other};
\&        return 0 unless $query_string{$$self} eq $query_string{$$other};
\&        return 1;
\&    }
.Ve
.PP
The last thing we'll need is a \fImake_compiler()\fR factory method which kicks out
a subclass of Compiler.
.PP
.Vb 7
\&    sub make_compiler {
\&        my ( $self, %args ) = @_;
\&        my $subordinate = delete $args{subordinate};
\&        my $compiler = PrefixCompiler\->new( %args, parent => $self );
\&        $compiler\->normalize unless $subordinate;
\&        return $compiler;
\&    }
.Ve
.SS "PrefixCompiler"
.IX Subsection "PrefixCompiler"
PrefixQuery's \fImake_compiler()\fR method will be called internally at search-time
by objects which subclass Lucy::Search::Searcher \*(-- such as
IndexSearchers.
.PP
A Searcher is associated with a particular collection of documents.   These
documents may all reside in one index, as with IndexSearcher, or they may be
spread out across multiple indexes on one or more machines, as with
LucyX::Remote::ClusterSearcher.
.PP
Searcher objects have access to certain statistical information about the
collections they represent; for instance, a Searcher can tell you how many
documents are in the collection...
.PP
.Vb 1
\&    my $maximum_number_of_docs_in_collection = $searcher\->doc_max;
.Ve
.PP
\&... or how many documents a specific term appears in:
.PP
.Vb 4
\&    my $term_appears_in_this_many_docs = $searcher\->doc_freq(
\&        field => \*(Aqcontent\*(Aq,
\&        term  => \*(Aqfoo\*(Aq,
\&    );
.Ve
.PP
Such information can be used by sophisticated Compiler implementations to
assign more or less heft to individual queries or sub-queries.  However, we're
not going to bother with weighting for this demo; we'll just assign a fixed
score of 1.0 to each matching document.
.PP
We don't need to write a constructor, as it will suffice to inherit \fInew()\fR from
Lucy::Search::Compiler.  The only method we need to implement for
PrefixCompiler is \fImake_matcher()\fR.
.PP
.Vb 2
\&    package PrefixCompiler;
\&    use base qw( Lucy::Search::Compiler );
\&
\&    sub make_matcher {
\&        my ( $self, %args ) = @_;
\&        my $seg_reader = $args{reader};
\&
\&        # Retrieve low\-level components LexiconReader and PostingListReader.
\&        my $lex_reader
\&            = $seg_reader\->obtain("Lucy::Index::LexiconReader");
\&        my $plist_reader
\&            = $seg_reader\->obtain("Lucy::Index::PostingListReader");
\&        
\&        # Acquire a Lexicon and seek it to our query string.
\&        my $substring = $self\->get_parent\->get_query_string;
\&        $substring =~ s/\e*.\es*$//;
\&        my $field = $self\->get_parent\->get_field;
\&        my $lexicon = $lex_reader\->lexicon( field => $field );
\&        return unless $lexicon;
\&        $lexicon\->seek($substring);
\&        
\&        # Accumulate PostingLists for each matching term.
\&        my @posting_lists;
\&        while ( defined( my $term = $lexicon\->get_term ) ) {
\&            last unless $term =~ /^\eQ$substring/;
\&            my $posting_list = $plist_reader\->posting_list(
\&                field => $field,
\&                term  => $term,
\&            );
\&            if ($posting_list) {
\&                push @posting_lists, $posting_list;
\&            }
\&            last unless $lexicon\->next;
\&        }
\&        return unless @posting_lists;
\&        
\&        return PrefixMatcher\->new( posting_lists => \e@posting_lists );
\&    }
.Ve
.PP
PrefixCompiler gets access to a SegReader
object when \fImake_matcher()\fR gets called.  From the SegReader and its
sub-components LexiconReader and
PostingListReader, we acquire a
Lexicon, scan through the Lexicon's unique
terms, and acquire a PostingList for each
term that matches our prefix.
.PP
Each of these PostingList objects represents a set of documents which match
the query.
.SS "PrefixMatcher"
.IX Subsection "PrefixMatcher"
The Matcher subclass is the most involved.
.PP
.Vb 2
\&    package PrefixMatcher;
\&    use base qw( Lucy::Search::Matcher );
\&    
\&    # Inside\-out member vars.
\&    my %doc_ids;
\&    my %tick;
\&    
\&    sub new {
\&        my ( $class, %args ) = @_;
\&        my $posting_lists = delete $args{posting_lists};
\&        my $self          = $class\->SUPER::new(%args);
\&        
\&        # Cheesy but simple way of interleaving PostingList doc sets.
\&        my %all_doc_ids;
\&        for my $posting_list (@$posting_lists) {
\&            while ( my $doc_id = $posting_list\->next ) {
\&                $all_doc_ids{$doc_id} = undef;
\&            }
\&        }
\&        my @doc_ids = sort { $a <=> $b } keys %all_doc_ids;
\&        $doc_ids{$$self} = \e@doc_ids;
\&        
\&        # Track our position within the array of doc ids.
\&        $tick{$$self} = \-1;
\&        
\&        return $self;
\&    }
\&    
\&    sub DESTROY {
\&        my $self = shift;
\&        delete $doc_ids{$$self};
\&        delete $tick{$$self};
\&        $self\->SUPER::DESTROY;
\&    }
.Ve
.PP
The doc ids must be in order, or some will be ignored; hence the \f(CW\*(C`sort\*(C'\fR
above.
.PP
In addition to the constructor and destructor, there are three methods that
must be overridden.
.PP
\&\fInext()\fR advances the Matcher to the next valid matching doc.
.PP
.Vb 7
\&    sub next {
\&        my $self    = shift;
\&        my $doc_ids = $doc_ids{$$self};
\&        my $tick    = ++$tick{$$self};
\&        return 0 if $tick >= scalar @$doc_ids;
\&        return $doc_ids\->[$tick];
\&    }
.Ve
.PP
\&\fIget_doc_id()\fR returns the current document id, or 0 if the Matcher is
exhausted.  (Document numbers start at 1, so 0 is
a sentinel.)
.PP
.Vb 6
\&    sub get_doc_id {
\&        my $self    = shift;
\&        my $tick    = $tick{$$self};
\&        my $doc_ids = $doc_ids{$$self};
\&        return $tick < scalar @$doc_ids ? $doc_ids\->[$tick] : 0;
\&    }
.Ve
.PP
\&\fIscore()\fR conveys the relevance score of the current match.  We'll just return a
fixed score of 1.0:
.PP
.Vb 1
\&    sub score { 1.0 }
.Ve
.SH "Usage"
.IX Header "Usage"
To get a basic feel for PrefixQuery, insert the FlatQueryParser module
described in Lucy::Docs::Cookbook::CustomQueryParser (which supports
PrefixQuery) into the search.cgi sample app.
.PP
.Vb 2
\&    my $parser = FlatQueryParser\->new( schema => $searcher\->get_schema );
\&    my $query  = $parser\->parse($q);
.Ve
.PP
If you're planning on using PrefixQuery in earnest, though, you may want to
change up analyzers to avoid stemming, because stemming \*(-- another approach to
prefix conflation \*(-- is not perfectly compatible with prefix searches.
.PP
.Vb 7
\&    # Polyanalyzer with no SnowballStemmer.
\&    my $analyzer = Lucy::Analysis::PolyAnalyzer\->new(
\&        analyzers => [
\&            Lucy::Analysis::RegexTokenizer\->new,
\&            Lucy::Analysis::CaseFolder\->new,
\&        ],
\&    );
.Ve