.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.28) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{ . if \nF \{ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "Lucy::Index::Indexer 3pm" .TH Lucy::Index::Indexer 3pm "2015-03-06" "perl v5.20.2" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" Lucy::Index::Indexer \- Build inverted indexes. .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 12 \& my $indexer = Lucy::Index::Indexer\->new( \& schema => $schema, \& index => \*(Aq/path/to/index\*(Aq, \& create => 1, \& ); \& while ( my ( $title, $content ) = each %source_docs ) { \& $indexer\->add_doc({ \& title => $title, \& content => $content, \& }); \& } \& $indexer\->commit; .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" The Indexer class is Apache Lucy's primary tool for managing the content of inverted indexes, which may later be searched using IndexSearcher. .PP In general, only one Indexer at a time may write to an index safely. If a write lock cannot be secured, \fInew()\fR will throw an exception. .PP If an index is located on a shared volume, each writer application must identify itself by supplying an IndexManager with a unique \&\f(CW\*(C`host\*(C'\fR id to Indexer's constructor or index corruption will occur. See Lucy::Docs::FileLocking for a detailed discussion. .PP Note: at present, \fIdelete_by_term()\fR and \fIdelete_by_query()\fR only affect documents which had been previously committed to the index \*(-- and not any documents added this indexing session but not yet committed. This may change in a future update. .SH "CONSTRUCTORS" .IX Header "CONSTRUCTORS" .SS "new( \fI[labeled params]\fP )" .IX Subsection "new( [labeled params] )" .Vb 7 \& my $indexer = Lucy::Index::Indexer\->new( \& schema => $schema, # required at index creation \& index => \*(Aq/path/to/index\*(Aq, # required \& create => 1, # default: 0 \& truncate => 1, # default: 0 \& manager => $manager # default: created internally \& ); .Ve .IP "\(bu" 4 \&\fBschema\fR \- A Schema. Required when index is being created; if not supplied, will be extracted from the index folder. .IP "\(bu" 4 \&\fBindex\fR \- Either a filepath to an index or a Folder. .IP "\(bu" 4 \&\fBcreate\fR \- If true and the index directory does not exist, attempt to create it. .IP "\(bu" 4 \&\fBtruncate\fR \- If true, proceed with the intention of discarding all previous indexing data. The old data will remain intact and visible until \fIcommit()\fR succeeds. .IP "\(bu" 4 \&\fBmanager\fR \- An IndexManager. .SH "METHODS" .IX Header "METHODS" .SS "add_doc(...)" .IX Subsection "add_doc(...)" .Vb 6 \& $indexer\->add_doc($doc); \& $indexer\->add_doc( { field_name => $field_value } ); \& $indexer\->add_doc( \& doc => { field_name => $field_value }, \& boost => 2.5, # default: 1.0 \& ); .Ve .PP Add a document to the index. Accepts either a single argument or labeled params. .IP "\(bu" 4 \&\fBdoc\fR \- Either a Lucy::Document::Doc object, or a hashref (which will be attached to a Lucy::Document::Doc object internally). .IP "\(bu" 4 \&\fBboost\fR \- A floating point weight which affects how this document scores. .SS "add_index(index)" .IX Subsection "add_index(index)" Absorb an existing index into this one. The two indexes must have matching Schemas. .IP "\(bu" 4 \&\fBindex\fR \- Either an index path name or a Folder. .SS "\fIoptimize()\fP" .IX Subsection "optimize()" Optimize the index for search-time performance. This may take a while, as it can involve rewriting large amounts of data. .SS "\fIcommit()\fP" .IX Subsection "commit()" Commit any changes made to the index. Until this is called, none of the changes made during an indexing session are permanent. .PP Calling \fIcommit()\fR invalidates the Indexer, so if you want to make more changes you'll need a new one. .SS "\fIprepare_commit()\fP" .IX Subsection "prepare_commit()" Perform the expensive setup for \fIcommit()\fR in advance, so that \fIcommit()\fR completes quickly. (If \fIprepare_commit()\fR is not called explicitly by the user, \fIcommit()\fR will call it internally.) .SS "delete_by_term( \fI[labeled params]\fP )" .IX Subsection "delete_by_term( [labeled params] )" Mark documents which contain the supplied term as deleted, so that they will be excluded from search results and eventually removed altogether. The change is not apparent to search apps until after \&\fIcommit()\fR succeeds. .IP "\(bu" 4 \&\fBfield\fR \- The name of an indexed field. (If it is not spec'd as \&\f(CW\*(C`indexed\*(C'\fR, an error will occur.) .IP "\(bu" 4 \&\fBterm\fR \- The term which identifies docs to be marked as deleted. If \&\f(CW\*(C`field\*(C'\fR is associated with an Analyzer, \f(CW\*(C`term\*(C'\fR will be processed automatically (so don't pre-process it yourself). .SS "delete_by_query(query)" .IX Subsection "delete_by_query(query)" Mark documents which match the supplied Query as deleted. .IP "\(bu" 4 \&\fBquery\fR \- A Query. .SH "INHERITANCE" .IX Header "INHERITANCE" Lucy::Index::Indexer isa Lucy::Object::Obj.