.\" Automatically generated by Pod::Man 4.11 (Pod::Simple 3.35) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is >0, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{\ . if \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{\ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "Lingua::Stem::EnBroken 3pm" .TH Lingua::Stem::EnBroken 3pm "2020-08-23" "perl v5.30.3" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" Lingua::Stem::EnBroken \- Porter's stemming algorithm for 'generic' English .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 5 \& use Lingua::Stem::EnBroken; \& my $stems = Lingua::Stem::EnBroken::stem({ \-words => $word_list_reference, \& \-locale => \*(Aqen\*(Aq, \& \-exceptions => $exceptions_hash, \& }); .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" This routine MIS-applies the Porter Stemming Algorithm to its parameters, returning the stemmed words. It is an intentionally broken version of Lingua::Stem::En for people needing backwards compatibility with Lingua::Stem 0.30 and Lingua::Stem 0.40. Do not use it if you aren't one of those people. .PP It is derived from the C program \*(L"stemmer.c\*(R" as found in freewais and elsewhere, which contains these notes: .PP .Vb 4 \& Purpose: Implementation of the Porter stemming algorithm documented \& in: Porter, M.F., "An Algorithm For Suffix Stripping," \& Program 14 (3), July 1980, pp. 130\-137. \& Provenance: Written by B. Frakes and C. Cox, 1986. .Ve .PP I have re-interpreted areas that use Frakes and Cox's \*(L"WordSize\*(R" function. My version may misbehave on short words starting with \*(L"y\*(R", but I can't think of any examples. .PP The step numbers correspond to Frakes and Cox, and are probably in Porter's article (which I've not seen). Porter's algorithm still has rough spots (e.g current/currency, \-ings words), which I've not attempted to cure, although I have added support for the British \-ise suffix. .SH "CHANGES" .IX Header "CHANGES" .Vb 1 \& 2003.09.28 \- Documentation fix \& \& 2000.09.14 \- Forked from the Lingua::Stem::En.pm module to provide \& a backward compatibly broken version for people needing \& consistent behavior with 0.30 and 0.40 more than accurate \& stemming. .Ve .SH "METHODS" .IX Header "METHODS" .IP "stem({ \-words => \e@words, \-locale => 'en', \-exceptions => \e%exceptions });" 4 .IX Item "stem({ -words => @words, -locale => 'en', -exceptions => %exceptions });" Stems a list of passed words using the rules of \s-1US\s0 English. Returns an anonymous array reference to the stemmed words. .Sp Example: .Sp .Vb 4 \& my $stemmed_words = Lingua::Stem::EnBroken::stem({ \-words => \e@words, \& \-locale => \*(Aqen\*(Aq, \& \-exceptions => \e%exceptions, \& }); .Ve .IP "stem_caching({ \-level => 0|1|2 });" 4 .IX Item "stem_caching({ -level => 0|1|2 });" Sets the level of stem caching. .Sp \&'0' means 'no caching'. This is the default level. .Sp \&'1' means 'cache per run'. This caches stemming results during a single call to 'stem'. .Sp \&'2' means 'cache indefinitely'. This caches stemming results until either the process exits or the 'clear_stem_cache' method is called. .IP "clear_stem_cache;" 4 .IX Item "clear_stem_cache;" Clears the cache of stemmed words .SH "NOTES" .IX Header "NOTES" This code is almost entirely derived from the Porter 2.1 module written by Jim Richardson. .SH "SEE ALSO" .IX Header "SEE ALSO" .Vb 1 \& Lingua::Stem .Ve .SH "AUTHOR" .IX Header "AUTHOR" .Vb 2 \& Jim Richardson, University of Sydney \& jimr@maths.usyd.edu.au or http://www.maths.usyd.edu.au:8000/jimr.html \& \& Integration in Lingua::Stem by \& Jerilyn Franz, FreeRun Technologies, \& .Ve .SH "COPYRIGHT" .IX Header "COPYRIGHT" Jim Richardson, University of Sydney Jerilyn Franz, FreeRun Technologies .PP This code is freely available under the same terms as Perl. .SH "BUGS" .IX Header "BUGS" .SH "TODO" .IX Header "TODO"