.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.28)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{
.    if \nF \{
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "Unicode::Collate::Locale 3perl"
.TH Unicode::Collate::Locale 3perl "2014-12-27" "perl v5.20.2" "Perl Programmers Reference Guide"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
Unicode::Collate::Locale \- Linguistic tailoring for DUCET via Unicode::Collate
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
.Vb 1
\&  use Unicode::Collate::Locale;
\&
\&  #construct
\&  $Collator = Unicode::Collate::Locale\->
\&      new(locale => $locale_name, %tailoring);
\&
\&  #sort
\&  @sorted = $Collator\->sort(@not_sorted);
\&
\&  #compare
\&  $result = $Collator\->cmp($a, $b); # returns 1, 0, or \-1.
.Ve
.PP
\&\fBNote:\fR Strings in \f(CW@not_sorted\fR, \f(CW$a\fR and \f(CW$b\fR are interpreted
according to Perl's Unicode support. See perlunicode,
perluniintro, perlunitut, perlunifaq, utf8.
Otherwise you can use \f(CW\*(C`preprocess\*(C'\fR (cf. \f(CW\*(C`Unicode::Collate\*(C'\fR)
or should decode them before.
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
This module provides linguistic tailoring for it
taking advantage of \f(CW\*(C`Unicode::Collate\*(C'\fR.
.SS "Constructor"
.IX Subsection "Constructor"
The \f(CW\*(C`new\*(C'\fR method returns a collator object.
.PP
A parameter list for the constructor is a hash, which can include
a special key \f(CW\*(C`locale\*(C'\fR and its value (case-insensitive) standing
for a Unicode base language code (two or three-letter).
For example, \f(CW\*(C`Unicode::Collate::Locale\->new(locale => \*(AqFR\*(Aq)\*(C'\fR
returns a collator tailored for French.
.PP
\&\f(CW$locale_name\fR may be suffixed with a Unicode script code (four-letter),
a Unicode region code, a Unicode language variant code. These codes are
case-insensitive, and separated with \f(CW\*(Aq_\*(Aq\fR or \f(CW\*(Aq\-\*(Aq\fR.
E.g. \f(CW\*(C`en_US\*(C'\fR for English in \s-1USA,
\&\s0\f(CW\*(C`az_Cyrl\*(C'\fR for Azerbaijani in the Cyrillic script,
\&\f(CW\*(C`es_ES_traditional\*(C'\fR for Spanish in Spain (Traditional).
.PP
If \f(CW$locale_name\fR is not available,
fallback is selected in the following order:
.PP
.Vb 5
\&    1. language with a variant code
\&    2. language with a script code
\&    3. language with a region code
\&    4. language
\&    5. default
.Ve
.PP
Tailoring tags provided by \f(CW\*(C`Unicode::Collate\*(C'\fR are allowed as long as
they are not used for \f(CW\*(C`locale\*(C'\fR support.  Esp. the \f(CW\*(C`table\*(C'\fR tag
is always untailorable, since it is reserved for \s-1DUCET.\s0
.PP
However \f(CW\*(C`entry\*(C'\fR is allowed, even if it is used for \f(CW\*(C`locale\*(C'\fR support,
to add or override mappings.
.PP
E.g. a collator for French, which ignores diacritics and case difference
(i.e. level 1), with reversed case ordering and no normalization.
.PP
.Vb 6
\&    Unicode::Collate::Locale\->new(
\&        level => 1,
\&        locale => \*(Aqfr\*(Aq,
\&        upper_before_lower => 1,
\&        normalization => undef
\&    )
.Ve
.PP
Overriding a behavior already tailored by \f(CW\*(C`locale\*(C'\fR is disallowed
if such a tailoring is passed to \f(CW\*(C`new()\*(C'\fR.
.PP
.Vb 4
\&    Unicode::Collate::Locale\->new(
\&        locale => \*(Aqda\*(Aq,
\&        upper_before_lower => 0, # causes error as reserved by \*(Aqda\*(Aq
\&    )
.Ve
.PP
However \f(CW\*(C`change()\*(C'\fR inherited from \f(CW\*(C`Unicode::Collate\*(C'\fR allows
such a tailoring that is reserved by \f(CW\*(C`locale\*(C'\fR. Examples:
.PP
.Vb 3
\&    new(locale => \*(Aqca\*(Aq)\->change(backwards => undef)
\&    new(locale => \*(Aqda\*(Aq)\->change(upper_before_lower => 0)
\&    new(locale => \*(Aqja\*(Aq)\->change(overrideCJK => undef)
.Ve
.SS "Methods"
.IX Subsection "Methods"
\&\f(CW\*(C`Unicode::Collate::Locale\*(C'\fR is a subclass of \f(CW\*(C`Unicode::Collate\*(C'\fR
and methods other than \f(CW\*(C`new\*(C'\fR are inherited from \f(CW\*(C`Unicode::Collate\*(C'\fR.
.PP
Here is a list of additional methods:
.ie n .IP """$Collator\->getlocale""" 4
.el .IP "\f(CW$Collator\->getlocale\fR" 4
.IX Item "$Collator->getlocale"
Returns a language code accepted and used actually on collation.
If linguistic tailoring is not provided for a language code you passed
(intensionally for some languages, or due to the incomplete implementation),
this method returns a string \f(CW\*(Aqdefault\*(Aq\fR meaning no special tailoring.
.ie n .IP """$Collator\->locale_version""" 4
.el .IP "\f(CW$Collator\->locale_version\fR" 4
.IX Item "$Collator->locale_version"
(Since Unicode::Collate::Locale 0.87)
Returns the version number (perhaps \f(CW\*(C`/\ed\e.\ed\ed/\*(C'\fR) of the locale, as that
of \fILocale/*.pl\fR.
.Sp
\&\fBNote:\fR \fILocale/*.pl\fR that a collator uses should be identified by
a combination of return values from \f(CW\*(C`getlocale\*(C'\fR and \f(CW\*(C`locale_version\*(C'\fR.
.SS "A list of tailorable locales"
.IX Subsection "A list of tailorable locales"
.Vb 10
\&      locale name       description
\&    \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
\&      af                Afrikaans
\&      ar                Arabic
\&      as                Assamese
\&      az                Azerbaijani (Azeri)
\&      be                Belarusian
\&      bg                Bulgarian
\&      bn                Bengali
\&      bs                Bosnian
\&      bs_Cyrl           Bosnian in Cyrillic (tailored as Serbian)
\&      ca                Catalan
\&      cs                Czech
\&      cy                Welsh
\&      da                Danish
\&      de_\|_phonebook     German (umlaut as \*(Aqae\*(Aq, \*(Aqoe\*(Aq, \*(Aque\*(Aq)
\&      ee                Ewe
\&      eo                Esperanto
\&      es                Spanish
\&      es_\|_traditional   Spanish (\*(Aqch\*(Aq and \*(Aqll\*(Aq as a grapheme)
\&      et                Estonian
\&      fa                Persian
\&      fi                Finnish (v and w are primary equal)
\&      fi_\|_phonebook     Finnish (v and w as separate characters)
\&      fil               Filipino
\&      fo                Faroese
\&      fr                French
\&      gu                Gujarati
\&      ha                Hausa
\&      haw               Hawaiian
\&      hi                Hindi
\&      hr                Croatian
\&      hu                Hungarian
\&      hy                Armenian
\&      ig                Igbo
\&      is                Icelandic
\&      ja                Japanese [1]
\&      kk                Kazakh
\&      kl                Kalaallisut
\&      kn                Kannada
\&      ko                Korean [2]
\&      kok               Konkani
\&      ln                Lingala
\&      lt                Lithuanian
\&      lv                Latvian
\&      mk                Macedonian
\&      ml                Malayalam
\&      mr                Marathi
\&      mt                Maltese
\&      nb                Norwegian Bokmal
\&      nn                Norwegian Nynorsk
\&      nso               Northern Sotho
\&      om                Oromo
\&      or                Oriya
\&      pa                Punjabi
\&      pl                Polish
\&      ro                Romanian
\&      ru                Russian
\&      sa                Sanskrit
\&      se                Northern Sami
\&      si                Sinhala
\&      si_\|_dictionary    Sinhala (U+0DA5 = U+0DA2,0DCA,0DA4)
\&      sk                Slovak
\&      sl                Slovenian
\&      sq                Albanian
\&      sr                Serbian
\&      sr_Latn           Serbian in Latin (tailored as Croatian)
\&      sv                Swedish (v and w are primary equal)
\&      sv_\|_reformed      Swedish (v and w as separate characters)
\&      ta                Tamil
\&      te                Telugu
\&      th                Thai
\&      tn                Tswana
\&      to                Tonga
\&      tr                Turkish
\&      uk                Ukrainian
\&      ur                Urdu
\&      vi                Vietnamese
\&      wae               Walser
\&      wo                Wolof
\&      yo                Yoruba
\&      zh                Chinese
\&      zh_\|_big5han       Chinese (ideographs: big5 order)
\&      zh_\|_gb2312han     Chinese (ideographs: GB\-2312 order)
\&      zh_\|_pinyin        Chinese (ideographs: pinyin order) [3]
\&      zh_\|_stroke        Chinese (ideographs: stroke order) [3]
\&      zh_\|_zhuyin        Chinese (ideographs: zhuyin order) [3]
\&    \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
.Ve
.PP
Locales according to the default \s-1UCA\s0 rules include
chr (Cherokee),
de (German),
en (English),
ga (Irish),
id (Indonesian),
it (Italian),
ka (Georgian),
ms (Malay),
nl (Dutch),
pt (Portuguese),
st (Southern Sotho),
sw (Swahili),
xh (Xhosa),
zu (Zulu).
.PP
\&\fBNote\fR
.PP
[1] ja: Ideographs are sorted in \s-1JIS X 0208\s0 order.
Fullwidth and halfwidth forms are identical to their regular form.
The difference between hiragana and katakana is at the 4th level,
the comparison also requires \f(CW\*(C`(variable => \*(AqNon\-ignorable\*(Aq)\*(C'\fR,
and then \f(CW\*(C`katakana_before_hiragana\*(C'\fR has no effect.
.PP
[2] ko: Plenty of ideographs are sorted by their reading. Such
an ideograph is primary (level 1) equal to, and secondary (level 2)
greater than, the corresponding hangul syllable.
.PP
[3] zh_\|_pinyin, zh_\|_stroke and zh_\|_zhuyin: implemented alt='short',
where a smaller number of ideographs are tailored.
.PP
Note: 'pinyin' is in latin, 'zhuyin' is in bopomofo.
.SH "INSTALL"
.IX Header "INSTALL"
Installation of \f(CW\*(C`Unicode::Collate::Locale\*(C'\fR requires \fICollate/Locale.pm\fR,
\&\fICollate/Locale/*.pm\fR, \fICollate/CJK/*.pm\fR and \fICollate/allkeys.txt\fR.
On building, \f(CW\*(C`Unicode::Collate::Locale\*(C'\fR doesn't require any of \fIdata/*.txt\fR,
\&\fIgendata/*\fR, and \fImklocale\fR.
Tests for \f(CW\*(C`Unicode::Collate::Locale\*(C'\fR are named \fIt/loc_*.t\fR.
.SH "CAVEAT"
.IX Header "CAVEAT"
.IP "tailoring is not maximum" 4
.IX Item "tailoring is not maximum"
Even if a certain letter is tailored, its equivalent would not always
tailored as well as it. For example, even though W is tailored,
fullwidth W (\f(CW\*(C`U+FF37\*(C'\fR), W with acute (\f(CW\*(C`U+1E82\*(C'\fR), etc. are not
tailored. The result may depend on whether source strings are
normalized or not, and whether decomposed or composed.
Thus \f(CW\*(C`(normalization => undef)\*(C'\fR is less preferred.
.SH "AUTHOR"
.IX Header "AUTHOR"
The Unicode::Collate::Locale module for perl was written
by \s-1SADAHIRO\s0 Tomoyuki, <SADAHIRO@cpan.org>.
This module is Copyright(C) 2004\-2013, \s-1SADAHIRO\s0 Tomoyuki. Japan.
All rights reserved.
.PP
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
.SH "SEE ALSO"
.IX Header "SEE ALSO"
.IP "Unicode Collation Algorithm \- \s-1UTS\s0 #10" 4
.IX Item "Unicode Collation Algorithm - UTS #10"
<http://www.unicode.org/reports/tr10/>
.IP "The Default Unicode Collation Element Table (\s-1DUCET\s0)" 4
.IX Item "The Default Unicode Collation Element Table (DUCET)"
<http://www.unicode.org/Public/UCA/latest/allkeys.txt>
.IP "Unicode Locale Data Markup Language (\s-1LDML\s0) \- \s-1UTS\s0 #35" 4
.IX Item "Unicode Locale Data Markup Language (LDML) - UTS #35"
<http://www.unicode.org/reports/tr35/>
.IP "\s-1CLDR \-\s0 Unicode Common Locale Data Repository" 4
.IX Item "CLDR - Unicode Common Locale Data Repository"
<http://cldr.unicode.org/>
.IP "Unicode::Collate" 4
.IX Item "Unicode::Collate"
.PD 0
.IP "Unicode::Normalize" 4
.IX Item "Unicode::Normalize"