.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.28) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{ . if \nF \{ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "Unicode::MapUTF8 3pm" .TH Unicode::MapUTF8 3pm "2015-06-04" "perl v5.20.2" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" Unicode::MapUTF8 \- Conversions to and from arbitrary character sets and UTF8 .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 1 \& use Unicode::MapUTF8 qw(to_utf8 from_utf8 utf8_supported_charset); \& \& # Convert a string in \*(AqISO\-8859\-1\*(Aq to \*(AqUTF8\*(Aq \& my $output = to_utf8({ \-string => \*(AqAn example\*(Aq, \-charset => \*(AqISO\-8859\-1\*(Aq }); \& \& # Convert a string in \*(AqUTF8\*(Aq encoding to encoding \*(AqISO\-8859\-1\*(Aq \& my $other = from_utf8({ \-string => \*(AqOther text\*(Aq, \-charset => \*(AqISO\-8859\-1\*(Aq }); \& \& # List available character set encodings \& my @character_sets = utf8_supported_charset; \& \& # Add a character set alias \& utf8_charset_alias({ \*(Aqms\-japanese\*(Aq => \*(Aqsjis\*(Aq }); \& \& # Convert between two arbitrary (but largely compatible) charset encodings \& # (SJIS to EUC\-JP) \& my $utf8_string = to_utf8({ \-string =>$sjis_string, \-charset => \*(Aqsjis\*(Aq}); \& my $euc_jp_string = from_utf8({ \-string => $utf8_string, \-charset => \*(Aqeuc\-jp\*(Aq }) \& \& # Verify that a specific character set is supported \& if (utf8_supported_charset(\*(AqISO\-8859\-1\*(Aq) { \& # Yes \& } .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" Provides an adapter layer between core routines for converting to and from \s-1UTF8\s0 and other encodings. In essence, a way to give multiple existing Unicode modules a single common interface so you don't have to know the underlaying implementations to do simple \s-1UTF8\s0 to-from other character set encoding conversions. As such, it wraps the Unicode::String, Unicode::Map8, Unicode::Map and Jcode modules in a standardized and simple \s-1API.\s0 .PP This also provides general character set conversion operation based on \s-1UTF8 \-\s0 it is possible to convert between any two compatible and supported character sets via a simple two step chaining of conversions. .PP As with most things Perlish \- if you give it a few big chunks of text to chew on instead of lots of small ones it will handle many more characters per second. .PP By design, it can be easily extended to encompass any new charset encoding conversion modules that arrive on the scene. .PP This module is intended to provide good Unicode support to versions of Perl prior to 5.8. If you are using Perl 5.8.0 or later, you probably want to be using the Encode module instead. This module \fBdoes\fR work with Perl 5.8, but Encode is the preferred method in that environment. .SH "CHANGES" .IX Header "CHANGES" .Vb 4 \& 1.11 2005.10.10 Documentation changes. Addition of Build.PL support. \& Added various build tests, LICENSE, Artistic_License.txt, \& GPL_License.txt. Split documentation into seperate \& .pod file. Added Japanese translation of POD. \& \& 1.10 2005.05.22 \- Fixed bug in conversion of ISO\-2022\-JP to UTF\-8. \& Problem and fix found by Masahiro HONMA \& . \& \& Similar bugs in conversions of shift_jis and euc\-jp \& to UTF\-8 fixed as well. \& \& 1.09 2001.08.22 \- Fixed multiple typo occurances of \*(Aquft\*(Aq \& where \*(Aqutf\*(Aq was meant in code. Problem affected \& utf16 and utf7 encodings. Problem found \& by devon smith \& \& 1.08 2000.11.06 \- Added \*(Aqutf8_charset_alias\*(Aq function to \& allow for runtime setting of character \& set aliases. Added several alternate \& names for \*(Aqsjis\*(Aq (shiftjis, shift\-jis, \& shift_jis, s\-jis, and s_jis). \& \& Corrected \*(Aqcroak\*(Aq messages for \& \*(Aqfrom_utf8\*(Aq functions to appropriate \& function name. \& \& Tightened up initialization encapsulation \& \& Corrected fatal problem in jcode from \& unicode internals. Problem and fix \& found by Brian Wisti . \& \& 1.07 2000.11.01 \- Added \*(Aqcroak\*(Aq to use Carp declaration to \& fix error messages. Problem and fix \& found by Brian Wisti \& . \& \& 1.06 2000.10.30 \- Fix to handle change in stringification \& of overloaded objects between Perl 5.005 \& and 5.6. Problem noticed by Brian Wisti \& . \& \& 1.05 2000.10.23 \- Error in conversions from UTF8 to \& multibyte encodings corrected \& \& 1.04 2000.10.23 \- Additional diagnostic messages added \& for internal error conditions \& \& 1.03 2000.10.22 \- Bug fix for load time autodetction of \& Unicode::Map8 encodings \& \& 1.02 2000.10.22 \- Added load time autodetection of \& Unicode::Map8 supported character set \& encodings. \& \& Fixed internal calling error for some \& character sets with \*(Aqfrom_utf8\*(Aq. Thanks \& goes to Ilia Lobsanov \& for reporting this \& problem. \& \& 1.01 2000.10.02 \- Fixed handling of empty strings and \& added more identification for error \& messages. \& \& 1.00 2000.09.29 \- Pre\-release version .Ve .SH "FUNCTIONS" .IX Header "FUNCTIONS" .ie n .IP "utf8_charset_alias({ $alias => $charset });" 4 .el .IP "utf8_charset_alias({ \f(CW$alias\fR => \f(CW$charset\fR });" 4 .IX Item "utf8_charset_alias({ $alias => $charset });" Used for runtime assignment of character set aliases. .Sp Called with no parameters, returns a hash of defined aliases and the character sets they map to. .Sp Example: .Sp .Vb 2 \& my $aliases = utf8_charset_alias; \& my @alias_names = keys %$aliases; .Ve .Sp If called with \s-1ONE\s0 parameter, returns the name of the 'real' charset if the alias is defined. Returns undef if it is not found in the aliases. .Sp Example: .Sp .Vb 3 \& if (! utf8_charset_alias(\*(AqVISCII\*(Aq)) { \& # No alias for this \& } .Ve .Sp If called with a list of 'alias' => 'charset' pairs, defines those aliases for use. .Sp Example: .Sp .Vb 1 \& utf8_charset_alias({ \*(Aqjapanese\*(Aq => \*(Aqsjis\*(Aq, \*(Aqjapan\*(Aq => \*(Aqsjis\*(Aq }); .Ve .Sp Note: It will croak if a passed pair does not map to a character set defined in the predefined set of character encoding. It is \s-1NOT\s0 allowed to alias something to another alias. .Sp Multiple character set aliases can be set with a single call. .Sp To clear an alias, pass a character set mapping of undef. .Sp Example: .Sp .Vb 1 \& utf8_charset_alias({ \*(Aqjapanese\*(Aq => undef }); .Ve .Sp While an alias is set, the 'utf8_supported_charset' function will return the alias as if it were a predefined charset. .Sp Overriding a base defined character encoding with an alias will generate a warning message to \s-1STDERR.\s0 .IP "utf8_supported_charset($charset_name);" 4 .IX Item "utf8_supported_charset($charset_name);" Returns true if the named charset is supported (including user defined aliases). .Sp Returns false if it is not. .Sp Example: .Sp .Vb 3 \& if (! utf8_supported_charset(\*(AqVISCII\*(Aq)) { \& # No support yet \& } .Ve .Sp If called in a list context with no parameters, it will return a list of all supported character set names (including user defined aliases). .Sp Example: .Sp .Vb 1 \& my @charsets = utf8_supported_charset; .Ve .ie n .IP "to_utf8({ \-string => $string, \-charset => $source_charset });" 4 .el .IP "to_utf8({ \-string => \f(CW$string\fR, \-charset => \f(CW$source_charset\fR });" 4 .IX Item "to_utf8({ -string => $string, -charset => $source_charset });" Returns the string converted to \s-1UTF8\s0 from the specified source charset. .ie n .IP "from_utf8({ \-string => $string, \-charset => $target_charset});" 4 .el .IP "from_utf8({ \-string => \f(CW$string\fR, \-charset => \f(CW$target_charset\fR});" 4 .IX Item "from_utf8({ -string => $string, -charset => $target_charset});" Returns the string converted from \s-1UTF8\s0 to the specified target charset. .SH "VERSION" .IX Header "VERSION" 1.11 2005.10.10 .SH "TODO" .IX Header "TODO" Regression tests for Jcode, 2\-byte encodings and encoding aliases .SH "SEE ALSO" .IX Header "SEE ALSO" Unicode::String Unicode::Map8 Unicode::Map Jcode Encode .SH "COPYRIGHT" .IX Header "COPYRIGHT" Copyright 2000\-2005, Benjamin Franz. All rights reserved. .SH "AUTHOR" .IX Header "AUTHOR" Benjamin Franz .SH "LICENSE" .IX Header "LICENSE" This program is free software; you can redistribute it and/or modify it under the same terms and conditions as Perl itself. .PP This means that you can, at your option, redistribute it and/or modify it under either the terms the \s-1GNU\s0 Public License (\s-1GPL\s0) version 1 or later, or under the Perl Artistic License. .PP See http://dev.perl.org/licenses/ .SH "DISCLAIMER" .IX Header "DISCLAIMER" \&\s-1THIS SOFTWARE IS PROVIDED\s0 ``\s-1AS IS\s0'' \s-1AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.\s0 .PP Use of this software in any way or in any form, source or binary, is not allowed in any country which prohibits disclaimers of any implied warranties of merchantability or fitness for a particular purpose or any disclaimers of a similar nature. .PP \&\s-1IN NO EVENT SHALL I BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION \s0(\s-1INCLUDING, BUT NOT LIMITED TO, LOST PROFITS\s0) \s-1EVEN IF I HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE\s0