.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.28) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{ . if \nF \{ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "URI::Escape::XS 3pm" .TH URI::Escape::XS 3pm "2014-04-16" "perl v5.20.0" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" URI::Escape::XS \- Drop\-In replacement for URI::Escape .SH "VERSION" .IX Header "VERSION" \&\f(CW$Id:\fR \s-1XS\s0.pm,v 0.12 2014/04/16 10:49:36 dankogai Exp dankogai $ .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 5 \& # use it instead of URI::Escape \& use URI::Escape::XS qw/uri_escape uri_unescape/; \& $safe = uri_escape("10% is enough\en"); \& $verysafe = uri_escape("foo", "\e0\-\e377"); \& $str = uri_unescape($safe); \& \& # or use encodeURIComponent and decodeURIComponent \& use URI::Escape::XS; \& $safe = encodeURIComponent("10% is enough\en"); \& $str = decodeURIComponent("10%25%20is%20enough%0A"); \& \& # if you have CNet::IDN::Encode installed \& $safe = encodeURIComponentIDN("http://XXXXXX.jp/dan/"); \& $str = decodeURIComponentIDN("http:%2F%2Fxn\-\-eckwd4c7cu47r2wf.jp%2Fdan%2F"); .Ve .SH "EXPORT" .IX Header "EXPORT" .SS "by default" .IX Subsection "by default" \&\*(L"encodeURIComponent\*(R" and \*(L"decodeURIComponent\*(R" .PP \&\*(L"encodeURIComponentIDN\*(R" and \*(L"decodeURIComponentIDN\*(R" if either Net::LibIDN or Net::IDN::Encode is available .SS "on demand" .IX Subsection "on demand" \&\*(L"uri_escape\*(R" and \*(L"uri_unescape\*(R" .SH "FUNCTIONS" .IX Header "FUNCTIONS" .SS "encodeURIComponent" .IX Subsection "encodeURIComponent" Does what JavaScript's encodeURIComponent does. .PP .Vb 2 \& $uri = encodeURIComponent("http://www.example.com/"); \& # http%3A%2F%2Fwww.example.com%2F .Ve .PP Note you cannot customize characters to escape. If you need to do so, use \*(L"uri_escape\*(R". .SS "decodeURIComponent" .IX Subsection "decodeURIComponent" Does what JavaScript's decodeURIComponent does. .PP .Vb 2 \& $str = decodeURIComponent("http%3A%2F%2Fwww.example.com%2F"); \& # http://www.example.com/ .Ve .PP It decode not only \f(CW%HH\fR sequences but also \f(CW%uHHHH\fR sequences, with surrogate pairs correctly decoded. .PP .Vb 2 \& $str = decodeURIComponent("%uD869%uDEB2%u5F3E%u0061"); \& # \ex{2A6B2}\ex{5F3E}a .Ve .PP This function \s-1UNCONDITIONALLY\s0 returns the decoded string with utf8 flag off. To get utf8\-decoded string, use Encode and .PP .Vb 1 \& decode_utf8(decodeURIComponent($uri)); .Ve .PP This is the correct behavior because you cannot tell if the decoded string actually contains \s-1UTF\-8\s0 decoded string, like \s-1ISO\-8859\-1\s0 and Shift_JIS. .SS "encodeURIComponentIDN" .IX Subsection "encodeURIComponentIDN" Same as \*(L"encodeURIComponent\*(R" except that the host part is encoded in punycode. Either Net::LibIDN or Net::IDN::Encode is required to use this function. .PP URIs with Internationalizing Domain Names require two encodings: Punycode for host part and \s-1URI\s0 escape for the rest. .PP Currently only \s-1FULL\s0 URIs with \f(CW\*(C`http:\*(C'\fR or \f(CW\*(C`https:\*(C'\fR are supported. .SS "decodeURIComponentIDN" .IX Subsection "decodeURIComponentIDN" Same as \*(L"decodeURIComponent\*(R" except that the host part is encoded in punycode. Either Net::LibIDN or Net::IDN::Encode is required to use this function. .SS "uri_escape" .IX Subsection "uri_escape" Does exactly the same as URI::Escape::\fIuri_escape()\fR \fBexcept\fR when utf8\-flagged string is fed. .PP URI::Escape::\fIuri_escape()\fR croak and urge you to \&\f(CW\*(C`uri_escape_utf8()\*(C'\fR but it is pointless because \s-1URI\s0 itself has no such things as utf8 flag. The function in this module \s-1ALWAYS TREATS\s0 the string as byte sequence. That way you can safely use this function without worrying about utf8 flags. .PP Note this function is \s-1NOT EXPORTED\s0 by default. That way you can use URI::Escape and URI::Escape::XS simultaneously. .SS "uri_unescape" .IX Subsection "uri_unescape" Does exactly the same as URI::Escape::\fIuri_escape()\fR \fBexcept\fR when \f(CW%uHHHH\fR is fed. .PP URI::Escape::\fIuri_unescape()\fR simply ignores \f(CW%uHHHH\fR sequences while the function in this module does decode it into the corresponding \&\s-1UTF\-8 \s0\fBbyte sequence\fR. .PP Like uri_escape, this function is \s-1NOT EXPORTED\s0 by default. .ie n .SS "Note on the %uHHHH sequence" .el .SS "Note on the \f(CW%uHHHH\fP sequence" .IX Subsection "Note on the %uHHHH sequence" With this module the resulting strings never have the utf8 flag on. So if you want to decode it to perl utf8, You have to explicitly decode via Encode. Remember. URIs have always been a byte sequence, not \s-1UTF\-8\s0 characters. .PP If the \f(CW%uHHHH\fR sequence became standard, you could have safely told if a given \s-1URI\s0 is in Unicode. But more fortunately than unfortunately, the \&\s-1RFC\s0 proposal was rejected so you cannot tell which encoding is used just by looking at the \s-1URI.\s0 .PP .PP I said fortunately because \f(CW%uHHHH\fR can be nasty for non-BMP characters. Since each \f(CW%uHHHH\fR can hold one 16\-bit value, you need a \fIsurrogate pair\fR to represent it if it is U+10000 and above. .PP In spite of that, there are a significant number of URIs with \f(CW%uHHHH\fR escapes. Therefore this module supports decoding only. .SH "SPEED" .IX Header "SPEED" Since this module uses \s-1XS,\s0 it is really fast except for uri_escape(\*(L"noop\*(R"). .PP Regexp which is used in URI::Escape is really fast for non-matching but slows down significantly when it has to replace string. .SS "\s-1BENCHMARK\s0" .IX Subsection "BENCHMARK" On Macbook Pro 2GHz, Perl 5.8.8. .PP .Vb 11 \& http://www.google.co.jp/search?q=%E5%B0%8F%E9%A3%BC%E5%BC%BE \& ============================================================ \& Unescape it \& \-\-\-\-\-\-\-\-\-\-\- \& U::E 58526/s \-\- \-88% \& U::E::XS 486968/s 732% \-\- \& \-\-\-\-\-\-\-\-\-\-\-\-\-\- \& Escape it back \& \-\-\-\-\-\-\-\-\-\-\-\-\-\- \& U::E 30046/s \-\- \-78% \& U::E::XS 136992/s 356% \-\- \& \& www.example.com \& =============== \& Unescape it \& \-\-\-\-\-\-\-\-\-\-\- \& Rate U::E U::E::XS \& U::E 821972/s \-\- \-4% \& U::E::XS 854732/s 4% \-\- \& \-\-\-\-\-\-\-\-\-\-\-\-\-\- \& Escape it back \& \-\-\-\-\-\-\-\-\-\-\-\-\- \& U::E::XS 522969/s \-\- \-7% \& U::E 565112/s 8% \-\- .Ve .SH "AUTHOR" .IX Header "AUTHOR" Dan Kogai, \f(CW\*(C`\*(C'\fR .SH "BUGS" .IX Header "BUGS" Please report any bugs or feature requests to \&\f(CW\*(C`bug\-uri\-escape\-xs at rt.cpan.org\*(C'\fR, or through the web interface at . I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. .SH "SUPPORT" .IX Header "SUPPORT" You can find documentation for this module with the perldoc command. .PP .Vb 1 \& perldoc URI::Escape::XS .Ve .PP You can also look for information at: .IP "\(bu" 4 AnnoCPAN: Annotated \s-1CPAN\s0 documentation .Sp .IP "\(bu" 4 \&\s-1CPAN\s0 Ratings .Sp .IP "\(bu" 4 \&\s-1RT: CPAN\s0's request tracker .Sp .IP "\(bu" 4 Search \s-1CPAN\s0 .Sp .SH "ACKNOWLEDGEMENTS" .IX Header "ACKNOWLEDGEMENTS" Gisle Aas for URI::Escape .PP Koichi Taniguchi for URI::Escape::JavaScript .PP Thomas Jacob for Net::LibIDN .PP Claus Fa\*:rber for Net::IDN::Encode .SH "COPYRIGHT & LICENSE" .IX Header "COPYRIGHT & LICENSE" Copyright 2007\-2014 Dan Kogai, all rights reserved. .PP This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.