.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{\
.    if \nF \{\
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{\
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "Sympa::Tools::Text 3Sympa"
.TH Sympa::Tools::Text 3Sympa "2020-12-30" "6.2.58" "sympa 6.2.58"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
Sympa::Tools::Text \- Text\-related functions
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
This package provides some text-related functions.
.SS "Functions"
.IX Subsection "Functions"
.ie n .IP "addrencode ( $addr, [ $phrase, [ $charset, [ $comment ] ] ] )" 4
.el .IP "addrencode ( \f(CW$addr\fR, [ \f(CW$phrase\fR, [ \f(CW$charset\fR, [ \f(CW$comment\fR ] ] ] )" 4
.IX Item "addrencode ( $addr, [ $phrase, [ $charset, [ $comment ] ] ] )"
Returns formatted (and encoded) name-addr as \s-1RFC5322 3.4.\s0
.ie n .IP "canonic_email ( $email )" 4
.el .IP "canonic_email ( \f(CW$email\fR )" 4
.IX Item "canonic_email ( $email )"
\&\fIFunction\fR.
Returns canonical form of e\-mail address.
.Sp
Leading and trailing white spaces are removed.
Latin letters without accents are lower-cased.
.Sp
For malformed inputs returns \f(CW\*(C`undef\*(C'\fR.
.ie n .IP "canonic_message_id ( $message_id )" 4
.el .IP "canonic_message_id ( \f(CW$message_id\fR )" 4
.IX Item "canonic_message_id ( $message_id )"
Returns canonical form of message \s-1ID\s0 without trailing or leading whitespaces
or \f(CW\*(C`<\*(C'\fR, \f(CW\*(C`>\*(C'\fR.
.ie n .IP "canonic_text ( $text )" 4
.el .IP "canonic_text ( \f(CW$text\fR )" 4
.IX Item "canonic_text ( $text )"
Canonicalizes text.
\&\f(CW$text\fR should be a binary string encoded by \s-1UTF\-8\s0 character set or
a Unicode string.
Forbidden sequences in binary string will be replaced by
U+FFFD \s-1REPLACEMENT\s0 CHARACTERs, and Normalization Form C (\s-1NFC\s0) will be applied.
.ie n .IP "decode_filesystem_safe ( $str )" 4
.el .IP "decode_filesystem_safe ( \f(CW$str\fR )" 4
.IX Item "decode_filesystem_safe ( $str )"
\&\fIFunction\fR.
Decodes a string encoded by \fBencode_filesystem_safe()\fR.
.Sp
Parameter:
.RS 4
.ie n .IP "$str" 4
.el .IP "\f(CW$str\fR" 4
.IX Item "$str"
String to be decoded.
.RE
.RS 4
.Sp
Returns:
.Sp
Decoded string, stripped \f(CW\*(C`utf8\*(C'\fR flag if any.
.RE
.ie n .IP "decode_html ( $str )" 4
.el .IP "decode_html ( \f(CW$str\fR )" 4
.IX Item "decode_html ( $str )"
\&\fIFunction\fR.
Decodes \s-1HTML\s0 entities in a string encoded by \s-1UTF\-8\s0 or a Unicode string.
.Sp
Parameter:
.RS 4
.ie n .IP "$str" 4
.el .IP "\f(CW$str\fR" 4
.IX Item "$str"
String to be decoded.
.RE
.RS 4
.Sp
Returns:
.Sp
Decoded string, stripped \f(CW\*(C`utf8\*(C'\fR flag if any.
.RE
.ie n .IP "encode_filesystem_safe ( $str )" 4
.el .IP "encode_filesystem_safe ( \f(CW$str\fR )" 4
.IX Item "encode_filesystem_safe ( $str )"
\&\fIFunction\fR.
Encodes a string \f(CW$str\fR to be suitable for filesystem.
.Sp
Parameter:
.RS 4
.ie n .IP "$str" 4
.el .IP "\f(CW$str\fR" 4
.IX Item "$str"
String to be encoded.
.RE
.RS 4
.Sp
Returns:
.Sp
Encoded string, stripped \f(CW\*(C`utf8\*(C'\fR flag if any.
All bytes except \f(CW\*(Aq\-\*(Aq\fR, \f(CW\*(Aq+\*(Aq\fR, \f(CW\*(Aq.\*(Aq\fR, \f(CW\*(Aq@\*(Aq\fR
and alphanumeric characters are encoded to sequences \f(CW\*(Aq_\*(Aq\fR followed by
two hexdigits.
.Sp
Note that \f(CW\*(Aq/\*(Aq\fR will also be encoded.
.RE
.ie n .IP "encode_html ( $str, [ $additional_unsafe ] )" 4
.el .IP "encode_html ( \f(CW$str\fR, [ \f(CW$additional_unsafe\fR ] )" 4
.IX Item "encode_html ( $str, [ $additional_unsafe ] )"
\&\fIFunction\fR.
Encodes characters in a string \f(CW$str\fR to \s-1HTML\s0 entities.
By default
\&\f(CW\*(Aq<\*(Aq\fR, \f(CW\*(Aq>\*(Aq\fR, \f(CW\*(Aq&\*(Aq\fR and \f(CW\*(Aq"\*(Aq\fR are encoded.
.Sp
Parameter:
.RS 4
.ie n .IP "$str" 4
.el .IP "\f(CW$str\fR" 4
.IX Item "$str"
String to be encoded.
.ie n .IP "$additional_unsafe" 4
.el .IP "\f(CW$additional_unsafe\fR" 4
.IX Item "$additional_unsafe"
Character or range of characters additionally encoded as entity references.
.Sp
This optional parameter was introduced on Sympa 6.2.37b.3.
.RE
.RS 4
.Sp
Returns:
.Sp
Encoded string, \fInot\fR stripping utf8 flag if any.
.RE
.ie n .IP "encode_uri ( $str, [ omit => $chars ] )" 4
.el .IP "encode_uri ( \f(CW$str\fR, [ omit => \f(CW$chars\fR ] )" 4
.IX Item "encode_uri ( $str, [ omit => $chars ] )"
\&\fIFunction\fR.
Encodes potentially unsafe characters in the string using \*(L"percent\*(R" encoding
suitable for URIs.
.Sp
Parameters:
.RS 4
.ie n .IP "$str" 4
.el .IP "\f(CW$str\fR" 4
.IX Item "$str"
String to be encoded.
.ie n .IP "omit => $chars" 4
.el .IP "omit => \f(CW$chars\fR" 4
.IX Item "omit => $chars"
By default, all characters except those defined as \*(L"unreserved\*(R" in \s-1RFC 3986\s0
are encoded, that is, \f(CW\*(C`[^\-A\-Za\-z0\-9._~]\*(C'\fR.
If this parameter is given, it will prevent encoding additional characters.
.RE
.RS 4
.Sp
Returns:
.Sp
Encoded string, stripped \f(CW\*(C`utf8\*(C'\fR flag if any.
.RE
.ie n .IP "escape_chars ( $str )" 4
.el .IP "escape_chars ( \f(CW$str\fR )" 4
.IX Item "escape_chars ( $str )"
Escape weird characters.
.Sp
ToDo: This should be obsoleted in the future release: Would be better to use
\&\*(L"encode_filesystem_safe\*(R".
.ie n .IP "escape_url ( $str )" 4
.el .IP "escape_url ( \f(CW$str\fR )" 4
.IX Item "escape_url ( $str )"
\&\s-1DEPRECATED.\s0
Would be better to use \*(L"encode_uri\*(R" or \*(L"mailtourl\*(R".
.ie n .IP "foldcase ( $str )" 4
.el .IP "foldcase ( \f(CW$str\fR )" 4
.IX Item "foldcase ( $str )"
\&\fIFunction\fR.
Returns \*(L"fold-case\*(R" string suitable for case-insensitive match.
For example, a code below looks for a needle in haystack not regarding case,
even if they are non-ASCII \s-1UTF\-8\s0 strings.
.Sp
.Vb 5
\&  $haystack = Sympa::Tools::Text::foldcase($HayStack);
\&  $needle   = Sympa::Tools::Text::foldcase($NeedLe);
\&  if (index $haystack, $needle >= 0) {
\&      ...
\&  }
.Ve
.Sp
Parameter:
.RS 4
.ie n .IP "$str" 4
.el .IP "\f(CW$str\fR" 4
.IX Item "$str"
A string.
.RE
.RS 4
.RE
.ie n .IP "guessed_to_utf8( $text, [ lang, ... ] )" 4
.el .IP "guessed_to_utf8( \f(CW$text\fR, [ lang, ... ] )" 4
.IX Item "guessed_to_utf8( $text, [ lang, ... ] )"
\&\fIFunction\fR.
Guesses text charset considering language context
and returns the text reencoded by \s-1UTF\-8.\s0
.Sp
Parameters:
.RS 4
.ie n .IP "$text" 4
.el .IP "\f(CW$text\fR" 4
.IX Item "$text"
Text to be reencoded.
.IP "lang, ..." 4
.IX Item "lang, ..."
Language tag(s) which may be given by \*(L"implicated_langs\*(R" in Sympa::Language.
.RE
.RS 4
.Sp
Returns:
.Sp
Reencoded text.
If any charsets could not be guessed, \f(CW\*(C`iso\-8859\-1\*(C'\fR will be used
as the last resort, just because it covers full range of 8\-bit.
.RE
.ie n .IP "mailtourl ( $email, [ decode_html => 1 ], [ query => {key => val, ...} ] )" 4
.el .IP "mailtourl ( \f(CW$email\fR, [ decode_html => 1 ], [ query => {key => val, ...} ] )" 4
.IX Item "mailtourl ( $email, [ decode_html => 1 ], [ query => {key => val, ...} ] )"
\&\fIFunction\fR.
Constructs a \f(CW\*(C`mailto:\*(C'\fR \s-1URL\s0 for given e\-mail.
.Sp
Parameters:
.RS 4
.ie n .IP "$email" 4
.el .IP "\f(CW$email\fR" 4
.IX Item "$email"
E\-mail address.
.IP "decode_html => 1" 4
.IX Item "decode_html => 1"
If set, arguments are assumed to include \s-1HTML\s0 entities.
.IP "query => {key => val, ...}" 4
.IX Item "query => {key => val, ...}"
Optional query.
.RE
.RS 4
.Sp
Returns:
.Sp
Constructed \s-1URL.\s0
.RE
.ie n .IP "pad ( $str, $width )" 4
.el .IP "pad ( \f(CW$str\fR, \f(CW$width\fR )" 4
.IX Item "pad ( $str, $width )"
Pads space a string so that result will not be narrower than given width.
.Sp
Parameters:
.RS 4
.ie n .IP "$str" 4
.el .IP "\f(CW$str\fR" 4
.IX Item "$str"
A string.
.ie n .IP "$width" 4
.el .IP "\f(CW$width\fR" 4
.IX Item "$width"
If \f(CW$width\fR is false value or width of \f(CW$str\fR is not less than \f(CW$width\fR,
does nothing.
If \f(CW$width\fR is less than \f(CW0\fR, pads right.
Otherwise, pads left.
.RE
.RS 4
.Sp
Returns:
.Sp
Padded string.
.RE
.ie n .IP "qdecode_filename ( $filename )" 4
.el .IP "qdecode_filename ( \f(CW$filename\fR )" 4
.IX Item "qdecode_filename ( $filename )"
Q\-Decodes web file name.
.Sp
ToDo:
This should be obsoleted in the future release: Would be better to use
\&\*(L"decode_filesystem_safe\*(R".
.ie n .IP "qencode_filename ( $filename )" 4
.el .IP "qencode_filename ( \f(CW$filename\fR )" 4
.IX Item "qencode_filename ( $filename )"
Q\-Encodes web file name.
.Sp
ToDo:
This should be obsoleted in the future release: Would be better to use
\&\*(L"encode_filesystem_safe\*(R".
.ie n .IP "slurp ( $file )" 4
.el .IP "slurp ( \f(CW$file\fR )" 4
.IX Item "slurp ( $file )"
Get entire content of the file.
Normalization by \fBcanonic_text()\fR is applied.
\&\f(CW$file\fR is the path to text file.
.ie n .IP "unescape_chars ( $str )" 4
.el .IP "unescape_chars ( \f(CW$str\fR )" 4
.IX Item "unescape_chars ( $str )"
Unescape weird characters.
.Sp
ToDo: This should be obsoleted in the future release: Would be better to use
\&\*(L"decode_filesystem_safe\*(R".
.ie n .IP "valid_email ( $string )" 4
.el .IP "valid_email ( \f(CW$string\fR )" 4
.IX Item "valid_email ( $string )"
Basic check of an email address.
.ie n .IP "weburl ( $base, \e@paths, [ decode_html => 1 ], [ fragment => $fragment ], [ query => \e%query ] )" 4
.el .IP "weburl ( \f(CW$base\fR, \e@paths, [ decode_html => 1 ], [ fragment => \f(CW$fragment\fR ], [ query => \e%query ] )" 4
.IX Item "weburl ( $base, @paths, [ decode_html => 1 ], [ fragment => $fragment ], [ query => %query ] )"
Constructs a \f(CW\*(C`http:\*(C'\fR or \f(CW\*(C`https:\*(C'\fR \s-1URL\s0 under given base \s-1URI.\s0
.Sp
Parameters:
.RS 4
.ie n .IP "$base" 4
.el .IP "\f(CW$base\fR" 4
.IX Item "$base"
Base \s-1URI.\s0
.IP "\e@paths" 4
.IX Item "@paths"
Additional path components.
.IP "decode_html => 1" 4
.IX Item "decode_html => 1"
If set, arguments are assumed to include \s-1HTML\s0 entities.
Exception is \f(CW$base:\fR
It is assumed not to include entities.
.ie n .IP "fragment => $fragment" 4
.el .IP "fragment => \f(CW$fragment\fR" 4
.IX Item "fragment => $fragment"
Optional fragment.
.IP "query => \e%query" 4
.IX Item "query => %query"
Optional query.
.RE
.RS 4
.Sp
Returns:
.Sp
A \s-1URI.\s0
.RE
.ie n .IP "wrap_text ( $text, [ $init_tab, [ $subsequent_tab, [ $cols ] ] ] )" 4
.el .IP "wrap_text ( \f(CW$text\fR, [ \f(CW$init_tab\fR, [ \f(CW$subsequent_tab\fR, [ \f(CW$cols\fR ] ] ] )" 4
.IX Item "wrap_text ( $text, [ $init_tab, [ $subsequent_tab, [ $cols ] ] ] )"
\&\fIFunction\fR.
Returns line-wrapped text.
.Sp
Parameters:
.RS 4
.ie n .IP "$text" 4
.el .IP "\f(CW$text\fR" 4
.IX Item "$text"
The text to be folded.
.ie n .IP "$init_tab" 4
.el .IP "\f(CW$init_tab\fR" 4
.IX Item "$init_tab"
Indentation prepended to the first line of paragraph.
Default is \f(CW\*(Aq\*(Aq\fR, no indentation.
.ie n .IP "$subsequent_tab" 4
.el .IP "\f(CW$subsequent_tab\fR" 4
.IX Item "$subsequent_tab"
Indentation prepended to each subsequent line of folded paragraph.
Default is \f(CW\*(Aq\*(Aq\fR, no indentation.
.ie n .IP "$cols" 4
.el .IP "\f(CW$cols\fR" 4
.IX Item "$cols"
Max number of columns of folded text.
Default is \f(CW78\fR.
.RE
.RS 4
.RE
.SH "HISTORY"
.IX Header "HISTORY"
Sympa::Tools::Text appeared on Sympa 6.2a.41.
.PP
\&\fBdecode_filesystem_safe()\fR and \fBencode_filesystem_safe()\fR were added
on Sympa 6.2.10.
.PP
\&\fBdecode_html()\fR, \fBencode_html()\fR, \fBencode_uri()\fR and \fBmailtourl()\fR
were added on Sympa 6.2.14, and \fBescape_url()\fR was deprecated.
.PP
\&\fBguessed_to_utf8()\fR and \fBpad()\fR were added on Sympa 6.2.17.
.PP
\&\fBcanonic_text()\fR and \fBslurp()\fR were added on Sympa 6.2.53b.