.\" Automatically generated by Pod::Man 2.23 (Pod::Simple 3.14) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .ie \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . nr % 0 . rr F .\} .el \{\ . de IX .. .\} .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "IRC::Utils 3pm" .TH IRC::Utils 3pm "2011-10-07" "perl v5.12.4" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" IRC::Utils \- Common utilities for IRC\-related tasks .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 2 \& use strict; \& use warnings; \& \& use IRC::Utils \*(Aq:ALL\*(Aq; \& \& my $nickname = \*(Aq^Lame|BOT[moo]\*(Aq; \& my $uppercase_nick = uc_irc($nickname); \& my $lowercase_nick = lc_irc($nickname); \& \& print "They\*(Aqre equivalent\en" if eq_irc($uppercase_nick, $lowercase_nick); \& \& my $mode_line = \*(Aqov+b\-i Bob sue stalin*!*@*\*(Aq; \& my $hashref = parse_mode_line($mode_line); \& \& my $banmask = \*(Aqstalin*\*(Aq; \& my $full_banmask = normalize_mask($banmask); \& \& if (matches_mask($full_banmask, \*(Aqstalin!joe@kremlin.ru\*(Aq)) { \& print "EEK!"; \& } \& \& my $decoded = irc_decode($raw_irc_message); \& print $decoded, "\en"; \& \& if (has_color($message)) { \& print \*(AqCOLOR CODE ALERT!\en"; \& } \& \& my $results_hashref = matches_mask_array(\e@masks, \e@items_to_match_against); \& \& my $nick = parse_user(\*(Aqstalin!joe@kremlin.ru\*(Aq); \& my ($nick, $user, $host) = parse_user(\*(Aqstalin!joe@kremlin.ru\*(Aq); .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" The functions in this module take care of many of the tasks you are faced with when working with \s-1IRC\s0. Mode lines, ban masks, message encoding and formatting, etc. .SH "FUNCTIONS" .IX Header "FUNCTIONS" .ie n .SS """uc_irc""" .el .SS "\f(CWuc_irc\fP" .IX Subsection "uc_irc" Takes one mandatory parameter, a string to convert to \s-1IRC\s0 uppercase, and one optional parameter, the casemapping of the ircd (which can be \fB'rfc1459'\fR, \&\fB'strict\-rfc1459'\fR or \fB'ascii'\fR. Default is \fB'rfc1459'\fR). Returns the \s-1IRC\s0 uppercase equivalent of the passed string. .ie n .SS """lc_irc""" .el .SS "\f(CWlc_irc\fP" .IX Subsection "lc_irc" Takes one mandatory parameter, a string to convert to \s-1IRC\s0 lowercase, and one optional parameter, the casemapping of the ircd (which can be \fB'rfc1459'\fR, \&\fB'strict\-rfc1459'\fR or \fB'ascii'\fR. Default is \fB'rfc1459'\fR). Returns the \s-1IRC\s0 lowercase equivalent of the passed string. .ie n .SS """eq_irc""" .el .SS "\f(CWeq_irc\fP" .IX Subsection "eq_irc" Takes two mandatory parameters, \s-1IRC\s0 strings (channels or nicknames) to compare. A third, optional parameter specifies the casemapping. Returns true if the two strings are equivalent, false otherwise .PP .Vb 2 \& # long version \& lc_irc($one, $map) eq lc_irc($two, $map) \& \& # short version \& eq_irc($one, $two, $map) .Ve .ie n .SS """parse_mode_line""" .el .SS "\f(CWparse_mode_line\fP" .IX Subsection "parse_mode_line" Takes a list representing an \s-1IRC\s0 mode line. Returns a hashref. Optionally you can also supply an arrayref and a hashref to specify valid channel modes (default: \f(CW\*(C`[qw(beI k l imnpstaqr)]\*(C'\fR) and status modes (default: \&\f(CW\*(C`{o => \*(Aq@\*(Aq, h => \*(Aq%\*(Aq, v => \*(Aq+\*(Aq}\*(C'\fR), respectively. .PP If the modeline couldn't be parsed the hashref will be empty. On success the following keys will be available in the hashref: .PP \&\fB'modes'\fR, an arrayref of normalised modes; .PP \&\fB'args'\fR, an arrayref of applicable arguments to the modes; .PP Example: .PP .Vb 1 \& my $hashref = parse_mode_line( \*(Aqov+b\-i\*(Aq, \*(AqBob\*(Aq, \*(Aqsue\*(Aq, \*(Aqstalin*!*@*\*(Aq ); \& \& # $hashref will be: \& { \& modes => [ \*(Aq+o\*(Aq, \*(Aq+v\*(Aq, \*(Aq+b\*(Aq, \*(Aq\-i\*(Aq ], \& args => [ \*(AqBob\*(Aq, \*(Aqsue\*(Aq, \*(Aqstalin*!*@*\*(Aq ], \& } .Ve .ie n .SS """normalize_mask""" .el .SS "\f(CWnormalize_mask\fP" .IX Subsection "normalize_mask" Takes one parameter, a string representing an \s-1IRC\s0 mask. Returns a normalised full mask. .PP Example: .PP .Vb 1 \& $fullbanmask = normalize_mask( \*(Aqstalin*\*(Aq ); \& \& # $fullbanmask will be: \*(Aqstalin*!*@*\*(Aq; .Ve .ie n .SS """matches_mask""" .el .SS "\f(CWmatches_mask\fP" .IX Subsection "matches_mask" Takes two parameters, a string representing an \s-1IRC\s0 mask and something to match against the \s-1IRC\s0 mask, such as a nick!user@hostname string. Returns a true value if they match, a false value otherwise. Optionally, one may pass the casemapping (see \f(CW\*(C`uc_irc\*(C'\fR), as this function uses \&\f(CW\*(C`uc_irc\*(C'\fR internally. .ie n .SS """matches_mask_array""" .el .SS "\f(CWmatches_mask_array\fP" .IX Subsection "matches_mask_array" Takes two array references, the first being a list of strings representing \&\s-1IRC\s0 masks, the second a list of somethings to test against the masks. Returns an empty hashref if there are no matches. Otherwise, the keys will be the masks matched, each value being an arrayref of the strings that matched it. Optionally, one may pass the casemapping (see \f(CW\*(C`uc_irc\*(C'\fR), as this function uses \f(CW\*(C`uc_irc\*(C'\fR internally. .ie n .SS """unparse_mode_line""" .el .SS "\f(CWunparse_mode_line\fP" .IX Subsection "unparse_mode_line" Takes one argument, a string representing a number of mode changes. Returns a condensed version of the changes. .PP .Vb 2 \& my $mode_line = unparse_mode_line(\*(Aq+o+o+o\-v+v\*(Aq); \& $mode_line is now \*(Aq+ooo\-v+v\*(Aq .Ve .ie n .SS """gen_mode_change""" .el .SS "\f(CWgen_mode_change\fP" .IX Subsection "gen_mode_change" Takes two arguments, strings representing a set of \s-1IRC\s0 user modes before and after a change. Returns a string representing what changed. .PP .Vb 2 \& my $mode_change = gen_mode_change(\*(Aqabcde\*(Aq, \*(AqbefmZ\*(Aq); \& $mode_change is now \*(Aq\-acd+fmZ\*(Aq .Ve .ie n .SS """parse_user""" .el .SS "\f(CWparse_user\fP" .IX Subsection "parse_user" Takes one parameter, a string representing a user in the form nick!user@hostname. In a scalar context it returns just the nickname. In a list context it returns a list consisting of the nick, user and hostname, respectively. .ie n .SS """is_valid_chan_name""" .el .SS "\f(CWis_valid_chan_name\fP" .IX Subsection "is_valid_chan_name" Takes one argument, a channel name to validate. Returns true or false if the channel name is valid or not. You can supply a second argument, an array of characters of allowed channel prefixes. Defaults to \f(CW\*(C`[\*(Aq#\*(Aq, \*(Aq&\*(Aq]\*(C'\fR. .ie n .SS """is_valid_nick_name""" .el .SS "\f(CWis_valid_nick_name\fP" .IX Subsection "is_valid_nick_name" Takes one argument, a nickname to validate. Returns true or false if the nickname is valid or not. .ie n .SS """numeric_to_name""" .el .SS "\f(CWnumeric_to_name\fP" .IX Subsection "numeric_to_name" Takes an \s-1IRC\s0 server numerical reply code (e.g. '001') as an argument, and returns the corresponding name (e.g. '\s-1RPL_WELCOME\s0'). .ie n .SS """name_to_numeric""" .el .SS "\f(CWname_to_numeric\fP" .IX Subsection "name_to_numeric" Takes an \s-1IRC\s0 server reply name (e.g. '\s-1RPL_WELCOME\s0') as an argument, and returns the corresponding numerical code (e.g. '001'). .ie n .SS """has_color""" .el .SS "\f(CWhas_color\fP" .IX Subsection "has_color" Takes one parameter, a string of \s-1IRC\s0 text. Returns true if it contains any \s-1IRC\s0 color codes, false otherwise. Useful if you want your bot to kick users for (ab)using colors. :) .ie n .SS """has_formatting""" .el .SS "\f(CWhas_formatting\fP" .IX Subsection "has_formatting" Takes one parameter, a string of \s-1IRC\s0 text. Returns true if it contains any \s-1IRC\s0 formatting codes, false otherwise. .ie n .SS """strip_color""" .el .SS "\f(CWstrip_color\fP" .IX Subsection "strip_color" Takes one parameter, a string of \s-1IRC\s0 text. Returns the string stripped of all \&\s-1IRC\s0 color codes. .ie n .SS """strip_formatting""" .el .SS "\f(CWstrip_formatting\fP" .IX Subsection "strip_formatting" Takes one parameter, a string of \s-1IRC\s0 text. Returns the string stripped of all \&\s-1IRC\s0 formatting codes. .ie n .SS """decode_irc""" .el .SS "\f(CWdecode_irc\fP" .IX Subsection "decode_irc" This function takes a byte string (i.e. an unmodified \s-1IRC\s0 message) and returns a text string. Since the source encoding might have been \s-1UTF\-8\s0, you should store it with \s-1UTF\-8\s0 or some other Unicode encoding in your file/database/whatever to be safe. For a more detailed discussion, see \&\*(L"\s-1ENCODING\s0\*(R". .PP .Vb 1 \& use IRC::Utils qw(decode_irc); \& \& sub message_handler { \& my ($nick, $channel, $message) = @_; \& \& # not wise, $message is a byte string of unknown encoding \& print $message, "\en"; \& \& $message = decode_irc($what); \& \& # good, $message is a text string \& print $message, "\en"; \& } .Ve .SH "CONSTANTS" .IX Header "CONSTANTS" Use the following constants to add formatting and mIRC color codes to \s-1IRC\s0 messages. .PP Normal text: .PP .Vb 1 \& NORMAL .Ve .PP Formatting: .PP .Vb 5 \& BOLD \& UNDERLINE \& REVERSE \& ITALIC \& FIXED .Ve .PP Colors: .PP .Vb 10 \& WHITE \& BLACK \& BLUE \& GREEN \& RED \& BROWN \& PURPLE \& ORANGE \& YELLOW \& LIGHT_GREEN \& TEAL \& LIGHT_CYAN \& LIGHT_BLUE \& PINK \& GREY \& LIGHT_GREY .Ve .PP Individual non-color formatting codes can be cancelled with their corresponding constant, but you can also cancel all of them at once with \&\f(CW\*(C`NORMAL\*(C'\fR. To cancel the effect of color codes, you must use \f(CW\*(C`NORMAL\*(C'\fR. which of course has the side effect of cancelling all other formatting codes as well. .PP .Vb 2 \& $msg = \*(AqThis word is \*(Aq.YELLOW.\*(Aqyellow\*(Aq.NORMAL.\*(Aq while this word is\*(Aq.BOLD.\*(Aqbold\*(Aq.BOLD; \& $msg = UNDERLINE.BOLD.\*(AqThis sentence is both underlined and bold.\*(Aq.NORMAL; .Ve .SH "ENCODING" .IX Header "ENCODING" .SS "Messages" .IX Subsection "Messages" The only encoding requirement the \s-1IRC\s0 protocol places on its messages is that they be 8\-bits and ASCII-compatible. This has resulted in most of the Western world settling on ASCII-compatible Latin\-1 (usually Microsoft's \&\s-1CP1252\s0, a Latin\-1 variant) as a convention. Recently, popular \s-1IRC\s0 clients (mIRC, xchat, certain irssi configurations) have begun sending a mixture of \&\s-1CP1252\s0 and \s-1UTF\-8\s0 over the wire to allow more characters without breaking backward compatibility (too much). They send \s-1CP1252\s0 encoded messages if the characters fit within that encoding, otherwise falling back to \s-1UTF\-8\s0, and likewise autodetecting the encoding (\s-1UTF\-8\s0 or \s-1CP1252\s0) of incoming messages. Since writing text with mixed encoding to a file, terminal, or database is not a good idea, you need a way to decode messages from \s-1IRC\s0. \&\f(CW\*(C`decode_irc\*(C'\fR will do that. .SS "Channel names" .IX Subsection "Channel names" The matter is complicated further by the fact that some servers allow non-ASCII characters in channel names. \s-1IRC\s0 modules generally don't explicitly encode or decode any \s-1IRC\s0 traffic, but they do have to concatenate parts of a message (e.g. a channel name and a message) before sending it over the wire. So when you do something like \&\f(CW\*(C`privmsg($channel, \*(Aq\*(ae\*(d-i\*(Aq)\*(C'\fR, where \f(CW$channel\fR is the unmodified channel name (a byte string) you got from an earlier \s-1IRC\s0 message, the channel name will get double-encoded when concatenated with your message (a non-ASCII text string) if the channel name contains non-ASCII bytes. .PP To prevent this, you can't simply decode the channel name and then use it. \f(CW\*(Aq#\*(ae\*(d-i\*(Aq\fR in \s-1CP1252\s0 is not the same channel as \f(CW\*(Aq#\*(ae\*(d-i\*(Aq\fR in \&\s-1UTF\-8\s0, since they are encoded as different sequences of bytes, and the \s-1IRC\s0 server only cares about the byte representation. Therefore, when using a channel name you got from the server (e.g. when replying to message), you should use the original byte string (before it has been decoded with \&\f(CW\*(C`decode_irc\*(C'\fR), and encode any other parameters (with \&\f(CW\*(C`encode_utf8\*(C'\fR) so that your message will be concatenated correctly. At some point, you'll probably want to print the channel name, write it to a log file or use it in a filename, so you'll eventually have to decode it, at which point the \s-1UTF\-8\s0 \f(CW\*(C`#\*(ae\*(d-i\*(C'\fR and \s-1CP1252\s0 \f(CW\*(C`#\*(ae\*(d-i\*(C'\fR will have to be considered equivalent. .PP .Vb 1 \& use Encode qw(encode_utf8 encode); \& \& sub message_handler { \& # these three are all byte strings \& my ($nick, $channel, $message) = @_; \& \& # bad: if $channel has any non\-ASCII bytes, they will get double\-encoded \& privmsg($channel, \*(Aq\*(ae\*(d-i\*(Aq); \& \& # bad: if $message has any non\-ASCII bytes, they will get double\-encoded \& privmsg(\*(Aq#\*(ae\*(d-i\*(Aq, $message); \& \& # good: both are byte strings already, so they will concatenate correctly \& privmsg($channel, $message); \& \& # good: both are text strings (Latin1 as per Perl\*(Aqs default), so \& # they\*(Aqll be concatenated correctly \& privmsg(\*(Aq#\*(ae\*(d-i\*(Aq, \*(Aq\*(ae\*(d-i\*(Aq); \& \& # good: similar to the last one, except now they\*(Aqre using UTF\-8, which \& # means that the channel is actually not the same as above \& use utf8; \& privmsg(\*(Aq#\*(ae\*(d-i\*(Aq, \*(Aq\*(ae\*(d-i\*(Aq); \& \& # good: $channel and $msg_bytes are both byte strings \& my $msg_bytes = encode_utf8(\*(Aq\*(ae\*(d-i\*(Aq); \& privmsg($channel, $msg_bytes); \& \& # good: $chan_bytes and $message are both byte strings \& # here we\*(Aqre sending a message to the utf8\-encoded #\*(ae\*(d-i \& my $utf8_bytes = encode_utf8(\*(Aq#\*(ae\*(d-i\*(Aq); \& privmsg($utf8_bytes, $message); \& \& # good: $chan_bytes and $message are both byte strings \& # here we\*(Aqre sending a message to the cp1252\-encoded #\*(ae\*(d-i \& my $cp1252_bytes = encode(\*(Aqcp1252\*(Aq, \*(Aq#\*(ae\*(d-i\*(Aq); \& privmsg($cp1252_bytes, $message); \& \& # bad: $channel is in an undetermined encoding \& log_message("Got message from $channel"); \& \& # good: using the decoded version of $channel \& log_message("Got message from ".decode_irc($channel)); \& } .Ve .PP See also Encode, perluniintro, perlunitut, perlunicode, and perlunifaq. .SH "AUTHOR" .IX Header "AUTHOR" Hinrik O\*:rn Sigur\*(d-sson (\f(CW\*(C`Hinrik\*(C'\fR irc.perl.org, or \f(CW\*(C`literal\*(C'\fR @ FreeNode). .PP Chris \f(CW\*(C`BinGOs\*(C'\fR Williams .SH "SEE ALSO" .IX Header "SEE ALSO" POE::Component::IRC .PP POE::Component::Server::IRC