.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.29) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{ . if \nF \{ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" ======================================================================== .\" .IX Title "HTML::Defang 3pm" .TH HTML::Defang 3pm "2015-12-27" "perl v5.22.1" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" HTML::Defang \- Cleans HTML as well as CSS of scripting and other executable contents, and neutralises XSS attacks. .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 1 \& my $InputHtml = ""; \& \& my $Defang = HTML::Defang\->new( \& context => $Self, \& fix_mismatched_tags => 1, \& tags_to_callback => [ br embed img ], \& tags_callback => \e&DefangTagsCallback, \& url_callback => \e&DefangUrlCallback, \& css_callback => \e&DefangCssCallback, \& attribs_to_callback => [ qw(border src) ], \& attribs_callback => \e&DefangAttribsCallback \& ); \& \& my $SanitizedHtml = $Defang\->defang($InputHtml); \& \& # Callback for custom handling specific HTML tags \& sub DefangTagsCallback { \& my ($Self, $Defang, $OpenAngle, $lcTag, $IsEndTag, $AttributeHash, $CloseAngle, $HtmlR, $OutR) = @_; \& \& # Explicitly defang this tag, eventhough safe \& return DEFANG_ALWAYS if $lcTag eq \*(Aqbr\*(Aq; \& \& # Explicitly whitelist this tag, eventhough unsafe \& return DEFANG_NONE if $lcTag eq \*(Aqembed\*(Aq; \& \& # I am not sure what to do with this tag, so process as HTML::Defang normally would \& return DEFANG_DEFAULT if $lcTag eq \*(Aqimg\*(Aq; \& } \& \& # Callback for custom handling URLs in HTML attributes as well as style tag/attribute declarations \& sub DefangUrlCallback { \& my ($Self, $Defang, $lcTag, $lcAttrKey, $AttrValR, $AttributeHash, $HtmlR) = @_; \& \& # Explicitly allow this URL in tag attributes or stylesheets \& return DEFANG_NONE if $$AttrValR =~ /safesite.com/i; \& \& # Explicitly defang this URL in tag attributes or stylesheets \& return DEFANG_ALWAYS if $$AttrValR =~ /evilsite.com/i; \& } \& \& # Callback for custom handling style tags/attributes \& sub DefangCssCallback { \& my ($Self, $Defang, $Selectors, $SelectorRules, $Tag, $IsAttr) = @_; \& my $i = 0; \& foreach (@$Selectors) { \& my $SelectorRule = $$SelectorRules[$i]; \& foreach my $KeyValueRules (@$SelectorRule) { \& foreach my $KeyValueRule (@$KeyValueRules) { \& my ($Key, $Value) = @$KeyValueRule; \& \& # Comment out any \*(Aq!important\*(Aq directive \& $$KeyValueRule[2] = DEFANG_ALWAYS if $Value =~ \*(Aq!important\*(Aq; \& \& # Comment out any \*(Aqposition=fixed;\*(Aq declaration \& $$KeyValueRule[2] = DEFANG_ALWAYS if $Key =~ \*(Aqposition\*(Aq && $Value =~ \*(Aqfixed\*(Aq; \& } \& } \& $i++; \& } \& } \& \& # Callback for custom handling HTML tag attributes \& sub DefangAttribsCallback { \& my ($Self, $Defang, $lcTag, $lcAttrKey, $AttrValR, $HtmlR) = @_; \& \& # Change all \*(Aqborder\*(Aq attribute values to zero. \& $$AttrValR = \*(Aq0\*(Aq if $lcAttrKey eq \*(Aqborder\*(Aq; \& \& # Defang all \*(Aqsrc\*(Aq attributes \& return DEFANG_ALWAYS if $lcAttrKey eq \*(Aqsrc\*(Aq; \& \& return DEFANG_NONE; \& } .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" This module accepts an input \s-1HTML\s0 and/or \s-1CSS\s0 string and removes any executable code including scripting, embedded objects, applets, etc., and neutralises any \s-1XSS\s0 attacks. A whitelist based approach is used which means only \s-1HTML\s0 known to be safe is allowed through. .PP HTML::Defang uses a custom html tag parser. The parser has been designed and tested to work with nasty real world html and to try and emulate as close as possible what browsers actually do with strange looking constructs. The test suite has been built based on examples from a range of sources such as http://ha.ckers.org/xss.html and http://imfo.ru/csstest/css_hacks/import.php to ensure that as many as possible \s-1XSS\s0 attack scenarios have been dealt with. .PP HTML::Defang can make callbacks to client code when it encounters the following: .IP "\(bu" 4 When a specified tag is parsed .IP "\(bu" 4 When a specified attribute is parsed .IP "\(bu" 4 When a \s-1URL\s0 is parsed as part of an \s-1HTML\s0 attribute, or \s-1CSS\s0 property value. .IP "\(bu" 4 When style data is parsed, as part of an \s-1HTML\s0 style attribute, or as part of an \s-1HTML\s0