.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{\
.    if \nF \{\
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{\
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\" ========================================================================
.\"
.IX Title "HTML::HTML5::Sanity 3pm"
.TH HTML::HTML5::Sanity 3pm "2022-07-11" "perl v5.34.0" "User Contributed Perl Documentation"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
HTML::HTML5::Sanity \- make HTML5 DOM trees less insane
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
.Vb 2
\&  use HTML::HTML5::Parser;
\&  use HTML::HTML5::Sanity;
\&  
\&  my $parser    = HTML::HTML5::Parser\->new;
\&  my $html5_dom = $parser\->parse_file(\*(Aqhttp://example.com/\*(Aq);
\&  my $sane_dom  = fix_document($html5_dom);
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
The Document Object Model (\s-1DOM\s0) generated by HTML::HTML5::Parser meets
the requirements of the \s-1HTML5\s0 spec, but will probably catch a lot of
people by surprise.
.PP
The main oddity is that elements and attributes which appear to be
namespaced are not really. For example, the following element:
.PP
.Vb 1
\&  <div xml:lang="fr">...</div>
.Ve
.PP
Looks like it should be parsed so that it has an attribute \*(L"lang\*(R" in
the \s-1XML\s0 namespace. Not so. It will really be parsed as having the
attribute \*(L"xml:lang\*(R" in the null namespace.
.ie n .IP """fix_document($document)""" 4
.el .IP "\f(CWfix_document($document)\fR" 4
.IX Item "fix_document($document)"
.Vb 1
\&  $sane_dom = fix_document($html5_dom);
.Ve
.Sp
Returns a modified copy of the \s-1DOM\s0 and leaving the original \s-1DOM\s0
unmodified.
.ie n .IP """fix_element($element_node, $new_document_node, \e%namespaces)""" 4
.el .IP "\f(CWfix_element($element_node, $new_document_node, \e%namespaces)\fR" 4
.IX Item "fix_element($element_node, $new_document_node, %namespaces)"
Don't use this. Not exported.
.ie n .IP """fix_attribute($attribute_node, $new_element_node, \e%namespaces)""" 4
.el .IP "\f(CWfix_attribute($attribute_node, $new_element_node, \e%namespaces)\fR" 4
.IX Item "fix_attribute($attribute_node, $new_element_node, %namespaces)"
Don't use this. Not exported.
.ie n .IP "$HTML::HTML5::Sanity::FIX_LANG_ATTRIBUTES" 4
.el .IP "\f(CW$HTML::HTML5::Sanity::FIX_LANG_ATTRIBUTES\fR" 4
.IX Item "$HTML::HTML5::Sanity::FIX_LANG_ATTRIBUTES"
.Vb 2
\&  $HTML::HTML5::Sanity::FIX_LANG_ATTRIBUTES = 2;
\&  $sane_dom = fix_document($html5_dom);
.Ve
.Sp
If set to 1 (the default), the package will detect invalid values in
\&\f(CW@lang\fR and \f(CW@xml:lang\fR, and remove the attribute if it is invalid. If set
to 2, it will also attempt to canonicalise the value (e.g. '\s-1EN_GB\s0' will
be converted to to 'en\-GB'). If set to 0, then the value of language
attributes is not checked.
.SH "BUGS"
.IX Header "BUGS"
Please report any bugs to <http://rt.cpan.org/>.
.SH "SEE ALSO"
.IX Header "SEE ALSO"
HTML::HTML5::Parser, XML::LibXML, Task::HTML5.
.SH "AUTHOR"
.IX Header "AUTHOR"
Toby Inkster <tobyink@cpan.org>.
.SH "COPYRIGHT AND LICENSE"
.IX Header "COPYRIGHT AND LICENSE"
Copyright (C) 2009\-2014 by Toby Inkster
.PP
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
.SH "DISCLAIMER OF WARRANTIES"
.IX Header "DISCLAIMER OF WARRANTIES"
\&\s-1THIS PACKAGE IS PROVIDED \*(L"AS IS\*(R" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.\s0