.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.ie \nF \{\
.    de IX
.    tm Index:\\$1\t\\n%\t"\\$2"
..
.    nr % 0
.    rr F
.\}
.el \{\
.    de IX
..
.\}
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "HTML::HTML5::Writer 3pm"
.TH HTML::HTML5::Writer 3pm "2012-06-12" "perl v5.14.2" "User Contributed Perl Documentation"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
HTML::HTML5::Writer \- output a DOM as HTML5
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
.Vb 1
\& use HTML::HTML5::Writer;
\& 
\& my $writer = HTML::HTML5::Writer\->new;
\& print $writer\->document($dom);
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
This module outputs XML::LibXML::Node objects as \s-1HTML5\s0 strings.
It works well on \s-1DOM\s0 trees that represent valid \s-1HTML/XHTML\s0
documents; less well on other \s-1DOM\s0 trees.
.SS "Constructor"
.IX Subsection "Constructor"
.ie n .IP """$writer = HTML::HTML5::Writer\->new(%opts)""" 4
.el .IP "\f(CW$writer = HTML::HTML5::Writer\->new(%opts)\fR" 4
.IX Item "$writer = HTML::HTML5::Writer->new(%opts)"
Create a new writer object. Options include:
.RS 4
.IP "\(bu" 4
\&\fBmarkup\fR
.Sp
Choose which serialisation of \s-1HTML5\s0 to use: 'html' or 'xhtml'.
.IP "\(bu" 4
\&\fBpolyglot\fR
.Sp
Set to true in order to attempt to produce output which works as both
\&\s-1XML\s0 and \s-1HTML\s0. Set to false to produce content that might not.
.Sp
If you don't explicitly set it, then it defaults to false for \s-1HTML\s0, and
true for \s-1XHTML\s0.
.IP "\(bu" 4
\&\fBdoctype\fR
.Sp
Set this to a string to choose which <!DOCTYPE> tag to output. Note, this
purely sets the <!DOCTYPE> tag and does not change how the rest of the
document is output. This really is just a plain string literal...
.Sp
.Vb 2
\& # Yes, this works...
\& my $w = HTML::HTML5::Writer\->new(doctype => \*(Aq<!doctype html>\*(Aq);
.Ve
.Sp
The following constants are provided for convenience:
\&\fB\s-1DOCTYPE_HTML2\s0\fR,
\&\fB\s-1DOCTYPE_HTML32\s0\fR,
\&\fB\s-1DOCTYPE_HTML4\s0\fR (latest stable strict \s-1HTML\s0 4.x),
\&\fB\s-1DOCTYPE_HTML4_RDFA\s0\fR (latest stable \s-1HTML\s0 4.x+RDFa),
\&\fB\s-1DOCTYPE_HTML40\s0\fR (strict),
\&\fB\s-1DOCTYPE_HTML40_FRAMESET\s0\fR,
\&\fB\s-1DOCTYPE_HTML40_LOOSE\s0\fR,
\&\fB\s-1DOCTYPE_HTML40_STRICT\s0\fR,
\&\fB\s-1DOCTYPE_HTML401\s0\fR (strict),
\&\fB\s-1DOCTYPE_HTML401_FRAMESET\s0\fR,
\&\fB\s-1DOCTYPE_HTML401_LOOSE\s0\fR,
\&\fB\s-1DOCTYPE_HTML401_RDFA10\s0\fR,
\&\fB\s-1DOCTYPE_HTML401_RDFA11\s0\fR,
\&\fB\s-1DOCTYPE_HTML401_STRICT\s0\fR,
\&\fB\s-1DOCTYPE_HTML5\s0\fR,
\&\fB\s-1DOCTYPE_LEGACY\s0\fR (about:legacy\-compat),
\&\fB\s-1DOCTYPE_NIL\s0\fR (empty string),
\&\fB\s-1DOCTYPE_XHTML1\s0\fR (strict),
\&\fB\s-1DOCTYPE_XHTML1_FRAMESET\s0\fR,
\&\fB\s-1DOCTYPE_XHTML1_LOOSE\s0\fR,
\&\fB\s-1DOCTYPE_XHTML1_STRICT\s0\fR,
\&\fB\s-1DOCTYPE_XHTML11\s0\fR,
\&\fB\s-1DOCTYPE_XHTML_BASIC\s0\fR,
\&\fB\s-1DOCTYPE_XHTML_BASIC_10\s0\fR,
\&\fB\s-1DOCTYPE_XHTML_BASIC_11\s0\fR,
\&\fB\s-1DOCTYPE_XHTML_MATHML_SVG\s0\fR,
\&\fB\s-1DOCTYPE_XHTML_RDFA\s0\fR (latest stable strict XHTML+RDFa),
\&\fB\s-1DOCTYPE_XHTML_RDFA10\s0\fR,
\&\fB\s-1DOCTYPE_XHTML_RDFA11\s0\fR.
.Sp
Defaults to \s-1DOCTYPE_HTML5\s0 for \s-1HTML\s0 and \s-1DOCTYPE_LEGACY\s0 for \s-1XHTML\s0.
.IP "\(bu" 4
\&\fBcharset\fR
.Sp
This module always returns strings in Perl's internal utf8 encoding, but
you can set the 'charset' option to 'ascii' to create output that would
be suitable for re-encoding to \s-1ASCII\s0 (e.g. it will entity-encode characters
which do not exist in \s-1ASCII\s0).
.IP "\(bu" 4
\&\fBquote_attributes\fR
.Sp
Set this to a true to force attributes to be quoted. If not explicitly
set, the writer will automatically detect when attributes need quoting.
.IP "\(bu" 4
\&\fBvoids\fR
.Sp
Set this to true to force void elements to always be terminated with '/>'.
If not explicitly set, they'll only be terminated that way in polyglot or
\&\s-1XHTML\s0 documents.
.IP "\(bu" 4
\&\fBstart_tags\fR and \fBend_tags\fR
.Sp
Except in polyglot and \s-1XHTML\s0 documents, some elements allow their
start and/or end tags to be omitted in certain circumstances. By
setting these to true, you can prevent them from being omitted.
.IP "\(bu" 4
\&\fBrefs\fR
.Sp
Special characters that can't be encoded as named entities need
to be encoded as numeric character references instead. These
can be expressed in decimal or hexadecimal. Setting this option to
\&'dec' or 'hex' allows you to choose. The default is 'hex'.
.RE
.RS 4
.RE
.SS "Public Methods"
.IX Subsection "Public Methods"
.ie n .IP """$writer\->document($node)""" 4
.el .IP "\f(CW$writer\->document($node)\fR" 4
.IX Item "$writer->document($node)"
Outputs (i.e. returns a string that is) an XML::LibXML::Document as \s-1HTML\s0.
.ie n .IP """$writer\->element($node)""" 4
.el .IP "\f(CW$writer\->element($node)\fR" 4
.IX Item "$writer->element($node)"
Outputs an XML::LibXML::Element as \s-1HTML\s0.
.ie n .IP """$writer\->attribute($node)""" 4
.el .IP "\f(CW$writer\->attribute($node)\fR" 4
.IX Item "$writer->attribute($node)"
Outputs an XML::LibXML::Attr as \s-1HTML\s0.
.ie n .IP """$writer\->text($node)""" 4
.el .IP "\f(CW$writer\->text($node)\fR" 4
.IX Item "$writer->text($node)"
Outputs an XML::LibXML::Text as \s-1HTML\s0.
.ie n .IP """$writer\->cdata($node)""" 4
.el .IP "\f(CW$writer\->cdata($node)\fR" 4
.IX Item "$writer->cdata($node)"
Outputs an XML::LibXML::CDATASection as \s-1HTML\s0.
.ie n .IP """$writer\->comment($node)""" 4
.el .IP "\f(CW$writer\->comment($node)\fR" 4
.IX Item "$writer->comment($node)"
Outputs an XML::LibXML::Comment as \s-1HTML\s0.
.ie n .IP """$writer\->pi($node)""" 4
.el .IP "\f(CW$writer\->pi($node)\fR" 4
.IX Item "$writer->pi($node)"
Outputs an XML::LibXML::PI as \s-1HTML\s0.
.ie n .IP """$writer\->doctype""" 4
.el .IP "\f(CW$writer\->doctype\fR" 4
.IX Item "$writer->doctype"
Outputs the writer's \s-1DOCTYPE\s0.
.ie n .IP """$writer\->encode_entities($string, characters=>$more)""" 4
.el .IP "\f(CW$writer\->encode_entities($string, characters=>$more)\fR" 4
.IX Item "$writer->encode_entities($string, characters=>$more)"
Takes a string and returns the same string with some special characters
replaced. These special characters do not include any of '&', '<', '>'
or '"', but you can provide a string of additional characters to treat as
special:
.Sp
.Vb 1
\& $encoded = $writer\->encode_entities($raw, characters=>\*(Aq&<>"\*(Aq);
.Ve
.ie n .IP """$writer\->encode_entity($char)""" 4
.el .IP "\f(CW$writer\->encode_entity($char)\fR" 4
.IX Item "$writer->encode_entity($char)"
Returns \f(CW$char\fR entity-encoded. Encoding is done regardless of whether 
\&\f(CW$char\fR is \*(L"special\*(R" or not.
.ie n .IP """$writer\->is_xhtml""" 4
.el .IP "\f(CW$writer\->is_xhtml\fR" 4
.IX Item "$writer->is_xhtml"
Boolean indicating if \f(CW$writer\fR is configured to output \s-1XHTML\s0.
.ie n .IP """$writer\->is_polyglot""" 4
.el .IP "\f(CW$writer\->is_polyglot\fR" 4
.IX Item "$writer->is_polyglot"
Boolean indicating if \f(CW$writer\fR is configured to output polyglot \s-1HTML\s0.
.ie n .IP """$writer\->should_force_start_tags""" 4
.el .IP "\f(CW$writer\->should_force_start_tags\fR" 4
.IX Item "$writer->should_force_start_tags"
.PD 0
.ie n .IP """$writer\->should_force_end_tags""" 4
.el .IP "\f(CW$writer\->should_force_end_tags\fR" 4
.IX Item "$writer->should_force_end_tags"
.PD
Booleans indicating whether optional start and end tags should be forced.
.ie n .IP """$writer\->should_quote_attributes""" 4
.el .IP "\f(CW$writer\->should_quote_attributes\fR" 4
.IX Item "$writer->should_quote_attributes"
Boolean indicating whether attributes need to be quoted.
.ie n .IP """$writer\->should_slash_voids""" 4
.el .IP "\f(CW$writer\->should_slash_voids\fR" 4
.IX Item "$writer->should_slash_voids"
Boolean indicating whether void elements should be closed in the \s-1XHTML\s0 style.
.SH "BUGS AND LIMITATIONS"
.IX Header "BUGS AND LIMITATIONS"
Certain \s-1DOM\s0 constructs cannot be output in non-XML \s-1HTML\s0. e.g.
.PP
.Vb 9
\& my $xhtml = <<XHTML;
\& <html xmlns="http://www.w3.org/1999/xhtml">
\&  <head><title>Test</title></head>
\&  <body><hr>This text is within the HR element</hr></body>
\& </html>
\& XHTML
\& my $dom    = XML::LibXML\->new\->parse_string($xhtml);
\& my $writer = HTML::HTML5::Writer\->new(markup=>\*(Aqhtml\*(Aq);
\& print $writer\->document($dom);
.Ve
.PP
In \s-1HTML\s0, there's no way to serialise that properly in \s-1HTML\s0. Right
now this module just outputs that \s-1HR\s0 element with text contained
within it, a la \s-1XHTML\s0. In future versions, it may emit a warning
or throw an error.
.PP
In these cases, the HTML::HTML5::{Parser,Writer} combination is
not round-trippable.
.PP
Outputting elements and attributes in foreign (non-XHTML)
namespaces is implemented pretty naively and not thoroughly
tested. I'd be interested in any feedback people have, especially
on round-trippability of \s-1SVG\s0, MathML and RDFa content in \s-1HTML\s0.
.PP
Please report any bugs to <http://rt.cpan.org/>.
.SH "SEE ALSO"
.IX Header "SEE ALSO"
HTML::HTML5::Parser,
HTML::HTML5::Builder,
HTML::HTML5::ToText,
XML::LibXML.
.SH "AUTHOR"
.IX Header "AUTHOR"
Toby Inkster <tobyink@cpan.org>.
.SH "COPYRIGHT AND LICENSE"
.IX Header "COPYRIGHT AND LICENSE"
Copyright (C) 2010\-2012 by Toby Inkster.
.PP
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.