.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .ie \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . nr % 0 . rr F .\} .el \{\ . de IX .. .\} .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "HTML::HTML5::Writer 3pm" .TH HTML::HTML5::Writer 3pm "2012-06-12" "perl v5.14.2" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" HTML::HTML5::Writer \- output a DOM as HTML5 .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 1 \& use HTML::HTML5::Writer; \& \& my $writer = HTML::HTML5::Writer\->new; \& print $writer\->document($dom); .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" This module outputs XML::LibXML::Node objects as \s-1HTML5\s0 strings. It works well on \s-1DOM\s0 trees that represent valid \s-1HTML/XHTML\s0 documents; less well on other \s-1DOM\s0 trees. .SS "Constructor" .IX Subsection "Constructor" .ie n .IP """$writer = HTML::HTML5::Writer\->new(%opts)""" 4 .el .IP "\f(CW$writer = HTML::HTML5::Writer\->new(%opts)\fR" 4 .IX Item "$writer = HTML::HTML5::Writer->new(%opts)" Create a new writer object. Options include: .RS 4 .IP "\(bu" 4 \&\fBmarkup\fR .Sp Choose which serialisation of \s-1HTML5\s0 to use: 'html' or 'xhtml'. .IP "\(bu" 4 \&\fBpolyglot\fR .Sp Set to true in order to attempt to produce output which works as both \&\s-1XML\s0 and \s-1HTML\s0. Set to false to produce content that might not. .Sp If you don't explicitly set it, then it defaults to false for \s-1HTML\s0, and true for \s-1XHTML\s0. .IP "\(bu" 4 \&\fBdoctype\fR .Sp Set this to a string to choose which tag to output. Note, this purely sets the tag and does not change how the rest of the document is output. This really is just a plain string literal... .Sp .Vb 2 \& # Yes, this works... \& my $w = HTML::HTML5::Writer\->new(doctype => \*(Aq\*(Aq); .Ve .Sp The following constants are provided for convenience: \&\fB\s-1DOCTYPE_HTML2\s0\fR, \&\fB\s-1DOCTYPE_HTML32\s0\fR, \&\fB\s-1DOCTYPE_HTML4\s0\fR (latest stable strict \s-1HTML\s0 4.x), \&\fB\s-1DOCTYPE_HTML4_RDFA\s0\fR (latest stable \s-1HTML\s0 4.x+RDFa), \&\fB\s-1DOCTYPE_HTML40\s0\fR (strict), \&\fB\s-1DOCTYPE_HTML40_FRAMESET\s0\fR, \&\fB\s-1DOCTYPE_HTML40_LOOSE\s0\fR, \&\fB\s-1DOCTYPE_HTML40_STRICT\s0\fR, \&\fB\s-1DOCTYPE_HTML401\s0\fR (strict), \&\fB\s-1DOCTYPE_HTML401_FRAMESET\s0\fR, \&\fB\s-1DOCTYPE_HTML401_LOOSE\s0\fR, \&\fB\s-1DOCTYPE_HTML401_RDFA10\s0\fR, \&\fB\s-1DOCTYPE_HTML401_RDFA11\s0\fR, \&\fB\s-1DOCTYPE_HTML401_STRICT\s0\fR, \&\fB\s-1DOCTYPE_HTML5\s0\fR, \&\fB\s-1DOCTYPE_LEGACY\s0\fR (about:legacy\-compat), \&\fB\s-1DOCTYPE_NIL\s0\fR (empty string), \&\fB\s-1DOCTYPE_XHTML1\s0\fR (strict), \&\fB\s-1DOCTYPE_XHTML1_FRAMESET\s0\fR, \&\fB\s-1DOCTYPE_XHTML1_LOOSE\s0\fR, \&\fB\s-1DOCTYPE_XHTML1_STRICT\s0\fR, \&\fB\s-1DOCTYPE_XHTML11\s0\fR, \&\fB\s-1DOCTYPE_XHTML_BASIC\s0\fR, \&\fB\s-1DOCTYPE_XHTML_BASIC_10\s0\fR, \&\fB\s-1DOCTYPE_XHTML_BASIC_11\s0\fR, \&\fB\s-1DOCTYPE_XHTML_MATHML_SVG\s0\fR, \&\fB\s-1DOCTYPE_XHTML_RDFA\s0\fR (latest stable strict XHTML+RDFa), \&\fB\s-1DOCTYPE_XHTML_RDFA10\s0\fR, \&\fB\s-1DOCTYPE_XHTML_RDFA11\s0\fR. .Sp Defaults to \s-1DOCTYPE_HTML5\s0 for \s-1HTML\s0 and \s-1DOCTYPE_LEGACY\s0 for \s-1XHTML\s0. .IP "\(bu" 4 \&\fBcharset\fR .Sp This module always returns strings in Perl's internal utf8 encoding, but you can set the 'charset' option to 'ascii' to create output that would be suitable for re-encoding to \s-1ASCII\s0 (e.g. it will entity-encode characters which do not exist in \s-1ASCII\s0). .IP "\(bu" 4 \&\fBquote_attributes\fR .Sp Set this to a true to force attributes to be quoted. If not explicitly set, the writer will automatically detect when attributes need quoting. .IP "\(bu" 4 \&\fBvoids\fR .Sp Set this to true to force void elements to always be terminated with '/>'. If not explicitly set, they'll only be terminated that way in polyglot or \&\s-1XHTML\s0 documents. .IP "\(bu" 4 \&\fBstart_tags\fR and \fBend_tags\fR .Sp Except in polyglot and \s-1XHTML\s0 documents, some elements allow their start and/or end tags to be omitted in certain circumstances. By setting these to true, you can prevent them from being omitted. .IP "\(bu" 4 \&\fBrefs\fR .Sp Special characters that can't be encoded as named entities need to be encoded as numeric character references instead. These can be expressed in decimal or hexadecimal. Setting this option to \&'dec' or 'hex' allows you to choose. The default is 'hex'. .RE .RS 4 .RE .SS "Public Methods" .IX Subsection "Public Methods" .ie n .IP """$writer\->document($node)""" 4 .el .IP "\f(CW$writer\->document($node)\fR" 4 .IX Item "$writer->document($node)" Outputs (i.e. returns a string that is) an XML::LibXML::Document as \s-1HTML\s0. .ie n .IP """$writer\->element($node)""" 4 .el .IP "\f(CW$writer\->element($node)\fR" 4 .IX Item "$writer->element($node)" Outputs an XML::LibXML::Element as \s-1HTML\s0. .ie n .IP """$writer\->attribute($node)""" 4 .el .IP "\f(CW$writer\->attribute($node)\fR" 4 .IX Item "$writer->attribute($node)" Outputs an XML::LibXML::Attr as \s-1HTML\s0. .ie n .IP """$writer\->text($node)""" 4 .el .IP "\f(CW$writer\->text($node)\fR" 4 .IX Item "$writer->text($node)" Outputs an XML::LibXML::Text as \s-1HTML\s0. .ie n .IP """$writer\->cdata($node)""" 4 .el .IP "\f(CW$writer\->cdata($node)\fR" 4 .IX Item "$writer->cdata($node)" Outputs an XML::LibXML::CDATASection as \s-1HTML\s0. .ie n .IP """$writer\->comment($node)""" 4 .el .IP "\f(CW$writer\->comment($node)\fR" 4 .IX Item "$writer->comment($node)" Outputs an XML::LibXML::Comment as \s-1HTML\s0. .ie n .IP """$writer\->pi($node)""" 4 .el .IP "\f(CW$writer\->pi($node)\fR" 4 .IX Item "$writer->pi($node)" Outputs an XML::LibXML::PI as \s-1HTML\s0. .ie n .IP """$writer\->doctype""" 4 .el .IP "\f(CW$writer\->doctype\fR" 4 .IX Item "$writer->doctype" Outputs the writer's \s-1DOCTYPE\s0. .ie n .IP """$writer\->encode_entities($string, characters=>$more)""" 4 .el .IP "\f(CW$writer\->encode_entities($string, characters=>$more)\fR" 4 .IX Item "$writer->encode_entities($string, characters=>$more)" Takes a string and returns the same string with some special characters replaced. These special characters do not include any of '&', '<', '>' or '"', but you can provide a string of additional characters to treat as special: .Sp .Vb 1 \& $encoded = $writer\->encode_entities($raw, characters=>\*(Aq&<>"\*(Aq); .Ve .ie n .IP """$writer\->encode_entity($char)""" 4 .el .IP "\f(CW$writer\->encode_entity($char)\fR" 4 .IX Item "$writer->encode_entity($char)" Returns \f(CW$char\fR entity-encoded. Encoding is done regardless of whether \&\f(CW$char\fR is \*(L"special\*(R" or not. .ie n .IP """$writer\->is_xhtml""" 4 .el .IP "\f(CW$writer\->is_xhtml\fR" 4 .IX Item "$writer->is_xhtml" Boolean indicating if \f(CW$writer\fR is configured to output \s-1XHTML\s0. .ie n .IP """$writer\->is_polyglot""" 4 .el .IP "\f(CW$writer\->is_polyglot\fR" 4 .IX Item "$writer->is_polyglot" Boolean indicating if \f(CW$writer\fR is configured to output polyglot \s-1HTML\s0. .ie n .IP """$writer\->should_force_start_tags""" 4 .el .IP "\f(CW$writer\->should_force_start_tags\fR" 4 .IX Item "$writer->should_force_start_tags" .PD 0 .ie n .IP """$writer\->should_force_end_tags""" 4 .el .IP "\f(CW$writer\->should_force_end_tags\fR" 4 .IX Item "$writer->should_force_end_tags" .PD Booleans indicating whether optional start and end tags should be forced. .ie n .IP """$writer\->should_quote_attributes""" 4 .el .IP "\f(CW$writer\->should_quote_attributes\fR" 4 .IX Item "$writer->should_quote_attributes" Boolean indicating whether attributes need to be quoted. .ie n .IP """$writer\->should_slash_voids""" 4 .el .IP "\f(CW$writer\->should_slash_voids\fR" 4 .IX Item "$writer->should_slash_voids" Boolean indicating whether void elements should be closed in the \s-1XHTML\s0 style. .SH "BUGS AND LIMITATIONS" .IX Header "BUGS AND LIMITATIONS" Certain \s-1DOM\s0 constructs cannot be output in non-XML \s-1HTML\s0. e.g. .PP .Vb 9 \& my $xhtml = < \& Test \&
This text is within the HR element \& \& XHTML \& my $dom = XML::LibXML\->new\->parse_string($xhtml); \& my $writer = HTML::HTML5::Writer\->new(markup=>\*(Aqhtml\*(Aq); \& print $writer\->document($dom); .Ve .PP In \s-1HTML\s0, there's no way to serialise that properly in \s-1HTML\s0. Right now this module just outputs that \s-1HR\s0 element with text contained within it, a la \s-1XHTML\s0. In future versions, it may emit a warning or throw an error. .PP In these cases, the HTML::HTML5::{Parser,Writer} combination is not round-trippable. .PP Outputting elements and attributes in foreign (non-XHTML) namespaces is implemented pretty naively and not thoroughly tested. I'd be interested in any feedback people have, especially on round-trippability of \s-1SVG\s0, MathML and RDFa content in \s-1HTML\s0. .PP Please report any bugs to . .SH "SEE ALSO" .IX Header "SEE ALSO" HTML::HTML5::Parser, HTML::HTML5::Builder, HTML::HTML5::ToText, XML::LibXML. .SH "AUTHOR" .IX Header "AUTHOR" Toby Inkster . .SH "COPYRIGHT AND LICENSE" .IX Header "COPYRIGHT AND LICENSE" Copyright (C) 2010\-2012 by Toby Inkster. .PP This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.