.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.ie \nF \{\
.    de IX
.    tm Index:\\$1\t\\n%\t"\\$2"
..
.    nr % 0
.    rr F
.\}
.el \{\
.    de IX
..
.\}
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "XML::Easy::Text 3pm"
.TH XML::Easy::Text 3pm "2011-11-16" "perl v5.14.2" "User Contributed Perl Documentation"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
XML::Easy::Text \- XML parsing and serialisation
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
.Vb 4
\&        use XML::Easy::Text qw(
\&                xml10_read_content_object xml10_read_element
\&                xml10_read_document xml10_read_extparsedent_object
\&        );
\&
\&        $content = xml10_read_content_object($text);
\&        $element = xml10_read_element($text);
\&        $element = xml10_read_document($text);
\&        $content = xml10_read_extparsedent_object($text);
\&
\&        use XML::Easy::Text qw(
\&                xml10_write_content xml10_write_element
\&                xml10_write_document xml10_write_extparsedent
\&        );
\&
\&        $text = xml10_write_content($content);
\&        $text = xml10_write_element($element);
\&        $text = xml10_write_document($element, "UTF\-8");
\&        $text = xml10_write_extparsedent($content, "UTF\-8");
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
This module supplies functions that parse and serialise \s-1XML\s0 data
according to the \s-1XML\s0 1.0 specification.
.PP
This module is oriented towards the use of \s-1XML\s0 to represent data
for interchange purposes, rather than the use of \s-1XML\s0 as markup of
principally textual data.  It does not perform any schema processing,
and does not interpret DTDs or any other kind of schema.  It adheres
strictly to the \s-1XML\s0 specification, in all its awkward details, except
for the aforementioned DTDs.
.PP
\&\s-1XML\s0 data in memory is represented using a tree of
XML::Easy::Content and XML::Easy::Element
objects.  Such a tree encapsulates all the structure and data content
of an \s-1XML\s0 element or document, without any irrelevant detail resulting
from the textual syntax.
These node trees are readily manipulated by the functions
in XML::Easy::NodeBasics.
.PP
The functions of this module are implemented
in C for performance, with a pure Perl backup version (which has good
performance compared to other pure Perl parsers) for systems that can't
handle \s-1XS\s0 modules.
.SH "FUNCTIONS"
.IX Header "FUNCTIONS"
All functions \f(CW\*(C`die\*(C'\fR on error.
.SS "Parsing"
.IX Subsection "Parsing"
These function take textual \s-1XML\s0 and extract the abstract \s-1XML\s0 content.
In the terminology of the \s-1XML\s0 specification, they constitute a
non-validating processor: they check for well-formedness of the \s-1XML\s0,
but not for adherence of the content to any schema.
.PP
The inputs (to be parsed) for these functions are always character
strings.  \s-1XML\s0 text is frequently encoded using \s-1UTF\-8\s0, or some other
Unicode encoding, so that it can contain characters from the full
Unicode repertoire.  In that case, something must perform \s-1UTF\-8\s0 decoding
(or decoding of some other character encoding) to convert the octets of
a file to the characters on which these functions operate.  A Perl I/O
layer can do the job (see perlio), or it can be performed explicitly
using the \f(CW\*(C`decode\*(C'\fR function in the Encode module.
.IP "xml10_read_content_object(\s-1TEXT\s0)" 4
.IX Item "xml10_read_content_object(TEXT)"
\&\fI\s-1TEXT\s0\fR must be a character string.  It is parsed against the \fBcontent\fR
production of the \s-1XML\s0 1.0 grammar; i.e., as a sequence of the kind of
matter that can appear between the start-tag and end-tag of an element.
Returns a reference to an XML::Easy::Content object.
.Sp
Normally one would not want to use this function directly, but prefer the
higher-level \f(CW\*(C`xml10_read_document\*(C'\fR function.  This function exists for
the construction of custom \s-1XML\s0 parsers in situations that don't match
the full \s-1XML\s0 grammar.
.IP "xml10_read_content_twine(\s-1TEXT\s0)" 4
.IX Item "xml10_read_content_twine(TEXT)"
Performs the same parsing job as \*(L"xml10_read_content_object\*(R",
but returns the resulting content chunk in the form of twine
(see \*(L"Twine\*(R" in XML::Easy::NodeBasics) rather than a content object.
.Sp
The returned array must not be subsequently modified.  If possible,
it will be marked as read-only in order to prevent modification.
.IP "xml10_read_content(\s-1TEXT\s0)" 4
.IX Item "xml10_read_content(TEXT)"
Deprecated alias for \*(L"xml10_read_content_twine\*(R".
.IP "xml10_read_element(\s-1TEXT\s0)" 4
.IX Item "xml10_read_element(TEXT)"
\&\fI\s-1TEXT\s0\fR must be a character string.  It is parsed against the \fBelement\fR
production of the \s-1XML\s0 1.0 grammar; i.e., as an item bracketed by tags
and containing content that may recursively include other elements.
Returns a reference to an XML::Easy::Element object.
.Sp
Normally one would not want to use this function directly, but prefer the
higher-level \f(CW\*(C`xml10_read_document\*(C'\fR function.  This function exists for
the construction of custom \s-1XML\s0 parsers in situations that don't match
the full \s-1XML\s0 grammar.
.IP "xml10_read_document(\s-1TEXT\s0)" 4
.IX Item "xml10_read_document(TEXT)"
\&\fI\s-1TEXT\s0\fR must be a character string.  It is parsed against the \fBdocument\fR
production of the \s-1XML\s0 1.0 grammar; i.e., as a root element (possibly
containing subelements) optionally preceded and followed by non-content
matter, possibly headed by an \s-1XML\s0 declaration.  (A document type
declaration is \fInot\fR accepted; this module does not process schemata.)
Returns a reference to an XML::Easy::Element object which represents
the root element.  Nothing is returned relating to the \s-1XML\s0 declaration
or other non-content matter.
.Sp
This is the most likely function to use to process incoming \s-1XML\s0 data.
Beware that the encoding declaration in the \s-1XML\s0 declaration, if any, does
not affect the interpretation of the input as a sequence of characters.
.IP "xml10_read_extparsedent_object(\s-1TEXT\s0)" 4
.IX Item "xml10_read_extparsedent_object(TEXT)"
\&\fI\s-1TEXT\s0\fR must be a character string.  It is parsed against the
\&\fBextParsedEnt\fR production of the \s-1XML\s0 1.0 grammar; i.e., as a sequence
of content (containing character data and subelements), possibly
headed by a text declaration (which is similar to, but not the same
as, an \s-1XML\s0 declaration).
Returns a reference to an XML::Easy::Content object.
.Sp
This is a relatively obscure part of the \s-1XML\s0 grammar, used when a
subpart of a document is stored in a separate file.  You're more likely
to require the \f(CW\*(C`xml10_read_document\*(C'\fR function.
.IP "xml10_read_extparsedent_twine(\s-1TEXT\s0)" 4
.IX Item "xml10_read_extparsedent_twine(TEXT)"
Performs the same parsing job as \*(L"xml10_read_extparsedent_object\*(R",
but returns the resulting content chunk in the form of twine
(see \*(L"Twine\*(R" in XML::Easy::NodeBasics) rather than a content object.
.Sp
The returned array must not be subsequently modified.  If possible,
it will be marked as read-only in order to prevent modification.
.IP "xml10_read_extparsedent(\s-1TEXT\s0)" 4
.IX Item "xml10_read_extparsedent(TEXT)"
Deprecated alias for \*(L"xml10_read_extparsedent_twine\*(R".
.SS "Serialisation"
.IX Subsection "Serialisation"
These function take abstract \s-1XML\s0 data and serialise it as textual \s-1XML\s0.
They do not perform indentation, default attribute suppression, or any
other schema-dependent processing.
.PP
The outputs of these functions are always character strings.  \s-1XML\s0 text
is frequently encoded using \s-1UTF\-8\s0, or some other Unicode encoding,
so that it can contain characters from the full Unicode repertoire.
In that case, something must perform \s-1UTF\-8\s0 encoding (or encoding of some
other character encoding) to convert the characters generated by these
functions to the octets of a file.  A Perl I/O layer can do the job
(see perlio), or it can be performed explicitly using the \f(CW\*(C`encode\*(C'\fR
function in the Encode module.
.IP "xml10_write_content(\s-1CONTENT\s0)" 4
.IX Item "xml10_write_content(CONTENT)"
\&\fI\s-1CONTENT\s0\fR must be a reference to either an XML::Easy::Content
object or a twine array (see \*(L"Twine\*(R" in XML::Easy::NodeBasics).
The \s-1XML\s0 1.0 textual representation of that content is returned.
.IP "xml10_write_element(\s-1ELEMENT\s0)" 4
.IX Item "xml10_write_element(ELEMENT)"
\&\fI\s-1ELEMENT\s0\fR must be a reference to an XML::Easy::Element object.
The \s-1XML\s0 1.0 textual representation of that element is returned.
.IP "xml10_write_document(ELEMENT[, \s-1ENCODING\s0])" 4
.IX Item "xml10_write_document(ELEMENT[, ENCODING])"
\&\fI\s-1ELEMENT\s0\fR must be a reference to an XML::Easy::Element object.
The \s-1XML\s0 1.0 textual form of a document with that element as the root
element is returned.  The document includes an \s-1XML\s0 declaration.
If \fI\s-1ENCODING\s0\fR is supplied, it must be a valid character encoding
name, and the \s-1XML\s0 declaration specifies it in an encoding declaration.
(The returned string consists of unencoded characters regardless of the
encoding specified.)
.IP "xml10_write_extparsedent(CONTENT[, \s-1ENCODING\s0])" 4
.IX Item "xml10_write_extparsedent(CONTENT[, ENCODING])"
\&\fI\s-1CONTENT\s0\fR must be a reference to either an XML::Easy::Content
object or a twine array (see \*(L"Twine\*(R" in XML::Easy::NodeBasics).
The \s-1XML\s0 1.0 textual form of an external
parsed entity encapsulating that content is returned.  If \fI\s-1ENCODING\s0\fR is
supplied, it must be a valid character encoding name, and the returned
entity includes a text declaration that specifies the encoding name in
an encoding declaration.  (The returned string consists of unencoded
characters regardless of the encoding specified.)
.SH "SEE ALSO"
.IX Header "SEE ALSO"
XML::Easy::NodeBasics,
XML::Easy::Syntax,
http://www.w3.org/TR/REC\-xml/ <http://www.w3.org/TR/REC-xml/>
.SH "AUTHOR"
.IX Header "AUTHOR"
Andrew Main (Zefram) <zefram@fysh.org>
.SH "COPYRIGHT"
.IX Header "COPYRIGHT"
Copyright (C) 2008, 2009 PhotoBox Ltd
.PP
Copyright (C) 2009, 2010, 2011 Andrew Main (Zefram) <zefram@fysh.org>
.SH "LICENSE"
.IX Header "LICENSE"
This module is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.