.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .ie \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . nr % 0 . rr F .\} .el \{\ . de IX .. .\} .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "RDF::RDFa::Parser 3pm" .TH RDF::RDFa::Parser 3pm "2012-06-05" "perl v5.14.2" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" RDF::RDFa::Parser \- flexible RDFa parser .SH "SYNOPSIS" .IX Header "SYNOPSIS" If you're wanting to work with an RDF::Trine::Model that can be queried with \s-1SPARQL\s0, etc: .PP .Vb 5 \& use RDF::RDFa::Parser; \& my $url = \*(Aqhttp://example.com/document.html\*(Aq; \& my $options = RDF::RDFa::Parser::Config\->new(\*(Aqxhtml\*(Aq, \*(Aq1.1\*(Aq); \& my $rdfa = RDF::RDFa::Parser\->new_from_url($url, $options); \& my $model = $rdfa\->graph; .Ve .PP For dealing with local data: .PP .Vb 5 \& use RDF::RDFa::Parser; \& my $base_url = \*(Aqhttp://example.com/document.html\*(Aq; \& my $options = RDF::RDFa::Parser::Config\->new(\*(Aqxhtml\*(Aq, \*(Aq1.1\*(Aq); \& my $rdfa = RDF::RDFa::Parser\->new($markup, $base_url, $options); \& my $model = $rdfa\->graph; .Ve .PP A simple set of operations for working with Open Graph Protocol data: .PP .Vb 6 \& use RDF::RDFa::Parser; \& my $url = \*(Aqhttp://www.rottentomatoes.com/m/net/\*(Aq; \& my $options = RDF::RDFa::Parser::Config\->tagsoup; \& my $rdfa = RDF::RDFa::Parser\->new_from_url($url, $options); \& print $rdfa\->opengraph(\*(Aqtitle\*(Aq) . "\en"; \& print $rdfa\->opengraph(\*(Aqimage\*(Aq) . "\en"; .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" RDF::TrineX::Parser::RDFa provides a saner interface for this module. If you are new to parsing RDFa with Perl, then that's the best place to start. .SS "Forthcoming \s-1API\s0 Changes" .IX Subsection "Forthcoming API Changes" Some of the logic regarding host language and RDFa version guessing is likely to be removed from RDF::RDFa::Parser and RDF::RDFa::Parser::Config, and shifted into RDF::TrineX::Parser::RDFa instead. .SS "Constructors" .IX Subsection "Constructors" .ie n .IP """$p = RDF::RDFa::Parser\->new($markup, $base, [$config], [$storage])""" 4 .el .IP "\f(CW$p = RDF::RDFa::Parser\->new($markup, $base, [$config], [$storage])\fR" 4 .IX Item "$p = RDF::RDFa::Parser->new($markup, $base, [$config], [$storage])" This method creates a new RDF::RDFa::Parser object and returns it. .Sp The \f(CW$markup\fR variable may contain an \s-1XHTML/XML\s0 string, or a XML::LibXML::Document. If a string, the document is parsed using XML::LibXML::Parser or HTML::HTML5::Parser, depending on the configuration in \f(CW$config\fR. \s-1XML\s0 well-formedness errors will cause the function to die. .Sp \&\f(CW$base\fR is a \s-1URL\s0 used to resolve relative links found in the document. .Sp \&\f(CW$config\fR optionally holds an RDF::RDFa::Parser::Config object which determines the set of rules used to parse the RDFa. It defaults to XHTML+RDFa 1.1. .Sp \&\fBAdvanced usage note:\fR \f(CW$storage\fR optionally holds an RDF::Trine::Store object. If undef, then a new temporary store is created. .ie n .IP """$p = RDF::RDFa::Parser\->new_from_url($url, [$config], [$storage])""" 4 .el .IP "\f(CW$p = RDF::RDFa::Parser\->new_from_url($url, [$config], [$storage])\fR" 4 .IX Item "$p = RDF::RDFa::Parser->new_from_url($url, [$config], [$storage])" .PD 0 .ie n .IP """$p = RDF::RDFa::Parser\->new_from_uri($url, [$config], [$storage])""" 4 .el .IP "\f(CW$p = RDF::RDFa::Parser\->new_from_uri($url, [$config], [$storage])\fR" 4 .IX Item "$p = RDF::RDFa::Parser->new_from_uri($url, [$config], [$storage])" .PD \&\f(CW$url\fR is a \s-1URL\s0 to fetch and parse, or an HTTP::Response object. .Sp \&\f(CW$config\fR optionally holds an RDF::RDFa::Parser::Config object which determines the set of rules used to parse the RDFa. The default is to determine the configuration by looking at the \s-1HTTP\s0 response Content-Type header; it's probably sensible to keep the default. .Sp \&\f(CW$storage\fR optionally holds an RDF::Trine::Store object. If undef, then a new temporary store is created. .Sp This function can also be called as \f(CW\*(C`new_from_url\*(C'\fR or \f(CW\*(C`new_from_uri\*(C'\fR. Same thing. .ie n .IP """$p = RDF::RDFa::Parser\->new_from_response($response, [$config], [$storage])""" 4 .el .IP "\f(CW$p = RDF::RDFa::Parser\->new_from_response($response, [$config], [$storage])\fR" 4 .IX Item "$p = RDF::RDFa::Parser->new_from_response($response, [$config], [$storage])" \&\f(CW$response\fR is an \f(CW\*(C`HTTP::Response\*(C'\fR object. .Sp Otherwise the same as \f(CW\*(C`new_from_url\*(C'\fR. .SS "Public Methods" .IX Subsection "Public Methods" .ie n .IP """$p\->graph""" 4 .el .IP "\f(CW$p\->graph\fR" 4 .IX Item "$p->graph" This will return an RDF::Trine::Model containing all the RDFa data found on the page. .Sp \&\fBAdvanced usage note:\fR If passed a graph \s-1URI\s0 as a parameter, will return a single named graph from within the page. This feature is only useful if you're using named graphs. .ie n .IP """$p\->graphs""" 4 .el .IP "\f(CW$p\->graphs\fR" 4 .IX Item "$p->graphs" \&\fBAdvanced usage only.\fR .Sp Will return a hashref of all named graphs, where the graph name is a key and the value is a RDF::Trine::Model tied to a temporary storage. .Sp This method is only useful if you're using named graphs. .ie n .IP """$p\->opengraph([$property])""" 4 .el .IP "\f(CW$p\->opengraph([$property])\fR" 4 .IX Item "$p->opengraph([$property])" If \f(CW$property\fR is provided, will return the value or list of values (if called in list context) for that Open Graph Protocol property. (In pure \&\s-1RDF\s0 terms, it returns the non-bnode objects of triples where the subject is the document base \s-1URI\s0; and the predicate is \f(CW$property\fR, with non-URI \f(CW$property\fR strings taken as having the implicit prefix \&'http://ogp.me/ns#'. There is no distinction between literal and non-literal values; literal datatypes and languages are dropped.) .Sp If \f(CW$property\fR is omitted, returns a list of possible properties. .Sp Example: .Sp .Vb 8 \& foreach my $property (sort $p\->opengraph) \& { \& print "$property :\en"; \& foreach my $val (sort $p\->opengraph($property)) \& { \& print " * $val\en"; \& } \& } .Ve .Sp See also: . .ie n .IP """$p\->dom""" 4 .el .IP "\f(CW$p\->dom\fR" 4 .IX Item "$p->dom" Returns the parsed XML::LibXML::Document. .ie n .IP """$p\->uri( [$other_uri] )""" 4 .el .IP "\f(CW$p\->uri( [$other_uri] )\fR" 4 .IX Item "$p->uri( [$other_uri] )" Returns the base \s-1URI\s0 of the document being parsed. This will usually be the same as the base \s-1URI\s0 provided to the constructor, but may differ if the document contains a \s-1HTML\s0 element. .Sp Optionally it may be passed a parameter \- an absolute or relative \s-1URI\s0 \- in which case it returns the same \s-1URI\s0 which it was passed as a parameter, but as an absolute \s-1URI\s0, resolved relative to the document's base \s-1URI\s0. .Sp This seems like two unrelated functions, but if you consider the consequence of passing a relative \s-1URI\s0 consisting of a zero-length string, it in fact makes sense. .ie n .IP """$p\->errors""" 4 .el .IP "\f(CW$p\->errors\fR" 4 .IX Item "$p->errors" Returns a list of errors and warnings that occurred during parsing. .ie n .IP """$p\->processor_graph""" 4 .el .IP "\f(CW$p\->processor_graph\fR" 4 .IX Item "$p->processor_graph" As per \f(CW\*(C`$p\->errors\*(C'\fR but returns data as an \s-1RDF\s0 model. .ie n .IP """$p\->output_graph""" 4 .el .IP "\f(CW$p\->output_graph\fR" 4 .IX Item "$p->output_graph" An alias for \f(CW\*(C`graph\*(C'\fR, but does not accept a parameter. .ie n .IP """$p\->processor_and_output_graph""" 4 .el .IP "\f(CW$p\->processor_and_output_graph\fR" 4 .IX Item "$p->processor_and_output_graph" Union of the above two graphs. .ie n .IP """$p\->consume""" 4 .el .IP "\f(CW$p\->consume\fR" 4 .IX Item "$p->consume" \&\fBAdvanced usage only.\fR .Sp The document is parsed for RDFa. As of RDF::RDFa::Parser 1.09x, this is called automatically when needed; you probably don't need to touch it unless you're doing interesting things with callbacks. .Sp Calling \f(CW\*(C`$p\->consume(survive => 1)\*(C'\fR will avoid crashing (e.g. when the markup provided cannot be parsed), and instead make more errors available in \f(CW\*(C`$p\->errors\*(C'\fR. .ie n .IP """$p\->set_callbacks(\e%callbacks)""" 4 .el .IP "\f(CW$p\->set_callbacks(\e%callbacks)\fR" 4 .IX Item "$p->set_callbacks(%callbacks)" \&\fBAdvanced usage only.\fR .Sp Set callback functions for the parser to call on certain events. These are only necessary if you want to do something especially unusual. .Sp .Vb 6 \& $p\->set_callbacks({ \& \*(Aqpretriple_resource\*(Aq => sub { ... } , \& \*(Aqpretriple_literal\*(Aq => sub { ... } , \& \*(Aqontriple\*(Aq => undef , \& \*(Aqonprefix\*(Aq => \e&some_function , \& }); .Ve .Sp Either of the two pretriple callbacks can be set to the string 'print' instead of a coderef. This enables built-in callbacks for printing Turtle to \s-1STDOUT\s0. .Sp For details of the callback functions, see the section \s-1CALLBACKS\s0. If used, \f(CW\*(C`set_callbacks\*(C'\fR must be called \fIbefore\fR \f(CW\*(C`consume\*(C'\fR. \f(CW\*(C`set_callbacks\*(C'\fR returns a reference to the parser object itself. .ie n .IP """$p\->element_subjects""" 4 .el .IP "\f(CW$p\->element_subjects\fR" 4 .IX Item "$p->element_subjects" \&\fBAdvanced usage only.\fR .Sp Gets/sets a hashref of { xpath => RDF::Trine::Node } mappings. .Sp This is not touched during normal RDFa parsing, only being used by the \f(CW@role\fR and \&\f(CW@cite\fR features where \s-1RDF\s0 resources (i.e. URIs and blank nodes) are needed to represent \s-1XML\s0 elements themselves. .SH "CALLBACKS" .IX Header "CALLBACKS" Several callback functions are provided. These may be set using the \f(CW\*(C`set_callbacks\*(C'\fR function, which takes a hashref of keys pointing to coderefs. The keys are named for the event to fire the callback on. .SS "ontriple" .IX Subsection "ontriple" This is called once a triple is ready to be added to the graph. (After the pretriple callbacks.) The parameters passed to the callback function are: .IP "\(bu" 4 A reference to the \f(CW\*(C`RDF::RDFa::Parser\*(C'\fR object .IP "\(bu" 4 A hashref of relevant \f(CW\*(C`XML::LibXML::Element\*(C'\fR objects (subject, predicate, object, graph, current) .IP "\(bu" 4 An RDF::Trine::Statement object. .PP The callback should return 1 to tell the parser to skip this triple (not add it to the graph); return 0 otherwise. The callback may modify the RDF::Trine::Statement object. .SS "onprefix" .IX Subsection "onprefix" This is called when a new \s-1CURIE\s0 prefix is discovered. The parameters passed to the callback function are: .IP "\(bu" 4 A reference to the \f(CW\*(C`RDF::RDFa::Parser\*(C'\fR object .IP "\(bu" 4 A reference to the \f(CW\*(C`XML::LibXML::Element\*(C'\fR being parsed .IP "\(bu" 4 The prefix (string, e.g. \*(L"foaf\*(R") .IP "\(bu" 4 The expanded \s-1URI\s0 (string, e.g. \*(L"http://xmlns.com/foaf/0.1/\*(R") .PP The return value of this callback is currently ignored, but you should return 0 in case future versions of this module assign significance to the return value. .SS "ontoken" .IX Subsection "ontoken" This is called when a \s-1CURIE\s0 or term has been expanded. The parameters are: .IP "\(bu" 4 A reference to the \f(CW\*(C`RDF::RDFa::Parser\*(C'\fR object .IP "\(bu" 4 A reference to the \f(CW\*(C`XML::LibXML::Element\*(C'\fR being parsed .IP "\(bu" 4 The \s-1CURIE\s0 or token as a string (e.g. \*(L"foaf:name\*(R" or \*(L"Stylesheet\*(R") .IP "\(bu" 4 The fully expanded \s-1URI\s0 .PP The callback function must return a fully expanded \s-1URI\s0, or if it wants the \s-1CURIE\s0 to be ignored, undef. .SS "onerror" .IX Subsection "onerror" This is called when an error occurs: .IP "\(bu" 4 A reference to the \f(CW\*(C`RDF::RDFa::Parser\*(C'\fR object .IP "\(bu" 4 The error level (RDF::RDFa::Parser::ERR_ERROR or RDF::RDFa::Parser::ERR_WARNING) .IP "\(bu" 4 An error code .IP "\(bu" 4 An error message .IP "\(bu" 4 A hash of other information .PP The return value of this callback is currently ignored, but you should return 0 in case future versions of this module assign significance to the return value. .PP If you do not define an onerror callback, then errors will be output via \s-1STDERR\s0 and warnings will be silent. Either way, you can retrieve errors after parsing using the \f(CW\*(C`errors\*(C'\fR method. .SS "pretriple_resource" .IX Subsection "pretriple_resource" \&\fBThis callback is deprecated \- use ontriple instead.\fR .PP This is called when a triple has been found, but before preparing the triple for adding to the model. It is only called for triples with a non-literal object value. .PP The parameters passed to the callback function are: .IP "\(bu" 4 A reference to the \f(CW\*(C`RDF::RDFa::Parser\*(C'\fR object .IP "\(bu" 4 A reference to the \f(CW\*(C`XML::LibXML::Element\*(C'\fR being parsed .IP "\(bu" 4 Subject \s-1URI\s0 or bnode (string) .IP "\(bu" 4 Predicate \s-1URI\s0 (string) .IP "\(bu" 4 Object \s-1URI\s0 or bnode (string) .IP "\(bu" 4 Graph \s-1URI\s0 or bnode (string or undef) .PP The callback should return 1 to tell the parser to skip this triple (not add it to the graph); return 0 otherwise. .SS "pretriple_literal" .IX Subsection "pretriple_literal" \&\fBThis callback is deprecated \- use ontriple instead.\fR .PP This is the equivalent of pretriple_resource, but is only called for triples with a literal object value. .PP The parameters passed to the callback function are: .IP "\(bu" 4 A reference to the \f(CW\*(C`RDF::RDFa::Parser\*(C'\fR object .IP "\(bu" 4 A reference to the \f(CW\*(C`XML::LibXML::Element\*(C'\fR being parsed .IP "\(bu" 4 Subject \s-1URI\s0 or bnode (string) .IP "\(bu" 4 Predicate \s-1URI\s0 (string) .IP "\(bu" 4 Object literal (string) .IP "\(bu" 4 Datatype \s-1URI\s0 (string or undef) .IP "\(bu" 4 Language (string or undef) .IP "\(bu" 4 Graph \s-1URI\s0 or bnode (string or undef) .PP Beware: sometimes both a datatype \fIand\fR a language will be passed. This goes beyond the normal \s-1RDF\s0 data model.) .PP The callback should return 1 to tell the parser to skip this triple (not add it to the graph); return 0 otherwise. .SH "FEATURES" .IX Header "FEATURES" Most features are configurable using RDF::RDFa::Parser::Config. .SS "RDFa Versions" .IX Subsection "RDFa Versions" RDF::RDFa::Parser supports RDFa versions 1.0 and 1.1. .PP 1.1 is currently a moving target; support is experimental. .PP 1.1 is the default, but this can be configured using RDF::RDFa::Parser::Config. .SS "Host Languages" .IX Subsection "Host Languages" RDF::RDFa::Parser supports various different RDFa host languages: .IP "\(bu" 4 \&\fB\s-1XHTML\s0\fR .Sp As per the XHTML+RDFa 1.0 and XHTML+RDFa 1.1 specifications. .IP "\(bu" 4 \&\fB\s-1HTML\s0 4\fR .Sp Uses an \s-1HTML5\s0 (sic) parser; uses \f(CW@lang\fR instead of \f(CW@xml:lang\fR; keeps prefixes and terms case-insensitive; recognises the \f(CW@rel\fR relations defined in the \s-1HTML\s0 4 specification. Otherwise the same as \s-1XHTML\s0. .IP "\(bu" 4 \&\fB\s-1HTML5\s0\fR .Sp Uses an \s-1HTML5\s0 parser; uses \f(CW@lang\fR as well as \f(CW@xml:lang\fR; keeps prefixes and terms case-insensitive; recognises the \f(CW@rel\fR relations defined in the \s-1HTML5\s0 draft specification. Otherwise the same as \s-1XHTML\s0. .IP "\(bu" 4 \&\fB\s-1XML\s0\fR .Sp This is implemented as per the RDFa Core 1.1 specification. There is also support for \*(L"RDFa Core 1.0\*(R", for which no specification exists, but has been reverse-engineered by applying the differences between XHTML+RDFa 1.1 and RDFa Core 1.1 to the XHTML+RDFa 1.0 specification. .Sp Embedded chunks of \s-1RDF/XML\s0 within \s-1XML\s0 are supported. .IP "\(bu" 4 \&\fB\s-1SVG\s0\fR .Sp For now, a synonym for \s-1XML\s0. .IP "\(bu" 4 \&\fBAtom\fR .Sp The and elements are treated specially, setting a new subject; IANA-registered rel keywords are recognised. .Sp By passing \f(CW\*(C`atom_parser=>1\*(C'\fR as a Config option, you can also handle Atom's native semantics. (Uses XML::Atom::OWL. If this module is not installed, this option is silently ignored.) .Sp Otherwise, the same as \s-1XML\s0. .IP "\(bu" 4 \&\fBDataRSS\fR .Sp Defines some default prefixes. Otherwise, the same as Atom. .IP "\(bu" 4 \&\fBOpenDocument \s-1XML\s0\fR .Sp That is, \s-1XML\s0 content formatted along the lines of 'content.xml' in OpenDocument files. .Sp Supports OpenDocument bookmarked ranges used as typed or plain object literals (though not \s-1XML\s0 literals); expects RDFa attributes in the \s-1XHTML\s0 namespace instead of in no namespace. Otherwise, the same as \s-1XML\s0. .IP "\(bu" 4 \&\fBOpenDocument\fR .Sp That is, a \s-1ZIP\s0 file containing OpenDocument \s-1XML\s0 files. RDF::RDFa::Parser will do all the unzipping and combining for you, so you don't have to. The unregistered \*(L"jar:\*(R" \s-1URI\s0 scheme is used to refer to files within the \s-1ZIP\s0. .SS "Embedded \s-1RDF/XML\s0" .IX Subsection "Embedded RDF/XML" Though a rarely used feature, \s-1XHTML\s0 allows other \s-1XML\s0 markup languages to be directly embedded into it. In particular, chunks of \s-1RDF/XML\s0 can be included in \s-1XHTML\s0. While this is not common in \s-1XHTML\s0, it's seen quite often in \s-1SVG\s0 and other \s-1XML\s0 markup languages. .PP When RDF::RDFa::Parser encounters a chunk of \s-1RDF/XML\s0 in a document it's parsing (i.e. an element called '\s-1RDF\s0' with namespace \&'http://www.w3.org/1999/02/22\-rdf\-syntax\-ns#'), there are three different courses of action it can take: .IP "0. Continue straight through it." 4 .IX Item "0. Continue straight through it." This is the behaviour that XHTML+RDFa seems to suggest is the right option. It should mostly not do any harm: triples encoded in \s-1RDF/XML\s0 will be generally ignored (though the chunk itself could theoretically end up as part of an \s-1XML\s0 literal). It will waste a bit of time though. .IP "1. Parse the \s-1RDF/XML\s0." 4 .IX Item "1. Parse the RDF/XML." The parser will parse the \s-1RDF/XML\s0 properly. If named graphs are enabled, any triples will be added to a separate graph. This is the behaviour that \s-1SVG\s0 Tiny 1.2 seems to suggest is the correct thing to do. .IP "2. Skip the chunk." 4 .IX Item "2. Skip the chunk." This will skip over the \s-1RDF\s0 element entirely, and thus save you a bit of time. .PP You can decide which path to take by setting the 'embedded_rdfxml' Config option. For \s-1HTML\s0 and \s-1XHTML\s0, you probably want to set embedded_rdfxml to '0' (the default) or '2' (a little faster). For other \s-1XML\s0 markup languages (e.g. \s-1SVG\s0 or Atom), then you probably want to set it to '1'. .PP (There's also an option '3' which controls how embedded \s-1RDF/XML\s0 interacts with named graphs, but this is only really intended for internal use, parsing OpenDocument.) .SS "Named Graphs" .IX Subsection "Named Graphs" The parser has support for named graphs within a single RDFa document. To switch this on, use the 'graph' Config option. .PP See also . .PP The name of the attribute which indicates graph URIs is by default 'graph', but can be changed using the 'graph_attr' Config option. This option accepts Clark Notation to specify a namespaced attribute. By default, the attribute value is interpreted as like the 'about' attribute (i.e. CURIEs, URIs, etc), but if you set the 'graph_type' Config option to 'id', it will be treated as setting a fragment identifier (like the 'id' attribute). .PP The 'graph_default' Config option allows you to set the default graph URI/bnode identifier. .PP Once you're using named graphs, the \f(CW\*(C`graphs\*(C'\fR method becomes useful: it returns a hashref of { graph_uri => trine_model } pairs. The optional parameter to the \f(CW\*(C`graph\*(C'\fR method also becomes useful. .PP OpenDocument (\s-1ZIP\s0) host language support makes internal use of named graphs, so if you're parsing OpenDocument, tinker with the graph Config options at your own risk! .SS "Auto Config" .IX Subsection "Auto Config" RDF::RDFa::Parser has a lot of different Config options to play with. Sometimes it might be useful to allow the page being parsed to control some of these options. If you switch on the 'auto_config' Config option, pages can do this. .PP A page can set options using a specially crafted tag: .PP .Vb 2 \& .Ve .PP Note that the \f(CW\*(C`content\*(C'\fR attribute is an application/x\-www\-form\-urlencoded string (which must then be HTML-escaped of course). Semicolons may be used instead of ampersands, as these tend to look nicer: .PP .Vb 2 \& .Ve .PP It's possible to use auto config outside \s-1XHTML\s0 (e.g. in Atom or \&\s-1SVG\s0) using namespaces: .PP .Vb 3 \& .Ve .PP Any Config option may be given using auto config, except 'use_rtnlx', 'dom_parser', and of course 'auto_config' itself. .SS "Profiles" .IX Subsection "Profiles" Support for Profiles (an experimental RDFa 1.1 feature) was added in version 1.09_00, but dropped after version 1.096, because the feature was removed from draft specs. .SH "BUGS" .IX Header "BUGS" RDF::RDFa::Parser 0.21 passed all approved tests in the XHTML+RDFa test suite at the time of its release. .PP RDF::RDFa::Parser 0.22 (used in conjunction with HTML::HTML5::Parser 0.01 and HTML::HTML5::Sanity 0.01) additionally passes all approved tests in the HTML4+RDFa and HTML5+RDFa test suites at the time of its release; except test cases 0113 and 0121, which the author of this module believes mandate incorrect \s-1HTML\s0 parsing. .PP RDF::RDFa::Parser 1.096_01 passes all approved tests on the default graph (not the processor graph) in the RDFa 1.1 test suite for language versions 1.0 and host languages xhtml1, html4 and html5, with the following exceptions which are skipped: .IP "\(bu" 4 \&\fB0140\fR \- wilful violation, pending proof that the test is backed up by the spec. .IP "\(bu" 4 \&\fB0198\fR \- an \s-1XML\s0 canonicalisation test that may be dropped in the future. .IP "\(bu" 4 \&\fB0212\fR \- wilful violation, as passing this test would require regressing on the old RDFa 1.0 test suite. .IP "\(bu" 4 \&\fB0251\fR to \fB0256\fR pass with RDFa 1.1 and are skipped in RDFa 1.0 because they use RDFa\-1.1\-specific syntax. .IP "\(bu" 4 \&\fB0256\fR is additionally skipped in \s-1HTML4\s0 mode, as the author believes xml:lang should be ignored in \s-1HTML\s0 versions prior to \s-1HTML5\s0. .IP "\(bu" 4 \&\fB0303\fR \- wilful violation, as this feature is simply awful. .PP Please report any bugs to . .PP Common gotchas: .IP "\(bu" 8 Are you using the \s-1XML\s0 catalogue? .Sp RDF::RDFa::Parser maintains a locally cached version of the XHTML+RDFa \&\s-1DTD\s0. This will normally be within your Perl module directory, in a subdirectory named \*(L"auto/share/dist/RDF\-RDFa\-Parser/catalogue/\*(R". If this is missing, the parser should still work, but will be very slow. .SH "SEE ALSO" .IX Header "SEE ALSO" RDF::TrineX::Parser::RDFa provides a saner interface for this module. .PP RDF::RDFa::Parser::Config. .PP XML::LibXML, RDF::Trine, HTML::HTML5::Parser, HTML::HTML5::Sanity, RDF::RDFa::Generator, RDF::RDFa::Linter. .PP , . .SH "AUTHOR" .IX Header "AUTHOR" Toby Inkster . .SH "ACKNOWLEDGEMENTS" .IX Header "ACKNOWLEDGEMENTS" Kjetil Kjernsmo wrote much of the stuff for building RDF::Trine models. Neubert Joachim taught me to use \s-1XML\s0 catalogues, which massively speeds up parsing of \s-1XHTML\s0 files that have DTDs. .SH "COPYRIGHT AND LICENCE" .IX Header "COPYRIGHT AND LICENCE" Copyright 2008\-2012 Toby Inkster .PP This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. .SH "DISCLAIMER OF WARRANTIES" .IX Header "DISCLAIMER OF WARRANTIES" \&\s-1THIS\s0 \s-1PACKAGE\s0 \s-1IS\s0 \s-1PROVIDED\s0 \*(L"\s-1AS\s0 \s-1IS\s0\*(R" \s-1AND\s0 \s-1WITHOUT\s0 \s-1ANY\s0 \s-1EXPRESS\s0 \s-1OR\s0 \s-1IMPLIED\s0 \&\s-1WARRANTIES\s0, \s-1INCLUDING\s0, \s-1WITHOUT\s0 \s-1LIMITATION\s0, \s-1THE\s0 \s-1IMPLIED\s0 \s-1WARRANTIES\s0 \s-1OF\s0 \&\s-1MERCHANTIBILITY\s0 \s-1AND\s0 \s-1FITNESS\s0 \s-1FOR\s0 A \s-1PARTICULAR\s0 \s-1PURPOSE\s0.