.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .ie \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . nr % 0 . rr F .\} .el \{\ . de IX .. .\} .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "SGML::Parser::OpenSP 3pm" .TH SGML::Parser::OpenSP 3pm "2011-11-15" "perl v5.14.2" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" SGML::Parser::OpenSP \- Parse SGML documents using OpenSP .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 1 \& use SGML::Parser::OpenSP; \& \& my $p = SGML::Parser::OpenSP\->new; \& my $h = ExampleHandler\->new; \& \& $p\->catalogs(qw(xhtml.soc)); \& $p\->warnings(qw(xml valid)); \& $p\->handler($h); \& \& $p\->parse("example.xhtml"); .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" This module provides an interface to the OpenSP \s-1SGML\s0 parser. OpenSP and this module are event based. As the parser recognizes parts of the document (say the start or end of an element), then any handlers registered for that type of an event are called with suitable parameters. .SH "COMMON METHODS" .IX Header "COMMON METHODS" .IP "\fInew()\fR" 4 .IX Item "new()" Returns a new SGML::Parser::OpenSP object. Takes no arguments. .IP "parse($file)" 4 .IX Item "parse($file)" Parses the file passed as an argument. Note that this must be a filename and not a filehandle. See \*(L"\s-1PROCESSING\s0 \s-1FILES\s0\*(R" below for details. .IP "parse_string($data)" 4 .IX Item "parse_string($data)" Parses the data passed as an argument. See \*(L"\s-1PROCESSING\s0 \s-1FILES\s0\*(R" below for details. .IP "\fIhalt()\fR" 4 .IX Item "halt()" Halts processing before parsing the entire document. Takes no arguments. .IP "\fIsplit_message()\fR" 4 .IX Item "split_message()" Splits OpenSP's error messages into their component parts. See \*(L"POST-PROCESSING \s-1ERROR\s0 \s-1MESSAGES\s0\*(R" below for details. .IP "\fIget_location()\fR" 4 .IX Item "get_location()" See \*(L"\s-1POSITIONING\s0 \s-1INFORMATION\s0\*(R" below for details. .SH "CONFIGURATION" .IX Header "CONFIGURATION" .SS "\s-1BOOLEAN\s0 \s-1OPTIONS\s0" .IX Subsection "BOOLEAN OPTIONS" .ie n .IP "$p\->handler([$handler])" 4 .el .IP "\f(CW$p\fR\->handler([$handler])" 4 .IX Item "$p->handler([$handler])" Report events to the blessed reference \f(CW$handler\fR. .SS "\s-1ERROR\s0 \s-1MESSAGE\s0 \s-1FORMAT\s0" .IX Subsection "ERROR MESSAGE FORMAT" .ie n .IP "$p\->show_open_entities([$bool])" 4 .el .IP "\f(CW$p\fR\->show_open_entities([$bool])" 4 .IX Item "$p->show_open_entities([$bool])" Describe open entities in error messages. Error messages always include the position of the most recently opened external entity. The default is false. .ie n .IP "$p\->show_open_elements([$bool])" 4 .el .IP "\f(CW$p\fR\->show_open_elements([$bool])" 4 .IX Item "$p->show_open_elements([$bool])" Show the generic identifiers of open elements in error messages. The default is false. .ie n .IP "$p\->show_error_numbers([$bool])" 4 .el .IP "\f(CW$p\fR\->show_error_numbers([$bool])" 4 .IX Item "$p->show_error_numbers([$bool])" Show message numbers in error messages. .SS "\s-1GENERATED\s0 \s-1EVENTS\s0" .IX Subsection "GENERATED EVENTS" .ie n .IP "$p\->output_comment_decls([$bool])" 4 .el .IP "\f(CW$p\fR\->output_comment_decls([$bool])" 4 .IX Item "$p->output_comment_decls([$bool])" Generate \f(CW\*(C`comment_decl\*(C'\fR events. The default is false. .ie n .IP "$p\->output_marked_sections([$bool])" 4 .el .IP "\f(CW$p\fR\->output_marked_sections([$bool])" 4 .IX Item "$p->output_marked_sections([$bool])" Generate marked section events (\f(CW\*(C`marked_section_start\*(C'\fR, \&\f(CW\*(C`marked_section_end\*(C'\fR, \f(CW\*(C`ignored_chars\*(C'\fR). The default is false. .ie n .IP "$p\->output_general_entities([$bool])" 4 .el .IP "\f(CW$p\fR\->output_general_entities([$bool])" 4 .IX Item "$p->output_general_entities([$bool])" Generate \f(CW\*(C`general_entity\*(C'\fR events. The default is false. .SS "\s-1IO\s0 \s-1SETTINGS\s0" .IX Subsection "IO SETTINGS" .ie n .IP "$p\->map_catalog_document([$bool])" 4 .el .IP "\f(CW$p\fR\->map_catalog_document([$bool])" 4 .IX Item "$p->map_catalog_document([$bool])" \&\f(CW\*(C`parse\*(C'\fR arguments specify catalog files rather than the document entity. The document entity is specified by the first \s-1DOCUMENT\s0 entry in the catalog files. The default is false. .ie n .IP "$p\->restrict_file_reading([$bool])" 4 .el .IP "\f(CW$p\fR\->restrict_file_reading([$bool])" 4 .IX Item "$p->restrict_file_reading([$bool])" Restrict file reading to the specified directories (see the \f(CW\*(C`search_dirs\*(C'\fR method and the \f(CW\*(C`SGML_SEARCH_PATH\*(C'\fR environment variable). You should turn this option on and configure the search paths accordingly if you intend to process untrusted resources. The default is false. .ie n .IP "$p\->catalogs([@catalogs])" 4 .el .IP "\f(CW$p\fR\->catalogs([@catalogs])" 4 .IX Item "$p->catalogs([@catalogs])" Map public identifiers and entity names to system identifiers using the specified catalog entry files. Multiple catalogs are allowed. If there is a catalog entry file called \f(CW\*(C`catalog\*(C'\fR in the same place as the document entity, it will be searched for immediately after those specified. .ie n .IP "$p\->search_dirs([@search_dirs])" 4 .el .IP "\f(CW$p\fR\->search_dirs([@search_dirs])" 4 .IX Item "$p->search_dirs([@search_dirs])" Search the specified directories for files specified in system identifiers. Multiple values options are allowed. See the description of the osfile storage manager in the OpenSP documentation for more information about file searching. .ie n .IP "$p\->pass_file_descriptor([$bool])" 4 .el .IP "\f(CW$p\fR\->pass_file_descriptor([$bool])" 4 .IX Item "$p->pass_file_descriptor([$bool])" Instruct \f(CW\*(C`parse_string\*(C'\fR to pass the input data down to the guts of OpenSP using the \f(CW\*(C`OSFD\*(C'\fR storage manager (if true) or the \f(CW\*(C`OSFILE\*(C'\fR storage manager (if false). This amounts to the difference between passing a file descriptor and a (temporary) file name. .Sp The default is true except on platforms, such as Win32, which are known to not support passing file descriptors around in this manner. On platforms which support it you can call this method with a false parameter to force use of temporary file names instead. .Sp In general, this will do the right thing on its own so it's best to consider this an internal method. If your platform is such that you have to force use of the \s-1OSFILE\s0 storage manager, please report it as a bug and include the values of \f(CW$^O\fR, \f(CW$Config{archname}\fR, and a description of the platform (e.g. \*(L"Windows Vista Service Pack 42\*(R"). .SS "\s-1PROCESSING\s0 \s-1OPTIONS\s0" .IX Subsection "PROCESSING OPTIONS" .ie n .IP "$p\->include_params([@include_params])" 4 .el .IP "\f(CW$p\fR\->include_params([@include_params])" 4 .IX Item "$p->include_params([@include_params])" For each name in \f(CW@include_params\fR pretend that .Sp .Vb 1 \& .Ve .Sp occurs at the start of the document type declaration subset in the \s-1SGML\s0 document entity. Since repeated definitions of an entity are ignored, this definition will take precedence over any other definitions of this entity in the document type declaration. Multiple names are allowed. If the \s-1SGML\s0 declaration replaces the reserved name \s-1INCLUDE\s0 then the new reserved name will be the replacement text of the entity. Typically the document type declaration will contain .Sp .Vb 1 \& .Ve .Sp and will use \f(CW%name\fR; in the status keyword specification of a marked section declaration. In this case the effect of the option will be to cause the marked section not to be ignored. .ie n .IP "$p\->active_links([@active_links])" 4 .el .IP "\f(CW$p\fR\->active_links([@active_links])" 4 .IX Item "$p->active_links([@active_links])" ??? .SS "\s-1ENABLING\s0 \s-1WARNINGS\s0" .IX Subsection "ENABLING WARNINGS" Additional warnings can be enabled using .PP .Vb 1 \& $p\->warnings([@warnings]) .Ve .PP The following values can be used to enable warnings: .IP "xml" 4 .IX Item "xml" Warn about constructs that are not allowed by \s-1XML\s0. .IP "mixed" 4 .IX Item "mixed" Warn about mixed content models that do not allow #pcdata anywhere. .IP "sgmldecl" 4 .IX Item "sgmldecl" Warn about various dubious constructions in the \s-1SGML\s0 declaration. .IP "should" 4 .IX Item "should" Warn about various recommendations made in \s-1ISO\s0 8879 that the document does not comply with. (Recommendations are expressed with ``should'', as distinct from requirements which are usually expressed with ``shall''.) .IP "default" 4 .IX Item "default" Warn about defaulted references. .IP "duplicate" 4 .IX Item "duplicate" Warn about duplicate entity declarations. .IP "undefined" 4 .IX Item "undefined" Warn about undefined elements: elements used in the \s-1DTD\s0 but not defined. .IP "unclosed" 4 .IX Item "unclosed" Warn about unclosed start and end-tags. .IP "empty" 4 .IX Item "empty" Warn about empty start and end-tags. .IP "net" 4 .IX Item "net" Warn about net-enabling start-tags and null end-tags. .IP "min-tag" 4 .IX Item "min-tag" Warn about minimized start and end-tags. Equivalent to combination of unclosed, empty and net warnings. .IP "unused-map" 4 .IX Item "unused-map" Warn about unused short reference maps: maps that are declared with a short reference mapping declaration but never used in a short reference use declaration in the \s-1DTD\s0. .IP "unused-param" 4 .IX Item "unused-param" Warn about parameter entities that are defined but not used in a \s-1DTD\s0. Unused internal parameter entities whose text is \f(CW\*(C`INCLUDE\*(C'\fR or \f(CW\*(C`IGNORE\*(C'\fR won't get the warning. .IP "notation-sysid" 4 .IX Item "notation-sysid" Warn about notations for which no system identifier could be generated. .IP "all" 4 .IX Item "all" Warn about conditions that should usually be avoided (in the opinion of the author). Equivalent to: \f(CW\*(C`mixed\*(C'\fR, \f(CW\*(C`should\*(C'\fR, \f(CW\*(C`default\*(C'\fR, \f(CW\*(C`undefined\*(C'\fR, \&\f(CW\*(C`sgmldecl\*(C'\fR, \f(CW\*(C`unused\-map\*(C'\fR, \f(CW\*(C`unused\-param\*(C'\fR, \f(CW\*(C`empty\*(C'\fR and \f(CW\*(C`unclosed\*(C'\fR. .SS "\s-1DISABLING\s0 \s-1WARNINGS\s0" .IX Subsection "DISABLING WARNINGS" A warning can be disabled by using its name prefixed with \f(CW\*(C`no\-\*(C'\fR. Thus calling warnings(qw(all no-duplicate)) will enable all warnings except those about duplicate entity declarations. .PP The following values for \f(CW\*(C`warnings()\*(C'\fR disable errors: .IP "no-idref" 4 .IX Item "no-idref" Do not give an error for an \s-1ID\s0 reference value which no element has as its \s-1ID\s0. The effect will be as if each attribute declared as an \s-1ID\s0 reference value had been declared as a name. .IP "no-significant" 4 .IX Item "no-significant" Do not give an error when a character that is not a significant character in the reference concrete syntax occurs in a literal in the \&\s-1SGML\s0 declaration. This may be useful in conjunction with certain buggy test suites. .IP "no-valid" 4 .IX Item "no-valid" Do not require the document to be type-valid. This has the effect of changing the \s-1SGML\s0 declaration to specify \f(CW\*(C`VALIDITY NOASSERT\*(C'\fR and \f(CW\*(C`IMPLYDEF ATTLIST YES ELEMENT YES\*(C'\fR. An option of \f(CW\*(C`valid\*(C'\fR has the effect of changing the \s-1SGML\s0 declaration to specify \f(CW\*(C`VALIDITY TYPE\*(C'\fR and \f(CW\*(C`IMPLYDEF ATTLIST NO ELEMENT NO\*(C'\fR. If neither \f(CW\*(C`valid\*(C'\fR nor \f(CW\*(C`no\-valid\*(C'\fR are specified, then the \&\f(CW\*(C`VALIDITY\*(C'\fR and \f(CW\*(C`IMPLYDEF\*(C'\fR specified in the \s-1SGML\s0 declaration will be used. .SS "\s-1XML\s0 \s-1WARNINGS\s0" .IX Subsection "XML WARNINGS" The following warnings are turned on for the \f(CW\*(C`xml\*(C'\fR warning described above: .IP "inclusion" 4 .IX Item "inclusion" Warn about inclusions in element type declarations. .IP "exclusion" 4 .IX Item "exclusion" Warn about exclusions in element type declarations. .IP "rcdata-content" 4 .IX Item "rcdata-content" Warn about \s-1RCDATA\s0 declared content in element type declarations. .IP "cdata-content" 4 .IX Item "cdata-content" Warn about \s-1CDATA\s0 declared content in element type declarations. .IP "ps-comment" 4 .IX Item "ps-comment" Warn about comments in parameter separators. .IP "attlist-group-decl" 4 .IX Item "attlist-group-decl" Warn about name groups in attribute declarations. .IP "element-group-decl" 4 .IX Item "element-group-decl" Warn about name groups in element type declarations. .IP "pi-entity" 4 .IX Item "pi-entity" Warn about \s-1PI\s0 entities. .IP "internal-sdata-entity" 4 .IX Item "internal-sdata-entity" Warn about internal \s-1SDATA\s0 entities. .IP "internal-cdata-entity" 4 .IX Item "internal-cdata-entity" Warn about internal \s-1CDATA\s0 entities. .IP "external-sdata-entity" 4 .IX Item "external-sdata-entity" Warn about external \s-1SDATA\s0 entities. .IP "external-cdata-entity" 4 .IX Item "external-cdata-entity" Warn about external \s-1CDATA\s0 entities. .IP "bracket-entity" 4 .IX Item "bracket-entity" Warn about bracketed text entities. .IP "data-atts" 4 .IX Item "data-atts" Warn about attribute definition list declarations for notations. .IP "missing-system-id" 4 .IX Item "missing-system-id" Warn about external identifiers without system identifiers. .IP "conref" 4 .IX Item "conref" Warn about content reference attributes. .IP "current" 4 .IX Item "current" Warn about current attributes. .IP "nutoken-decl-value" 4 .IX Item "nutoken-decl-value" Warn about attributes with a declared value of \s-1NUTOKEN\s0 or \s-1NUTOKENS\s0. .IP "number-decl-value" 4 .IX Item "number-decl-value" Warn about attributes with a declared value of \s-1NUMBER\s0 or \s-1NUMBERS\s0. .IP "name-decl-value" 4 .IX Item "name-decl-value" Warn about attributes with a declared value of \s-1NAME\s0 or \s-1NAMES\s0. .IP "named-char-ref" 4 .IX Item "named-char-ref" Warn about named character references. .IP "refc" 4 .IX Item "refc" Warn about omitted refc delimiters. .IP "temp-ms" 4 .IX Item "temp-ms" Warn about \s-1TEMP\s0 marked sections. .IP "rcdata-ms" 4 .IX Item "rcdata-ms" Warn about \s-1RCDATA\s0 marked sections. .IP "instance-include-ms" 4 .IX Item "instance-include-ms" Warn about \s-1INCLUDE\s0 marked sections in the document instance. .IP "instance-ignore-ms" 4 .IX Item "instance-ignore-ms" Warn about \s-1IGNORE\s0 marked sections in the document instance. .IP "and-group" 4 .IX Item "and-group" Warn about \s-1AND\s0 connectors in model groups. .IP "rank" 4 .IX Item "rank" Warn about ranked elements. .IP "empty-comment-decl" 4 .IX Item "empty-comment-decl" Warn about empty comment declarations. .IP "att-value-not-literal" 4 .IX Item "att-value-not-literal" Warn about attribute values which are not literals. .IP "missing-att-name" 4 .IX Item "missing-att-name" Warn about omitted attribute names in start tags. .IP "comment-decl-s" 4 .IX Item "comment-decl-s" Warn about spaces before the \s-1MDC\s0 in comment declarations. .IP "comment-decl-multiple" 4 .IX Item "comment-decl-multiple" Warn about comment declarations containing multiple comments. .IP "missing-status-keyword" 4 .IX Item "missing-status-keyword" Warn about marked sections without a status keyword. .IP "multiple-status-keyword" 4 .IX Item "multiple-status-keyword" Warn about marked sections with multiple status keywords. .IP "instance-param-entity" 4 .IX Item "instance-param-entity" Warn about parameter entities in the document instance. .IP "min-param" 4 .IX Item "min-param" Warn about minimization parameters in element type declarations. .IP "mixed-content-xml" 4 .IX Item "mixed-content-xml" Warn about cases of mixed content which are not allowed in \s-1XML\s0. .IP "name-group-not-or" 4 .IX Item "name-group-not-or" Warn about name groups with a connector different from \s-1OR\s0. .IP "pi-missing-name" 4 .IX Item "pi-missing-name" Warn about processing instructions which don't start with a name. .IP "instance-status-keyword-s" 4 .IX Item "instance-status-keyword-s" Warn about spaces between \s-1DSO\s0 and status keyword in marked sections. .IP "external-data-entity-ref" 4 .IX Item "external-data-entity-ref" Warn about references to external data entities in the content. .IP "att-value-external-entity-ref" 4 .IX Item "att-value-external-entity-ref" Warn about references to external data entities in attribute values. .IP "data-delim" 4 .IX Item "data-delim" Warn about occurances of `<' and `&' as data. .IP "explicit-sgml-decl" 4 .IX Item "explicit-sgml-decl" Warn about an explicit \s-1SGML\s0 declaration. .IP "internal-subset-ms" 4 .IX Item "internal-subset-ms" Warn about marked sections in the internal subset. .IP "default-entity" 4 .IX Item "default-entity" Warn about a default entity declaration. .IP "non-sgml-char-ref" 4 .IX Item "non-sgml-char-ref" Warn about numeric character references to non-SGML characters. .IP "internal-subset-ps-param-entity" 4 .IX Item "internal-subset-ps-param-entity" Warn about parameter entity references in parameter separators in the internal subset. .IP "internal-subset-ts-param-entity" 4 .IX Item "internal-subset-ts-param-entity" Warn about parameter entity references in token separators in the internal subset. .IP "internal-subset-literal-param-entity" 4 .IX Item "internal-subset-literal-param-entity" Warn about parameter entity references in parameter literals in the internal subset. .SH "PROCESSING FILES" .IX Header "PROCESSING FILES" In order to start processing of a document and receive events, the \&\f(CW\*(C`parse\*(C'\fR method must be called. It takes one argument specifying the path to a file (not a file handle). You must set an event handler using the \f(CW\*(C`handler\*(C'\fR method prior to using this method. The return value of \f(CW\*(C`parse\*(C'\fR is currently undefined. .SH "EVENT HANDLERS" .IX Header "EVENT HANDLERS" In order to receive data from the parser you need to write an event handler. For example, .PP .Vb 1 \& package ExampleHandler; \& \& sub new { bless {}, shift } \& \& sub start_element \& { \& my ($self, $elem) = @_; \& printf " * %s\en", $elem\->{Name}; \& } .Ve .PP This handler would print all the element names as they are found in the document, for a typical \s-1XHTML\s0 document this might result in something like .PP .Vb 6 \& * html \& * head \& * title \& * body \& * p \& * ... .Ve .PP The events closely match those in the generic interface to OpenSP, see for more information. .PP The event names have been changed to lowercase and underscores to separate words and properties are capitalized. Arrays are represented as Perl array references. \f(CW\*(C`Position\*(C'\fR information is not passed to the handler but made available through the \f(CW\*(C`get_location\*(C'\fR method which can be called from event handlers. Some redundant information has also been stripped and the generic identifier of an element is stored in the \f(CW\*(C`Name\*(C'\fR hash entry. .PP For example, for an EndElementEvent the \f(CW\*(C`end_element\*(C'\fR handler gets called with a hash reference .PP .Vb 3 \& { \& Name => \*(Aqgi\*(Aq \& } .Ve .PP The following events are defined: .PP .Vb 10 \& * appinfo \& * processing_instruction \& * start_element \& * end_element \& * data \& * sdata \& * external_data_entity_ref \& * subdoc_entity_ref \& * start_dtd \& * end_dtd \& * end_prolog \& * general_entity # set $p\->output_general_entities(1) \& * comment_decl # set $p\->output_comment_decls(1) \& * marked_section_start # set $p\->output_marked_sections(1) \& * marked_section_end # set $p\->output_marked_sections(1) \& * ignored_chars # set $p\->output_marked_sections(1) \& * error \& * open_entity_change .Ve .PP If the documentation of the generic interface to OpenSP states that certain data is not valid, it will not be available through this interface (i.e., the respective key does not exist in the hash ref). .SH "POSITIONING INFORMATION" .IX Header "POSITIONING INFORMATION" Event handlers can call the \f(CW\*(C`get_location\*(C'\fR method on the parser object to retrieve positioning information, the get_location method will return a hash reference with the following properties: .PP .Vb 6 \& LineNumber => ..., # line number \& ColumnNumber => ..., # column number \& ByteOffset => ..., # number of preceding bytes \& EntityOffset => ..., # number of preceding bit combinations \& EntityName => ..., # name of the external entity \& FileName => ..., # name of the file .Ve .PP These can be \f(CW\*(C`undef\*(C'\fR or an empty string. .SH "POST-PROCESSING ERROR MESSAGES" .IX Header "POST-PROCESSING ERROR MESSAGES" OpenSP returns error messages in form of a string rather than individual components of the message like line numbers or message text. The \&\f(CW\*(C`split_message\*(C'\fR method on the parser object can be used to post-process these error message strings as reliable as possible. It can be used e.g. from an error event handler if the parser object is accessible like .PP .Vb 6 \& sub error \& { \& my $self = shift; \& my $erro = shift; \& my $mess = $self\->{parser}\->split_message($erro); \& } .Ve .PP See the documentation of \f(CW\*(C`split_message\*(C'\fR in the SGML::Parser::OpenSP::Tools documentation. .SH "UNICODE SUPPORT" .IX Header "UNICODE SUPPORT" All strings returned from event handlers and helper routines are \s-1UTF\-8\s0 encoded with the \s-1UTF\-8\s0 flag turned on, helper functions like \f(CW\*(C`split_message\*(C'\fR expect (but don't check) that string arguments are \s-1UTF\-8\s0 encoded and have the \s-1UTF\-8\s0 flag turned on. Behavior of helper functions is undefined when you pass unexpected input and should be avoided. .PP \&\f(CW\*(C`parse\*(C'\fR has limited support for binary input, but the binary input must be compatible with OpenSP's generic interface requirements and you must specify the encoding through means available to OpenSP to enable it to properly decode the binary input. Any encoding meta data about such binary input specific to Perl (such as encoding disciplines for file handles when you pass a file descriptor) will be ignored. For more specific information refer to the OpenSP manual. .IP "\(bu" 4 .IP "\(bu" 4 .SH "ENVIRONMENT VARIABLES" .IX Header "ENVIRONMENT VARIABLES" OpenSP supports a number of environment variables to control specific processing aspects such as \f(CW\*(C`SGML_SEARCH_PATH\*(C'\fR or \f(CW\*(C`SP_CHARSET_FIXED\*(C'\fR. Portable applications need to ensure that these are set prior to loading the OpenSP library into memory which happens when the \s-1XS\s0 code is loaded. This means you need to wrap the code into a \f(CW\*(C`BEGIN\*(C'\fR block: .PP .Vb 3 \& BEGIN { $ENV{SP_CHARSET_FIXED} = 1; } \& use SGML::Parser::OpenSP; \& # ... .Ve .PP Otherwise changes to the environment might not propagate to OpenSP. This applies specifically to Win32 systems. .IP "\s-1SGML_SEARCH_PATH\s0" 4 .IX Item "SGML_SEARCH_PATH" See . .IP "\s-1SP_HTTP_USER_AGENT\s0" 4 .IX Item "SP_HTTP_USER_AGENT" The \f(CW\*(C`User\-Agent\*(C'\fR header for \s-1HTTP\s0 requests. .IP "\s-1SP_HTTP_ACCEPT\s0" 4 .IX Item "SP_HTTP_ACCEPT" The \f(CW\*(C`Accept\*(C'\fR header for \s-1HTTP\s0 requests. .IP "\s-1SP_MESSAGE_FORMAT\s0" 4 .IX Item "SP_MESSAGE_FORMAT" Enable run time selection of message format, Value is one of \f(CW\*(C`XML\*(C'\fR, \&\f(CW\*(C`NONE\*(C'\fR, \f(CW\*(C`TRADITIONAL\*(C'\fR. Whether this will have an effect depends on a compile time setting which might not be enabled in your OpenSP build. This module assumes that no such support was compiled in. .IP "\s-1SGML_CATALOG_FILES\s0" 4 .IX Item "SGML_CATALOG_FILES" .PD 0 .IP "\s-1SP_USE_DOCUMENT_CATALOG\s0" 4 .IX Item "SP_USE_DOCUMENT_CATALOG" .PD See . .IP "\s-1SP_SYSTEM_CHARSET\s0" 4 .IX Item "SP_SYSTEM_CHARSET" .PD 0 .IP "\s-1SP_CHARSET_FIXED\s0" 4 .IX Item "SP_CHARSET_FIXED" .IP "\s-1SP_BCTF\s0" 4 .IX Item "SP_BCTF" .IP "\s-1SP_ENCODING\s0" 4 .IX Item "SP_ENCODING" .PD See . .PP Note that you can use the \f(CW\*(C`search_dirs\*(C'\fR method instead of using \&\f(CW\*(C`SGML_SEARCH_PATH\*(C'\fR and the \f(CW\*(C`catalogs\*(C'\fR method instead of using \&\f(CW\*(C`SGML_CATALOG_FILES\*(C'\fR and attributes on storage object specifications for \f(CW\*(C`SP_BCTF\*(C'\fR and \f(CW\*(C`SP_ENCODING\*(C'\fR respectively. For example, if \&\f(CW\*(C`SP_CHARSET_FIXED\*(C'\fR is set to \f(CW1\fR you can use .PP .Vb 1 \& $p\->parse("example.xhtml"); .Ve .PP to process \f(CW\*(C`example.xhtml\*(C'\fR using the \f(CW\*(C`UTF\-8\*(C'\fR character encoding. .SH "KNOWN ISSUES" .IX Header "KNOWN ISSUES" OpenSP must be compiled with \f(CW\*(C`SP_MULTI_BYTE\*(C'\fR \fIdefined\fR and with \&\f(CW\*(C`SP_WIDE_SYSTEM\*(C'\fR \fIundefined\fR, this module will otherwise break at runtime or not compile. .SH "BUG REPORTS" .IX Header "BUG REPORTS" Please report bugs in this module via http://rt.cpan.org/NoAuth/Bugs.html?Dist=SGML\-Parser\-OpenSP .PP Please report bugs in OpenSP via .PP Please send comments and questions to the spo-devel mailing list, see http://lists.sf.net/lists/listinfo/spo\-devel for details. .SH "SEE ALSO" .IX Header "SEE ALSO" .IP "\(bu" 4 .IP "\(bu" 4 .IP "\(bu" 4 .SH "AUTHORS" .IX Header "AUTHORS" .Vb 2 \& Terje Bless wrote version 0.01. \& Bjoern Hoehrmann wrote version 0.02+. .Ve .SH "COPYRIGHT AND LICENSE" .IX Header "COPYRIGHT AND LICENSE" .Vb 2 \& Copyright (c) 2006\-2008 Bjoern Hoehrmann . \& This module is licensed under the same terms as Perl itself. .Ve