.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.28) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{ . if \nF \{ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "Marpa::R2::Glade 3pm" .TH Marpa::R2::Glade 3pm "2015-03-06" "perl v5.20.2" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "Name" .IX Header "Name" Marpa::R2::Glade \- Low-level interface to Marpa's Abstract Syntax Forests (\s-1ASF\s0's) .SH "Synopsis" .IX Header "Synopsis" .Vb 11 \& my $grammar = Marpa::R2::Scanless::G\->new( \& { source => \e(<<\*(AqEND_OF_SOURCE\*(Aq), \& :start ::= pair \& pair ::= duple | item item \& duple ::= item item \& item ::= Hesperus | Phosphorus \& Hesperus ::= \*(Aqa\*(Aq \& Phosphorus ::= \*(Aqa\*(Aq \& END_OF_SOURCE \& } \& ); \& \& my $slr = Marpa::R2::Scanless::R\->new( { grammar => $grammar } ); \& $slr\->read( \e\*(Aqaa\*(Aq ); \& my $asf = Marpa::R2::ASF\->new( { slr => $slr } ); \& die \*(AqNo ASF\*(Aq if not defined $asf; \& my $output_as_array = asf_to_basic_tree($asf); \& my $actual_output = array_display($output_as_array); .Ve .PP The code for \f(CW\*(C`asf_to_basic_tree()\*(C'\fR represents a user-supplied call using the interface described below. An full example of \f(CW\*(C`ast_to_basic_tree()\*(C'\fR, which constructs a Perl array \*(L"tree\*(R", is given below. \&\f(CW\*(C`array_display()\*(C'\fR displays the tree in a compact form. The code for it is also given below. The return value of \f(CW\*(C`array_display()\*(C'\fR is as follows: .PP .Vb 10 \& Glade 2 has 2 symches \& Glade 2, Symch 0, pair ::= duple \& Glade 6, duple ::= item item \& Glade 8 has 2 symches \& Glade 8, Symch 0, item ::= Hesperus \& Glade 13, Hesperus ::= \*(Aqa\*(Aq \& Glade 15, Symbol \*(Aqa\*(Aq: "a" \& Glade 8, Symch 1, item ::= Phosphorus \& Glade 1, Phosphorus ::= \*(Aqa\*(Aq \& Glade 17, Symbol \*(Aqa\*(Aq: "a" \& Glade 7 has 2 symches \& Glade 7, Symch 0, item ::= Hesperus \& Glade 22, Hesperus ::= \*(Aqa\*(Aq \& Glade 24, Symbol \*(Aqa\*(Aq: "a" \& Glade 7, Symch 1, item ::= Phosphorus \& Glade 9, Phosphorus ::= \*(Aqa\*(Aq \& Glade 26, Symbol \*(Aqa\*(Aq: "a" \& Glade 2, Symch 1, pair ::= item item \& Glade 8 revisited \& Glade 7 revisited .Ve .SH "This INTERFACE is ALPHA and EXPERIMENTAL" .IX Header "This INTERFACE is ALPHA and EXPERIMENTAL" The interface described in this document is very much a work in progress. It is alpha and experimental. The bad side of this is that it is subject to radical change without notice. The good side is that field is 100% open for users to have feedback into the final interface. .SH "About this document" .IX Header "About this document" This document describes the low-level interface to Marpa's abstract syntax forests (\s-1ASF\s0's). It assumes that you are already familiar with the high-level interface. This low-level interface allows the maximum flexiblity in building the forest, but requires the application to do much of the work. .SH "Ambiguity: factoring versus symches" .IX Header "Ambiguity: factoring versus symches" An abstract syntax forest (\s-1ASF\s0) is similar to an abstract syntax tree (\s-1AST\s0), but it has an additional ability \*(-- it can represent an ambiguous parse. Ambiguity in a parse can come in two forms, and Marpa's \s-1ASF\s0's treat the distinction as important. An ambiguity can be a symbolic choice (a symch), or a factoring. Symbolic choices are the kind of ambiguity that springs first to mind \*(-- a choice between rules, or a choice between a rule and token. Factorings involve only one rule, but the \s-1RHS\s0 symbols of that rule divide the input up (\*(L"factor it\*(R") in different ways. I'll give examples below. .PP Symches and factorings are treated separately, because they behave very differently: .IP "\(bu" 4 Symches are less common than factorings. .IP "\(bu" 4 Factorings are frequently not of interest; symches are almost always of major interest. .IP "\(bu" 4 Symches usually have just a few alternatives; the possible number of factorings easily grows into the thousands. .IP "\(bu" 4 In the worst case, the number of symches is a constant that depends on size of the grammar. In the worst case, the number of factorings grows exponentially with the length of the string being factored. .IP "\(bu" 4 The constant limiting the number of symches will almost always be of manageable size. The number of factorings can grow without limit. .SS "An example of a symch" .IX Subsection "An example of a symch" Hesperus is Venus's traditional name as an evening star, and Phosphorus (aka Lucifer) is its traditional name as a morning star. For the grammar, .PP .Vb 6 \& :start ::= planet \& planet ::= hesperus \& planet ::= phosphorus \& hesperus ::= venus \& phosphorus ::= venus \& venus ~ \*(Aqvenus\*(Aq .Ve .PP and the input string '\f(CW\*(C`venus\*(C'\fR', the forest would look like .PP .Vb 9 \& Symbol #0 planet has 2 symches \& Symch #0.0 \& GL2 Rule 0: planet ::= hesperus \& GL3 Rule 2: hesperus ::= venus \& GL4 Symbol venus: "venus" \& Symch #0.1 \& GL2 Rule 1: planet ::= phosphorus \& GL5 Rule 3: phosphorus ::= venus \& GL6 Symbol venus: "venus" .Ve .PP Notice the tags of the form "\f(CW\*(C`GLn\*(C'\fR", where \fIn\fR is an integer. These identify the glade. Glades will be described in detail below. .PP The rules allow the string '\f(CW\*(C`venus\*(C'\fR' to be parsed as either one of two planets: '\f(CW\*(C`hesperus\*(C'\fR' or '\f(CW\*(C`phosphorus\*(C'\fR', depending on whether rule 0 or rule 1 is used. The choice, at glade 2, between rules 0 and 1, is a symch. .SS "An example of a factoring" .IX Subsection "An example of a factoring" For the grammar, .PP .Vb 5 \& :start ::= top \& top ::= b b \& b ::= a a \& b ::= a \& a ~ \*(Aqa\*(Aq .Ve .PP and the input '\f(CW\*(C`aaa\*(C'\fR', a successful parse will always have two \f(CW\*(C`b\*(C'\fR's. Of these two \f(CW\*(C`b\*(C'\fR's one will always be short, deriving a string of length 1: \&'\f(CW\*(C`a\*(C'\fR'. The other will always be long, deriving a string of length 2: \&'\f(CW\*(C`aa\*(C'\fR'. But they can be in either order, which means that the two \f(CW\*(C`b\*(C'\fR's can divide up the input stream in two different ways: long string first; or short string first. .PP These two different ways of dividing the input stream using the rule .PP .Vb 1 \& top ::= b b .Ve .PP are called a \fBfactoring\fR. Here's Marpa's dump of the forest: .PP .Vb 10 \& GL2 Rule 0: top ::= b b \& Factoring #0 \& GL3 Rule 2: b ::= a \& GL4 Symbol a: "a" \& GL5 Rule 1: b ::= a a \& GL6 Symbol a: "a" \& GL7 Symbol a: "a" \& Factoring #1 \& GL8 Rule 1: b ::= a a \& GL9 Symbol a: "a" \& GL10 Symbol a: "a" \& GL11 Rule 2: b ::= a \& GL12 Symbol a: "a" .Ve .SH "The structure of a forest" .IX Header "The structure of a forest" An \s-1ASF\s0 can be pictured as a forest on a mountain. This mountain forest has glades, and there are paths between the glades. The term \*(L"glade\*(R" comes from the idea of a glade as a distinct place in a forest that is open to light. .PP The paths between glades have a direction \*(-- they are always thought of as running one-way: downhill. If a path connects two glades, the one uphill is called an upglade and the one downhill is called a downglade. .PP There is a glade at the top of mountain called the \*(L"peak\*(R". The peak has no upglades. .SH "The glade hierarchy" .IX Header "The glade hierarchy" Every glade has the same internal structure, which is this hierarchy: .IP "\(bu" 4 Glades contain symches. A symch is either for a rule or for a token. .IP "\(bu" 4 Rule symches contain factorings. .IP "\(bu" 4 Factorings contain factors. .IP "\(bu" 4 A factor is the uphill end of a path which leads to a downglade. That downglade will contain a glade hierarchy of its own. .SS "Glades" .IX Subsection "Glades" Each glade node represents an instance of a symbol in one of the possible parse trees. This means that each glade has a symbol (called the \*(L"glade symbol\*(R"), and an \*(L"input span\*(R". An input span is an input start location, and a length in characters. Because it has a start location and a length, a span also specifies an end location in the input. .SS "Symches" .IX Subsection "Symches" Every glade contains one or more symches. If a glade has only one symch, that symch is said to be \fBtrivial\fR. A symch is either a token symch or a rule symch. For a token symch, the glade symbol is the token symbol. For a rule symch, the glade symbol is the \s-1LHS\s0 of the rule. .PP At most one of the symches in a glade can be a token symch. There can, however, be many rule symches in a glade \*(-- one for every rule with the glade symbol on its \s-1LHS.\s0 .SS "Factorings" .IX Subsection "Factorings" Each rule symch contains one or more factorings. A factoring is a way of dividing up the input span of the glade among its \s-1RHS\s0 symbols, which in this context are called \fBfactors\fR. If a rule symch has only one factoring, that factoring is said to be \fBtrivial\fR. A token symch contains no factorings, which means that token symches are the \fBterminals\fR of an \s-1ASF.\s0 .PP Because the number of factorings can get out of hand, factorings may be omitted. A symch which omits factorings is said to be \fBtruncated\fR. By default, every symch is truncated down to its first 42 factorings. .SS "Factors" .IX Subsection "Factors" Every factoring has one or more factors. Each \*(L"factor\*(R" corresponds to a symbol instance on the \s-1RHS\s0 of the rule. Each such \s-1RHS\s0 factor is also a downglade, one which contains its own symches. .SH "The glade ID" .IX Header "The glade ID" Each glade has a glade \s-1ID.\s0 This can be relied on to be a non-negative integer. A glade \s-1ID\s0 may be zero. Glade \s-1ID\s0's are obtained from the \*(L"\fIpeak()\fR\*(R" and \*(L"\fIfactoring_downglades()\fR\*(R" methods. .SH "Techniques for traversing ASF's" .IX Header "Techniques for traversing ASF's" .SS "Memoization" .IX Subsection "Memoization" When traversing a forest, you should take steps to avoid traversing the same glades twice. You can do this by memoizing the result of each glade, perhaps using its glade \s-1ID\s0 to index an array. When a glade is visited, the array can be checked to see if its result has been memoized. If so, the memoized result should be used. .PP This memoization eliminates the need to revisit the downglades of an already visited glade. It does not eliminate multiple visits to a glade, but it does eliminate retraversal of the glades downhill from it. In practice, the improvement in speed can be stunning. It will often be the difference between an program which is unuseably slow even for very small inputs, and one which is extremely fast even for large inputs. .PP Repeated subtraversals happen when two glades share the same downglades, something that occurs frequently in \s-1ASF\s0's. Additionally, some day the \s-1SLIF\s0 may allow cycles. Memoization will prevent a cycle form causing an infinite loop. .PP The example in this \s-1POD\s0 includes a memoization scheme which is very simple, but adequate for most purposes. The main logic of its memoization is shown here. .PP .Vb 4 \& my ( $asf, $glade, $seen ) = @_; \& return bless ["Glade $glade revisited"], \*(AqMy_Revisit\*(Aq \& if $seen\->[$glade]; \& $seen\->[$glade] = 1; .Ve .PP Putting memoization in one of the very first drafts of your code will save you time and trouble. .SH "Forest method" .IX Header "Forest method" .SS "\fIpeak()\fP" .IX Subsection "peak()" .Vb 1 \& my $peak = $asf\->peak(); .Ve .PP Returns the glade \s-1ID\s0 of the peak. This may be zero. All failures are thrown as exceptions. .SH "Glade methods" .IX Header "Glade methods" .SS "\fIglade_literal()\fP" .IX Subsection "glade_literal()" .Vb 1 \& my $literal = $asf\->glade_literal($glade); .Ve .PP Returns the literal substring of the input associated with the glade. Every glade is associated with a span \*(-- a start location in the input, and a length. On failure, throws an exception. .PP The literal is determined by the range. This works as expected if your application reads the input characters one-by-one in order. (We will call applications which read in this fashion, \fBmonotonic\fR.) Most applications are monotonic, and yours is, unless you've taken special pains to make it otherwise. Computation of literal substrings for non-monotonic applications is addressed in \*(L"Literals and G1 spans\*(R" in Marpa::R2::Scanless::R. .SS "\fIglade_span()\fP" .IX Subsection "glade_span()" .Vb 1 \& my ( $glade_start, $glade_length ) = $asf\->glade_span($glade_id); .Ve .PP Returns the span of the input associated with the glade. Every glade is associated with a span \*(-- a start location in the input, and a length. On failure, throws an exception. .PP The span will be as expected if your application reads the input characters one-by-one in order. (We will call applications which read in this fashion, \fBmonotonic\fR.) Most applications are monotonic, and yours is, unless you've taken special pains to make it otherwise. Computation of literal substrings for non-monotonic applications is addressed in \*(L"Literals and G1 spans\*(R" in Marpa::R2::Scanless::R. .SS "\fIglade_symch_count()\fP" .IX Subsection "glade_symch_count()" .Vb 1 \& my $symch_count = $asf\->glade_symch_count($glade); .Ve .PP Requires a glade \s-1ID\s0 as its only argument. Returns the number of symches contained in the glade specified by the argument. On failure, throws an exception. .SS "\fIglade_symbol_id()\fP" .IX Subsection "glade_symbol_id()" .Vb 2 \& my $symbol_id = $asf\->glade_symbol_id($glade); \& my $display_form = $grammar\->symbol_display_form($symbol_id); .Ve .PP Requires a glade \s-1ID\s0 as its only argument. Returns the symbol \s-1ID\s0 of the \*(L"glade symbol\*(R" for the glade specified by the argument. On failure, throws an exception. .SH "Symch methods" .IX Header "Symch methods" .SS "\fIsymch_rule_id()\fP" .IX Subsection "symch_rule_id()" .Vb 1 \& my $rule_id = $asf\->symch_rule_id( $glade, $symch_ix ); .Ve .PP Requires two arguments: a glade \s-1ID\s0 and a zero-based symch index. These specify a symch. If the symch specified is a rule symch, returns the rule \s-1ID.\s0 If it is a token symch, returns \-1. .PP Returns a Perl undef, if the glade exists, but the symch index is too high. On other failure, throws an exception. .SS "\fIsymch_is_truncated()\fP" .IX Subsection "symch_is_truncated()" [ To be written. ] .SS "\fIsymch_factoring_count()\fP" .IX Subsection "symch_factoring_count()" .Vb 2 \& my $factoring_count = \& $asf\->symch_factoring_count( $glade, $symch_ix ); .Ve .PP Requires two arguments: a glade \s-1ID\s0 and a zero-based symch index. These specify a symch. Returns the count of factorings if the specified symch is a rule symch. This count will always be one or greater. Returns zero if the specified symch is a token symch. .PP Returns a Perl undef, if the glade exists, but the symch index is too high. On other failure, throws an exception. .SH "Factoring methods" .IX Header "Factoring methods" .SS "\fIfactoring_downglades()\fP" .IX Subsection "factoring_downglades()" .Vb 3 \& my $downglades = \& $asf\->factoring_downglades( $glade, $symch_ix, \& $factoring_ix ); .Ve .PP Requires three arguments: a glade \s-1ID,\s0 the zero-based index of a symch and the zero-based index of a factoring. These specify a factoring. On success, returns a reference to an array. The array contains the glade IDs of the the downglades in the factoring specified. .PP Returns a Perl undef, if the glade and symch exist, but the factoring index is too high. On other failure, throws an exception. In particular, exceptions are thrown if the symch is for a token; and if the glade exists, but the symch index is too high. .SH "Methods for reporting ambiguity" .IX Header "Methods for reporting ambiguity" .Vb 4 \& if ( $recce\->ambiguity_metric() > 1 ) { \& my $asf = Marpa::R2::ASF\->new( { slr => $recce } ); \& die \*(AqNo ASF\*(Aq if not defined $asf; \& my $ambiguities = Marpa::R2::Internal::ASF::ambiguities($asf); \& \& # Only report the first two \& my @ambiguities = grep {defined} @{$ambiguities}[ 0 .. 1 ]; \& \& $actual_value = \*(AqApplication grammar is ambiguous\*(Aq; \& $actual_result = \& Marpa::R2::Internal::ASF::ambiguities_show( $asf, \e@ambiguities ); \& last PROCESSING; \& } ## end if ( $recce\->ambiguity_metric() > 1 ) .Ve .SS "\fIambiguities()\fP" .IX Subsection "ambiguities()" .Vb 1 \& my $ambiguities = Marpa::R2::Internal::ASF::ambiguities($asf); .Ve .PP Returns a reference to an array of ambiguity reports in the \s-1ASF.\s0 The first and only argument must be an \s-1ASF\s0 object. The array returned will be be zero length if the parse was not ambiguous. Ambiguity reports are as described below. .PP While the \f(CW\*(C`ambiguities()\*(C'\fR method can be called to determine whether or not ambiguities exist, it is the more expensive way to do it. The \f(CW$slr\fR\->\fIambiguity_metric()\fR method tests an already-existing boolean and is therefore extremely fast. If you are simply testing for ambiguity, or if you can save time when you know that a parse is unambiguous, you will usually want to test for ambiguity with the \f(CW\*(C`ambiguity_metric()\*(C'\fR method before calling the \f(CW\*(C`ambiguities()\*(C'\fR method. .SS "\fIambiguities_show()\fP" .IX Subsection "ambiguities_show()" .Vb 2 \& $actual_result = \& Marpa::R2::Internal::ASF::ambiguities_show( $asf, \e@ambiguities ); .Ve .PP Returns a string which contains a description of the ambiguities in its arguments. Takes two arguments, both required. The first is an \s-1ASF,\s0 and the second is a reference to an array of ambiguities, in the format returned by the \fIambiguities()\fR method. .PP Major applications will often have their own customized ambiguity formatting routine, one which can formulate error messages based, not just on the names of the rules and symbols, but on knowledge of the role that the rules and symbols play in the application. This method is intended for applications which do not have their own customized ambiguity handling. For those which do, it can be used as a fallback for handling those reports that the customized method does not recognize or that do not need special handling. The format of the returned string is subject to change. .SH "Ambiguity reports" .IX Header "Ambiguity reports" The ambiguity reports returned by the \f(CW\*(C`ambiguities()\*(C'\fR method are of two kinds: symch reports and factoring reports. .SS "Symch reports" .IX Subsection "Symch reports" A symch report is issued whenever, in a top-down traversal of the \s-1ASF,\s0 an non-trivial symch is encountered. A symch report takes the form .PP .Vb 1 \& [ \*(Aqsymch\*(Aq, $glade ] .Ve .PP where \f(CW$glade\fR is the \s-1ID\s0 of the glade with the symch ambiguity. With this and the accessor methods in this document, an application can report full details of the symch ambiguity. .PP Typically, when there is more than one kind of ambiguity in an input span, only one is of real interest. Symch ambiguities are usually of more interest than factorings. And if one ambiguity is uphill from another, the downhill ambiguity is usually a side effect of the uphill one and of little interest. .PP Accordingly, if a glade has both a symch ambiguity and a factoring ambiguity, only the symch ambiguity is reported. And if two ambiguities in the \s-1ASF\s0 overlap, only the one closest to the peak is reported. .SS "Factoring reports" .IX Subsection "Factoring reports" A symch report is issued whenever, in a top-down traversal of the \s-1ASF,\s0 an sequence of symbols is found which has more than one factoring. Factoring reports are specific \*(-- they identify not just rules, but the specific sequences within the \s-1RHS\s0 which are differently factored \*(-- \fBmultifactored stretches\fR. Sequence rules especially have long stretches where the symbols are in sync with each other, broken by other stretches where they are out of sync. Marpa reports each of the ambiguous stretches. (A detailed definition of multifactored stretches is below.) .PP A factoring report takes the form .PP .Vb 1 \& [ \*(Aqfactoring\*(Aq, $glade, $symch_ix, $factor_ix1, $factoring_ix2, $factor_ix2 ]; .Ve .PP where \f(CW$glade\fR is the \s-1ID\s0 of the glade with the factoring ambiguity, and \f(CW$symch_ix\fR is the index of the symch involved. The multifactored stretch is described by two \*(L"identifying factors\*(R". Both factors are at the beginning of the stretch, and therefore have the same input start location. They differ in length. .PP The first of the two identifying factors has factoring index of 0, and its factor index is \f(CW$factor_ix1\fR. The second identifying factor has a factoring index of \f(CW$factoring_ix2\fR, and its factor index is \f(CW$factor_ix2\fR. .PP The identifying factors will usually be enough for error reporting, which is the usual application of these reports. Full details of the stretch are not given because they can be extremely large; are usually not of interest; and can be determined by following up on the information in the factoring report using the accessor methods described in this document. .PP Ambiguities in rules and symbols downhill from an ambiguously factored stretch are not reported. If a glade has both a symch ambiguity and a factoring ambiguity, only the symch ambiguity is reported. .SH "The code for the synopsis" .IX Header "The code for the synopsis" .SS "The \fIasf_to_basic_tree()\fP code" .IX Subsection "The asf_to_basic_tree() code" .Vb 5 \& sub asf_to_basic_tree { \& my ( $asf, $glade ) = @_; \& my $peak = $asf\->peak(); \& return glade_to_basic_tree( $asf, $peak, [] ); \& } ## end sub asf_to_basic_tree \& \& sub glade_to_basic_tree { \& my ( $asf, $glade, $seen ) = @_; \& return bless ["Glade $glade revisited"], \*(AqMy_Revisit\*(Aq \& if $seen\->[$glade]; \& $seen\->[$glade] = 1; \& my $grammar = $asf\->grammar(); \& my @symches = (); \& my $symch_count = $asf\->glade_symch_count($glade); \& SYMCH: for ( my $symch_ix = 0; $symch_ix < $symch_count; $symch_ix++ ) { \& my $rule_id = $asf\->symch_rule_id( $glade, $symch_ix ); \& if ( $rule_id < 0 ) { \& my $literal = $asf\->glade_literal($glade); \& my $symbol_id = $asf\->glade_symbol_id($glade); \& my $display_form = $grammar\->symbol_display_form($symbol_id); \& push @symches, \& bless [qq{Glade $glade, Symbol $display_form: "$literal"}], \& \*(AqMy_Token\*(Aq; \& next SYMCH; \& } ## end if ( $rule_id < 0 ) \& \& # ignore any truncation of the factorings \& my $factoring_count = \& $asf\->symch_factoring_count( $glade, $symch_ix ); \& my @symch_description = ("Glade $glade"); \& push @symch_description, "Symch $symch_ix" if $symch_count > 1; \& push @symch_description, $grammar\->rule_show($rule_id); \& my $symch_description = join q{, }, @symch_description; \& \& my @factorings = ($symch_description); \& for ( \& my $factoring_ix = 0; \& $factoring_ix < $factoring_count; \& $factoring_ix++ \& ) \& { \& my $downglades = \& $asf\->factoring_downglades( $glade, $symch_ix, \& $factoring_ix ); \& push @factorings, \& bless [ map { glade_to_basic_tree( $asf, $_, $seen ) } \& @{$downglades} ], \*(AqMy_Rule\*(Aq; \& } ## end for ( my $factoring_ix = 0; $factoring_ix < $factoring_count...) \& if ( $factoring_count > 1 ) { \& push @symches, \& bless [ \& "Glade $glade, symch $symch_ix has $factoring_count factorings", \& @factorings \& ], \& \*(AqMy_Factorings\*(Aq; \& next SYMCH; \& } ## end if ( $factoring_count > 1 ) \& push @symches, bless [ @factorings[ 0, 1 ] ], \*(AqMy_Factorings\*(Aq; \& } ## end SYMCH: for ( my $symch_ix = 0; $symch_ix < $symch_count; ...) \& return bless [ "Glade $glade has $symch_count symches", @symches ], \& \*(AqMy_Symches\*(Aq \& if $symch_count > 1; \& return $symches[0]; \& } ## end sub glade_to_basic_tree .Ve .SS "The \fIarray_display()\fP code" .IX Subsection "The array_display() code" Because of the blessings in this example, a standard dump of the output array is too cluttered for comfortable reading. The following code displays the output from \&\f(CW\*(C`asf_to_basic_tree()\*(C'\fR in a more compact form. Note that this code makes no use of Marpa, and works for all Perl arrays. It is included for completeness, and as a simple example of array traversal. .PP .Vb 11 \& sub array_display { \& my ($array) = @_; \& my ( undef, @lines ) = @{ array_lines_display($array) }; \& my $text = q{}; \& for my $line (@lines) { \& my ( $indent, $body ) = @{$line}; \& $indent \-= 6; \& $text .= ( q{ } x $indent ) . $body . "\en"; \& } \& return $text; \& } ## end sub array_display \& \& sub array_lines_display { \& my ($array) = @_; \& my $reftype = Scalar::Util::reftype($array) // \*(Aq!undef!\*(Aq; \& return [ [ 0, $array ] ] if $reftype ne \*(AqARRAY\*(Aq; \& my @lines = (); \& ELEMENT: for my $element ( @{$array} ) { \& for my $line ( @{ array_lines_display($element) } ) { \& my ( $indent, $body ) = @{$line}; \& push @lines, [ $indent + 2, $body ]; \& } \& } ## end ELEMENT: for my $element ( @{$array} ) \& return \e@lines; \& } ## end sub array_lines_display .Ve .SH "Details" .IX Header "Details" This section contains some elaborations of the above, some of them in mathematical terms. These details are segregated because they are not essential to using this interface, and while some readers find them more helpful than distracting, for many others it is the reverse. .SS "An alternative way of defining glade terminology" .IX Subsection "An alternative way of defining glade terminology" Here's a way of defining some of the above terms which is less intuitive, but more precise. First, define the \fBglade length\fR from glades A to glade B in an \s-1ASF\s0 as the number of glades on the shortest path from A to B, not including glade A. (Recall that paths are directional.) If there is no path between glades A and B, the glade length is undefined. Glade B is a \fBdownglade\fR of glade A, and glade A is an \fBupglade\fR of glade B, if and only if the glade length from A to B is 1. .PP A glade A is \fBuphill\fR with respect to glade B, and a glade B is \fBdownhill\fR with respect to glade A, if and only if the glade length from A to B is defined. .PP A \fBpeak\fR of an \s-1ASF\s0 is a node without upglades. By construction of the \s-1ASF,\s0 there is only one peak. A glade with a token symch is \fBtrivial\fR if it has no rule symches. A glade without a token symch is \fBtrivial\fR if it has exactly one downglade. .PP The \fBdistance-to-peak\fR of a glade \f(CW\*(C`A\*(C'\fR is the glade length from the peak to glade \f(CW\*(C`A\*(C'\fR. Glade \f(CW\*(C`A\*(C'\fR is said to have a higher \fBaltitude\fR than glade \f(CW\*(C`B\*(C'\fR if the distance-to-peak of glade \f(CW\*(C`A\*(C'\fR is less than that of glade \f(CW\*(C`B\*(C'\fR. Glade \f(CW\*(C`A\*(C'\fR has a lower \fBaltitude\fR than glade \f(CW\*(C`B\*(C'\fR if the distance-to-peak of glade \f(CW\*(C`A\*(C'\fR is greater than that of glade \f(CW\*(C`B\*(C'\fR. Glade \f(CW\*(C`A\*(C'\fR has the same \fBaltitude\fR as glade \f(CW\*(C`B\*(C'\fR if the distance-to-peak of glade \f(CW\*(C`A\*(C'\fR is equal to that of glade \f(CW\*(C`B\*(C'\fR. .SS "Cycles" .IX Subsection "Cycles" In the current \s-1SLIF\s0 implementation, a forest is a directed acyclic graph (\s-1DAG\s0). (In the mathematical literature a \s-1DAG\s0 is also called a \*(L"tree\*(R", but that use is confusing in the present context.) The underlying Marpa algorithm allows parse trees with cycles, and someday the \s-1SLIF\s0 probably will as well. When that happens, \s-1ASF\s0's will no longer be \*(L"acyclic\*(R" and therefore will no longer be \s-1DAG\s0's. This document talks about \s-1ASF\s0's as if that day had already come \*(-- it assumes that the \s-1ASF\s0's might contain cycles. .PP In an \s-1ASF\s0 that contains one or more cycles, the concepts of uphill and downhill become much less useful for describing the relative positions of glades. For example, if glade A cycles back to itself through glade B, then .IP "\(bu" 4 Glade A will be uphill from glade B, and .IP "\(bu" 4 Glade B will be uphill from glade A; so that .IP "\(bu" 4 Glade B will be downhill from glade A, and .IP "\(bu" 4 Glade A will be downhill from glade B; and .IP "\(bu" 4 Glade A will be both downhill and uphill from itself; and .IP "\(bu" 4 Glade B will be both downhill and uphill from itself. .PP \&\s-1ASF\s0's will always be constructed so that the peak has no upglades. Because of this, the peak can never be part of a cycle. This means that altitude will always be well defined in the sense that, for any two glades \f(CW\*(C`A\*(C'\fR and \f(CW\*(C`B\*(C'\fR, one and only one of the following statements will be true: .IP "\(bu" 4 Glade \f(CW\*(C`A\*(C'\fR is lower in altitude than glade \f(CW\*(C`B\*(C'\fR. .IP "\(bu" 4 Glade \f(CW\*(C`A\*(C'\fR is higher in altitude than glade \f(CW\*(C`B\*(C'\fR. .IP "\(bu" 4 Glade \f(CW\*(C`A\*(C'\fR is equal in altitude to glade \f(CW\*(C`b\*(C'\fR. .SS "Token symches" .IX Subsection "Token symches" In the current \s-1SLIF\s0 implementation, a symbol is always either a token or the \s-1LHS\s0 of a rule. This means that any glade that contains a token symch cannot contain any rule symches. It also means that any glade that contains a rule symch will not contain a token symch. .PP However, the underlying Marpa algorithm allows \s-1LHS\s0 terminals, and someday the \s-1SLIF\s0 probably will as well. This document is written as if that day has already come, and describes glades as if they could contain both rule symches and a token symch. .SS "Maximum symches per glade" .IX Subsection "Maximum symches per glade" Above, the point is made that the number of symches in a glade, even in the worst case, is a very manageable number. For a particular case, it is not hard to work out the exact maximum. Here are the details. .PP There can be at most one token symch. There can be only rule symch for every rule. In addition, all rules in a glade must have the glade symbol as their \s-1LHS.\s0 Let the number of rules with the glade symbol on their \s-1LHS\s0 be \f(CW\*(C`r\*(C'\fR. The maximum number of symches in a glade is \f(CW\*(C`r+1\*(C'\fR. .SS "Multifactored stretches" .IX Subsection "Multifactored stretches" Marpa locates factoring ambiguities, not just by rule, but by \s-1RHS\s0 symbol. It finds \fBmultifactored stretches\fR, input spans where a sequence of symbols within the \s-1RHS\s0 of a rule have multiple factorings. A multifactored stretch will sometimes encompass the entire \s-1RHS\s0 of a rule. In other cases, the \s-1RHS\s0 of a single rule might contain many multifactored stretches. This is often the case with sequence rules. Sequence rules can have a very long \s-1RHS,\s0 and in those situations narrowing down factoring ambiguities to specific input spans is necessary for precise error reporting. .PP The main body of this document worked with an intuitive \*(L"know one when I see one\*(R" idea of multifactored stretches. The exact definition follows. First we will need a series of preliminary definitions. .PP Consider the case of a arbitrary rule symch. Intuitively, a \fBfactoring position\fR is a location within the factors of one of the factorings of that symch. It can be seen as a duple \f(CW\*(C`\*(C'\fR where \&\f(CW\*(C`\*(C'\fR is the index of a factoring within the symch, and \&\f(CW\*(C`\*(C'\fR is the index of one of the factors of the factoring. .PP Let \f(CW\*(C`SP\*(C'\fR be a function that maps the symch's set of factoring indexes to the non-negative integers, such that for a factoring index \f(CW\*(C`i\*(C'\fR and factor index \f(CW\*(C`j\*(C'\fR, \f(CW\*(C`SP(i)=j\*(C'\fR, \f(CW\*(C`j\*(C'\fR is a valid factor index within the factoring \f(CW\*(C`i\*(C'\fR. The function \f(CW\*(C`SP\*(C'\fR can be called a \fBsymch position\fR. .PP Every \fBsymch position\fR is equivalent to a set of factoring positions. The \fBinitial symch position\fR is the symch position all of whose factoring positions have a factor index of 0. Equivalently, it is the constant function \f(CW\*(C`ISP\*(C'\fR, where \f(CW\*(C`ISP(i)=0\*(C'\fR for all factoring indexes \f(CW\*(C`i\*(C'\fR. .PP The factor with index \f(CW\*(C`factor_ix\*(C'\fR in the factoring with index \f(CW\*(C`factoring_ix\*(C'\fR is said to be the factor at factoring position \f(CW\*(C`\*(C'\fR. A factor is one of the factors of a symch position if and only if it is a factor at one of its factoring positions. .PP An \fBaligned symch position\fR is a factoring position all of whose factors have the same start location. The location of an aligned symch position is that start location. The \fBinitial symch position\fR is always an aligned factoring position. A \fBsynced symch position\fR is an aligned symch position all of whose factors have the same length and symbol \s-1ID. A \s0\fBunsynced symch position\fR is an aligned symch position that is not a synced symch position. .PP We are now in a position to define a \fBmultifactored stretch\fR. Intuitively, a multifactored stretch is a longest possible input span that contains at least one unsynced symch position, but no synced symch positions. More formally, a multifactored stretch of a symch is a span of start locations within that symch, such that: .IP "\(bu" 4 Its first location is the location of unsynced symch position. .IP "\(bu" 4 Its first location is the initial symch position, or the first symch positiion after a synched symch position. .IP "\(bu" 4 Its end location is the end location of the symch, or a synced symch position, whichever occurs first. .PP Note that multifactored stretch are aligned in terms of input locations, but they do not have to be aligned in terms of factor indexes. The factoring positions of a multifactored stretch can have many different factor indexes. This is true of all rules, but it is particularly likely for a sequence rule, where the \s-1RHS\s0 consists of repetitions of a single symbol. .SH "Copyright and License" .IX Header "Copyright and License" .Vb 5 \& Copyright 2014 Jeffrey Kegler \& This file is part of Marpa::R2. Marpa::R2 is free software: you can \& redistribute it and/or modify it under the terms of the GNU Lesser \& General Public License as published by the Free Software Foundation, \& either version 3 of the License, or (at your option) any later version. \& \& Marpa::R2 is distributed in the hope that it will be useful, \& but WITHOUT ANY WARRANTY; without even the implied warranty of \& MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU \& Lesser General Public License for more details. \& \& You should have received a copy of the GNU Lesser \& General Public License along with Marpa::R2. If not, see \& http://www.gnu.org/licenses/. .Ve