.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is >0, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{\ . if \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{\ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" ======================================================================== .\" .IX Title "JSON::Tokenize 3pm" .TH JSON::Tokenize 3pm "2021-02-20" "perl v5.32.1" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" JSON::Tokenize \- Tokenize JSON .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 1 \& use JSON::Tokenize \*(Aq:all\*(Aq; \& \& my $input = \*(Aq{"tuttie":["fruity", true, 100]}\*(Aq; \& my $token = tokenize_json ($input); \& print_tokens ($token, 0); \& \& sub print_tokens \& { \& my ($token, $depth) = @_; \& while ($token) { \& my $start = tokenize_start ($token); \& my $end = tokenize_end ($token); \& my $type = tokenize_type ($token); \& print " " x $depth; \& my $value = substr ($input, $start, $end \- $start); \& print "\*(Aq$value\*(Aq has type \*(Aq$type\*(Aq.\en"; \& my $child = tokenize_child ($token); \& if ($child) { \& print_tokens ($child, $depth+1); \& } \& my $next = tokenize_next ($token); \& $token = $next; \& } \& } .Ve .PP This outputs .PP .Vb 9 \& \*(Aq{"tuttie":["fruity", true, 100]}\*(Aq has type \*(Aqobject\*(Aq. \& \*(Aq"tuttie"\*(Aq has type \*(Aqstring\*(Aq. \& \*(Aq:\*(Aq has type \*(Aqcolon\*(Aq. \& \*(Aq["fruity", true, 100]\*(Aq has type \*(Aqarray\*(Aq. \& \*(Aq"fruity"\*(Aq has type \*(Aqstring\*(Aq. \& \*(Aq,\*(Aq has type \*(Aqcomma\*(Aq. \& \*(Aqtrue\*(Aq has type \*(Aqliteral\*(Aq. \& \*(Aq,\*(Aq has type \*(Aqcomma\*(Aq. \& \*(Aq100\*(Aq has type \*(Aqnumber\*(Aq. .Ve .SH "VERSION" .IX Header "VERSION" This documents version 0.61 of JSON::Tokenize corresponding to git commit 033269fa8972fdce8626aa65cd11a5394ab50492 released on Thu Feb 11 09:14:04 2021 +0900. .SH "DESCRIPTION" .IX Header "DESCRIPTION" This is a module for tokenizing a \s-1JSON\s0 string. \*(L"Tokenizing\*(R" means breaking the string into individual tokens, without creating any Perl structures. It uses the same underlying code as JSON::Parse. Tokenizing can be used for tasks such as picking out or searching through parts of a large \s-1JSON\s0 structure without storing each part of the entire structure in memory. .PP This module is an experimental part of JSON::Parse and its interface is likely to change. The tokenizing functions are currently written in a very primitive way. .SH "FUNCTIONS" .IX Header "FUNCTIONS" .SS "tokenize_child" .IX Subsection "tokenize_child" .Vb 1 \& my $child = tokenize_child ($child); .Ve .PP Walk the tree of tokens. .SS "tokenize_end" .IX Subsection "tokenize_end" .Vb 1 \& my $end = tokenize_end ($token); .Ve .PP Get the end of the token as a byte offset from the start of the string. Note this is a byte offset not a character offset. .SS "tokenize_json" .IX Subsection "tokenize_json" .Vb 1 \& my $token = tokenize_json ($json); .Ve .SS "tokenize_next" .IX Subsection "tokenize_next" .Vb 1 \& my $next = tokenize_next ($token); .Ve .PP Walk the tree of tokens. .SS "tokenize_start" .IX Subsection "tokenize_start" .Vb 1 \& my $start = tokenize_start ($token); .Ve .PP Get the start of the token as a byte offset from the start of the string. Note this is a byte offset not a character offset. .SS "tokenize_text" .IX Subsection "tokenize_text" .Vb 1 \& my $text = tokenize_text ($json, $token); .Ve .PP Given a token \f(CW$token\fR from this parsing and the \s-1JSON\s0 in \f(CW$json\fR, return the text which corresponds to the token. This is a convenience function written in Perl which uses \*(L"tokenize_start\*(R" and \&\*(L"tokenize_end\*(R" and \f(CW\*(C`substr\*(C'\fR to get the string from \f(CW$json\fR. .SS "tokenize_type" .IX Subsection "tokenize_type" .Vb 1 \& my $type = tokenize_type ($token); .Ve .PP Get the type of the token as a string. The possible return values are .PP .Vb 8 \& "array", \& "initial state", \& "invalid", \& "literal", \& "number", \& "object", \& "string", \& "unicode escape" .Ve .SH "AUTHOR" .IX Header "AUTHOR" Ben Bullock, .SH "COPYRIGHT & LICENCE" .IX Header "COPYRIGHT & LICENCE" This package and associated files are copyright (C) 2016\-2021 Ben Bullock. .PP You can use, copy, modify and redistribute this package and associated files under the Perl Artistic Licence or the \s-1GNU\s0 General Public Licence.