.\" Automatically generated by Pod::Man 4.10 (Pod::Simple 3.35) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is >0, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{\ . if \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{\ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "Catmandu::Fix 3pm" .TH Catmandu::Fix 3pm "2019-01-29" "perl v5.28.1" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" Catmandu::Fix \- a Catmandu class used for data transformations .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 1 \& # From the command line \& \& $ catmandu convert JSON \-\-fix \*(Aqadd_field(foo,bar)\*(Aq < data.json \& $ catmandu convert YAML \-\-fix \*(Aqupcase(job); remove_field(test)\*(Aq < data.yml \& $ catmandu convert CSV \-\-fix \*(Aqsort_field(tags)\*(Aq < data.csv \& $ catmandu run /tmp/myfixes.txt \& $ catmandu convert OAI \-\-url http://biblio.ugent.be/oai \-\-fix /tmp/myfixes.txt \& \& # From Perl \& \& use Catmandu; \& \& my $fixer = Catmandu\->fixer(\*(Aqupcase(job)\*(Aq,\*(Aqremove_field(test)\*(Aq); \& my $fixer = Catmandu\->fixer(\*(Aq/tmp/myfixes.txt\*(Aq); \& \& # Convert data \& my $arr = $fixer\->fix([ ... ]); \& my $hash = $fixer\->fix({ ... }); \& my $importer = Catmandu\->importer(\*(AqYAML\*(Aq, file => \*(Aqdata.yml\*(Aq); \& my $fixed_importer = $fixer\->fix($importer); \& \& # Inline fixes \& use Catmandu::Fix::upcase as => \*(Aqmy_upcase\*(Aq; \& use Catmandu::Fix::remove_field as => \*(Aqmy_remove\*(Aq; \& \& my $hash = { \*(Aqjob\*(Aq => \*(Aqlibrarian\*(Aq , deep => { nested => \*(Aq1\*(Aq} }; \& \& my_upcase($hash,\*(Aqjob\*(Aq); \& my_remove($hash,\*(Aqdeep.nested\*(Aq); .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" A Catmandu::Fix is a Perl package that can transform data. These packages are used for easy data manipulation by non programmers. The main intention is to use fixes on the command line or in Fix scripts. A small \s-1DSL\s0 language is available to execute many Fix command on a stream of data. .PP When a \f(CW\*(C`fix\*(C'\fR argument is given to a Catmandu::Importer, Catmandu::Exporter or Catmandu::Store then the transformations are executed on every item in the stream. .SH "FIX LANGUAGE" .IX Header "FIX LANGUAGE" A Fix script is a collection of one or more Fix commands. The fixes are executed on every record in the dataset. If this command is executed on the command line: .PP .Vb 1 \& $ catmandu convert JSON \-\-fix \*(Aqupcase(title); add_field(deep.nested.field,1)\*(Aq < data.json .Ve .PP then all the title fields will be upcased and a new deeply nested field will be added: .PP .Vb 2 \& { "title":"foo" } \& { "title":"bar" } .Ve .PP becomes: .PP .Vb 2 \& { "title":"FOO" , "deep":{"nested":{"field":1}} } \& { "title":"BAR" , "deep":{"nested":{"field":1}} } .Ve .PP Using the command line, Fix commands need a semicolon (;) as separator. All these commands can also be written into a Fix script where semicolons are not required: .PP .Vb 1 \& $ catmandu convert JSON \-\-fix script.fix < data.json .Ve .PP where \f(CW\*(C`script.fix\*(C'\fR contains: .PP .Vb 2 \& upcase(title) \& add_field(deep.nested.field,1) .Ve .PP Conditionals can be used to provide the logic when to execute fixes: .PP .Vb 3 \& if exists(error) \& set_field(valid, 0) \& end \& \& if exists(error) \& set_field(is_valid, 0) \& elsif exists(warning) \& set_field(is_valid, 1) \& log(...) \& else \& set_field(is_valid, 1) \& end \& \& unless all_match(title, "PERL") \& add_field(is_perl, "noooo") \& end \& \& exists(error) and set_field(is_valid, 0) \& exists(error) && set_field(is_valid, 0) \& \& exists(title) or log(\*(Aqtitle missing\*(Aq) \& exists(title) || log(\*(Aqtitle missing\*(Aq) .Ve .PP Binds are used to manipulate the context in which Fixes are executed. E.g. execute a fix on every item in a list: .PP .Vb 8 \& # \*(Aqdemo\*(Aq is an array of hashes \& bind list(path:demo) \& add_field(foo,bar) \& end \& # do is an alias for bind \& do list(path:demo) \& add_field(foo,bar) \& end .Ve .PP To delete records from a stream of data the \f(CW\*(C`reject\*(C'\fR Fix can be used: .PP .Vb 1 \& reject() # Reject all in the stream \& \& if exists(foo) \& reject() # Reject records that contain a \*(Aqfoo\*(Aq field \& end \& \& reject exists(foo) # Reject records that contain a \*(Aqfoo\*(Aq field .Ve .PP The opposite of \f(CW\*(C`reject\*(C'\fR is \f(CW\*(C`select\*(C'\fR: .PP .Vb 1 \& select() # Keep all records in the stream \& \& select exists(foo) # Keep only the records that contain a \*(Aqfoo\*(Aq field .Ve .PP Comments in Fix scripts are all lines (or parts of a line) that start with a hash (#): .PP .Vb 2 \& # This is ignored \& add_field(test,123) # This is also a comment .Ve .PP You can load fixes from another namespace with the \f(CW\*(C`use\*(C'\fR statement: .PP .Vb 4 \& # this will look for fixes in the Foo::Bar namespace and make them \& # available prefixed by fb \& use(foo.bar, as: fb) \& fb.baz() \& \& # this will look for Foo::Bar::Condition::is_baz \& if fb.is_baz() \& ... \& fix() \& ... \& end .Ve .SH "FIX COMMANDS, ARGUMENTS AND OPTIONS" .IX Header "FIX COMMANDS, ARGUMENTS AND OPTIONS" Fix commands manipulate data or in some cases execute side effects. Fix commands have zero or more arguments and zero or more options. Fix command arguments are separated by commas \*(L",\*(R". Fix options are name/value pairs separated by a colon \*(L":\*(R". .PP .Vb 2 \& # A command with zero arguments \& my_command() \& \& # A command with multiple arguments \& my_other_command(foo,bar,test) \& \& # A command with optional arguments \& my_special_command(foo,bar,color:blue,size:12) .Ve .PP All command arguments are treated as strings. These strings can be \s-1FIX\s0 PATHs pointing to values or string literals. When command line arguments don't contain special characters comma \*(L",\*(R" , equal \*(L"=\*(R" , great than \*(L">\*(R" or colon \*(L":\*(R", then they can be written as-is. Otherwise, the arguments need to be quoted with single or double quotes: .PP .Vb 3 \& # Both commands below have the same effect \& my_other_command(foo,bar,test) \& my_other_command("foo","bar","test") \& \& # Illegal syntax \& my_special_command(foo,http://test.org,color:blue,size:12) # <\- syntax error \& \& # Correct syntax \& my_special_command(foo,"http://test.org",color:blue,size:12) \& \& # Or, alternative \& my_special_command("foo","http://test.org",color:"blue",size:12) .Ve .SH "FIX PATHS" .IX Header "FIX PATHS" Most of the Fix commands use paths to point to values in a data record. E.g. 'foo.2.bar' is a key 'bar' which is the 3\-rd value of the key 'foo'. .PP A special case is when you want to point to all items in an array. In this case the wildcard '*' can be used. E.g. 'foo.*' points to all the items in the 'foo' array. .PP For array values there are special wildcards available: .PP .Vb 4 \& * $append \- Add a new item at the end of an array \& * $prepend \- Add a new item at the start of an array \& * $first \- Syntactic sugar for index \*(Aq0\*(Aq (the head of the array) \& * $last \- Syntactic sugar for index \*(Aq\-1\*(Aq (the tail of the array) .Ve .PP E.g. .PP .Vb 2 \& # Create { mods => { titleInfo => [ { \*(Aqtitle\*(Aq => \*(Aqa title\*(Aq }] } }; \& add_field(\*(Aqmods.titleInfo.$append.title\*(Aq, \*(Aqa title\*(Aq); \& \& # Create { mods => { titleInfo => [ { \*(Aqtitle\*(Aq => \*(Aqa title\*(Aq } , { \*(Aqtitle\*(Aq => \*(Aqanother title\*(Aq }] } }; \& add_field(\*(Aqmods.titleInfo.$append.title\*(Aq, \*(Aqanother title\*(Aq); \& \& # Create { mods => { titleInfo => [ { \*(Aqtitle\*(Aq => \*(Aqfoo\*(Aq } , { \*(Aqtitle\*(Aq => \*(Aqanother title\*(Aq }] } }; \& add_field(\*(Aqmods.titleInfo.$first.title\*(Aq, \*(Aqfoo\*(Aq); \& \& # Create { mods => { titleInfo => [ { \*(Aqtitle\*(Aq => \*(Aqfoo\*(Aq } , { \*(Aqtitle\*(Aq => \*(Aqbar\*(Aq }] } }; \& add_field(\*(Aqmods.titleInfo.$last.title\*(Aq, \*(Aqbar\*(Aq); .Ve .PP Some Fix commands can implement an alternatice path syntax to point to values. See for example Catmandu::MARC, : .PP .Vb 2 \& # Copy the MARC 245a field to the my.title field \& marc_map(245a,my.title) .Ve .SH "OPTIONS" .IX Header "OPTIONS" .SS "fixes" .IX Subsection "fixes" An array of fixes. Catmandu::Fix which will execute every fix in consecutive order. A fix can be the name of a Catmandu::Fix::* routine, or the path to a plain text file containing all the fixes to be executed. Required. .SS "preprocess" .IX Subsection "preprocess" If set to \f(CW1\fR, fix files or inline fixes will first be preprocessed as a moustache template. See \f(CW\*(C`variables\*(C'\fR below for an example. Default is \f(CW0\fR, no preprocessing. .SS "variables" .IX Subsection "variables" An optional hashref of variables that are used to preprocess the fix files or inline fixes as a moustache template. Setting the \f(CW\*(C`variables\*(C'\fR option also sets \&\f(CW\*(C`preprocess\*(C'\fR to 1. .PP .Vb 7 \& my $fixer = Catmandu::Fix\->new( \& variables => {x => \*(Aqfoo\*(Aq, y => \*(Aqbar\*(Aq}, \& fixes => [\*(Aqadd_field({{x}},{{y}})\*(Aq], \& ); \& my $data = {}; \& $fixer\->fix($data); \& # $data is now {foo => \*(Aqbar\*(Aq} .Ve .SH "METHODS" .IX Header "METHODS" .SS "fix(\s-1HASH\s0)" .IX Subsection "fix(HASH)" Execute all the fixes on a \s-1HASH.\s0 Returns the fixed \s-1HASH.\s0 .SS "fix(\s-1ARRAY\s0)" .IX Subsection "fix(ARRAY)" Execute all the fixes on every element in the \s-1ARRAY.\s0 Returns an \s-1ARRAY\s0 of fixes. .SS "fix(Catmandu::Iterator)" .IX Subsection "fix(Catmandu::Iterator)" Execute all the fixes on every item in an Catmandu::Iterator. Returns a (lazy) iterator on all the fixes. .SS "fix(sub {})" .IX Subsection "fix(sub {})" Executes all the fixes on a generator function. Returns a new generator with fixed data. .SS "log" .IX Subsection "log" Return the current logger. See Catmandu for activating the logger in your main code. .SH "CODING" .IX Header "CODING" One can extend the Fix language by creating own custom-made fixes. Two methods are available to create an own Fix function: .PP .Vb 2 \& * Quick and easy: create a class that implements a C method. \& * Advanced: create a class that emits Perl code that will be evaled by the Fix module. .Ve .PP Both methods will be explained shortly. .SS "Quick and easy" .IX Subsection "Quick and easy" A Fix function is a Perl class in the \f(CW\*(C`Catmandu::Fix\*(C'\fR namespace that implements a \f(CW\*(C`fix\*(C'\fR method. The \f(CW\*(C`fix\*(C'\fR methods accepts a Perl hash as input and returns a (fixed) Perl hash as output. As an example, the code belows implements the \f(CW\*(C`meow\*(C'\fR Fix which inserts a 'meow' field with value 'purrrrr'. .PP .Vb 1 \& package Catmandu::Fix::meow; \& \& use Moo; \& \& sub fix { \& my ($self,$data) = @_; \& $data\->{meow} = \*(Aqpurrrrr\*(Aq; \& $data; \& } \& \& 1; .Ve .PP Given this Perl class, the following fix statement can be used in your application: .PP .Vb 2 \& # Will add \*(Aqmeow\*(Aq = \*(Aqpurrrrr\*(Aq to the data \& meow() .Ve .PP Use the quick and easy method when your fixes are not dependent on reading or writing data from/to a \s-1JSON\s0 path. Your Perl classes need to implement their own logic to read or write data into the given Perl hash. .PP Fix arguments are passed as arguments to the \f(CW\*(C`new\*(C'\fR function of the Perl class. As in .PP .Vb 2 \& # In the fix file... \& meow(\*(Aqtest123\*(Aq, \-count => 4) \& \& # ...will be translated into this pseudo code \& my $fix = Catmandu::Fix::meow\->new(\*(Aqtest123\*(Aq, \*(Aq\-count\*(Aq, 4); .Ve .PP Using Moo these arguments can be catched with Catmandu::Fix::Has package: .PP .Vb 1 \& package Catmandu::Fix::meow; \& \& use Catmandu::Sane; \& use Moo; \& use Catmandu::Fix::Has; \& \& has msg => (fix_arg => 1); # required parameter 1 \& has count => (fix_opt => 1, default => sub { 4 }); # optional parameter \*(Aqcount\*(Aq with default value 4 \& \& sub fix { \& my ($self,$data) = @_; \& $data\->{meow} = $self\->msg x $self\->count; \& $data; \& } \& \& 1; .Ve .PP Using this code the fix statement can be used like: .PP .Vb 2 \& # Will add \*(Aqmeow\*(Aq = \*(Aqpurrpurrpurrpurr\*(Aq \& meow(\*(Aqpurr\*(Aq, \-count => 4) .Ve .SS "Advanced" .IX Subsection "Advanced" The advanced method is required when one needs to read or write values from/to deeply nested \s-1JSON\s0 paths. One could parse \s-1JSON\s0 paths using the quick and easy Perl class above, but this would require a lot of inefficient for-while loops. The advanced method emits Perl code that gets compiled. This compiled code is evaled against all Perl hashes in the unput.The best way to learn this method is by inspecting some example Fix commands. .PP To ease the implementation of Fixed that emit Perl code some helper methods are created. Many Fix functions require a transformation of one or more values on a \s-1JSON\s0 Path. The Catmandu::Fix::SimpleGetValue provides an easy way to create such as script. In the example below we'll set the value at a \s-1JSON\s0 Path to 'purrrrr': .PP .Vb 1 \& package Catmandu::Fix::purrrrr; \& \& use Catmandu::Sane; \& use Moo; \& use Catmandu::Fix::Has; \& \& has path => (fix_arg => 1); \& \& with \*(AqCatmandu::Fix::SimpleGetValue\*(Aq; \& \& sub emit_value { \& my ($self, $var, $fixer) = @_; \& "${var} = \*(Aqpurrrrr\*(Aq;"; \& } \& \& 1; .Ve .PP Run this command as: .PP .Vb 3 \& # Set the value(s) of an existing path to \*(Aqpurrr\*(Aq \& purrrrr(my.deep.nested.path) \& purrrrr(all.my.values.*) .Ve .PP Notice how the \f(CW\*(C`emit_value\*(C'\fR of the Catmandu::Fix::purrrrr package returns Perl code and doesn't operate directy on the Perl data. The parameter \f(CW$var\fR contains only the name of a temporary variable that will hold the value of the \s-1JSON\s0 path after compiling the code into Perl. .PP Use Catmandu::Fix::Has to add more arguments to this fix: .PP .Vb 1 \& package Catmandu::Fix::purrrrr; \& \& use Catmandu::Sane; \& use Moo; \& use Catmandu::Fix::Has; \& \& has path => (fix_arg => 1); \& has msg => (fix_opt => 1 , default => sub { \*(Aqpurrrrr\*(Aq }); \& \& with \*(AqCatmandu::Fix::SimpleGetValue\*(Aq; \& \& sub emit_value { \& my ($self, $var, $fixer) = @_; \& my $msg = $fixer\->emit_string($self\->msg); \& "${var} = ${msg};"; \& } \& \& 1; .Ve .PP Run this command as: .PP .Vb 3 \& # Set the value(s) of an existing path to \*(Aqokido\*(Aq \& purrrrr(my.deep.nested.path, \-msg => \*(Aqokido\*(Aq) \& purrrrr(all.my.values.*, \-msg => \*(Aqokido\*(Aq) .Ve .PP Notice how the \f(CW\*(C`emit_value\*(C'\fR needs to quote the \f(CW\*(C`msg\*(C'\fR option using the emit_string function. .SH "INTERNAL METHODS" .IX Header "INTERNAL METHODS" This module provides several methods for writing fix packages. Usage can best be understood by reading the code of existing fix packages. .IP "capture" 4 .IX Item "capture" .PD 0 .IP "emit_block" 4 .IX Item "emit_block" .IP "emit_clone" 4 .IX Item "emit_clone" .IP "emit_clear_hash_ref" 4 .IX Item "emit_clear_hash_ref" .IP "emit_create_path" 4 .IX Item "emit_create_path" .IP "emit_declare_vars" 4 .IX Item "emit_declare_vars" .IP "emit_delete_key" 4 .IX Item "emit_delete_key" .IP "emit_fix" 4 .IX Item "emit_fix" .IP "emit_fixes" 4 .IX Item "emit_fixes" .IP "emit_foreach" 4 .IX Item "emit_foreach" .IP "emit_foreach_key" 4 .IX Item "emit_foreach_key" .IP "emit_get_key" 4 .IX Item "emit_get_key" .IP "emit_reject" 4 .IX Item "emit_reject" .IP "emit_retain_key" 4 .IX Item "emit_retain_key" .PD this method is \s-1DEPRECATED.\s0 .IP "emit_set_key" 4 .IX Item "emit_set_key" .PD 0 .IP "emit_string" 4 .IX Item "emit_string" .IP "emit_value" 4 .IX Item "emit_value" .IP "emit_walk_path" 4 .IX Item "emit_walk_path" .IP "generate_var" 4 .IX Item "generate_var" .IP "split_path" 4 .IX Item "split_path" .PD .SH "SEE ALSO" .IX Header "SEE ALSO" Catmandu::Fixable, Catmandu::Importer, Catmandu::Exporter, Catmandu::Store, Catmandu::Bag