.\" Automatically generated by Pod::Man 4.11 (Pod::Simple 3.35) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is >0, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{\ . if \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{\ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "Statistics::PCA 3pm" .TH Statistics::PCA 3pm "2020-06-15" "perl v5.30.3" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" Statistics::PCA \- A simple Perl implementation of Principal Component Analysis. .SH "VERSION" .IX Header "VERSION" This document describes Statistics::PCA version 0.0.1 .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 1 \& use Statistics::PCA; \& \& # Create new Statistics::PCA object. \& my $pca = Statistics::PCA\->new; \& \& # Var1 Var2 Var3 Var4... \& my @Obs1 = (qw/ 32 26 51 12 /); \& my @Obs2 = (qw/ 17 13 34 35 /); \& my @Obs3 = (qw/ 10 94 83 45 /); \& my @Obs4 = (qw/ 3 72 72 67 /); \& my @Obs5 = (qw/ 10 63 35 34 /); \& \& # Load data. Data is loaded as a LIST\-of\-LISTS (LoL) pointed to by a named argument \*(Aqdata\*(Aq. Requires argument for format (see METHODS). \& $pca\->load_data ( { format => \*(Aqtable\*(Aq, data => [ \e@Obs1, \e@Obs2, \e@Obs3, \e@Obs4, \e@Obs5 ], } ) ; \& \& # Perform the PCA analysis. Takes optional argument \*(Aqeigen\*(Aq (see METHODS). \& #$pca\->pca( { eigen => \*(AqC\*(Aq } ); \& $pca\->pca(); \& \& # Access results. The return value of this method is context\-dependent (see METHODS). To print a report to STDOUT call in VOID\-context. \& $pca\->results(); .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" Principal component analysis (\s-1PCA\s0) transforms higher-dimensional data consisting of a number of possibly correlated variables into a smaller number of uncorrelated variables termed principal components (PCs). The higher the ranking of the PCs the greater the amount of variability that the \s-1PC\s0 accounts for. This \s-1PCA\s0 procedure involves the calculation of the eigenvalue decomposition using either the Math::Cephes::Matrix or Math::MatrixReal modules (see \s-1METHODS\s0) from a data covariance matrix after mean centering the data. See http://en.wikipedia.org/wiki/Principal_component_analysis for more details. .SH "METHODS" .IX Header "METHODS" .SS "new" .IX Subsection "new" Create a new Statistics::PCA object. .PP .Vb 1 \& my $pca = Statistics::PCA\->new; .Ve .SS "load_data" .IX Subsection "load_data" Used for loading data into object. Data is fed as a reference to a LoL within an anonymous hash using the named argument 'data'. Data may be entered in one of two forms specified by the obligatory named argument 'format'. Data may either be entered in standard 'table' fashion (with rows corresponding to observations and columns corresponding to variables). Thus to enter the following table of data: .PP .Vb 1 \& Var1 Var2 Var3 Var4 \& \& Obs1 32 26 51 12 \& Obs2 17 13 34 35 \& Obs3 10 94 83 45 \& Obs4 3 72 72 67 \& Obs5 10 63 35 34 ... .Ve .PP The data is passed as an LoL with the with each nested \s-1ARRAY\s0 reference corresponding to a row of observations in the data table and the 'format' argument value 'table' as follows: .PP .Vb 8 \& # Var1 Var2 Var3 Var4 ... \& my $data = [ \& [qw/ 32 26 51 12 /], # Obs1 \& [qw/ 17 13 34 35 /], # Obs2 \& [qw/ 10 94 83 45 /], # Obs3 \& [qw/ 3 72 72 67 /], # Obs4 \& [qw/ 10 63 35 34 /], # Obs5 ... \& ]; \& \& $pca\->load_data ( { format => \*(Aqtable\*(Aq, data => $data, } ); .Ve .PP Alternatively you may enter the data in a variable-centric fashion where each nested \s-1ARRAY\s0 reference corresponds to a single variable within the data (i.e. the transpose of the above table-fashion). To pass the above data in this fashion use the 'format' argument with value 'variable' as follows: .PP .Vb 7 \& # Obs1 Obs2 Obs3 Obs4 Obs5 ... \& my $transpose = [ \& [qw/ 32 17 10 3 10 /], # Var1 \& [qw/ 26 13 94 72 63 /], # Var2 \& [qw/ 51 34 83 72 35 /], # Var3 \& [qw/ 12 35 45 67 34 /], # Var4 ... \& ]; \& \& $pca\->load_data ( { format => \*(Aqvariable\*(Aq, data => $transpose, } ) ; .Ve .SS "pca" .IX Subsection "pca" To perform the \s-1PCA\s0 analysis. This method takes the optional named argument 'eigen' that takes the values 'M' or 'C' to calculate the eigenvalue decomposition using either the Math::MatrixReal or Math::Cephes::Matrix modules respectively (defaults to 'M' without argument). .PP .Vb 3 \& $pca\->pca(); \& $pca\->pca( { eigen => \*(AqM\*(Aq } ); \& $pca\->pca( { eigen => \*(AqC\*(Aq } ); .Ve .SS "results" .IX Subsection "results" Used to access the results of the \s-1PCA\s0 analysis. This method is context-dependent and will return a variety of different values depending on whether it is called in \s-1VOID\s0 or \s-1LIST\s0 context and the arguments its passed. In VOID-context it prints a formatted table of the computed results to \s-1STDOUT.\s0 .PP .Vb 1 \& $pca\->results; .Ve .PP In \s-1LIST\s0 context this method takes an obligatory argument that determines its return values. To return an ordered list (ordered by \s-1PC\s0 ranking) of the proportions of total variance of each \s-1PC\s0 pass 'proportion' to the method. .PP .Vb 2 \& my @list = $pca\->results(\*(Aqproportion\*(Aq); \& print qq{\enOrdered list of individual proportions of variance: @list}; .Ve .PP To return an ordered list of the cumulative variance of the PCs pass argument 'cumulative'. .PP .Vb 2 \& @list = $pca\->results(\*(Aqcumulative\*(Aq); \& print qq{\enOrdered list of cumulative variance of the PCs: @list}; .Ve .PP To return an ordered list of the individual standard deviations of the PCs pass argument 'stdev'. .PP .Vb 2 \& @list = $pca\->results(\*(Aqstdev\*(Aq); \& print qq{\enOrdered list of individual standard deviations of the PCs: @list}; .Ve .PP To return an ordered list of the individual eigenvalues of the PCs pass argument 'eigenvalue'. .PP .Vb 2 \& @list = $pca\->results(\*(Aqeigenvalue\*(Aq); \& print qq{\enOrdered list of individual eigenvalues of the PCs: @list}; .Ve .PP To return an ordered list of \s-1ARRAY\s0 references containing the eigenvectors of the PCs pass argument 'eigenvector'. .PP .Vb 4 \& # Returns an ordered list of array references containing the eigenvectors for the components \& @list = $pca\->results(\*(Aqeigenvector\*(Aq); \& use Data::Dumper; \& print Dumper \e@list; .Ve .PP To return an ordered list of \s-1ARRAY\s0 references containing more detailed information about each \s-1PC\s0 use the 'full' argument. Each nested \s-1ARRAY\s0 reference consists of an ordered list of: \s-1PC\s0 rank, \s-1PC\s0 stdev, \s-1PC\s0 proportion of variance, \&\s-1PC\s0 cumulative_variance, \s-1PC\s0 eigenvalue and a further nested \s-1ARRAY\s0 reference containing the \s-1PC\s0 eigenvector. .PP .Vb 8 \& @list = $pca\->results(\*(Aqfull\*(Aq); \& for my $i (@list) { \& print qq{\enPC rank: $i\->[0]} \& . qq{\enPC stdev $i\->[1]} \& . qq{\enPC proportion of variance $i\->[2]} \& . qq{\enPC cumulative variance $i\->[3]} \& . qq{\enPC eigenvalue $i\->[4]} \& } .Ve .PP To return an ordered LoL of the transformed data for each of the PCs pass 'transformed' to the method. .PP .Vb 2 \& @list = $pca\->results(\*(Aqtransformed\*(Aq); \& print qq{\enThe transformed data for \*(Aqthe\*(Aq principal component (first PC): @{$list[0]} }; .Ve .SH "DEPENDENCIES" .IX Header "DEPENDENCIES" \&'version' => '0', \&'Carp' => '1.08', \&'Math::Cephes::Matrix' => '0.47', \&'Math::Cephes' => '0.47', \&'List::Util' => '1.19', \&'Math::MatrixReal' => '2.05', \&'Text::SimpleTable' => '2.0', \&'Contextual::Return' => '0.2.1', .SH "AUTHOR" .IX Header "AUTHOR" Daniel S. T. Hughes \f(CW\*(C`\*(C'\fR .SH "LICENCE AND COPYRIGHT" .IX Header "LICENCE AND COPYRIGHT" Copyright (c) 2009, Daniel S. T. Hughes \f(CW\*(C`\*(C'\fR. All rights reserved. .PP This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See perlartistic. .SH "DISCLAIMER OF WARRANTY" .IX Header "DISCLAIMER OF WARRANTY" Because this software is licensed free of charge, there is no warranty for the software, to the extent permitted by applicable law. Except when otherwise stated in writing the copyright holders and/or other parties provide the software \*(L"as is\*(R" without warranty of any kind, either expressed or implied, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. The entire risk as to the quality and performance of the software is with you. Should the software prove defective, you assume the cost of all necessary servicing, repair, or correction. .PP In no event unless required by applicable law or agreed to in writing will any copyright holder, or any other party who may modify and/or redistribute the software as permitted by the above licence, be liable to you for damages, including any general, special, incidental, or consequential damages arising out of the use or inability to use the software (including but not limited to loss of data or data being rendered inaccurate or losses sustained by you or third parties or a failure of the software to operate with any other software), even if such holder or other party has been advised of the possibility of such damages.