.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{\
.    if \nF \{\
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{\
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "Statistics::Contingency 3pm"
.TH Statistics::Contingency 3pm "2021-01-09" "perl v5.32.0" "User Contributed Perl Documentation"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
Statistics::Contingency \- Calculate precision, recall, F1, accuracy, etc.
.SH "VERSION"
.IX Header "VERSION"
version 0.09
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
.Vb 2
\& use Statistics::Contingency;
\& my $s = new Statistics::Contingency(categories => \e@all_categories);
\& 
\& while (...something...) {
\&   ...
\&   $s\->add_result($assigned_categories, $correct_categories);
\& }
\& 
\& print "Micro F1: ", $s\->micro_F1, "\en"; # Access a single statistic
\& print $s\->stats_table; # Show several stats in table form
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
The \f(CW\*(C`Statistics::Contingency\*(C'\fR class helps you calculate several
useful statistical measures based on 2x2 \*(L"contingency tables\*(R".  I use
these measures to help judge the results of automatic text
categorization experiments, but they are useful in other situations as
well.
.PP
The general usage flow is to tally a whole bunch of results in the
\&\f(CW\*(C`Statistics::Contingency\*(C'\fR object, then query that object to obtain
the measures you are interested in.  When all results have been
collected, you can get a report on accuracy, precision, recall, F1,
and so on, with both macro-averaging and micro-averaging over
categories.
.SS "Macro vs. Micro Statistics"
.IX Subsection "Macro vs. Micro Statistics"
All of the statistics offered by this module can be calculated for
each category and then averaged, or can be calculated over all
decisions and then averaged.  The former is called macro-averaging
(specifically, macro-averaging with respect to category), and the
latter is called micro-averaging.  The two procedures bias the results
differently \- micro-averaging tends to over-emphasize the performance
on the largest categories, while macro-averaging over-emphasizes the
performance on the smallest.  It's often best to look at both of them
to get a good idea of how your data distributes across categories.
.SS "Statistics available"
.IX Subsection "Statistics available"
All of the statistics are calculated based on a so-called \*(L"contingency
table\*(R", which looks like this:
.PP
.Vb 6
\&              Correct=Y   Correct=N
\&            +\-\-\-\-\-\-\-\-\-\-\-+\-\-\-\-\-\-\-\-\-\-\-+
\& Assigned=Y |     a     |     b     |
\&            +\-\-\-\-\-\-\-\-\-\-\-+\-\-\-\-\-\-\-\-\-\-\-+
\& Assigned=N |     c     |     d     |
\&            +\-\-\-\-\-\-\-\-\-\-\-+\-\-\-\-\-\-\-\-\-\-\-+
.Ve
.PP
a, b, c, and d are counts that reflect how the assigned categories
matched the correct categories.  Depending on whether a
macro-statistic or a micro-statistic is being calculated, these
numbers will be tallied per-category or for the entire result set.
.PP
The following statistics are available:
.IP "\(bu" 4
accuracy
.Sp
This measures the portion of all decisions that were correct
decisions.  It is defined as \f(CW\*(C`(a+d)/(a+b+c+d)\*(C'\fR.  It falls in the
range from 0 to 1, with 1 being the best score.
.Sp
Note that macro-accuracy and micro-accuracy will always give the same
number.
.IP "\(bu" 4
error
.Sp
This measures the portion of all decisions that were incorrect
decisions.  It is defined as \f(CW\*(C`(b+c)/(a+b+c+d)\*(C'\fR.  It falls in the
range from 0 to 1, with 0 being the best score.
.Sp
Note that macro-error and micro-error will always give the same
number.
.IP "\(bu" 4
precision
.Sp
This measures the portion of the assigned categories that were
correct.  It is defined as \f(CW\*(C`a/(a+b)\*(C'\fR.  It falls in the range from 0
to 1, with 1 being the best score.
.IP "\(bu" 4
recall
.Sp
This measures the portion of the correct categories that were
assigned.  It is defined as \f(CW\*(C`a/(a+c)\*(C'\fR.  It falls in the range from 0
to 1, with 1 being the best score.
.IP "\(bu" 4
F1
.Sp
This measures an even combination of precision and recall.  It is
defined as \f(CW\*(C`2*p*r/(p+r)\*(C'\fR.  In terms of a, b, and c, it may be
expressed as \f(CW\*(C`2a/(2a+b+c)\*(C'\fR.  It falls in the range from 0 to 1, with
1 being the best score.
.PP
The F1 measure is often the only simple measure that is worth trying
to maximize on its own \- consider the fact that you can get a perfect
precision score by always assigning zero categories, or a perfect
recall score by always assigning every category.  A truly smart system
will assign the correct categories and only the correct categories,
maximizing precision and recall at the same time, and therefore
maximizing the F1 score.
.PP
Sometimes it's worth trying to maximize the accuracy score, but
accuracy (and its counterpart error) are considered fairly crude
scores that don't give much information about the performance of a
categorizer.
.SH "METHODS"
.IX Header "METHODS"
The general execution flow when using this class is to create a
\&\f(CW\*(C`Statistics::Contingency\*(C'\fR object, add a bunch of results to it, and
then report on the results.
.IP "\(bu" 4
\&\f(CW$e\fR = Statistics::Contingency\->\fBnew()\fR
.Sp
Returns a new \f(CW\*(C`Statistics::Contingency\*(C'\fR object.  Expects a
\&\f(CW\*(C`categories\*(C'\fR parameter specifying the entire set of categories that
may be assigned during this experiment.  Also accepts a \f(CW\*(C`verbose\*(C'\fR
parameter \- if true, some diagnostic status information will be
displayed when certain actions are performed.
.IP "\(bu" 4
\&\f(CW$e\fR\->add_result($assigned_categories, \f(CW$correct_categories\fR, \f(CW$name\fR)
.Sp
Adds a new result to the experiment.  The lists of assigned and
correct categories can be given as an array of category names
(strings), as a hash whose keys are the category names and whose
values are anything logically true, or as a single string if there is
only one category.
.Sp
If you've already got the lists in hash form, this will be the fastest
way to pass them.  Otherwise, the current implementation will convert
them to hash form internally in order to make its calculations
efficient.
.Sp
The \f(CW$name\fR parameter is an optional name for this result.  It will
only be used in error messages or debugging/progress output.
.Sp
In the current implementation, we only store the contingency tables
per category, as well as a table for the entire result set.  This
means that you can't recover information about any particular single
result from the \f(CW\*(C`Statistics::Contingency\*(C'\fR object.
.IP "\(bu" 4
\&\f(CW$e\fR\->set_entries($a, \f(CW$b\fR, \f(CW$c\fR, \f(CW$d\fR)
.Sp
If you don't wish to use the c<\fBadd_result()\fR> interface, but still take
advantage of the calculation methods and the various edge cases they
handle, you can directly set the four elements of the contingency
table with this method.
.IP "\(bu" 4
\&\f(CW$e\fR\->micro_accuracy
.Sp
Returns the micro-averaged accuracy for the data set.
.IP "\(bu" 4
\&\f(CW$e\fR\->micro_error
.Sp
Returns the micro-averaged error for the data set.
.IP "\(bu" 4
\&\f(CW$e\fR\->micro_precision
.Sp
Returns the micro-averaged precision for the data set.
.IP "\(bu" 4
\&\f(CW$e\fR\->micro_recall
.Sp
Returns the micro-averaged recall for the data set.
.IP "\(bu" 4
\&\f(CW$e\fR\->micro_F1
.Sp
Returns the micro-averaged F1 for the data set.
.IP "\(bu" 4
\&\f(CW$e\fR\->macro_accuracy
.Sp
Returns the macro-averaged accuracy for the data set.
.IP "\(bu" 4
\&\f(CW$e\fR\->macro_error
.Sp
Returns the macro-averaged error for the data set.
.IP "\(bu" 4
\&\f(CW$e\fR\->macro_precision
.Sp
Returns the macro-averaged precision for the data set.
.IP "\(bu" 4
\&\f(CW$e\fR\->macro_recall
.Sp
Returns the macro-averaged recall for the data set.
.IP "\(bu" 4
\&\f(CW$e\fR\->macro_F1
.Sp
Returns the macro-averaged F1 for the data set.
.IP "\(bu" 4
\&\f(CW$e\fR\->stats_table
.Sp
Returns a string combining several statistics in one graphic table.
Since accuracy is 1 minus error, we only report error since it takes
less space to print.  An optional argument specifies the number of
significant digits to show in the data \- the default is 3 significant
digits.
.IP "\(bu" 4
\&\f(CW$e\fR\->category_stats
.Sp
Returns a hash reference whose keys are the names of each category,
and whose values contain the various statistical measures (accuracy,
error, precision, recall, or F1) about each category as a hash reference.  For
example, to print a single statistic:
.Sp
.Vb 1
\& print $e\->category_stats\->{sports}{recall}, "\en";
.Ve
.Sp
Or to print certain statistics for all categtories:
.Sp
.Vb 7
\& my $stats = $e\->category_stats;
\& while (my ($cat, $value) = each %$stats) {
\&   print "Category \*(Aq$cat\*(Aq: \en";
\&   print "  Accuracy: $value\->{accuracy}\en";
\&   print "  Precision: $value\->{precision}\en";
\&   print "  F1: $value\->{F1}\en";
\& }
.Ve
.SH "AUTHOR"
.IX Header "AUTHOR"
Ken Williams <kwilliams@cpan.org>
.SH "COPYRIGHT"
.IX Header "COPYRIGHT"
Copyright 2002\-2008 Ken Williams.  All rights reserved.
.PP
This distribution is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.