.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .ie \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . nr % 0 . rr F .\} .el \{\ . de IX .. .\} .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "DBD::SQLite::Cookbook 3pm" .TH DBD::SQLite::Cookbook 3pm "2012-06-09" "perl v5.14.2" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" DBD::SQLite::Cookbook \- The DBD::SQLite Cookbook .SH "DESCRIPTION" .IX Header "DESCRIPTION" This is the DBD::SQLite cookbook. .PP It is intended to provide a place to keep a variety of functions and formals for use in callback APIs in DBD::SQLite. .SH "AGGREGATE FUNCTIONS" .IX Header "AGGREGATE FUNCTIONS" .SS "Variance" .IX Subsection "Variance" This is a simple aggregate function which returns a variance. It is adapted from an example implementation in pysqlite. .PP .Vb 1 \& package variance; \& \& sub new { bless [], shift; } \& \& sub step { \& my ( $self, $value ) = @_; \& \& push @$self, $value; \& } \& \& sub finalize { \& my $self = $_[0]; \& \& my $n = @$self; \& \& # Variance is NULL unless there is more than one row \& return undef unless $n || $n == 1; \& \& my $mu = 0; \& foreach my $v ( @$self ) { \& $mu += $v; \& } \& $mu /= $n; \& \& my $sigma = 0; \& foreach my $v ( @$self ) { \& $sigma += ($v \- $mu)**2; \& } \& $sigma = $sigma / ($n \- 1); \& \& return $sigma; \& } \& \& # NOTE: If you use an older DBI (< 1.608), \& # use $dbh\->func(..., "create_aggregate") instead. \& $dbh\->sqlite_create_aggregate( "variance", 1, \*(Aqvariance\*(Aq ); .Ve .PP The function can then be used as: .PP .Vb 3 \& SELECT group_name, variance(score) \& FROM results \& GROUP BY group_name; .Ve .SS "Variance (Memory Efficient)" .IX Subsection "Variance (Memory Efficient)" A more efficient variance function, optimized for memory usage at the expense of precision: .PP .Vb 1 \& package variance2; \& \& sub new { bless {sum => 0, count=>0, hash=> {} }, shift; } \& \& sub step { \& my ( $self, $value ) = @_; \& my $hash = $self\->{hash}; \& \& # by truncating and hashing, we can comsume many more data points \& $value = int($value); # change depending on need for precision \& # use sprintf for arbitrary fp precision \& if (exists $hash\->{$value}) { \& $hash\->{$value}++; \& } else { \& $hash\->{$value} = 1; \& } \& $self\->{sum} += $value; \& $self\->{count}++; \& } \& \& sub finalize { \& my $self = $_[0]; \& \& # Variance is NULL unless there is more than one row \& return undef unless $self\->{count} > 1; \& \& # calculate avg \& my $mu = $self\->{sum} / $self\->{count}; \& \& my $sigma = 0; \& while (my ($h, $v) = each %{$self\->{hash}}) { \& $sigma += (($h \- $mu)**2) * $v; \& } \& $sigma = $sigma / ($self\->{count} \- 1); \& \& return $sigma; \& } .Ve .PP The function can then be used as: .PP .Vb 3 \& SELECT group_name, variance2(score) \& FROM results \& GROUP BY group_name; .Ve .SS "Variance (Highly Scalable)" .IX Subsection "Variance (Highly Scalable)" A third variable implementation, designed for arbitrarily large data sets: .PP .Vb 1 \& package variance3; \& \& sub new { bless {mu=>0, count=>0, S=>0}, shift; } \& \& sub step { \& my ( $self, $value ) = @_; \& $self\->{count}++; \& my $delta = $value \- $self\->{mu}; \& $self\->{mu} += $delta/$self\->{count}; \& $self\->{S} += $delta*($value \- $self\->{mu}); \& } \& \& sub finalize { \& my $self = $_[0]; \& return $self\->{S} / ($self\->{count} \- 1); \& } .Ve .PP The function can then be used as: .PP .Vb 3 \& SELECT group_name, variance3(score) \& FROM results \& GROUP BY group_name; .Ve .SH "FTS3 fulltext indexing" .IX Header "FTS3 fulltext indexing" .SS "Sparing database disk space" .IX Subsection "Sparing database disk space" As explained in , each \&\s-1FTS3\s0 table \f(CW\*(C`\f(CIt\f(CW\*(C'\fR is stored internally within three regular tables \&\f(CW\*(C`\f(CIt\f(CW_content\*(C'\fR, \f(CW\*(C`\f(CIt\f(CW_segments\*(C'\fR and \f(CW\*(C`\f(CIt\f(CW_segdir\*(C'\fR. The last two tables contain the fulltext index. The first table \f(CW\*(C`\f(CIt\f(CW_content\*(C'\fR stores the complete documents being indexed ... but if copies of the same documents are already stored somewhere else, or can be computed from external resources (for example as \s-1HTML\s0 or MsWord files in the filesystem), then this is quite a waste of space. SQLite itself only needs the \f(CW\*(C`\f(CIt\f(CW_content\*(C'\fR table for implementing the \f(CW\*(C`offsets()\*(C'\fR and \&\f(CW\*(C`snippet()\*(C'\fR functions, which are not always usable anyway (in particular when using utf8 characters greater than 255). .PP So an alternative strategy is to use SQLite only for the fulltext index and metadata, and to keep the full documents outside of SQLite : to do so, after each insert or update in the \s-1FTS3\s0 table, do an update in the \f(CW\*(C`\f(CIt\f(CW_content\*(C'\fR table, setting the content column(s) to \&\s-1NULL\s0. Of course your application will need an algorithm for finding the external resource corresponding to any \fIdocid\fR stored within SQLite. Furthermore, SQLite \f(CW\*(C`offsets()\*(C'\fR and \f(CW\*(C`snippet()\*(C'\fR functions cannot be used, so if such functionality is needed, it has to be directly programmed within the Perl application. In short, this strategy is really a hack, because \s-1FTS3\s0 was not originally programmed with that behaviour in mind; however it is workable and has a strong impact on the size of the database file. .SH "SUPPORT" .IX Header "SUPPORT" Bugs should be reported via the \s-1CPAN\s0 bug tracker at .PP http://rt.cpan.org/NoAuth/ReportBug.html?Queue=DBD\-SQLite .SH "TO DO" .IX Header "TO DO" * Add more and varied cookbook recipes, until we have enough to turn them into a separate \s-1CPAN\s0 distribution. .PP * Create a series of tests scripts that validate the cookbook recipies. .SH "AUTHOR" .IX Header "AUTHOR" Adam Kennedy .PP Laurent Dami .SH "COPYRIGHT" .IX Header "COPYRIGHT" Copyright 2009 \- 2012 Adam Kennedy. .PP This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. .PP The full text of the license can be found in the \&\s-1LICENSE\s0 file included with this module.