.\" Automatically generated by Pod::Man 4.10 (Pod::Simple 3.35) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is >0, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{\ . if \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{\ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "EBook::Tools::Unpack 3pm" .TH EBook::Tools::Unpack 3pm "2019-08-08" "perl v5.28.1" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" EBook::Tools::Unpack \- Object class for unpacking e\-book files into their component parts and metadata .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 10 \& use EBook::Tools::Unpack; \& my $unpacker = EBook::Tools::Unpack\->new( \& \*(Aqfile\*(Aq => $filename, \& \*(Aqdir\*(Aq => $dir, \& \*(Aqencoding\*(Aq => $encoding, \& \*(Aqformat\*(Aq => $format, \& \*(Aqraw\*(Aq => $raw, \& \*(Aqauthor\*(Aq => $author, \& \*(Aqtitle\*(Aq => $title, \& \*(Aqopffile\*(Aq => $opffile, \& \*(Aqtidy\*(Aq => $tidy, \& \*(Aqnosave\*(Aq => $nosave, \& ); \& $unpacker\->unpack; .Ve .PP or, more simply: .PP .Vb 3 \& use EBook::Tools::Unpack; \& my $unpacker = EBook::Tools::Unpack\->new(\*(Aqfile\*(Aq => \*(Aqmybook.prc\*(Aq); \& $unpacker\->unpack; .Ve .SH "CONSTRUCTOR" .IX Header "CONSTRUCTOR" .ie n .SS """new(%args)""" .el .SS "\f(CWnew(%args)\fP" .IX Subsection "new(%args)" Instantiates a new Ebook::Tools::Unpack object. .PP \fIArguments\fR .IX Subsection "Arguments" .IP "\(bu" 4 \&\f(CW\*(C`file\*(C'\fR .Sp The file to unpack. Specifying this is mandatory. .IP "\(bu" 4 \&\f(CW\*(C`dir\*(C'\fR .Sp The directory to unpack into. If not specified, defaults to the basename of the file. .IP "\(bu" 4 \&\f(CW\*(C`encoding\*(C'\fR .Sp If specified, overrides the encoding to use when unpacking. This is normally detected from the file and does not need to be specified. .Sp Valid values are '1252' (specifying Windows\-1252) and '65001' (specifying \s-1UTF\-8\s0). .IP "\(bu" 4 \&\f(CW\*(C`htmlconvert\*(C'\fR .Sp If set to true, an attempt will be made to convert non-HTML output text to \s-1HTML\s0 where possible. .IP "\(bu" 4 \&\f(CW\*(C`key\*(C'\fR .Sp The decryption key to use if necessary (not yet implemented) .IP "\(bu" 4 \&\f(CW\*(C`keyfile\*(C'\fR .Sp The file holding the decryption keys to use if necessary (not yet implemented) .IP "\(bu" 4 \&\f(CW\*(C`language\*(C'\fR .Sp If specified, overrides the detected language information. .IP "\(bu" 4 \&\f(CW\*(C`opffile\*(C'\fR .Sp The name of the file in which the metadata will be stored. If not specified, defaults to \f(CW\*(C`content.opf\*(C'\fR. .IP "\(bu" 4 \&\f(CW\*(C`raw\*(C'\fR .Sp If set true, this forces no corrections to be done on any extracted text and a lot of raw, unparsed, unmodified data to be dumped into the directory along with everything else. It's useful for debugging exactly what was in the file being unpacked, and (when combined with \&\f(CW\*(C`nosave\*(C'\fR) reducing the time needed to extract parsed data from an ebook container without actually unpacking it. .IP "\(bu" 4 \&\f(CW\*(C`author\*(C'\fR .Sp Overrides the detected author name. .IP "\(bu" 4 \&\f(CW\*(C`title\*(C'\fR .Sp Overrides the detected title. .IP "\(bu" 4 \&\f(CW\*(C`tidy\*(C'\fR .Sp If set to true, the unpacker will run tidy on any \s-1HTML\s0 output files to convert them to valid \s-1XHTML.\s0 Be warned that this can occasionally change the formatting, as Tidy isn't very forgiving on certain common tricks (such as empty
 elements with style elements) that abuse
the standard.
.IP "\(bu" 4
\&\f(CW\*(C`nosave\*(C'\fR
.Sp
If set to true, the unpacker will run through all of the unpacking
steps except those that actually write to the disk.  This is useful
for testing, but also (particularly when combined with \f(CW\*(C`raw\*(C'\fR) can be
used for extracting parsed data from an ebook container without
actually unpacking it.
.SH "ACCESSOR METHODS"
.IX Header "ACCESSOR METHODS"
See \*(L"\fBnew()\fR\*(R" for more details on what some of these mean.  Note
that some values cannot be autodetected until an unpack method
executes.
.ie n .SS """author"""
.el .SS "\f(CWauthor\fP"
.IX Subsection "author"
.ie n .SS """dir"""
.el .SS "\f(CWdir\fP"
.IX Subsection "dir"
.ie n .SS """file"""
.el .SS "\f(CWfile\fP"
.IX Subsection "file"
.ie n .SS """filebase"""
.el .SS "\f(CWfilebase\fP"
.IX Subsection "filebase"
In scalar context, this is the basename of \f(CW\*(C`file\*(C'\fR.  In list context,
it actually returns the basename, directory, and extension as per
\&\f(CW\*(C`fileparse\*(C'\fR from File::Basename.
.ie n .SS """format"""
.el .SS "\f(CWformat\fP"
.IX Subsection "format"
.ie n .SS """key"""
.el .SS "\f(CWkey\fP"
.IX Subsection "key"
.ie n .SS """keyfile"""
.el .SS "\f(CWkeyfile\fP"
.IX Subsection "keyfile"
.ie n .SS """language"""
.el .SS "\f(CWlanguage\fP"
.IX Subsection "language"
This returns the language specified by the user, if any.  It remains
undefined if the user has not requested that a language code be set
even if a language was autodetected.
.ie n .SS """opffile"""
.el .SS "\f(CWopffile\fP"
.IX Subsection "opffile"
.ie n .SS """raw"""
.el .SS "\f(CWraw\fP"
.IX Subsection "raw"
.ie n .SS """title"""
.el .SS "\f(CWtitle\fP"
.IX Subsection "title"
This returns the title specified by the user, if any.  It remains
undefined if the user has not requested a title be set even if a title
was autodetected.
.ie n .SS """detected"""
.el .SS "\f(CWdetected\fP"
.IX Subsection "detected"
This returns a hash containing the autodetected metadata, if any.
.SH "MODIFIER METHODS"
.IX Header "MODIFIER METHODS"
.ie n .SS """detect_format()"""
.el .SS "\f(CWdetect_format()\fP"
.IX Subsection "detect_format()"
Attempts to automatically detect the format of the input file and set
the internal object attributes \f(CW\*(C`$self\->{format}\*(C'\fR and
\&\f(CW\*(C`$self\->{formatinfo}\*(C'\fR, where the former is a one-word string used by
the dispatcher to select the correct unpacking method and the latter
may contain additional detected information (such as a title or
version).
.PP
Croaks if detection fails.
.PP
In scalar context, returns \f(CW\*(C`$self\->{format}\*(C'\fR.  In list context,
returns the two element list \f(CW\*(C`($self\->{format},$self\->{formatinfo})\*(C'\fR
.PP
This is automatically called by \*(L"\fBnew()\fR\*(R" if the \f(CW\*(C`format\*(C'\fR argument is
not specified.
.ie n .SS """detect_from_mobi_exth()"""
.el .SS "\f(CWdetect_from_mobi_exth()\fP"
.IX Subsection "detect_from_mobi_exth()"
Detects metadata values from the \s-1MOBI EXTH\s0 headers retrieved via
\&\*(L"\fBunpack_mobi_exth()\fR\*(R" and places them into the \f(CW\*(C`detected\*(C'\fR attribute.
.ie n .SS """gen_opf(%args)"""
.el .SS "\f(CWgen_opf(%args)\fP"
.IX Subsection "gen_opf(%args)"
This generates an \s-1OPF\s0 file from detected and specified metadata.  It
does not honor the \f(CW\*(C`nosave\*(C'\fR flag, and will always write its output.
.PP
Normally this is called automatically from inside the \f(CW\*(C`unpack\*(C'\fR
methods, but can be called manually after an unpack if the \f(CW\*(C`nosave\*(C'\fR
flag was set to write an \s-1OPF\s0 anyway.
.PP
Returns the filename of the \s-1OPF\s0 file.
.PP
\fIArguments\fR
.IX Subsection "Arguments"
.IP "\(bu" 4
\&\f(CW\*(C`opffile\*(C'\fR (optional)
.Sp
If specified, this overrides the object attribute \f(CW\*(C`opffile\*(C'\fR, and
determines the filename to use for the generated \s-1OPF\s0 file.  If not
specified, and the object attribute \f(CW\*(C`opffile\*(C'\fR has somehow been
cleared (the attribute is set during \*(L"\fBnew()\fR\*(R"), it will be generated
by looking at the \f(CW\*(C`textfile\*(C'\fR argument.  If no value can be found, the
method croaks.  If a value was found somewhere other than the object
attribute \f(CW\*(C`opffile\*(C'\fR, then the object attribute is updated to match.
.IP "\(bu" 4
\&\f(CW\*(C`textfile\*(C'\fR (optional)
.Sp
The file containing the main text of the document.  If specified, the
method will attempt to split metadata out of the file and add whatever
remains to the manifest of the \s-1OPF.\s0
.IP "\(bu" 4
\&\f(CW\*(C`mediatype\*(C'\fR (optional)
.Sp
The media type (mime type) of the document specified via \f(CW\*(C`textfile\*(C'\fR.
If \f(CW\*(C`textfile\*(C'\fR is not specified, this argument is ignored.  If \f(CW\*(C`textfile\*(C'\fR is specified, but
.ie n .SS """unpack()"""
.el .SS "\f(CWunpack()\fP"
.IX Subsection "unpack()"
This is a dispatcher for the specific unpacking methods needed to
unpack a particular format.  Unless you feel a need to override the
unpacking method specified or detected during object construction, it
is probalby better to call this than the specific unpacking methods.
.ie n .SS """unpack_ereader()"""
.el .SS "\f(CWunpack_ereader()\fP"
.IX Subsection "unpack_ereader()"
Unpacks Fictionwise/PeanutPress eReader (\-er.pdb) files.
.ie n .SS """unpack_imp()"""
.el .SS "\f(CWunpack_imp()\fP"
.IX Subsection "unpack_imp()"
Unpacks SoftBook/GEB/REB/eBookWise (.imp) files.
.ie n .SS """unpack_mobi()"""
.el .SS "\f(CWunpack_mobi()\fP"
.IX Subsection "unpack_mobi()"
Unpacks Mobipocket (.prc / .mobi) files.
.ie n .SS """unpack_msreader()"""
.el .SS "\f(CWunpack_msreader()\fP"
.IX Subsection "unpack_msreader()"
Unpacks Microsoft Reader (.lit) files
.ie n .SS """unpack_palmdoc()"""
.el .SS "\f(CWunpack_palmdoc()\fP"
.IX Subsection "unpack_palmdoc()"
Unpacks PalmDoc / AportisDoc (.pdb) files
.ie n .SS """unpack_zip()"""
.el .SS "\f(CWunpack_zip()\fP"
.IX Subsection "unpack_zip()"
Unpacks Zip archives (including ePub files).
.SH "BUGS AND LIMITATIONS"
.IX Header "BUGS AND LIMITATIONS"
.IP "\(bu" 4
\&\s-1DRM\s0 isn't handled.  Infrastructure to support this via an
external plug-in module may eventually be built, but it will never
become part of the main module for legal reasons.
.IP "\(bu" 4
Unit tests are incomplete
.IP "\(bu" 4
Documentation is incomplete.  Accessors in particular could
use some cleaning up.
.IP "\(bu" 4
Need to implement setter methods for object attributes
.IP "\(bu" 4
Import/extraction/unpacking is currently limited to PalmDoc,
Mobipocket, and eReader.  Extraction from Microsoft Reader (.lit) and
ePub is also eventually planned.  Other formats may follow from there.
.SH "AUTHOR"
.IX Header "AUTHOR"
Zed Pobre 
.SH "LICENSE AND COPYRIGHT"
.IX Header "LICENSE AND COPYRIGHT"
Copyright 2008 Zed Pobre
.PP
Licensed to the public under the terms of the \s-1GNU GPL,\s0 version 2