.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.28) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{ . if \nF \{ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "PERLOPENTUT 1" .TH PERLOPENTUT 1 "2014-12-27" "perl v5.20.2" "Perl Programmers Reference Guide" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" perlopentut \- simple recipes for opening files and pipes in Perl .SH "DESCRIPTION" .IX Header "DESCRIPTION" Whenever you do I/O on a file in Perl, you do so through what in Perl is called a \fBfilehandle\fR. A filehandle is an internal name for an external file. It is the job of the \f(CW\*(C`open\*(C'\fR function to make the association between the internal name and the external name, and it is the job of the \f(CW\*(C`close\*(C'\fR function to break that association. .PP For your convenience, Perl sets up a few special filehandles that are already open when you run. These include \f(CW\*(C`STDIN\*(C'\fR, \f(CW\*(C`STDOUT\*(C'\fR, \f(CW\*(C`STDERR\*(C'\fR, and \f(CW\*(C`ARGV\*(C'\fR. Since those are pre-opened, you can use them right away without having to go to the trouble of opening them yourself: .PP .Vb 1 \& print STDERR "This is a debugging message.\en"; \& \& print STDOUT "Please enter something: "; \& $response = // die "how come no input?"; \& print STDOUT "Thank you!\en"; \& \& while () { ... } .Ve .PP As you see from those examples, \f(CW\*(C`STDOUT\*(C'\fR and \f(CW\*(C`STDERR\*(C'\fR are output handles, and \f(CW\*(C`STDIN\*(C'\fR and \f(CW\*(C`ARGV\*(C'\fR are input handles. They are in all capital letters because they are reserved to Perl, much like the \f(CW@ARGV\fR array and the \f(CW%ENV\fR hash are. Their external associations were set up by your shell. .PP You will need to open every other filehandle on your own. Although there are many variants, the most common way to call Perl's \fIopen()\fR function is with three arguments and one return value: .PP \&\f(CW\*(C` \f(CIOK\f(CW = open(\f(CIHANDLE\f(CW, \f(CIMODE\f(CW, \f(CIPATHNAME\f(CW)\*(C'\fR .PP Where: .IP "\fI\s-1OK\s0\fR" 4 .IX Item "OK" will be some defined value if the open succeeds, but \&\f(CW\*(C`undef\*(C'\fR if it fails; .IP "\fI\s-1HANDLE\s0\fR" 4 .IX Item "HANDLE" should be an undefined scalar variable to be filled in by the \&\f(CW\*(C`open\*(C'\fR function if it succeeds; .IP "\fI\s-1MODE\s0\fR" 4 .IX Item "MODE" is the access mode and the encoding format to open the file with; .IP "\fI\s-1PATHNAME\s0\fR" 4 .IX Item "PATHNAME" is the external name of the file you want opened. .PP Most of the complexity of the \f(CW\*(C`open\*(C'\fR function lies in the many possible values that the \fI\s-1MODE\s0\fR parameter can take on. .PP One last thing before we show you how to open files: opening files does not (usually) automatically lock them in Perl. See perlfaq5 for how to lock. .SH "Opening Text Files" .IX Header "Opening Text Files" .SS "Opening Text Files for Reading" .IX Subsection "Opening Text Files for Reading" If you want to read from a text file, first open it in read-only mode like this: .PP .Vb 3 \& my $filename = "/some/path/to/a/textfile/goes/here"; \& my $encoding = ":encoding(UTF\-8)"; \& my $handle = undef; # this will be filled in on success \& \& open($handle, "< $encoding", $filename) \& || die "$0: can\*(Aqt open $filename for reading: $!"; .Ve .PP As with the shell, in Perl the \f(CW"<"\fR is used to open the file in read-only mode. If it succeeds, Perl allocates a brand new filehandle for you and fills in your previously undefined \f(CW$handle\fR argument with a reference to that handle. .PP Now you may use functions like \f(CW\*(C`readline\*(C'\fR, \f(CW\*(C`read\*(C'\fR, \f(CW\*(C`getc\*(C'\fR, and \&\f(CW\*(C`sysread\*(C'\fR on that handle. Probably the most common input function is the one that looks like an operator: .PP .Vb 2 \& $line = readline($handle); \& $line = <$handle>; # same thing .Ve .PP Because the \f(CW\*(C`readline\*(C'\fR function returns \f(CW\*(C`undef\*(C'\fR at end of file or upon error, you will sometimes see it used this way: .PP .Vb 7 \& $line = <$handle>; \& if (defined $line) { \& # do something with $line \& } \& else { \& # $line is not valid, so skip it \& } .Ve .PP You can also just quickly \f(CW\*(C`die\*(C'\fR on an undefined value this way: .PP .Vb 1 \& $line = <$handle> // die "no input found"; .Ve .PP However, if hitting \s-1EOF\s0 is an expected and normal event, you do not want to exit simply because you have run out of input. Instead, you probably just want to exit an input loop. You can then test to see if an actual error has caused the loop to terminate, and act accordingly: .PP .Vb 6 \& while (<$handle>) { \& # do something with data in $_ \& } \& if ($!) { \& die "unexpected error while reading from $filename: $!"; \& } .Ve .PP \&\fBA Note on Encodings\fR: Having to specify the text encoding every time might seem a bit of a bother. To set up a default encoding for \f(CW\*(C`open\*(C'\fR so that you don't have to supply it each time, you can use the \f(CW\*(C`open\*(C'\fR pragma: .PP .Vb 1 \& use open qw< :encoding(UTF\-8) >; .Ve .PP Once you've done that, you can safely omit the encoding part of the open mode: .PP .Vb 2 \& open($handle, "<", $filename) \& || die "$0: can\*(Aqt open $filename for reading: $!"; .Ve .PP But never use the bare \f(CW"<"\fR without having set up a default encoding first. Otherwise, Perl cannot know which of the many, many, many possible flavors of text file you have, and Perl will have no idea how to correctly map the data in your file into actual characters it can work with. Other common encoding formats including \f(CW"ASCII"\fR, \f(CW"ISO\-8859\-1"\fR, \&\f(CW"ISO\-8859\-15"\fR, \f(CW"Windows\-1252"\fR, \f(CW"MacRoman"\fR, and even \f(CW"UTF\-16LE"\fR. See perlunitut for more about encodings. .SS "Opening Text Files for Writing" .IX Subsection "Opening Text Files for Writing" When you want to write to a file, you first have to decide what to do about any existing contents of that file. You have two basic choices here: to preserve or to clobber. .PP If you want to preserve any existing contents, then you want to open the file in append mode. As in the shell, in Perl you use \f(CW">>"\fR to open an existing file in append mode. \f(CW">>"\fR creates the file if it does not already exist. .PP .Vb 3 \& my $handle = undef; \& my $filename = "/some/path/to/a/textfile/goes/here"; \& my $encoding = ":encoding(UTF\-8)"; \& \& open($handle, ">> $encoding", $filename) \& || die "$0: can\*(Aqt open $filename for appending: $!"; .Ve .PP Now you can write to that filehandle using any of \f(CW\*(C`print\*(C'\fR, \f(CW\*(C`printf\*(C'\fR, \&\f(CW\*(C`say\*(C'\fR, \f(CW\*(C`write\*(C'\fR, or \f(CW\*(C`syswrite\*(C'\fR. .PP As noted above, if the file does not already exist, then the append-mode open will create it for you. But if the file does already exist, its contents are safe from harm because you will be adding your new text past the end of the old text. .PP On the other hand, sometimes you want to clobber whatever might already be there. To empty out a file before you start writing to it, you can open it in write-only mode: .PP .Vb 3 \& my $handle = undef; \& my $filename = "/some/path/to/a/textfile/goes/here"; \& my $encoding = ":encoding(UTF\-8)"; \& \& open($handle, "> $encoding", $filename) \& || die "$0: can\*(Aqt open $filename in write\-open mode: $!"; .Ve .PP Here again Perl works just like the shell in that the \f(CW">"\fR clobbers an existing file. .PP As with the append mode, when you open a file in write-only mode, you can now write to that filehandle using any of \f(CW\*(C`print\*(C'\fR, \f(CW\*(C`printf\*(C'\fR, \&\f(CW\*(C`say\*(C'\fR, \f(CW\*(C`write\*(C'\fR, or \f(CW\*(C`syswrite\*(C'\fR. .PP What about read-write mode? You should probably pretend it doesn't exist, because opening text files in read-write mode is unlikely to do what you would like. See perlfaq5 for details. .SH "Opening Binary Files" .IX Header "Opening Binary Files" If the file to be opened contains binary data instead of text characters, then the \f(CW\*(C`MODE\*(C'\fR argument to \f(CW\*(C`open\*(C'\fR is a little different. Instead of specifying the encoding, you tell Perl that your data are in raw bytes. .PP .Vb 3 \& my $filename = "/some/path/to/a/binary/file/goes/here"; \& my $encoding = ":raw :bytes" \& my $handle = undef; # this will be filled in on success .Ve .PP And then open as before, choosing \f(CW"<"\fR, \f(CW">>"\fR, or \&\f(CW">"\fR as needed: .PP .Vb 2 \& open($handle, "< $encoding", $filename) \& || die "$0: can\*(Aqt open $filename for reading: $!"; \& \& open($handle, ">> $encoding", $filename) \& || die "$0: can\*(Aqt open $filename for appending: $!"; \& \& open($handle, "> $encoding", $filename) \& || die "$0: can\*(Aqt open $filename in write\-open mode: $!"; .Ve .PP Alternately, you can change to binary mode on an existing handle this way: .PP .Vb 1 \& binmode($handle) || die "cannot binmode handle"; .Ve .PP This is especially handy for the handles that Perl has already opened for you. .PP .Vb 2 \& binmode(STDIN) || die "cannot binmode STDIN"; \& binmode(STDOUT) || die "cannot binmode STDOUT"; .Ve .PP You can also pass \f(CW\*(C`binmode\*(C'\fR an explicit encoding to change it on the fly. This isn't exactly \*(L"binary\*(R" mode, but we still use \f(CW\*(C`binmode\*(C'\fR to do it: .PP .Vb 2 \& binmode(STDIN, ":encoding(MacRoman)") || die "cannot binmode STDIN"; \& binmode(STDOUT, ":encoding(UTF\-8)") || die "cannot binmode STDOUT"; .Ve .PP Once you have your binary file properly opened in the right mode, you can use all the same Perl I/O functions as you used on text files. However, you may wish to use the fixed-size \f(CW\*(C`read\*(C'\fR instead of the variable-sized \&\f(CW\*(C`readline\*(C'\fR for your input. .PP Here's an example of how to copy a binary file: .PP .Vb 3 \& my $BUFSIZ = 64 * (2 ** 10); \& my $name_in = "/some/input/file"; \& my $name_out = "/some/output/flie"; \& \& my($in_fh, $out_fh, $buffer); \& \& open($in_fh, "<", $name_in) \& || die "$0: cannot open $name_in for reading: $!"; \& open($out_fh, ">", $name_out) \& || die "$0: cannot open $name_out for writing: $!"; \& \& for my $fh ($in_fh, $out_fh) { \& binmode($fh) || die "binmode failed"; \& } \& \& while (read($in_fh, $buffer, $BUFSIZ)) { \& unless (print $out_fh $buffer) { \& die "couldn\*(Aqt write to $name_out: $!"; \& } \& } \& \& close($in_fh) || die "couldn\*(Aqt close $name_in: $!"; \& close($out_fh) || die "couldn\*(Aqt close $name_out: $!"; .Ve .SH "Opening Pipes" .IX Header "Opening Pipes" To be announced. .SH "Low-level File Opens via sysopen" .IX Header "Low-level File Opens via sysopen" To be announced. Or deleted. .SH "SEE ALSO" .IX Header "SEE ALSO" To be announced. .SH "AUTHOR and COPYRIGHT" .IX Header "AUTHOR and COPYRIGHT" Copyright 2013 Tom Christiansen. .PP This documentation is free; you can redistribute it and/or modify it under the same terms as Perl itself.