.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.28) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{ . if \nF \{ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "SCF 3pm" .TH SCF 3pm "2014-08-15" "perl v5.20.0" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" Bio::SCF \- Perl extension for reading and writing SCF sequence files .SH "SYNOPSIS" .IX Header "SYNOPSIS" use Bio::SCF; .PP # tied interface tie \f(CW%hash\fR,'Bio::SCF','my_scf_file.scf'; .PP my \f(CW$sequence_length\fR = \f(CW$hash\fR{bases_length}; my \f(CW$chromatogram_sample_length\fR = \f(CW$hash\fR{samples_length}; my \f(CW$third_base\fR = \f(CW$hash\fR{bases}[2]; my \f(CW$quality_score\fR = \f(CW$hash\fR{$third_base}[2]; my \f(CW$sample_A_at_time_1400\fR = \f(CW$hash\fR{samples}{A}[1400]; .PP # change the third base and write out new file \&\f(CW$hash\fR{bases}[2] = 'C'; tied (%hash)\->write('new.scf'); .PP # object-oriented interface my \f(CW$scf\fR = Bio::SCF\->new('my_scf_file.scf'); my \f(CW$sequence_length\fR = \f(CW$scf\fR\->bases_length; my \f(CW$chromatogram_sample_length\fR = \f(CW$scf\fR\->samples_length; my \f(CW$third_base\fR = \f(CW$scf\fR\->\fIbases\fR\|(2); my \f(CW$quality_score\fR = \f(CW$scf\fR\->\fIscore\fR\|(2); my \f(CW$sample_A_at_time_1400\fR = \f(CW$scf\fR\->sample('A',1400); .PP # change the third base and write out new file \&\f(CW$scf\fR\->bases(2,'C'); \&\f(CW$scf\fR\->write('new.scf'); .SH "DESCRIPTION" .IX Header "DESCRIPTION" This module provides a perl interface to \s-1SCF DNA\s0 sequencing files. It has both tied hash and an object-oriented interfaces. It provides the ability to read fields from \s-1SCF\s0 files and limited ability to modify them and write them back. .SS "Tied Methods" .IX Subsection "Tied Methods" .ie n .IP "$obj = tie %hash,'Bio::SCF',$filename_or_handle" 4 .el .IP "\f(CW$obj\fR = tie \f(CW%hash\fR,'Bio::SCF',$filename_or_handle" 4 .IX Item "$obj = tie %hash,'Bio::SCF',$filename_or_handle" Tie the Bio::SCF module to a filename or filehandle. If successful, \fItie()\fR will return the object. .ie n .IP "$value = $hash{'key'}" 4 .el .IP "\f(CW$value\fR = \f(CW$hash\fR{'key'}" 4 .IX Item "$value = $hash{'key'}" Fetch a field from the \s-1SCF\s0 file. Valid keys are as follows: .Sp .Vb 2 \& Key Value \& \-\-\- \-\-\-\-\- \& \& bases_length Number of called bases in the sequence (read\-only) \& \& samples_length Number of samples in the file (read\-only) \& \& version SCF version (read\-only) \& \& code_set Code set used to code bases (read\-only) \& \& comments Structured comments (read\-only) \& \& bases Array reference to a list of the base calls \& \& index Array reference to a list of the sample position \& for each of the base calls (e.g. the position of \& the base calling peak) \& \& A An array reference that can be used to determine the \& probability that the base in position $i is an "A". \& \& G An array reference that can be used to determine the \& probability that the base in position $i is a "G". \& \& C An array reference that can be used to determine the \& probability that the base in position $i is a "C". \& \& T An array reference that can be used to determine the \& probability that the base in position $i is a "T". \& \& samples A hash reference with keys "A", "C", "G" and "T". The \& value of each hash is an array reference to the list \& of intensity values for each sample. .Ve .Sp To get the length of the called sequence: \f(CW$scf\fR{bases_length} .Sp To get the value of the called sequence at position 3: \f(CW$scf\fR{bases}[3] .Sp To get the sample position at which base 3 was called: \f(CW$scf\fR{index}[3] .Sp To get the value of the \*(L"C\*(R" curve under base 3: \f(CW$scf\fR{samples}{C}[$scf{index}[3]] .Sp To get the probability that base 3 is a \*(L"C\*(R": \f(CW$scf\fR{C}[3] .Sp To print out the chromatogram as a four-column list: .Sp .Vb 5 \& my $samples = $scf{samples}; \& for (my $i = 0; $i<$scf{samples_length}; $i++) { \& print join "\et",$samples\->{C}[$i],$samples\->{G}[$i], \& $samples\->{A}[$i],$samples\->{T}[$i],"\en"; \& } .Ve .ie n .IP "$scf{bases}[$index] = $new_value" 4 .el .IP "\f(CW$scf\fR{bases}[$index] = \f(CW$new_value\fR" 4 .IX Item "$scf{bases}[$index] = $new_value" The base call probability scores, base call values, base call positions, and sample values are all read/write, so that you can change them: .Sp .Vb 1 \& $samples\->{C}[500] = 0; .Ve .ie n .IP "each %scf" 4 .el .IP "each \f(CW%scf\fR" 4 .IX Item "each %scf" Will return keys and values for the tied object. .ie n .IP "delete $scf{$key}" 4 .el .IP "delete \f(CW$scf\fR{$key}" 4 .IX Item "delete $scf{$key}" .PD 0 .ie n .IP "%scf = ()" 4 .el .IP "\f(CW%scf\fR = ()" 4 .IX Item "%scf = ()" .PD These operations are not supported and will return a run-time error .SS "Object Methods" .IX Subsection "Object Methods" .ie n .IP "$scf = Bio::SCF\->new($scf_file_or_filehandle)" 4 .el .IP "\f(CW$scf\fR = Bio::SCF\->new($scf_file_or_filehandle)" 4 .IX Item "$scf = Bio::SCF->new($scf_file_or_filehandle)" Create a new Bio::SCF object. The single argument is the name of a file or a previously-opened filehandle. If successful, \fInew()\fR returns the Bio::SCF object. .ie n .IP "$length = $scf\->bases_length" 4 .el .IP "\f(CW$length\fR = \f(CW$scf\fR\->bases_length" 4 .IX Item "$length = $scf->bases_length" Return the length of the called sequence. .ie n .IP "$samples = $scf\->samples_length" 4 .el .IP "\f(CW$samples\fR = \f(CW$scf\fR\->samples_length" 4 .IX Item "$samples = $scf->samples_length" Return the length of the list of chromatogram samples in the file. There are four sample series, one for each base. .ie n .IP "$sample_size = $scf\->sample_size" 4 .el .IP "\f(CW$sample_size\fR = \f(CW$scf\fR\->sample_size" 4 .IX Item "$sample_size = $scf->sample_size" Returns the size of each sample (bytes). .ie n .IP "$code_set = $scf\->code_set" 4 .el .IP "\f(CW$code_set\fR = \f(CW$scf\fR\->code_set" 4 .IX Item "$code_set = $scf->code_set" Return the code set used for base calling. .ie n .IP "$base = $scf\->base($base_no [,$new_base])" 4 .el .IP "\f(CW$base\fR = \f(CW$scf\fR\->base($base_no [,$new_base])" 4 .IX Item "$base = $scf->base($base_no [,$new_base])" Get the base call at the indicated position. If a new value is provided, will change the base call to the indicated base. .ie n .IP "$index = $scf\->index($base_no [,$new_index])" 4 .el .IP "\f(CW$index\fR = \f(CW$scf\fR\->index($base_no [,$new_index])" 4 .IX Item "$index = $scf->index($base_no [,$new_index])" Translates the indicated base position into the sample index for that called base. Here is how to fetch the intensity values at base number 5: .Sp .Vb 2 \& my $sample_index = $scf\->index(5); \& my ($g,$a,$t,$c) = map { $scf\->sample($_,$sample_index) } qw(G A T C); .Ve .Sp If you provide a new value for the sample index, it will be updated. .ie n .IP "$base_score = $scf\->base_score($base,$base_no [,$new_score])" 4 .el .IP "\f(CW$base_score\fR = \f(CW$scf\fR\->base_score($base,$base_no [,$new_score])" 4 .IX Item "$base_score = $scf->base_score($base,$base_no [,$new_score])" Get the probability that the indicated base occurs at position \&\f(CW$base_no\fR. Here is how to fetch the probabilities for the four bases at base position 5: .Sp .Vb 1 \& my ($g,$a,$t,$c) = map { $scf\->base_score($_,5) } qw(G A T C); .Ve .Sp If you provide a new value for the base probability score, it will be updated. .ie n .IP "$score = $scf\->score($base_no)" 4 .el .IP "\f(CW$score\fR = \f(CW$scf\fR\->score($base_no)" 4 .IX Item "$score = $scf->score($base_no)" Get the quality score for the called base at the indicated position. .ie n .IP "$intensity = $scf\->sample($base,$sample_index [,$new_value])" 4 .el .IP "\f(CW$intensity\fR = \f(CW$scf\fR\->sample($base,$sample_index [,$new_value])" 4 .IX Item "$intensity = $scf->sample($base,$sample_index [,$new_value])" Get the intensity value for the channel corresponding to the indicated base at the indicated sample index. You may update the intensity by providing a new value. .ie n .IP "$scf\->write('file_path')" 4 .el .IP "\f(CW$scf\fR\->write('file_path')" 4 .IX Item "$scf->write('file_path')" Write the updated \s-1SCF\s0 file to the indicated file path. .ie n .IP "$scf\->fwrite($file_handle)" 4 .el .IP "\f(CW$scf\fR\->fwrite($file_handle)" 4 .IX Item "$scf->fwrite($file_handle)" Write the updated \s-1SCF\s0 file to the indicated filehandle. The file must previously have been opened for writing. The filehandle is actually reopened in append mode, so you can call \fIfwrite()\fR multiple times and interperse your own record separators. .SH "EXAMPLES" .IX Header "EXAMPLES" Reading information from a preexisting file: .PP .Vb 6 \& tie %scf, \*(AqBio::SCF\*(Aq, "data.scf"; \& print "Base calls:\en"; \& for ( my $i=0; $i<$scf{bases}; $i++ ){ \& print "$scf{base}[$i] "; \& } \& print "\en"; \& \& print "Intensity values for the A curve\en"; \& for ( my $i=0; $i<$scf{samples}; $i++ ){ \& print "$scf{sample}{A}[$i]; \& } \& print "\en"; .Ve .PP Another example, where we set all bases to \*(L"A\*(R", indexes to 10 and write the file back: .PP .Vb 6 \& my $obj = tie %scf,\*(AqBio::SCF\*(Aq,\*(Aqdata.scf\*(Aq; \& for (0...@{$scf{bases}}\-1){ \& $scf{base}[$_] = "A"; \& $obj\->set(\*(Aqindex\*(Aq, $_, 10); \& } \& $obj\->write(\*(Aqdata.scf\*(Aq); .Ve .SH "AUTHOR" .IX Header "AUTHOR" Dmitri Priimak, priimak@cshl.org (1999) .PP with some cleanups by Lincoln Stein, lstein@cshl.edu (2006) .PP This package and its accompanying libraries is free software; you can redistribute it and/or modify it under the terms of the \s-1GPL \s0(either version 1, or at your option, any later version) or the Artistic License 2.0. Refer to \s-1LICENSE\s0 for the full license text. In addition, please see \s-1DISCLAIMER\s0 for disclaimers of warranty. .SH "SEE ALSO" .IX Header "SEE ALSO" \&\fIperl\fR\|(1).