.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .ie \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . nr % 0 . rr F .\} .el \{\ . de IX .. .\} .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "Bio::ASN1::EntrezGene::Indexer 3pm" .TH Bio::ASN1::EntrezGene::Indexer 3pm "2005-05-04" "perl v5.14.2" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" Bio::ASN1::EntrezGene::Indexer \- Indexes NCBI Entrez Gene files. .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 1 \& use Bio::ASN1::EntrezGene::Indexer; \& \& # creating & using the index is just a few lines \& my $inx = Bio::ASN1::EntrezGene::Indexer\->new( \& \-filename => \*(Aqentrezgene.idx\*(Aq, \& \-write_flag => \*(AqWRITE\*(Aq); # needed for make_index call, but if opening \& # existing index file, don\*(Aqt set write flag! \& $inx\->make_index(\*(AqHomo_sapiens\*(Aq, \*(AqMus_musculus\*(Aq, \*(AqRattus_norvegicus\*(Aq); \& my $seq = $inx\->fetch(10); # Bio::Seq obj for Entrez Gene #10 \& # alternatively, if one prefers just a data structure instead of objects \& $seq = $inx\->fetch_hash(10); # a hash produced by Bio::ASN1::EntrezGene \& # that contains all data in the Entrez Gene record \& \& # note that in case you wonder, you can get the files \*(AqHomo_sapiens\*(Aq \& # from NCBI Entrez Gene ftp download, DATA/ASN/Mammalia directory .Ve .SH "PREREQUISITE" .IX Header "PREREQUISITE" Bio::ASN1::EntrezGene, Bioperl version that contains Stefan Kirov's entrezgene.pm and all dependencies therein. .SH "INSTALLATION" .IX Header "INSTALLATION" Same as Bio::ASN1::EntrezGene .SH "DESCRIPTION" .IX Header "DESCRIPTION" Bio::ASN1::EntrezGene::Indexer is a Perl Indexer for \s-1NCBI\s0 Entrez Gene genome databases. It processes an \s-1ASN\s0.1\-formatted Entrez Gene record and stores the file position for each record in a way compliant with Bioperl standard (in fact its a subclass of Bioperl's index objects). .PP Note that this module does not parse record, because it needs to run fast and grab only the gene ids. For parsing record, use Bio::ASN1::EntrezGene, or better yet, use Bio::SeqIO, format 'entrezgene'. .PP It takes this module (version 1.07) 21 seconds to index the human genome Entrez Gene file (Apr. 5/2005 download) on one 2.4 GHz Intel Xeon processor. .SH "SEE ALSO" .IX Header "SEE ALSO" For details on various parsers I generated for Entrez Gene, example scripts that uses/benchmarks the modules, please see . Those other parsers etc. are included in V1.05 download. .SH "AUTHOR" .IX Header "AUTHOR" Dr. Mingyi Liu .SH "COPYRIGHT" .IX Header "COPYRIGHT" The Bio::ASN1::EntrezGene module and its related modules and scripts are copyright (c) 2005 Mingyi Liu, \s-1GPC\s0 Biotech \s-1AG\s0 and Altana Research Institute. All rights reserved. I created these modules when working on a collaboration project between these two companies. Therefore a special thanks for the two companies to allow the release of the code into public domain. .PP You may use and distribute them under the terms of the Perl itself or \&\s-1GPL\s0 (). .SH "CITATION" .IX Header "CITATION" Liu, M and Grigoriev, A (2005) \*(L"Fast Parsers for Entrez Gene\*(R" Bioinformatics. In press .SH "OPERATION SYSTEMS SUPPORTED" .IX Header "OPERATION SYSTEMS SUPPORTED" Any \s-1OS\s0 that Perl & Bioperl run on. .SH "METHODS" .IX Header "METHODS" .SS "fetch" .IX Subsection "fetch" .Vb 6 \& Parameters: $geneid \- id for the Entrez Gene record to be retrieved \& Example: my $hash = $indexer\->fetch(10); # get Entrez Gene #10 \& Function: fetch the data for the given Entrez Gene id. \& Returns: A Bio::Seq object produced by Bio::SeqIO::entrezgene \& Notes: One needs to have Bio::SeqIO::entrezgene installed before \& calling this function! .Ve .SS "fetch_hash" .IX Subsection "fetch_hash" .Vb 7 \& Parameters: $geneid \- id for the Entrez Gene record to be retrieved \& Example: my $hash = $indexer\->fetch_hash(10); # get Entrez Gene #10 \& Function: fetch a hash produced by Bio::ASN1::EntrezGene for given Entrez \& Gene id. \& Returns: A data structure containing all data items from the Entrez \& Gene record. \& Notes: Alternative to fetch() .Ve .SS "_file_handle" .IX Subsection "_file_handle" .Vb 10 \& Title : _file_handle \& Usage : $fh = $index\->_file_handle( INT ) \& Function: Returns an open filehandle for the file \& index INT. On opening a new filehandle it \& caches it in the @{$index\->_filehandle} array. \& If the requested filehandle is already open, \& it simply returns it from the array. \& Example : $fist_file_indexed = $index\->_file_handle( 0 ); \& Returns : ref to a filehandle \& Args : INT \& Notes : This function is copied from Bio::Index::Abstract. Once that module \& changes file handle code like I do below to fit perl 5.005_03, this \& sub would be removed from this module .Ve