.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is >0, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .if !\nF .nr F 0 .if \nF>0 \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{\ . nr % 0 . nr F 2 . \} .\} .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "Bio::DB::SeqFeature::Store 3pm" .TH Bio::DB::SeqFeature::Store 3pm "2018-10-27" "perl v5.26.2" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" Bio::DB::SeqFeature::Store \-\- Storage and retrieval of sequence annotation data .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 1 \& use Bio::DB::SeqFeature::Store; \& \& # Open the feature database \& my $db = Bio::DB::SeqFeature::Store\->new( \-adaptor => \*(AqDBI::mysql\*(Aq, \& \-dsn => \*(Aqdbi:mysql:test\*(Aq, \& \-create => 1 ); \& \& # Get a feature from somewhere \& my $feature = Bio::SeqFeature::Generic\->new(...); \& \& # Store it \& $db\->store($feature) or die "Couldn\*(Aqt store!"; \& \& # If absent, a primary ID is added to the feature when it is stored in the \& # database. Retrieve the primary ID \& my $id = $feature\->primary_id; \& \& # Get the feature back out \& my $feature = $db\->fetch($id); \& \& # .... which is identical to \& my $feature = $db\->get_feature_by_primary_id($id); \& \& # Change the feature and update it \& $f\->start(100); \& $db\->store($f) or die "Couldn\*(Aqt update!"; \& \& # Get all features at once \& my @features = $db\->features( ); \& \& # Retrieve multiple features by primary id \& my @features = $db\->fetch_many(@list_of_ids); \& \& # ...by name \& @features = $db\->get_features_by_name(\*(AqZK909\*(Aq); \& \& # ...by alias \& @features = $db\->get_features_by_alias(\*(Aqsma\-3\*(Aq); \& \& # ...by type \& @features = $db\->get_features_by_type(\*(Aqgene\*(Aq); \& \& # ...by location \& @features = $db\->get_features_by_location(\-seq_id=>\*(AqChr1\*(Aq,\-start=>4000,\-end=>600000); \& \& # ...by attribute \& @features = $db\->get_features_by_attribute({description => \*(Aqprotein kinase\*(Aq}) \& \& # ...by the GFF "Note" field \& @result_list = $db\->search_notes(\*(Aqkinase\*(Aq); \& \& # ...by arbitrary combinations of selectors \& @features = $db\->features(\-name => $name, \& \-type => $types, \& \-seq_id => $seqid, \& \-start => $start, \& \-end => $end, \& \-attributes => $attributes); \& \& # Loop through the features using an iterator \& my $iterator = $db\->get_seq_stream(\-name => $name, \& \-type => $types, \& \-seq_id => $seqid, \& \-start => $start, \& \-end => $end, \& \-attributes => $attributes); \& \& while (my $feature = $iterator\->next_seq) { \& # do something with the feature \& } \& \& # ...limiting the search to a particular region \& my $segment = $db\->segment(\*(AqChr1\*(Aq,5000=>6000); \& my @features = $segment\->features(\-type=>[\*(AqmRNA\*(Aq,\*(Aqmatch\*(Aq]); \& \& # Getting coverage statistics across a region \& my $summary = $db\->feature_summary(\*(AqChr1\*(Aq,10_000=>1_110_000); \& my ($bins) = $summary\->get_tag_values(\*(Aqcoverage\*(Aq); \& my $first_bin = $bins\->[0]; \& \& # Getting & storing sequence information \& # Warning: this returns a string, and not a PrimarySeq object \& $db\->insert_sequence(\*(AqChr1\*(Aq,\*(AqGATCCCCCGGGATTCCAAAA...\*(Aq); \& my $sequence = $db\->fetch_sequence(\*(AqChr1\*(Aq,5000=>6000); \& \& # What feature types are defined in the database? \& my @types = $db\->types; \& \& # Create a new feature in the database \& my $feature = $db\->new_feature(\-primary_tag => \*(AqmRNA\*(Aq, \& \-seq_id => \*(Aqchr3\*(Aq, \& \-start => 10000, \& \-end => 11000); \& \& # Load an entire GFF3 file, using the GFF3 loader... \& my $loader = Bio::DB::SeqFeature::Store::GFF3Loader\->new(\-store => $db, \& \-verbose => 1, \& \-fast => 1); \& \& $loader\->load(\*(Aq./my_genome.gff3\*(Aq); .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" Bio::DB::SeqFeature::Store implements the Bio::SeqFeature::CollectionI interface to allow you to persistently store Bio::SeqFeatureI objects in a database and to later to retrieve them by a variety of searches. This module is similar to the older Bio::DB::GFF module, with the following differences: .IP "1." 4 No limitation on Bio::SeqFeatureI implementations .Sp Unlike Bio::DB::GFF, Bio::DB::SeqFeature::Store works with any Bio::SeqFeatureI object. .IP "2." 4 No limitation on nesting of features & subfeatures .Sp Bio::DB::GFF is limited to features that have at most one level of subfeature. Bio::DB::SeqFeature::Store can work with features that have unlimited levels of nesting. .IP "3." 4 No aggregators .Sp The aggregator architecture, which was necessary to impose order on the \s-1GFF2\s0 files that Bio::DB::GFF works with, does not apply to Bio::DB::SeqFeature::Store. It is intended to store features that obey well-defined ontologies, such as the Sequence Ontology (http://song.sourceforge.net). .IP "4." 4 No relative locations .Sp All locations defined by this module are relative to an absolute sequence \s-1ID,\s0 unlike Bio::DB::GFF which allows you to define the location of one feature relative to another. .PP We'll discuss major concepts in Bio::DB::SeqFeature::Store and then describe how to use the module. .SS "Adaptors" .IX Subsection "Adaptors" Bio::DB::SeqFeature::Store is designed to work with a variety of storage back ends called \*(L"adaptors.\*(R" Adaptors are subclasses of Bio::DB::SeqFeature::Store and provide the interface between the \&\fIstore()\fR and \fIfetch()\fR methods and the physical database. Currently the number of adaptors is quite limited, but the number will grow soon. .IP "memory" 4 .IX Item "memory" An implementation that stores all data in memory. This is useful for small data sets of no more than 10,000 features (more or less, depending on system memory). .IP "DBI::mysql" 4 .IX Item "DBI::mysql" A full-featured implementation on top of the MySQL relational database system. .IP "berkeleydb" 4 .IX Item "berkeleydb" A full-feature implementation that runs on top of the BerkeleyDB database. See Bio::DB::SeqFeature::Store::berkeleydb. .PP If you do not explicitly specify the adaptor, then DBI::mysql will be used by default. .SS "Serializers" .IX Subsection "Serializers" When Bio::DB::SeqFeature::Store stores a Bio::SeqFeatureI object into the database, it serializes it into binary or text form. When it later fetches the feature from the database, it unserializes it. Two serializers are available: Recent versions of .IP "Storable" 4 .IX Item "Storable" This is a fast binary serializer. It is available in Perl versions 5.8.7 and higher and is used when available. .IP "Data::Dumper" 4 .IX Item "Data::Dumper" This is a slow text serializer that is available in Perl 5.8.0 and higher. It is used when Storable is unavailable. .PP If you do not specify the serializer, then Storable will be used if available; otherwise Data::Dumper. .SS "Loaders and Normalized Features" .IX Subsection "Loaders and Normalized Features" The Bio::DB::SeqFeature::Store::GFF3Loader parses a GFF3\-format file and loads the annotations and sequence data into the database of your choice. The script bp_seqfeature_load.pl (found in the scripts/Bio\-SeqFeature\-Store/ subdirectory) is a thin front end to the GFF3Loader. Other loaders may be written later. .PP Although Bio::DB::SeqFeature::Store should work with any Bio::SeqFeatureI object, there are some disadvantages to using Bio::SeqFeature::Generic and other vanilla implementations. The major issue is that if two vanilla features share the same subfeature (e.g. two transcripts sharing an exon), the shared subfeature will be cloned when stored into the database. .PP The special-purpose Bio::DB::SeqFeature class is able to normalize its subfeatures in the database, so that shared subfeatures are stored only once. This minimizes wasted storage space. In addition, when in-memory caching is turned on, each shared subfeature will usually occupy only a single memory location upon restoration. .SH "Methods for Connecting and Initializating a Database" .IX Header "Methods for Connecting and Initializating a Database" ## \s-1TODO:\s0 http://iowg.brcdevel.org/gff3.html#a_fasta is a dead link .SS "new" .IX Subsection "new" .Vb 6 \& Title : new \& Usage : $db = Bio::DB::SeqFeature::Store\->new(@options) \& Function: connect to a database \& Returns : A descendent of Bio::DB::Seqfeature::Store \& Args : several \- see below \& Status : public .Ve .PP This class method creates a new database connection. The following \&\-name=>$value arguments are accepted: .PP .Vb 2 \& Name Value \& \-\-\-\- \-\-\-\-\- \& \& \-adaptor The name of the Adaptor class (default DBI::mysql) \& \& \-serializer The name of the serializer class (default Storable) \& \& \-index_subfeatures Whether or not to make subfeatures searchable \& (default false) \& \& \-cache Activate LRU caching feature \-\- size of cache \& \& \-compress Compresses features before storing them in database \& using Compress::Zlib \& \& \-create (Re)initialize the database. .Ve .PP The \fB\-index_subfeatures\fR argument, if true, tells the module to create indexes for a feature and all its subfeatures (and its subfeatures' subfeatures). Indexing subfeatures means that you will be able to search for the gene, its mRNA subfeatures and the exons inside each mRNA. It also means when you search the database for all features contained within a particular location, you will get the gene, the mRNAs and all the exons as individual objects as well as subfeatures of each other. \s-1NOTE:\s0 this option is only honored when working with a normalized feature class such as Bio::DB::SeqFeature. .PP The \fB\-cache\fR argument, if true, tells the module to try to create a \&\s-1LRU\s0 (least-recently-used) object cache using the Tie::Cacher module. Caching will cause two objects that share the same primary_id to (often, but not always) share the same memory location, and may improve performance modestly. The argument is taken as the desired size for the cache. If you pass \*(L"1\*(R" as the cache value, a reasonable default cache size will be chosen. Caching requires the Tie::Cacher module to be installed. If the module is not installed, then caching will silently be disabled. .PP The \fB\-compress\fR argument, if true, will cause the feature data to be compressed before storing it. This will make the database somewhat smaller at the cost of decreasing performance. .PP The \fB\-create\fR argument, if true, will either initialize or reinitialize the database. It is needed the first time a database is used. .PP The \fInew()\fR method of individual adaptors recognize additional arguments. The default DBI::mysql adaptor recognizes the following ones: .PP .Vb 2 \& Name Value \& \-\-\-\- \-\-\-\-\- \& \& \-dsn DBI data source (default dbi:mysql:test) \& \& \-autoindex A flag that controls whether or not to update \& all search indexes whenever a feature is stored \& or updated (default true). \& \& \-namespace A string that will be used to qualify each table, \& thereby allowing you to store several independent \& sequence feature databases in a single Mysql \& database. \& \& \-dumpdir The path to a temporary directory that will be \& used during "fast" loading. See \& L for a \& description of this. Default is the current \& directory. \& \& \-write Make the database writable (implied by \-create) \& \& \-fasta Provide an alternative DNA accessor object or path. .Ve .PP By default the database will store \s-1DNA\s0 sequences internally. However, you may override this behavior by passing either a path to a \s-1FASTA\s0 file, or any Perl object that recognizes the seq($seqid,$start,$end) method. In the former case, the \s-1FASTA\s0 path will be passed to Bio::DB::Fasta, possibly causing an index to be constructed. Suitable examples of the latter type of object include the Bio::DB::Sam and Bio::DB::Sam::Fai classes. .SS "init_database" .IX Subsection "init_database" .Vb 6 \& Title : init_database \& Usage : $db\->init_database([$erase_flag]) \& Function: initialize a database \& Returns : true \& Args : (optional) flag to erase current data \& Status : public .Ve .PP Call this after Bio::DB::SeqFeature::Store\->\fInew()\fR to initialize a new database. In the case of a \s-1DBI\s0 database, this method installs the schema but does \fBnot\fR create the database. You have to do this offline using the appropriate command-line tool. In the case of the \&\*(L"berkeleydb\*(R" adaptor, this creates an empty \s-1BTREE\s0 database. .PP If there is any data already in the database, \fIinit_database()\fR called with no arguments will have no effect. To permanently erase the data already there and prepare to receive a fresh set of data, pass a true argument. .SS "post_init" .IX Subsection "post_init" This method is invoked after init_database for use by certain adaptors (currently only the memory adaptor) to do automatic data loading after initialization. It is passed a copy of the \fIinit_database()\fR args. .SS "add_features" .IX Subsection "add_features" .Vb 6 \& Title : add_features \& Usage : $success = $db\->add_features(\e@features) \& Function: store one or more features into the database \& Returns : true if successful \& Args : array reference of Bio::SeqFeatureI objects \& Status : public .Ve .SS "store" .IX Subsection "store" .Vb 6 \& Title : store \& Usage : $success = $db\->store(@features) \& Function: store one or more features into the database \& Returns : true if successful \& Args : list of Bio::SeqFeatureI objects \& Status : public .Ve .PP This method stores a list of features into the database. Each feature is updated so that its primary_id becomes the primary \s-1ID\s0 of the serialized feature stored in the database. If all features were successfully stored, the method returns true. In the \s-1DBI\s0 implementation, the store is performed as a single transaction and the transaction is rolled back if one or more store operations failed. .PP In most cases, you should let the database assign the primary id. If the object you store already has a primary_id, then the \s-1ID\s0 must adhere to the datatype expected by the adaptor: an integer in the case of the various \s-1DB\s0 adaptors, and a string in the case of the memory and berkeley adaptors. .PP You can find out what the primary \s-1ID\s0 of the feature has become by calling the feature's \fIprimary_id()\fR method: .PP .Vb 2 \& $db\->store($my_feature) or die "Oh darn"; \& my $id = $my_feature\->primary_id; .Ve .PP If the feature contains subfeatures, they will all be stored recursively. In the case of Bio::DB::SeqFeature and Bio::DB::SeqFeature::Store::NormalizedFeature, the subfeatures will be stored in a normalized way so that each subfeature appears just once in the database. .PP Subfeatures will be indexed for separate retrieval based on the current value of \fIindex_subfeatures()\fR. .PP If you call \fIstore()\fR with one or more features that already have valid primary_ids, then any existing objects will be \fBreplaced\fR. Note that when using normalized features such as Bio::DB::SeqFeature, the subfeatures are not recursively updated when you update the parent feature. You must manually update each subfeatures that has changed. .SS "store_noindex" .IX Subsection "store_noindex" .Vb 6 \& Title : store_noindex \& Usage : $success = $db\->store_noindex(@features) \& Function: store one or more features into the database without indexing \& Returns : true if successful \& Args : list of Bio::SeqFeatureI objects \& Status : public .Ve .PP This method stores a list of features into the database but does not make them searchable. The only way to access the features is via their primary IDs. This method is ordinarily only used internally to store subfeatures that are not indexed. .SS "no_blobs" .IX Subsection "no_blobs" .Vb 8 \& Title : no_blobs \& Usage : $db\->no_blobs(1); \& Function: decide if objects should be stored in the database as blobs. \& Returns : boolean (default false) \& Args : boolean (true to no longer store objects; when the corresponding \& feature is retrieved it will instead be a minimal representation of \& the object that was stored, as some simple Bio::SeqFeatureI object) \& Status : dubious (new) .Ve .PP This method saves lots of space in the database, which may in turn lead to large performance increases in extreme cases (over 7 million features in the db). .SS "new_feature" .IX Subsection "new_feature" .Vb 6 \& Title : new_feature \& Usage : $feature = $db\->new_feature(@args) \& Function: create a new Bio::DB::SeqFeature object in the database \& Returns : the new seqfeature \& Args : see below \& Status : public .Ve .PP This method creates and stores a new Bio::SeqFeatureI object using the specialized Bio::DB::SeqFeature class. This class is able to store its subfeatures in a normalized fashion, allowing subfeatures to be shared among multiple parents (e.g. multiple exons shared among several mRNAs). .PP The arguments are the same as for Bio::DB::SeqFeature\->\fInew()\fR, which in turn are similar to Bio::SeqFeature::Generic\->\fInew()\fR and Bio::Graphics::Feature\->\fInew()\fR. The most important difference is the \&\fB\-index\fR option, which controls whether the feature will be indexed for retrieval (default is true). Ordinarily, you would only want to turn indexing off when creating subfeatures, because features stored without indexes will only be reachable via their primary IDs or their parents. .PP Arguments are as follows: .PP .Vb 10 \& \-seq_id the reference sequence \& \-start the start position of the feature \& \-end the stop position of the feature \& \-display_name the feature name (returned by seqname) \& \-primary_tag the feature type (returned by primary_tag) \& \-source the source tag \& \-score the feature score (for GFF compatibility) \& \-desc a description of the feature \& \-segments a list of subfeatures (see Bio::Graphics::Feature) \& \-subtype the type to use when creating subfeatures \& \-strand the strand of the feature (one of \-1, 0 or +1) \& \-phase the phase of the feature (0..2) \& \-url a URL to link to when rendered with Bio::Graphics \& \-attributes a hashref of tag value attributes, in which the key is the tag \& and the value is an array reference of values \& \-index index this feature if true .Ve .PP Aliases: .PP .Vb 6 \& \-id an alias for \-display_name \& \-seqname an alias for \-display_name \& \-display_id an alias for \-display_name \& \-name an alias for \-display_name \& \-stop an alias for end \& \-type an alias for primary_tag .Ve .PP You can change the seqfeature implementation generated by \fInew()\fR by passing the name of the desired seqfeature class to \&\f(CW$db\fR\->\fIseqfeature_class()\fR. .SS "delete" .IX Subsection "delete" .Vb 6 \& Title : delete \& Usage : $success = $db\->delete(@features) \& Function: delete a list of feature from the database \& Returns : true if successful \& Args : list of features \& Status : public .Ve .PP This method looks up the primary IDs from a list of features and deletes them from the database, returning true if all deletions are successful. .PP \&\s-1WARNING:\s0 The current DBI::mysql implementation has some issues that need to be resolved, namely (1) normalized subfeatures are \s-1NOT\s0 recursively deleted; and (2) the deletions are not performed in a transaction. .SS "fetch / get_feature_by_id / get_feature_by_primary_id" .IX Subsection "fetch / get_feature_by_id / get_feature_by_primary_id" .Vb 8 \& Title : fetch \& get_feature_by_id \& get_feature_by_primary_id \& Usage : $feature = $db\->fetch($primary_id) \& Function: fetch a feature from the database using its primary ID \& Returns : a feature \& Args : primary ID of desired feature \& Status : public .Ve .PP This method returns a previously-stored feature from the database using its primary \s-1ID.\s0 If the primary \s-1ID\s0 is invalid, it returns undef. Use \fIfetch_many()\fR to rapidly retrieve multiple features. .SS "fetch_many" .IX Subsection "fetch_many" .Vb 6 \& Title : fetch_many \& Usage : @features = $db\->fetch_many($primary_id,$primary_id,$primary_id...) \& Function: fetch many features from the database using their primary ID \& Returns : list of features \& Args : a list of primary IDs or an array ref of primary IDs \& Status : public .Ve .PP Same as \fIfetch()\fR except that you can pass a list of primary IDs or a ref to an array of IDs. .SS "get_seq_stream" .IX Subsection "get_seq_stream" .Vb 6 \& Title : get_seq_stream \& Usage : $iterator = $db\->get_seq_stream(@args) \& Function: return an iterator across all features in the database \& Returns : a Bio::DB::SeqFeature::Store::Iterator object \& Args : feature filters (optional) \& Status : public .Ve .PP When called without any arguments this method will return an iterator object that will traverse all indexed features in the database. Call the iterator's \fInext_seq()\fR method to step through them (in no particular order): .PP .Vb 4 \& my $iterator = $db\->get_seq_stream; \& while (my $feature = $iterator\->next_seq) { \& print $feature\->primary_tag,\*(Aq \*(Aq,$feature\->display_name,"\en"; \& } .Ve .PP You can select a subset of features by passing a series of filter arguments. The arguments are identical to those accepted by \&\f(CW$db\fR\->\fIfeatures()\fR. .SS "get_features_by_name" .IX Subsection "get_features_by_name" .Vb 6 \& Title : get_features_by_name \& Usage : @features = $db\->get_features_by_name($name) \& Function: looks up features by their display_name \& Returns : a list of matching features \& Args : the desired name \& Status : public .Ve .PP This method searches the display_name of all features for matches against the provided name. \s-1GLOB\s0 style wildcares (\*(L"*\*(R", \*(L"?\*(R") are accepted, but may be slow. .PP The method returns the list of matches, which may be zero, 1 or more than one features. Be prepared to receive more than one result, as display names are not guaranteed to be unique. .PP For backward compatibility with gbrowse, this method is also known as \&\fIget_feature_by_name()\fR. .SS "get_feature_by_name" .IX Subsection "get_feature_by_name" .Vb 6 \& Title : get_feature_by_name \& Usage : @features = $db\->get_feature_by_name($name) \& Function: looks up features by their display_name \& Returns : a list of matching features \& Args : the desired name \& Status : Use get_features_by_name instead. .Ve .PP This method is provided for backward compatibility with gbrowse. .SS "get_features_by_alias" .IX Subsection "get_features_by_alias" .Vb 6 \& Title : get_features_by_alias \& Usage : @features = $db\->get_features_by_alias($name) \& Function: looks up features by their display_name or alias \& Returns : a list of matching features \& Args : the desired name \& Status : public .Ve .PP This method is similar to \fIget_features_by_name()\fR except that it will also search through the feature aliases. Aliases can be created by storing features that contain one or more Alias tags. Wildards are accepted. .SS "get_features_by_type" .IX Subsection "get_features_by_type" .Vb 6 \& Title : get_features_by_type \& Usage : @features = $db\->get_features_by_type(@types) \& Function: looks up features by their primary_tag \& Returns : a list of matching features \& Args : list of primary tags \& Status : public .Ve .PP This method will return a list of features that have any of the primary tags given in the argument list. For compatibility with gbrowse and Bio::DB::GFF, types can be qualified using a colon: .PP .Vb 1 \& primary_tag:source_tag .Ve .PP in which case only features that match both the primary_tag \fBand\fR the indicated source_tag will be returned. If the database was loaded from a \s-1GFF3\s0 file, this corresponds to the third and second columns of the row, in that order. .PP For example, given the \s-1GFF3\s0 lines: .PP .Vb 2 \& ctg123 geneFinder exon 1300 1500 . + . ID=exon001 \& ctg123 fgenesH exon 1300 1520 . + . ID=exon002 .Ve .PP exon001 and exon002 will be returned by searching for type \*(L"exon\*(R", but only exon001 will be returned by searching for type \*(L"exon:fgenesH\*(R". .SS "get_features_by_location" .IX Subsection "get_features_by_location" .Vb 6 \& Title : get_features_by_location \& Usage : @features = $db\->get_features_by_location(@args) \& Function: looks up features by their location \& Returns : a list of matching features \& Args : see below \& Status : public .Ve .PP This method fetches features based on a location range lookup. You call it using a positional list of arguments, or a list of (\-argument=>$value) pairs. .PP The positional form is as follows: .PP .Vb 1 \& $db\->get_features_by_location($seqid [[,$start,]$end]) .Ve .PP The \f(CW$seqid\fR is the name of the sequence on which the feature resides, and start and end are optional endpoints for the match. If the endpoints are missing then any feature on the indicated seqid is returned. .PP Examples: .PP .Vb 3 \& get_features_by_location(\*(Aqchr1\*(Aq); # all features on chromosome 1 \& get_features_by_location(\*(Aqchr1\*(Aq,5000); # features between 5000 and the end \& get_features_by_location(\*(Aqchr1\*(Aq,5000,8000); # features between 5000 and 8000 .Ve .PP Location lookups are overlapping. A feature will be returned if it partially or completely overlaps the indicated range. .PP The named argument form gives you more control: .PP .Vb 2 \& Argument Value \& \-\-\-\-\-\-\-\- \-\-\-\-\- \& \& \-seq_id The name of the sequence on which the feature resides \& \-start Start of the range \& \-end End of the range \& \-strand Strand of the feature \& \-range_type Type of range to search over .Ve .PP The \fB\-strand\fR argument, if present, can be one of \*(L"0\*(R" to find features that are on both strands, \*(L"+1\*(R" to find only plus strand features, and \*(L"\-1\*(R" to find only minus strand features. Specifying a strand of undef is the same as not specifying this argument at all, and retrieves all features regardless of their strandedness. .PP The \fB\-range_type\fR argument, if present, can be one of \*(L"overlaps\*(R" (the default), to find features whose positions overlap the indicated range, \*(L"contains,\*(R" to find features whose endpoints are completely contained within the indicated range, and \*(L"contained_in\*(R" to find features whose endpoints are both outside the indicated range. .SS "get_features_by_attribute" .IX Subsection "get_features_by_attribute" .Vb 6 \& Title : get_features_by_attribute \& Usage : @features = $db\->get_features_by_attribute(@args) \& Function: looks up features by their attributes/tags \& Returns : a list of matching features \& Args : see below \& Status : public .Ve .PP This implements a simple tag filter. Pass a list of tag names and their values. The module will return a list of features whose tag names and values match. Tag names are case insensitive. If multiple tag name/value pairs are present, they will be ANDed together. To match any of a list of values, use an array reference for the value. .PP Examples: .PP .Vb 2 \& # return all features whose "function" tag is "GO:0000123" \& @features = $db\->get_features_by_attribute(function => \*(AqGO:0000123\*(Aq); \& \& # return all features whose "function" tag is "GO:0000123" or "GO:0000555" \& @features = $db\->get_features_by_attribute(function => [\*(AqGO:0000123\*(Aq,\*(AqGO:0000555\*(Aq]); \& \& # return all features whose "function" tag is "GO:0000123" or "GO:0000555" \& # and whose "confirmed" tag is 1 \& @features = $db\->get_features_by_attribute(function => [\*(AqGO:0000123\*(Aq,\*(AqGO:0000555\*(Aq], \& confirmed => 1); .Ve .SS "features" .IX Subsection "features" .Vb 6 \& Title : features \& Usage : @features = $db\->features(@args) \& Function: generalized query & retrieval interface \& Returns : list of features \& Args : see below \& Status : Public .Ve .PP This is the workhorse for feature query and retrieval. It takes a series of \-name=>$value arguments filter arguments. Features that match all the filters are returned. .PP .Vb 2 \& Argument Value \& \-\-\-\-\-\-\-\- \-\-\-\-\- \& \& Location filters: \& \-seq_id Chromosome, contig or other DNA segment \& \-seqid Synonym for \-seq_id \& \-ref Synonym for \-seqid \& \-start Start of range \& \-end End of range \& \-stop Synonym for \-end \& \-strand Strand \& \-range_type Type of range match (\*(Aqoverlaps\*(Aq,\*(Aqcontains\*(Aq,\*(Aqcontained_in\*(Aq) \& \& Name filters: \& \-name Name of feature (may be a glob expression) \& \-aliases If true, match aliases as well as display names \& \-class Archaic argument for backward compatibility. \& (\-class=>\*(AqClone\*(Aq,\-name=>\*(AqABC123\*(Aq) is equivalent \& to (\-name=>\*(AqClone:ABC123\*(Aq) \& \& Type filters: \& \-types List of feature types (array reference) or one type (scalar) \& \-type Synonym for the above \& \-primary_tag Synonym for the above \& \& \-attributes Hashref of attribute=>value pairs as per \& get_features_by_attribute(). Multiple alternative values \& can be matched by providing an array reference. \& \-attribute synonym for \-attributes .Ve .PP You may also provide \fIfeatures()\fR with a list of scalar values (the first element of which must \fBnot\fR begin with a dash), in which case it will treat the list as a feature type filter. .PP Examples: .PP All features: \f(CW@features\fR = \f(CW$db\fR\->features( ); .PP All features on chromosome 1: .PP .Vb 1 \& @features = $db\->features(\-seqid=>\*(AqChr1\*(Aq); .Ve .PP All features on chromosome 1 between 5000 and 6000: .PP .Vb 1 \& @features = $db\->features(\-seqid=>\*(AqChr1\*(Aq,\-start=>5000,\-end=>6000); .Ve .PP All mRNAs on chromosome 1 between 5000 and 6000: .PP .Vb 1 \& @features = $db\->features(\-seqid=>\*(AqChr1\*(Aq,\-start=>5000,\-end=>6000,\-types=>\*(AqmRNA\*(Aq); .Ve .PP All confirmed mRNAs and repeats on chromosome 1 that overlap the range 5000..6000: .PP .Vb 4 \& @features = $db\->features(\-seqid => \*(AqChr1\*(Aq,\-start=>5000,\-end=>6000, \& \-types => [\*(AqmRNA\*(Aq,\*(Aqrepeat\*(Aq], \& \-attributes=> {confirmed=>1} \& ); .Ve .PP All confirmed mRNAs and repeats on chromosome 1 strictly contained within the range 5000..6000: .PP .Vb 5 \& @features = $db\->features(\-seqid => \*(AqChr1\*(Aq,\-start=>5000,\-end=>6000, \& \-types => [\*(AqmRNA\*(Aq,\*(Aqrepeat\*(Aq], \& \-attributes=> {confirmed=>1} \& \-range_type => \*(Aqcontained_in\*(Aq, \& ); .Ve .PP All genes and repeats: .PP .Vb 1 \& @features = $db\->features(\*(Aqgene\*(Aq,\*(Aqrepeat_region\*(Aq); .Ve .SS "get_all_features" .IX Subsection "get_all_features" .Vb 6 \& Title : get_all_features \& Usage : @features = $db\->get_all_features() \& Function: get all feature in the database \& Returns : list of features \& Args : none \& Status : Public .Ve .SS "seq_ids" .IX Subsection "seq_ids" .Vb 6 \& Title : seq_ids \& Usage : @ids = $db\->seq_ids() \& Function: Return all sequence IDs contained in database \& Returns : list of sequence Ids \& Args : none \& Status : public .Ve .SS "search_attributes" .IX Subsection "search_attributes" .Vb 6 \& Title : search_attributes \& Usage : @result_list = $db\->search_attributes("text search string",[$tag1,$tag2...],$limit) \& Function: Search attributes for keywords occurring in a text string \& Returns : array of results \& Args : full text search string, array ref of attribute names, and an optional feature limit \& Status : public .Ve .PP Given a search string, this method performs a full-text search of the specified attributes and returns an array of results. You may pass a scalar attribute name to search the values of one attribute (e.g. \*(L"Note\*(R") or you may pass an array reference to search inside multiple attributes (['Note','Alias','Parent']).Each row of the returned array is a arrayref containing the following fields: .PP .Vb 5 \& column 1 The display name of the feature \& column 2 The text of the note \& column 3 A relevance score. \& column 4 The feature type \& column 5 The unique ID of the feature .Ve .PP \&\s-1NOTE:\s0 This search will fail to find features that do not have a display name! .PP You can use \fIfetch()\fR or \fIfetch_many()\fR with the returned IDs to get to the features themselves. .SS "search_notes" .IX Subsection "search_notes" .Vb 6 \& Title : search_notes \& Usage : @result_list = $db\->search_notes("full text search string",$limit) \& Function: Search the notes for a text string \& Returns : array of results \& Args : full text search string, and an optional feature limit \& Status : public .Ve .PP Given a search string, this method performs a full-text search of the \&\*(L"Notes\*(R" attribute and returns an array of results. Each row of the returned array is a arrayref containing the following fields: .PP .Vb 4 \& column 1 The display_name of the feature, suitable for passing to get_feature_by_name() \& column 2 The text of the note \& column 3 A relevance score. \& column 4 The type .Ve .PP \&\s-1NOTE:\s0 This is equivalent to \f(CW$db\fR\->search_attributes('full text search string','Note',$limit). This search will fail to find features that do not have a display name! .SS "types" .IX Subsection "types" .Vb 6 \& Title : types \& Usage : @type_list = $db\->types \& Function: Get all the types in the database \& Returns : array of Bio::DB::GFF::Typename objects \& Args : none \& Status : public .Ve .SS "insert_sequence" .IX Subsection "insert_sequence" .Vb 6 \& Title : insert_sequence \& Usage : $success = $db\->insert_sequence($seqid,$sequence_string,$offset) \& Function: Inserts sequence data into the database at the indicated offset \& Returns : true if successful \& Args : see below \& Status : public .Ve .PP This method inserts the \s-1DNA\s0 or protein sequence fragment \&\f(CW$sequence_string\fR, identified by the \s-1ID\s0 \f(CW$seq_id\fR, into the database at the indicated offset \f(CW$offset\fR. It is used internally by the GFF3Loader to load sequence data from the files. .SS "fetch_sequence" .IX Subsection "fetch_sequence" .Vb 6 \& Title : fetch_sequence \& Usage : $sequence = $db\->fetch_sequence(\-seq_id=>$seqid,\-start=>$start,\-end=>$end) \& Function: Fetch the indicated subsequene from the database \& Returns : The sequence string (not a Bio::PrimarySeq object!) \& Args : see below \& Status : public .Ve .PP This method retrieves a portion of the indicated sequence. The arguments are: .PP .Vb 11 \& Argument Value \& \-\-\-\-\-\-\-\- \-\-\-\-\- \& \-seq_id Chromosome, contig or other DNA segment \& \-seqid Synonym for \-seq_id \& \-name Synonym for \-seq_id \& \-start Start of range \& \-end End of range \& \-class Obsolete argument used for Bio::DB::GFF compatibility. If \& specified will qualify the seq_id as "$class:$seq_id". \& \-bioseq Boolean flag; if true, returns a Bio::PrimarySeq object instead \& of a sequence string. .Ve .PP You can call fetch_sequence using the following shortcuts: .PP .Vb 4 \& $seq = $db\->fetch_sequence(\*(Aqchr3\*(Aq); # entire chromosome \& $seq = $db\->fetch_sequence(\*(Aqchr3\*(Aq,1000); # position 1000 to end of chromosome \& $seq = $db\->fetch_sequence(\*(Aqchr3\*(Aq,undef,5000); # position 1 to 5000 \& $seq = $db\->fetch_sequence(\*(Aqchr3\*(Aq,1000,5000); # positions 1000 to 5000 .Ve .SS "segment" .IX Subsection "segment" .Vb 6 \& Title : segment \& Usage : $segment = $db\->segment($seq_id [,$start] [,$end] [,$absolute]) \& Function: restrict the database to a sequence range \& Returns : a Bio::DB::SeqFeature::Segment object \& Args : sequence id, start and end ranges (optional) \& Status : public .Ve .PP This is a convenience method that can be used when you are interested in the contents of a particular sequence landmark, such as a contig. Specify the \s-1ID\s0 of a sequence or other landmark in the database and optionally a start and endpoint relative to that landmark. The method will look up the region and return a Bio::DB::SeqFeature::Segment object that spans it. You can then use this segment object to make location-restricted queries on the database. .PP Example: .PP .Vb 2 \& $segment = $db\->segment(\*(Aqcontig23\*(Aq,1,1000); # first 1000 bp of contig23 \& my @mRNAs = $segment\->features(\*(AqmRNA\*(Aq); # all mRNAs that overlap segment .Ve .PP Although you will usually want to fetch segments that correspond to physical sequences in the database, you can actually use any feature in the database as the sequence \s-1ID.\s0 The \fIsegment()\fR method will perform a \fIget_features_by_name()\fR internally and then transform the feature into the appropriate coordinates. .PP The named feature should exist once and only once in the database. If it exists multiple times in the database and you attempt to call \&\fIsegment()\fR in a scalar context, you will get an exception. A workaround is to call the method in a list context, as in: .PP .Vb 1 \& my ($segment) = $db\->segment(\*(Aqcontig23\*(Aq,1,1000); .Ve .PP or .PP .Vb 1 \& my @segments = $db\->segment(\*(Aqcontig23\*(Aq,1,1000); .Ve .PP However, having multiple same-named features in the database is often an indication of underlying data problems. .PP If the optional \f(CW$absolute\fR argument is a true value, then the specified coordinates are relative to the reference (absolute) coordinates. .SS "seqfeature_class" .IX Subsection "seqfeature_class" .Vb 6 \& Title : seqfeature_class \& Usage : $classname = $db\->seqfeature_class([$new_classname]) \& Function: get or set the name of the Bio::SeqFeatureI class generated by new_feature() \& Returns : name of class \& Args : new classname (optional) \& Status : public .Ve .SS "reindex" .IX Subsection "reindex" .Vb 6 \& Title : reindex \& Usage : $db\->reindex \& Function: reindex the database \& Returns : nothing \& Args : nothing \& Status : public .Ve .PP This method will force the secondary indexes (name, location, attributes, feature types) to be recalculated. It may be useful to rebuild a corrupted database. .SS "attributes" .IX Subsection "attributes" .Vb 6 \& Title : attributes \& Usage : @a = $db\->attributes \& Function: Returns list of all known attributes \& Returns : Returns list of all known attributes \& Args : nothing \& Status : public .Ve .SS "start_bulk_update,finish_bulk_update" .IX Subsection "start_bulk_update,finish_bulk_update" .Vb 7 \& Title : start_bulk_update,finish_bulk_update \& Usage : $db\->start_bulk_update \& $db\->finish_bulk_update \& Function: Activate optimizations for large number of insertions/updates \& Returns : nothing \& Args : nothing \& Status : public .Ve .PP With some adaptors (currently only the DBI::mysql adaptor), these methods signal the adaptor that a large number of insertions or updates are to be performed, and activate certain optimizations. These methods are called automatically by the Bio::DB::SeqFeature::Store::GFF3Loader module. .PP Example: .PP .Vb 5 \& $db\->start_bulk_update; \& for my $f (@features) { \& $db\->store($f); \& } \& $db\->finish_bulk_update; .Ve .SS "add_SeqFeature" .IX Subsection "add_SeqFeature" .Vb 7 \& Title : add_SeqFeature \& Usage : $count = $db\->add_SeqFeature($parent,@children) \& Function: store a parent/child relationship between a $parent and @children \& features that are already stored in the database \& Returns : number of children successfully stored \& Args : parent feature or primary ID and children features or primary IDs \& Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS .Ve .PP If \fIcan_store_parentage()\fR returns true, then some store-aware features (e.g. Bio::DB::SeqFeature) will invoke this method to store feature/subfeature relationships in a normalized table. .SS "fetch_SeqFeatures" .IX Subsection "fetch_SeqFeatures" .Vb 6 \& Title : fetch_SeqFeatures \& Usage : @children = $db\->fetch_SeqFeatures($parent_feature) \& Function: return the immediate subfeatures of the indicated feature \& Returns : list of subfeatures \& Args : the parent feature and an optional list of children types \& Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS .Ve .PP If \fIcan_store_parentage()\fR returns true, then some store-aware features (e.g. Bio::DB::SeqFeature) will invoke this method to retrieve feature/subfeature relationships from the database. .SH "Changing the Behavior of the Database" .IX Header "Changing the Behavior of the Database" These methods allow you to modify the behavior of the database. .SS "debug" .IX Subsection "debug" .Vb 6 \& Title : debug \& Usage : $debug_flag = $db\->debug([$new_flag]) \& Function: set the debug flag \& Returns : current debug flag \& Args : new debug flag \& Status : public .Ve .PP This method gets/sets a flag that turns on verbose progress messages. Currently this will not do very much. .SS "serializer" .IX Subsection "serializer" .Vb 6 \& Title : serializer \& Usage : $serializer = $db\->serializer([$new_serializer]) \& Function: get/set the name of the serializer \& Returns : the name of the current serializer class \& Args : (optional) the name of a new serializer \& Status : public .Ve .PP You can use this method to set the serializer, but do not attempt to change the serializer once the database is initialized and populated. .SS "dna_accessor" .IX Subsection "dna_accessor" .Vb 6 \& Title : dna_accessor \& Usage : $dna_accessor = $db\->dna_accessor([$new_dna_accessor]) \& Function: get/set the name of the dna_accessor \& Returns : the current dna_accessor object, if any \& Args : (optional) the dna_accessor object \& Status : public .Ve .PP You can use this method to request or set the \s-1DNA\s0 accessor. .SS "index_subfeatures" .IX Subsection "index_subfeatures" .Vb 6 \& Title : index_subfeatures \& Usage : $flag = $db\->index_subfeatures([$new_value]) \& Function: flag whether to index subfeatures \& Returns : current value of the flag \& Args : (optional) new value of the flag \& Status : public .Ve .PP If true, the \fIstore()\fR method will add a searchable index to both the top-level feature and all its subfeatures, allowing the search functions to return features at any level of the containment hierarchy. If false, only the top level feature will be indexed, meaning that you will only be able to get at subfeatures by fetching the top-level feature and then traversing downward using \&\fIget_SeqFeatures()\fR. .PP You are free to change this setting at any point during the creation and population of a database. One database can contain both indexed and unindexed subfeatures. .SS "clone" .IX Subsection "clone" The \fIclone()\fR method should be used when you want to pass the Bio::DB::SeqFeature::Store object to a child process across a \&\fIfork()\fR. The child must call \fIclone()\fR before making any queries. .PP The default behavior is to do nothing, but adaptors that use the \s-1DBI\s0 interface may need to implement this in order to avoid database handle errors. See the dbi adaptor for an example. .SH "TIE Interface" .IX Header "TIE Interface" This module implements a full \s-1TIEHASH\s0 interface. The keys are the primary IDs of the features in the database. Example: .PP .Vb 4 \& tie %h,\*(AqBio::DB::SeqFeature::Store\*(Aq,\-adaptor=>\*(AqDBI::mysql\*(Aq,\-dsn=>\*(Aqdbi:mysql:elegans\*(Aq; \& $h{123} = $feature1; \& $h{124} = $feature2; \& print $h{123}\->display_name; .Ve .SS "_init_database" .IX Subsection "_init_database" .Vb 6 \& Title : _init_database \& Usage : $success = $db\->_init_database([$erase]) \& Function: initialize an empty database \& Returns : true on success \& Args : optional boolean flag to erase contents of an existing database \& Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR .Ve .PP This method is the back end for \fIinit_database()\fR. It must be implemented by an adaptor that inherits from Bio::DB::SeqFeature::Store. It returns true on success. \f(CW@features\fR = \f(CW$db\fR\->features(\-seqid=>'Chr1'); .SS "_store" .IX Subsection "_store" .Vb 7 \& Title : _store \& Usage : $success = $db\->_store($indexed,@objects) \& Function: store seqfeature objects into database \& Returns : true on success \& Args : a boolean flag indicating whether objects are to be indexed, \& and one or more objects \& Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR .Ve .PP This method is the back end for \fIstore()\fR and \fIstore_noindex()\fR. It should write the seqfeature objects into the database. If indexing is requested, the features should be indexed for query and retrieval. Otherwise the features should be stored without indexing (it is not required that adaptors respect this). .PP If the object has no primary_id (undef), then the object is written into the database and assigned a new primary_id. If the object already has a primary_id, then the system will perform an update, replacing whatever was there before. .PP In practice, the implementation will serialize each object using the \&\fIfreeze()\fR method and then store it in the database under the corresponding primary_id. The object is then updated with the primary_id. .SS "_fetch" .IX Subsection "_fetch" .Vb 6 \& Title : _fetch \& Usage : $feature = $db\->_fetch($primary_id) \& Function: fetch feature from database \& Returns : feature \& Args : primary id \& Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR .Ve .PP This method is the back end for \fIfetch()\fR. It accepts a primary_id and returns a feature object. It must be implemented by the adaptor. .PP In practice, the implementation will retrieve the serialized Bio::SeqfeatureI object from the database and pass it to the \fIthaw()\fR method to unserialize it and synchronize the primary_id. .SS "_fetch_many" .IX Subsection "_fetch_many" .Vb 6 \& Title : _fetch_many \& Usage : $feature = $db\->_fetch_many(@primary_ids) \& Function: fetch many features from database \& Returns : feature \& Args : primary id \& Status : private \-\- does not need to be implemented .Ve .PP This method fetches many features specified by a list of IDs. The default implementation simply calls \fI_fetch()\fR once for each primary_id. Implementors can override it if needed for efficiency. .SS "_update_indexes" .IX Subsection "_update_indexes" .Vb 6 \& Title : _update_indexes \& Usage : $success = $db\->_update_indexes($feature) \& Function: update the indexes for a feature \& Returns : true on success \& Args : A seqfeature object \& Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR .Ve .PP This method is called by \fIreindex()\fR to update the searchable indexes for a feature object that has changed. .SS "_start_reindexing, _end_reindexing" .IX Subsection "_start_reindexing, _end_reindexing" .Vb 7 \& Title : _start_reindexing, _end_reindexing \& Usage : $db\->_start_reindexing() \& $db\->_end_reindexing \& Function: flag that a series of reindexing operations is beginning/ending \& Returns : true on success \& Args : none \& Status : MAY BE IMPLEMENTED BY AN ADAPTOR (optional) .Ve .PP These methods are called by \fIreindex()\fR before and immediately after a series of reindexing operations. The default behavior is to do nothing, but these methods can be overridden by an adaptor in order to perform optimizations, turn off autocommits, etc. .SS "_features" .IX Subsection "_features" .Vb 6 \& Title : _features \& Usage : @features = $db\->_features(@args) \& Function: back end for all get_feature_by_*() queries \& Returns : list of features \& Args : see below \& Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR .Ve .PP This is the backend for \fIfeatures()\fR, \fIget_features_by_name()\fR, \&\fIget_features_by_location()\fR, etc. Arguments are as described for the \&\fIfeatures()\fR method, except that only the named-argument form is recognized. .SS "_search_attributes" .IX Subsection "_search_attributes" .Vb 6 \& Title : _search_attributes \& Usage : @result_list = $db\->_search_attributes("text search string",[$tag1,$tag2...],$limit) \& Function: back end for the search_attributes() method \& Returns : results list \& Args : as per search_attributes() \& Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR .Ve .PP See \fIsearch_attributes()\fR for the format of the results list. The only difference between this and the public method is that the tag list is guaranteed to be an array reference. .SS "can_store_parentage" .IX Subsection "can_store_parentage" .Vb 6 \& Title : can_store_parentage \& Usage : $flag = $db\->can_store_parentage \& Function: return true if this adaptor can store parent/child relationships \& Returns : boolean \& Args : none \& Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS .Ve .PP Override this method and return true if this adaptor supports the \&\fI_add_SeqFeature()\fR and \fI_get_SeqFeatures()\fR methods, which are used for storing feature parent/child relationships in a normalized fashion. Default is false (parent/child relationships are stored in denormalized form in each feature). .SS "_add_SeqFeature" .IX Subsection "_add_SeqFeature" .Vb 6 \& Title : _add_SeqFeature \& Usage : $count = $db\->_add_SeqFeature($parent,@children) \& Function: store a parent/child relationship between $parent and @children \& Returns : number of children successfully stored \& Args : parent feature and one or more children \& Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS .Ve .PP If \fIcan_store_parentage()\fR returns true, then some store-aware features (e.g. Bio::DB::SeqFeature) will invoke this method to store feature/subfeature relationships in a normalized table. .SS "_fetch_SeqFeatures" .IX Subsection "_fetch_SeqFeatures" .Vb 6 \& Title : _fetch_SeqFeatures \& Usage : @children = $db\->_fetch_SeqFeatures($parent_feature) \& Function: return the immediate subfeatures of the indicated feature \& Returns : list of subfeatures \& Args : the parent feature \& Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS .Ve .PP If \fIcan_store_parentage()\fR returns true, then some store-aware features (e.g. Bio::DB::SeqFeature) will invoke this method to retrieve feature/subfeature relationships from the database. .SS "_insert_sequence" .IX Subsection "_insert_sequence" .Vb 6 \& Title : _insert_sequence \& Usage : $success = $db\->_insert_sequence($seqid,$sequence_string,$offset) \& Function: Inserts sequence data into the database at the indicated offset \& Returns : true if successful \& Args : see below \& Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR .Ve .PP This is the back end for \fIinsert_sequence()\fR. Adaptors must implement this method in order to store and retrieve nucleotide or protein sequence. .SS "_fetch_sequence" .IX Subsection "_fetch_sequence" .Vb 6 \& Title : _fetch_sequence \& Usage : $sequence = $db\->_fetch_sequence(\-seq_id=>$seqid,\-start=>$start,\-end=>$end) \& Function: Fetch the indicated subsequence from the database \& Returns : The sequence string (not a Bio::PrimarySeq object!) \& Args : see below \& Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR .Ve .PP This is the back end for \fIfetch_sequence()\fR. Adaptors must implement this method in order to store and retrieve nucleotide or protein sequence. .SS "_seq_ids" .IX Subsection "_seq_ids" .Vb 6 \& Title : _seq_ids \& Usage : @ids = $db\->_seq_ids() \& Function: Return all sequence IDs contained in database \& Returns : list of sequence Ids \& Args : none \& Status : TO BE IMPLEMENTED BY ADAPTOR .Ve .PP This method is invoked by \fIseq_ids()\fR to return all sequence IDs (coordinate systems) known to the database. .SS "_start_bulk_update,_finish_bulk_update" .IX Subsection "_start_bulk_update,_finish_bulk_update" .Vb 7 \& Title : _start_bulk_update, _finish_bulk_update \& Usage : $db\->_start_bulk_update \& $db\->_finish_bulk_update \& Function: Activate optimizations for large number of insertions/updates \& Returns : nothing \& Args : nothing \& Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTOR .Ve .PP These are the backends for \fIstart_bulk_update()\fR and \&\fIfinish_bulk_update()\fR. The default behavior of both methods is to do nothing. .SS "Optional methods needed to implement full \s-1TIEHASH\s0 interface" .IX Subsection "Optional methods needed to implement full TIEHASH interface" The core \s-1TIEHASH\s0 interface will work if just the \fI_store()\fR and \fI_fetch()\fR methods are implemented. To support the full \s-1TIEHASH\s0 interface, including support for \fIkeys()\fR, \fIeach()\fR, and \fIexists()\fR, the following methods should be implemented: .ie n .IP "$id = $db\->\fI_firstid()\fR" 4 .el .IP "\f(CW$id\fR = \f(CW$db\fR\->\fI_firstid()\fR" 4 .IX Item "$id = $db->_firstid()" Return the first primary \s-1ID\s0 in the database. Needed for the \fIeach()\fR function. .ie n .IP "$next_id = $db\->_nextid($id)" 4 .el .IP "\f(CW$next_id\fR = \f(CW$db\fR\->_nextid($id)" 4 .IX Item "$next_id = $db->_nextid($id)" Given a primary \s-1ID,\s0 return the next primary \s-1ID\s0 in the series. Needed for the \fIeach()\fR function. .ie n .IP "$boolean = $db\->_existsid($id)" 4 .el .IP "\f(CW$boolean\fR = \f(CW$db\fR\->_existsid($id)" 4 .IX Item "$boolean = $db->_existsid($id)" Returns true if the indicated primary \s-1ID\s0 is in the database. Needed for the \fIexists()\fR function. .ie n .IP "$db\->_deleteid($id)" 4 .el .IP "\f(CW$db\fR\->_deleteid($id)" 4 .IX Item "$db->_deleteid($id)" Delete the feature corresponding to the given primary \s-1ID.\s0 Needed for \&\fIdelete()\fR. .ie n .IP "$db\->\fI_clearall()\fR" 4 .el .IP "\f(CW$db\fR\->\fI_clearall()\fR" 4 .IX Item "$db->_clearall()" Empty the database. Needed for \f(CW%tied_hash\fR = (). .ie n .IP "$count = $db\->\fI_featurecount()\fR" 4 .el .IP "\f(CW$count\fR = \f(CW$db\fR\->\fI_featurecount()\fR" 4 .IX Item "$count = $db->_featurecount()" Return the number of features in the database. Needed for scalar \&\f(CW%tied_hash\fR. .SH "Internal Methods" .IX Header "Internal Methods" These methods are internal to Bio::DB::SeqFeature::Store and adaptors. .SS "new_instance" .IX Subsection "new_instance" .Vb 6 \& Title : new_instance \& Usage : $db = $db\->new_instance() \& Function: class constructor \& Returns : A descendent of Bio::DB::SeqFeature::Store \& Args : none \& Status : internal .Ve .PP This method is called internally by \fInew()\fR to create a new uninitialized instance of Bio::DB::SeqFeature::Store. It is used internally and should not be called by application software. .SS "init" .IX Subsection "init" .Vb 6 \& Title : init \& Usage : $db\->init(@args) \& Function: initialize object \& Returns : none \& Args : Arguments passed to new() \& Status : private .Ve .PP This method is called internally by \fInew()\fR to initialize a newly-created object using the arguments passed to \fInew()\fR. It is to be overridden by Bio::DB::SeqFeature::Store adaptors. .SS "default_settings" .IX Subsection "default_settings" .Vb 6 \& Title : default_settings \& Usage : $db\->default_settings() \& Function: set up default settings for the adaptor \& Returns : none \& Args : none \& Status : private .Ve .PP This method is may be overridden by adaptors. It is responsible for setting up object default settings. .SS "default_serializer" .IX Subsection "default_serializer" .Vb 6 \& Title : default_serializer \& Usage : $serializer = $db\->default_serializer \& Function: finds an available serializer \& Returns : the name of an available serializer \& Args : none \& Status : private .Ve .PP This method returns the name of an available serializer module. .SS "setting" .IX Subsection "setting" .Vb 6 \& Title : setting \& Usage : $value = $db\->setting(\*(Aqsetting_name\*(Aq [=> $new_value]) \& Function: get/set the value of a setting \& Returns : the value of the current setting \& Args : the name of the setting and optionally a new value for the setting \& Status : private .Ve .PP This is a low-level procedure for persistently storing database settings. It can be overridden by adaptors. .SS "subfeatures_are_indexed" .IX Subsection "subfeatures_are_indexed" .Vb 6 \& Title : subfeatures_are_indexed \& Usage : $flag = $db\->subfeatures_are_indexed([$new_value]) \& Function: flag whether subfeatures are indexed \& Returns : a flag indicating that all subfeatures are indexed \& Args : (optional) new value of the flag \& Status : private .Ve .PP This method is used internally by the Bio::DB::SeqFeature class to optimize some of its operations. It returns true if all of the subfeatures in the database are indexed; it returns false if at least one of the subfeatures is not indexed. Do not attempt to change the value of this setting unless you are writing an adaptor. .SS "subfeature_types_are_indexed" .IX Subsection "subfeature_types_are_indexed" .Vb 6 \& Title : subfeature_types_are_indexed \& Usage : $flag = $db\->subfeature_types_are_indexed \& Function: whether subfeatures are indexed by type \& Returns : a flag indicating that all subfeatures are indexed \& Args : none \& Status : private .Ve .PP This method returns true if subfeature types are indexed. Default is to return the value of \fIsubfeatures_are_indexed()\fR. .SS "subfeature_locations_are_indexed" .IX Subsection "subfeature_locations_are_indexed" .Vb 6 \& Title : subfeature_locations_are_indexed \& Usage : $flag = $db\->subfeature_locations_are_indexed \& Function: whether subfeatures are indexed by type \& Returns : a flag indicating that all subfeatures are indexed \& Args : none \& Status : private .Ve .PP This method returns true if subfeature locations are indexed. Default is to return the value of \fIsubfeatures_are_indexed()\fR. .SS "setup_segment_args" .IX Subsection "setup_segment_args" .Vb 6 \& Title : setup_segment_args \& Usage : @args = $db\->setup_segment_args(@args) \& Function: munge the arguments to the segment() call \& Returns : munged arguments \& Args : see below \& Status : private .Ve .PP This method is used internally by \fIsegment()\fR to translate positional arguments into named argument=>value pairs. .SS "store_and_cache" .IX Subsection "store_and_cache" .Vb 6 \& Title : store_and_cache \& Usage : $success = $db\->store_and_cache(@features) \& Function: store features into database and update cache \& Returns : number of features stored \& Args : index the features? (0 or 1) and list of features \& Status : private .Ve .PP This private method stores the list of Bio::SeqFeatureI objects into the database and caches them in memory for retrieval. .SS "init_cache" .IX Subsection "init_cache" .Vb 6 \& Title : init_cache \& Usage : $db\->init_cache($size) \& Function: initialize the in\-memory feature cache \& Returns : the Tie::Cacher object \& Args : desired size of the cache \& Status : private .Ve .PP This method is used internally by \fInew()\fR to create the Tie::Cacher instance used for the in-memory feature cache. .SS "cache" .IX Subsection "cache" .Vb 6 \& Title : cache \& Usage : $cache = $db\->cache \& Function: return the cache object \& Returns : the Tie::Cacher object \& Args : none \& Status : private .Ve .PP This method returns the Tie::Cacher object used for the in-memory feature cache. .SS "load_class" .IX Subsection "load_class" .Vb 6 \& Title : load_class \& Usage : $db\->load_class($blessed_object) \& Function: loads the module corresponding to a blessed object \& Returns : empty \& Args : a blessed object \& Status : private .Ve .PP This method is used by \fIthaw()\fR to load the code for a blessed object. This ensures that all the object's methods are available. .SS "freeze" .IX Subsection "freeze" .Vb 6 \& Title : freeze \& Usage : $serialized_object = $db\->freeze($feature) \& Function: serialize a feature object into a string \& Returns : serialized feature object \& Args : a seqfeature object \& Status : private .Ve .PP This method converts a Bio::SeqFeatureI object into a serialized form suitable for storage into a database. The feature's primary \s-1ID\s0 is set to undef before it is serialized. This avoids any potential mismatch between the primary \s-1ID\s0 used as the database key and the primary \s-1ID\s0 stored in the serialized object. .SS "thaw" .IX Subsection "thaw" .Vb 6 \& Title : thaw \& Usage : $feature = $db\->thaw($serialized_object,$primary_id) \& Function: unserialize a string into a feature object \& Returns : Bio::SeqFeatureI object \& Args : serialized form of object from freeze() and primary_id of object \& Status : private .Ve .PP This method is the reverse of the \fIfreeze()\fR. The supplied primary_id becomes the \fIprimary_id()\fR of the returned Bio::SeqFeatureI object. This implementation checks for a deserialized object in the cache before it calls \fIthaw_object()\fR to do the actual deserialization. .SS "thaw_object" .IX Subsection "thaw_object" .Vb 6 \& Title : thaw_object \& Usage : $feature = $db\->thaw_object($serialized_object,$primary_id) \& Function: unserialize a string into a feature object \& Returns : Bio::SeqFeatureI object \& Args : serialized form of object from freeze() and primary_id of object \& Status : private .Ve .PP After \fIthaw()\fR checks the cache and comes up empty, this method is invoked to thaw the object. .SS "feature_names" .IX Subsection "feature_names" .Vb 6 \& Title : feature_names \& Usage : ($names,$aliases) = $db\->feature_names($feature) \& Function: get names and aliases for a feature \& Returns : an array of names and an array of aliases \& Args : a Bio::SeqFeatureI object \& Status : private .Ve .PP This is an internal utility function which, given a Bio::SeqFeatureI object, returns two array refs. The first is a list of official names for the feature, and the second is a list of aliases. This is slightly skewed towards \s-1GFF3\s0 usage, so the official names are the \&\fIdisplay_name()\fR, plus all tag values named 'Name', plus all tag values named '\s-1ID\s0'. The aliases are all tag values named 'Alias'. .SS "feature_summary" .IX Subsection "feature_summary" .Vb 6 \& Title : feature_summary \& Usage : $summary = $db\->feature_summary(@args) \& Function: returns a coverage summary across indicated region/type \& Returns : a Bio::SeqFeatureI object containing the "coverage" tag \& Args : see below \& Status : public .Ve .PP This method is used to get coverage density information across a region of interest. You provide it with a region of interest, optional a list of feature types, and a count of the number of bins over which you want to calculate the coverage density. An object is returned corresponding to the requested region. It contains a tag called \&\*(L"coverage\*(R" that will return an array ref of \*(L"bins\*(R" length. Each element of the array describes the number of features that overlap the bin at this position. .PP Arguments: .PP .Vb 2 \& Argument Description \& \-\-\-\-\-\-\-\- \-\-\-\-\-\-\-\-\-\-\- \& \& \-seq_id Sequence ID for the region \& \-start Start of region \& \-end End of region \& \-type/\-types Feature type of interest or array ref of types \& \-bins Number of bins across region. Defaults to 1000. \& \-iterator Return an iterator across the region .Ve .PP Note that this method uses an approximate algorithm that is only accurate to 500 bp, so when dealing with bins that are smaller than 1000 bp, you may see some shifting of counts between adjacent bins. .PP Although an \-iterator option is provided, the method only ever returns a single feature, so this is fairly useless. .SS "coverage_array" .IX Subsection "coverage_array" .Vb 6 \& Title : coverage_array \& Usage : $arrayref = $db\->coverage_array(@args) \& Function: returns a coverage summary across indicated region/type \& Returns : an array reference \& Args : see below \& Status : public .Ve .PP This method is used to get coverage density information across a region of interest. The arguments are identical to feature_summary, except that instead of returning a Bio::SeqFeatureI object, it returns an array reference of the desired number of bins. The value of each element corresponds to the number of features in the bin. .PP Arguments: .PP .Vb 2 \& Argument Description \& \-\-\-\-\-\-\-\- \-\-\-\-\-\-\-\-\-\-\- \& \& \-seq_id Sequence ID for the region \& \-start Start of region \& \-end End of region \& \-type/\-types Feature type of interest or array ref of types \& \-bins Number of bins across region. Defaults to 1000. .Ve .PP Note that this method uses an approximate algorithm that is only accurate to 500 bp, so when dealing with bins that are smaller than 1000 bp, you may see some shifting of counts between adjacent bins. .SH "BUGS" .IX Header "BUGS" This is an early version, so there are certainly some bugs. Please use the BioPerl bug tracking system to report bugs. .SH "SEE ALSO" .IX Header "SEE ALSO" Bio::DB::SeqFeature, Bio::DB::SeqFeature::Store::GFF3Loader, Bio::DB::SeqFeature::Segment, Bio::DB::SeqFeature::Store::DBI::mysql, Bio::DB::SeqFeature::Store::berkeleydb Bio::DB::SeqFeature::Store::memory .SH "AUTHOR" .IX Header "AUTHOR" Lincoln Stein . .PP Copyright (c) 2006 Cold Spring Harbor Laboratory. .PP This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.