.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is >0, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{\ . if \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{\ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" ======================================================================== .\" .IX Title "GO::Parser 3pm" .TH GO::Parser 3pm "2021-01-09" "perl v5.32.0" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" .Vb 1 \& GO::Parser \- parses all GO files formats and types .Ve .SH "SYNOPSIS" .IX Header "SYNOPSIS" fetch GO::Model::Graph objects using a parser: .PP .Vb 9 \& # Scenario 1: Getting objects from a file \& use GO::Parser; \& my $parser = new GO::Parser({handler=>\*(Aqobj\*(Aq,use_cache=>1}); \& $parser\->parse("function.ontology"); # ontology \& $parser\->parse("GO.defs"); # definitions \& $parser\->parse("ec2go"); # external refs \& $parser\->parse("gene\-associations.sgd"); # gene assocs \& # get GO::Model::Graph object \& my $graph = $parser\->handler\->graph; \& \& # Scenario 2: Getting OBO XML from a file \& use GO::Parser; \& my $parser = new GO::Parser({handler=>\*(Aqxml\*(Aq}); \& $parser\->handler\->file("output.xml"); \& $parser\->parse("gene_ontology.obo"); \& \& # Scenario 3: Using an XSL stylesheet to convert the OBO XML \& use GO::Parser; \& my $parser = new GO::Parser({handler=>\*(Aqxml\*(Aq}); \& # xslt files are kept in in $ENV{GO_ROOT}/xml/xsl \& # (if $GO_ROOT is not set, defaults to install directory) \& $parser\->xslt("oboxml_to_owl"); \& $parser\->handler\->file("output.owl\-xml"); \& $parser\->parse("gene_ontology.obo"); \& \& # Scenario 4: via scripts \& my $cmd = "go2xml gene_ontology.obo | xsltproc my\-transform.xsl \-"; \& my $fh = FileHandle\->new("$cmd |") || die("problem initiating $cmd"); \& while(<$fh>) { print $_ } \& $fh\->close || die("problem running $cmd"); .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" Module for parsing \s-1GO\s0 flat files; for examples of \s-1GO/OBO\s0 flatfile formats see: .PP .PP .PP For a description of the various file formats, see: .PP .PP .PP This module will generate \s-1XML\s0 events from a correctly formatted \s-1GO/OBO\s0 file .SH "SEE ALSO" .IX Header "SEE ALSO" This module is a part of go-dev, see: .PP .PP for more details .SH "PUBLIC METHODS" .IX Header "PUBLIC METHODS" .SS "new" .IX Subsection "new" .Vb 12 \& Title : new \& Usage : my $p = GO::Parser\->new({format=>\*(Aqobo_xml\*(Aq,handler=>\*(Aqobj\*(Aq}); \& $p\->parse("go.obo\-xml"); \& my $g = $p\->handler\->graph; \& Synonyms: \& Function: creates a parser object \& Example : \& Returns : GO::Parser \& Args : a hashref of arguments: \& format: a format for which a parser exists \& handler: a format for which a perl handler exists \& use_cache: (boolean) see caching below .Ve .SS "parse" .IX Subsection "parse" .Vb 7 \& Title : parse \& Usage : $p\->parse($file); \& Synonyms: \& Function: parses a file \& Example : \& Returns : \& Args : str filename .Ve .SS "handler" .IX Subsection "handler" .Vb 7 \& Title : handler \& Usage : my $handler = $p\->handler; \& Synonyms: \& Function: gets/sets a GO::Handler object \& Example : \& Returns : L \& Args : L .Ve .SH "FORMATS" .IX Header "FORMATS" This module is a front end wrapper for a number of different \s-1GO/OBO\s0 formats \- see the relevant module documentation below for details. .PP The full list of parsers can be found in the go\-perl/GO/Parsers/ directory .IP "obo_text" 4 .IX Item "obo_text" Files with suffix \*(L".obo\*(R" .Sp This is a new file format replacement for the existing \s-1GO\s0 flat file formats. It handles ontologies, definitions and xrefs (but not associations) .IP "go_ont" 4 .IX Item "go_ont" Files with suffix \*(L".ontology\*(R" .Sp These store the ontology DAGs .IP "go_def" 4 .IX Item "go_def" Files with suffix \*(L".defs\*(R" .IP "go_xref" 4 .IX Item "go_xref" External database references for \s-1GO\s0 terms .Sp Files with suffix \*(L"2go\*(R" (eg ec2go, metacyc2go) .IP "go_assoc" 4 .IX Item "go_assoc" Annotations of genes or gene products using \s-1GO\s0 .Sp Files with prefix \*(L"gene-association.\*(R" .IP "obo_xml" 4 .IX Item "obo_xml" Files with suffix \*(L".obo.xml\*(R" or \*(L".obo\-xml\*(R" .Sp This is the \s-1XML\s0 version of the \s-1OBO\s0 flat file format above .Sp See .IP "obj_yaml" 4 .IX Item "obj_yaml" A \s-1YAML\s0 dump of the perl GO::Model::Graph object. You need \s-1YAML\s0 from \s-1CPAN\s0 for this to work .IP "obj_storable" 4 .IX Item "obj_storable" A dump of the perl GO::Model::Graph object. You need Storable from \s-1CPAN\s0 for this to work. This is intended to cache objects on the filesystem, for fast access. The obj_storable representation may not be portable .SS "\s-1PARSING ARCHITECTURE\s0" .IX Subsection "PARSING ARCHITECTURE" Each parser fires \s-1XML\s0 \fBevents\fR. The \s-1XML\s0 events are known as \&\fBObo-XML\fR. .PP These \s-1XML\s0 events can be \fBcaught\fR by a handler written in perl, or they can be caught by an \s-1XML\s0 parser written in some other language, or by using \s-1XSL\s0 stylesheets. .PP go-dev comes with a number of stylesheets in the go\-dev/xml/xsl directory .PP Anything that catches these \s-1XML\s0 events is known as a \fBhandler\fR .PP go-perl comes with some standard perl \s-1XML\s0 handlers, in addition to some standard \s-1XSL\s0 stylesheets. These can be found in the \&\fBgo\-dev/go\-perl/GO/Handlers\fR directory .PP If you are interested in getting perl \fBobjects\fR from files then you will want the \fBobj\fR handler, which gives back GO::Model::Graph objects .PP The parsing architecture gives you the option of using the go-perl object model, or just parsing the \s-1XML\s0 events directly .PP If you are using the go-db-perl library, the load\-go\-into\-db.pl script will perform the following processes when loading files into the database .IP "Obo-XML events fired using GO::Parser::* classes" 4 .IX Item "Obo-XML events fired using GO::Parser::* classes" .PD 0 .IP "Obo-XML transformed into godb xml using oboxml_to_godb_prestore.xsl" 4 .IX Item "Obo-XML transformed into godb xml using oboxml_to_godb_prestore.xsl" .IP "godb_prestore.xml stored in database using generic loader" 4 .IX Item "godb_prestore.xml stored in database using generic loader" .PD .SS "Obo-XML" .IX Subsection "Obo-XML" The Obo-XML format \s-1DTD\s0 is stored in the go\-dev/xml/dtd directory .SS "\s-1HOW IT WORKS\s0" .IX Subsection "HOW IT WORKS" Currently the various parsers and perl event handlers use the \fBstag\fR module for this \- see Data::Stag for more details, or http://stag.sourceforge.net .SS "\s-1NESTED EVENTS\s0" .IX Subsection "NESTED EVENTS" nested events can be thought of as xml, without attributes; nested events can easily be turned into xml .PP events have a start, a body and an end .PP event handlers can *catch* these events and do something with them. .PP an object handler can turn the events into objects, centred around the GO::Model::Graph object; see GO::Handlers::obj .PP other handlers can catch the events and convert them into other formats, eg \s-1OWL\s0 or \s-1OBO\s0 .PP Or you can bypass the handler and get output as an \s-1XML\s0 stream \- to do this, just run the go2xml script .PP a database loading event handler can catch the events and turn them into \s-1SQL\s0 statements, loading a MySQL or postgres database (see the go-db-perl library) .PP the advantage of an event based parsing architecture is that it is easy to build lightweight parsers, and heavy weight object models can be bypassed if preferred. .SS "\s-1EXAMPLES\s0" .IX Subsection "EXAMPLES" To see examples of the events generated by the GO::Parser class, run the script go2xml; for example .PP .Vb 1 \& go2xml function.ontology .Ve .PP on any GO-formatted flatfile .PP This also works on OBO-formatted files: .PP .Vb 1 \& go2xml gene_ontology.obo .Ve .PP You can also use the script \*(L"stag\-parse.pl\*(R" which comes with the Data::Stag distribution. for example .PP .Vb 1 \& stag\-parse.pl \-p GO::Parsers::go_assoc_parser gene\-association.fb .Ve .SS "\s-1XSLT HANDLERS\s0" .IX Subsection "XSLT HANDLERS" The full list can be found in the go\-dev/xml/xsl directory .SS "\s-1PERL HANDLERS\s0" .IX Subsection "PERL HANDLERS" see GO::Handlers::* for all the different handlers possible; more can be added dynamically. .PP you can either create the handler object yourself, and pass it as an argument, e.g. .PP .Vb 4 \& my $apph = new GO::AppHandle(\-db=>"go"); \& my $handler = new GO::Handlers::godb({apph=>$apph}); \& my $parser = new GO::Parser({handler=>$handler}); \& $parser\->parse(@files); .Ve .PP or you can use one of the registered handlers: .PP .Vb 2 \& my $parser = new GO::Parser({handler=>\*(Aqdb\*(Aq, \& handler_args=>{apph=>$apph}}); .Ve .PP or you can just do things from the command line .PP .Vb 1 \& go2fmt.pl \-w oboxml function.ontology .Ve .PP the registered perl handlers are as follows: .IP "obo_xml" 4 .IX Item "obo_xml" writes out OBO-XML (which is basically a straightforward conversion of the event stream into \s-1XML\s0) .IP "obo_text" 4 .IX Item "obo_text" .PD 0 .IP "go_ont" 4 .IX Item "go_ont" .PD legacy GO-ontology file format .IP "go_xref" 4 .IX Item "go_xref" \&\s-1GO\s0 xref file, for linking \s-1GO\s0 terms to terms and dbxrefs in other ontologies .IP "go_defs" 4 .IX Item "go_defs" legacy GO-definitions file format .IP "go_assoc" 4 .IX Item "go_assoc" \&\s-1GO\s0 association file format .IP "rdf" 4 .IX Item "rdf" \&\s-1GO\s0 XML-RDF file format .IP "owl" 4 .IX Item "owl" \&\s-1OWL\s0 format (default: OWL-DL) .Sp \&\s-1OWL\s0 is a W3C standard format for ontologies .Sp You will need the \s-1XSL\s0 files from the full go-dev distribution to run this; see the \s-1XML\s0 section in .IP "prolog" 4 .IX Item "prolog" prolog facts \- you will need a prolog compiler/interpreter to use these. You can reason over these facts using Obol or the forthcoming Bio-LP project .IP "sxpr" 4 .IX Item "sxpr" lisp style S\-Expressions, conforming to the OBO-XML schema; you will need lisp to make full use of these. you can also do some nice stuff just within emacs (use lisp-mode and load an sxpr file into your buffer) .IP "godb" 4 .IX Item "godb" this is actually part of the go-db-perl library, not the go-perl library .Sp catches events and loads them into a database conforming to the \s-1GO\s0 database schema; see the directory go\-dev/sql, as part of the whole go-dev distribution; or www.godatabase.org/dev/database .IP "obj_yaml" 4 .IX Item "obj_yaml" A \s-1YAML\s0 dump of the perl GO::Model::Graph object. You need \s-1YAML\s0 from \s-1CPAN\s0 for this to work .IP "obj_storable" 4 .IX Item "obj_storable" A dump of the perl GO::Model::Graph object. You need Storable from \s-1CPAN\s0 for this to work. This is intended to cache objects on the filesystem, for fast access. The obj_storable representation may not be portable .SH "EXAMPLES OF DATATYPE TEXT FORMATS" .IX Header "EXAMPLES OF DATATYPE TEXT FORMATS" .SS "go_ont format" .IX Subsection "go_ont format" eg format: go_ont for storing graphs and metadata; for example: .PP .Vb 9 \& !version: $Revision: 1.15 $ \& !date: $Date: 2006/04/20 22:48:23 $ \& !editors: Michael Ashburner (FlyBase), Midori Harris (SGD), Judy Blake (MGD) \& $Gene_Ontology ; GO:0003673 \& $cellular_component ; GO:0005575 \& %extracellular ; GO:0005576 \& \& \& \& file \& z.ontology \& 1075164285 \& \& \& GO:0003673 \& Gene_Ontology \& root \& \& \& GO:0005575 \& cellular_component \& root \& GO:0003673 \& \& \& GO:0005576 \& extracellular \& root \& GO:0005575 \& \& \& GO:0005577 \& fibrinogen \& root \& \& part_of \& GO:0005576 \& \& \& \& GO:0005972 \& fibrinogen alpha chain \& root \& \& part_of \& GO:0005577 \& \& \& \& GO:0005973 \& fibrinogen beta chain \& root \& \& part_of \& GO:0005577 \& \& \& .Ve .SS "go_def format" .IX Subsection "go_def format" eg format: go_defs for storing definitions: .PP .Vb 6 \& !Gene Ontology definitions \& ! \& term: \*(Aqde novo\*(Aq protein folding \& goid: GO:0006458 \& definition: Processes that assist the folding of a nascent peptide chain into its correct tertiary structure. \& definition_reference: Sanger:mb .Ve .PP See GO::Parsers::go_def_parser for more details .SS "go_xref format" .IX Subsection "go_xref format" eg format: go_xrefs for storing links between \s-1GO\s0 IDs and IDs for terms in other DBs: .PP .Vb 2 \& EC:1.\-.\-.\- > GO:oxidoreductase ; GO:0016491 \& EC:1.1.\-.\- > GO:1\-phenylethanol dehydrogenase ; GO:0018449 .Ve .PP See GO::Parsers::go_xref_parser for more details .SS "go_assoc format" .IX Subsection "go_assoc format" eg format: go-assocs for storing gene-associations: .PP .Vb 2 \& SGD S0004660 AAC1 GO:0005743 SGD:12031|PMID:2167309 TAS C ADP/ATP translocator YMR056C gene taxon:4932 20010118 \& SGD S0004660 AAC1 GO:0006854 SGD:12031|PMID:2167309 IDA P ADP/ATP translocator YMR056C gene taxon:4932 20010118 .Ve .PP See GO::Parsers::go_assoc_parser for more details .SS "obo_text format" .IX Subsection "obo_text format" .SS "new" .IX Subsection "new" .Vb 2 \& Usage \- my $parser = GO::Parser\->new() \& Returns \- GO::Parser .Ve .PP creates a new parser .SS "create_handler" .IX Subsection "create_handler" .Vb 3 \& Usage \- my $handler = GO::Parser\->create_handler(\*(Aqobj\*(Aq); \& Returns \- L \& Args \- handler type [str] .Ve