.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is >0, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .if !\nF .nr F 0 .if \nF>0 \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{\ . nr % 0 . nr F 2 . \} .\} .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "GTF2GFF3 1p" .TH GTF2GFF3 1p "2018-10-03" "perl v5.26.2" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .ie n .SH " if ($i == 0 && $START_IN_CDS == 0) { if ($strand == 1) { $exon\->start = $start\->[0]{start}; } else { $exon\->end = $start\->[0]{end}; } } if ($i == (scalar @{$CDSs} \- 1) && $STOP_IN_CDS == 0) { if ($strand == 1) { $exon\->{end} = $stop\->[0]{end}; } else { $exon\->{start} = $stop\->[0]{start} } }" .el .SH " if ($i == 0 && \f(CW$START_IN_CDS\fP == 0) { if ($strand == 1) { \f(CW$exon\fP\->start = \f(CW$start\fP\->[0]{start}; } else { \f(CW$exon\fP\->end = \f(CW$start\fP\->[0]{end}; } } if ($i == (scalar @{$CDSs} \- 1) && \f(CW$STOP_IN_CDS\fP == 0) { if ($strand == 1) { \f(CW$exon\fP\->{end} = \f(CW$stop\fP\->[0]{end}; } else { \f(CW$exon\fP\->{start} = \f(CW$stop\fP\->[0]{start} } }" .IX Header " if ($i == 0 && $START_IN_CDS == 0) { if ($strand == 1) { $exon->start = $start->[0]{start}; } else { $exon->end = $start->[0]{end}; } } if ($i == (scalar @{$CDSs} - 1) && $STOP_IN_CDS == 0) { if ($strand == 1) { $exon->{end} = $stop->[0]{end}; } else { $exon->{start} = $stop->[0]{start} } }" .SH "NAME" gtf2gff3 \- Converts GTF formatted files to valid GFF3 files .SH "VERSION" .IX Header "VERSION" This document describes version 0.1 .SH "SYNOPSIS" .IX Header "SYNOPSIS" gtf2gff3 \-\-cfg gtf2gff3_MY_CONFIG.cfg gtf_file > gff3_file .SH "DESCRIPTION" .IX Header "DESCRIPTION" This script will convert \s-1GTF\s0 formatted files to valid \s-1GFF3\s0 formatted files. It will map the value in column 3 (\e\*(L"type\e\*(R" column) to valid \&\s-1SO,\s0 but because many non standard term may appear in that column in \s-1GTF\s0 files, you may edit the config file to provide your own \s-1GTF\s0 feature to \&\s-1SO\s0 mapping. The script will also build gene models from exons, CDSs and other features given in the \s-1GTF\s0 file. It is currently tested on Ensemble and Twinscan \s-1GTF,\s0 and it should work on any other files that follow the same specification. It does not work on \s-1GTF\s0 from the \s-1UCSC\s0 table browser because those files use the same \s-1ID\s0 for gene and transcript, so it is impossible to group multiple transcripts to a gene. See the \s-1README\s0 that came with the script for more info. .SH "OPTIONS:" .IX Header "OPTIONS:" .IP "\-\-cfg" 4 .IX Item "--cfg" Provide the filename for a config file. See the configuration file provided with this script for format details. Use this configuration file to modify the behavior of the script. If no config file is given it looks for ./gtf2gff3.cfg, ~/gtf2gff3.cfg or /etc/gtf2gff3.cfg in that order. .IP "\-\-help" 4 .IX Item "--help" Provide a detailed man page style help message and then exit. .SH "DIAGNOSTICS" .IX Header "DIAGNOSTICS" .ie n .IP """ERROR: Missing or non\-standard attributes: parse_attributes""" 4 .el .IP "\f(CWERROR: Missing or non\-standard attributes: parse_attributes\fR" 4 .IX Item "ERROR: Missing or non-standard attributes: parse_attributes" A line in the \s-1GTF\s0 file did not have any attributes, or it's attributes column was unparsable. .ie n .IP """ERROR: Non\-transcript gene feature not supported. Please contact the author for support: build_gene""" 4 .el .IP "\f(CWERROR: Non\-transcript gene feature not supported. Please contact the author for support: build_gene\fR" 4 .IX Item "ERROR: Non-transcript gene feature not supported. Please contact the author for support: build_gene" This warning indicates that a line was skipped because it contained a non-transcript gene feature, and the code is not currently equipped to handle this type of feature. This probably isn't too hard to add, so contact me if you get this error and would like to have these features supported. .ie n .IP """ERROR: Must have at least exons or CDSs to build a transcript: build_trnsc""" 4 .el .IP "\f(CWERROR: Must have at least exons or CDSs to build a transcript: build_trnsc\fR" 4 .IX Item "ERROR: Must have at least exons or CDSs to build a transcript: build_trnsc" Some feature had a transcript_id and yet there were no exons or CDSs associated with that transcript_id so the script failed to build a transcript. .ie n .IP """ERROR: seq_id conflict: validate_and_finish_trnsc""" 4 .el .IP "\f(CWERROR: seq_id conflict: validate_and_finish_trnsc\fR" 4 .IX Item "ERROR: seq_id conflict: validate_and_finish_trnsc" Found two features within the same transcript that didn't share the same seq_id. .ie n .IP """ERROR: source conflict: validate_and_finish_trnsc""" 4 .el .IP "\f(CWERROR: source conflict: validate_and_finish_trnsc\fR" 4 .IX Item "ERROR: source conflict: validate_and_finish_trnsc" Found two features within the same transcript that didn't share the same source. .ie n .IP """ERROR: type conflict: validate_and_finish_trnsc""" 4 .el .IP "\f(CWERROR: type conflict: validate_and_finish_trnsc\fR" 4 .IX Item "ERROR: type conflict: validate_and_finish_trnsc" Found two features within the same transcript that were expected to share the same type and yet they didn't. .ie n .IP """ERROR: strand conflict: validate_and_finish_trnsc""" 4 .el .IP "\f(CWERROR: strand conflict: validate_and_finish_trnsc\fR" 4 .IX Item "ERROR: strand conflict: validate_and_finish_trnsc" Found two features within the same transcript that didn't share the same strand. .ie n .IP """ERROR: seq_id conflict: validate_and_build_gene""" 4 .el .IP "\f(CWERROR: seq_id conflict: validate_and_build_gene\fR" 4 .IX Item "ERROR: seq_id conflict: validate_and_build_gene" Found two features within the same gene that didn't share the same seq_id. .ie n .IP """ERROR: source conflict: validate_and_build_gene""" 4 .el .IP "\f(CWERROR: source conflict: validate_and_build_gene\fR" 4 .IX Item "ERROR: source conflict: validate_and_build_gene" Found two features within the same gene that didn't share the same source. .ie n .IP """ERROR: strand conflict: validate_and_build_gene""" 4 .el .IP "\f(CWERROR: strand conflict: validate_and_build_gene\fR" 4 .IX Item "ERROR: strand conflict: validate_and_build_gene" Found two features within the same gene that didn't share the same strand. .ie n .IP """ERROR: gene_id conflict: validate_and_build_gene""" 4 .el .IP "\f(CWERROR: gene_id conflict: validate_and_build_gene\fR" 4 .IX Item "ERROR: gene_id conflict: validate_and_build_gene" Found two features within the same gene that didn't share the same gene_id. .ie n .IP """FATAL: Can\*(Aqt open GTF file: file_name for reading.""" 4 .el .IP "\f(CWFATAL: Can\*(Aqt open GTF file: file_name for reading.\fR" 4 .IX Item "FATAL: Cant open GTF file: file_name for reading." Unable to open the \s-1GTF\s0 file for reading. .ie n .IP """FATAL: Need exons or CDSs to build transcripts: process_start""" 4 .el .IP "\f(CWFATAL: Need exons or CDSs to build transcripts: process_start\fR" 4 .IX Item "FATAL: Need exons or CDSs to build transcripts: process_start" A start_codon feature was annotated and yet there were no exons or CDSs associated with that transcript_id so the script failed. .ie n .IP """FATAL: Untested code in process_start. Contact the aurthor for support.""" 4 .el .IP "\f(CWFATAL: Untested code in process_start. Contact the aurthor for support.\fR" 4 .IX Item "FATAL: Untested code in process_start. Contact the aurthor for support." The script is written to infer a start codon based on the presence of a 5' \s-1UTR,\s0 but we had no example \s-1GTF\s0 of this type when we wrote the code, so we killed process rather than run untested code. Contact the author for support. .ie n .IP """FATAL: Invalid feature set: process_start""" 4 .el .IP "\f(CWFATAL: Invalid feature set: process_start\fR" 4 .IX Item "FATAL: Invalid feature set: process_start" We tried to consider all possible ways of inferring a start codon or inferring a a non-coding gene, and yet we've failed. Your combination of gene features doesn't make sense to us. You should never get this error, and if you do, we'd really like to see the \s-1GTF\s0 file that generated it. Please contact the author for support. .ie n .IP """FATAL: Need exons or CDSs to build transcripts: process_stop""" 4 .el .IP "\f(CWFATAL: Need exons or CDSs to build transcripts: process_stop\fR" 4 .IX Item "FATAL: Need exons or CDSs to build transcripts: process_stop" A stop_codon feature was annotated and yet there were no exons or CDSs associated with that transcript_id so the script failed. .ie n .IP """FATAL: Untested code in process_stop. Contact the aurthor for support.""" 4 .el .IP "\f(CWFATAL: Untested code in process_stop. Contact the aurthor for support.\fR" 4 .IX Item "FATAL: Untested code in process_stop. Contact the aurthor for support." The script is written to infer a stop codon based on the presence of a 3' \s-1UTR,\s0 but we had no example \s-1GTF\s0 of this type when we wrote the code, so we killed process rather than run untested code. Contact the author for support. .ie n .IP """FATAL: Invalid feature set: process_stop""" 4 .el .IP "\f(CWFATAL: Invalid feature set: process_stop\fR" 4 .IX Item "FATAL: Invalid feature set: process_stop" We tried to consider all possible ways of inferring a stop codon or inferring a a non-coding gene, and yet we've failed. Your combination of gene features doesn't make sense to us. You should never get this error, and if you do, we'd really like to see the \s-1GTF\s0 file that generated it. Please contact the author for support. .ie n .IP """FATAL: Invalid feature set: process_exon_CDS_UTR""" 4 .el .IP "\f(CWFATAL: Invalid feature set: process_exon_CDS_UTR\fR" 4 .IX Item "FATAL: Invalid feature set: process_exon_CDS_UTR" We tried to consider all possible ways of inferring exons, CDSs and UTRs and yet we've failed. Your combination of gene features doesn't make sense to us. You really should ever get this error, and if you do, we'd really like to see the \s-1GTF\s0 file that generated it. Please contact the author for support. .ie n .IP """FATAL: Array reference required: sort_features.""" 4 .el .IP "\f(CWFATAL: Array reference required: sort_features.\fR" 4 .IX Item "FATAL: Array reference required: sort_features." A user shouldn't be able to trigger this error. It almost certainly indicates a software bug. Please contact the author. .ie n .IP """FATAL: Can\*(Aqt determine strand in: sort_feature_types.""" 4 .el .IP "\f(CWFATAL: Can\*(Aqt determine strand in: sort_feature_types.\fR" 4 .IX Item "FATAL: Cant determine strand in: sort_feature_types." This may indicate that your \s-1GTF\s0 file does not indicate the strand for features that require it. It may also indicate a software bug. Please contact the author. .ie n .IP """FATAL: Hash reference required: sort_feature_types.""" 4 .el .IP "\f(CWFATAL: Hash reference required: sort_feature_types.\fR" 4 .IX Item "FATAL: Hash reference required: sort_feature_types." A user shouldn't be able to trigger this error. It almost certainly indicates a software bug. Please contact the author. .ie n .IP """FATAL: Invalid value passed to strand: strand.""" 4 .el .IP "\f(CWFATAL: Invalid value passed to strand: strand.\fR" 4 .IX Item "FATAL: Invalid value passed to strand: strand." This may indicate that your \s-1GTF\s0 file does not indicate the strand for features that require it. Consider using the \s-1DEFAULT_STRAND\s0 parameter in the config file. It may also indicate a software bug. Please contact the author. .SH "CONFIGURATION AND ENVIRONMENT" .IX Header "CONFIGURATION AND ENVIRONMENT" A configuration file is provided with this script. The script will look for that configuration file in ./gtf2gff3.cfg, ~/gtf2gff3.cfg or /etc/gtf2gff3.cfg in that order. If the configuration file is not found in one of those locations and one is not provided via the \-\-cfg flag it will try to choose some sane defaults, but you really should provide the configuration file. See the supplied configuration file itself as well as the \s-1README\s0 that came with this package for format and details about the configuration file. .SH "DEPENDENCIES" .IX Header "DEPENDENCIES" This script requires the following perl packages that are available from \s-1CPAN\s0 (www.cpan.org). .PP Getopt::Long; use Config::Std; .SH "INCOMPATIBILITIES" .IX Header "INCOMPATIBILITIES" None reported. .SH "BUGS AND LIMITATIONS" .IX Header "BUGS AND LIMITATIONS" No bugs have been reported. .PP Please report any bugs or feature requests to: .SH "AUTHOR" .IX Header "AUTHOR" Barry Moore .SH "LICENCE AND COPYRIGHT" .IX Header "LICENCE AND COPYRIGHT" Copyright (c) 2007, University of Utah .PP .Vb 2 \& This module is free software; you can redistribute it and/or \& modify it under the same terms as Perl itself. .Ve .SH "DISCLAIMER OF WARRANTY" .IX Header "DISCLAIMER OF WARRANTY" \&\s-1BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE SOFTWARE \*(L"AS IS\*(R" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR, OR CORRECTION.\s0 .PP \&\s-1IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE SOFTWARE\s0 (\s-1INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE\s0), \s-1EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.\s0