Scroll to navigation

libpll(3) Library Functions Manual libpll(3)

NAME

libpll — Phylogenetic Likelihood Library

SYNOPSIS

Partition management

pll_partition_t * pll_partition_create(unsigned int tips, unsigned int clv_buffers, unsigned int states, unsigned int sites, unsigned int rate_matrices, unsigned int prob_matrices, unsigned int rate_cats, unsigned int scale_buffers, unsigned int attributes);

void pll_partition_destroy(pll_partition_t * partition);

Partition parameters setup

int pll_set_tip_states(pll_partition_t * partition, unsigned int tip_index, const unsigned int * map, const char * sequence);

int pll_set_tip_clv(pll_partition_t * partition, unsigned int tip_index, const double * clv);

void pll_set_pattern_weights(pll_partition_t * partition, const unsigned int * pattern_weights);

int pll_set_asc_bias_type(pll_partition_t * partition, int asc_bias_type);

void pll_set_asc_state_weights(pll_partition_t * partition, const unsigned int * state_weights);

void pll_set_subst_params(pll_partition_t * partition, unsigned int params_index, const double * params);

void pll_set_frequencies(pll_partition_t * partition, unsigned int params_index, const double * frequencies);

void pll_set_category_rates(pll_partition_t * partition, const double * rates);

void pll_set_category_weights(pll_partition_t * partition, const double * rate_weights);

Transition probability matrices

int pll_update_prob_matrices(pll_partition_t * partition, const unsigned int * params_index, const unsigned int * matrix_indices, const double * branch_lengths, unsigned int count);

int pll_update_eigen(pll_partition_t * partition, unsigned int params_index);

void pll_show_pmatrix(pll_partition_t * partition, unsigned int index, unsigned int float_precision);

Invariant sites

unsigned int pll_count_invariant_sites(pll_partition_t * partition, unsigned int * state_inv_count);

int pll_update_invariant_sites(pll_partition_t * partition);

int pll_update_invariant_sites_proportion(pll_partition_t * partition, unsigned int params_index, double prop_invar);

Conditional probability vectors

void pll_update_partials(pll_partition_t * partition, const pll_operation_t * operations, unsigned int count);

void pll_show_clv(pll_partition_t * partition, unsigned int clv_index, int scaler_index, unsigned int float_precision);

Evaluation of log-Likelihood

double pll_compute_root_loglikelihood(pll_partition_t * partition, unsigned int clv_index, int scaler_index, const unsigned int * freqs_index, double * persite_lnl);

double pll_compute_edge_loglikelihood(pll_partition_t * partition, unsigned int parent_clv_index, int parent_scaler_index, unsigned int child_clv_index, int child_scaler_index, unsigned int matrix_index, const unsigned int * freqs_index, double * persite_lnl);

Likelihood function derivatives

int pll_update_sumtable(pll_partition_t * partition, unsigned int parent_clv_index, unsigned int child_clv_index, const unsigned int * params_indices, double * sumtable);

int pll_compute_likelihood_derivatives(pll_partition_t * partition, int parent_scaler_index, int child_scaler_index, double branch_length, const unsigned int * params_indices, const double * sumtable, double * d_f, double * dd_f);

FASTA file handling

pll_fasta_t * pll_fasta_open(const char * filename, const unsigned int * map);

int pll_fasta_getnext(pll_fasta_t * fd, char ** head, long * head_len, char ** seq, long * seq_len, long * seqno);

void pll_fasta_close(pll_fasta_t * fd);

long pll_fasta_getfilesize(pll_fasta_t * fd);

long pll_fasta_getfilepos(pll_fasta_t * fd);

int pll_fasta_rewind(pll_fasta_t * fd);

PHYLIP file handling

pll_msa_t * pll_phylip_parse_msa(const char * filename, unsigned int * msa_count);

void pll_msa_destroy(pll_msa_t * msa);

Newick handling

pll_rtree_t * pll_rtree_parse_newick(const char * filename, unsigned int * tip_count);

pll_utree_t * pll_utree_parse_newick(const char * filename, unsigned int * tip_count);

pll_utree_t * pll_utree_parse_newick_string(char * s, unsigned int * tip_count);

Unrooted tree structure manipulation

void pll_utree_destroy(pll_utree_t * root);

void pll_utree_show_ascii(pll_utree_t * tree, int options);

char * pll_utree_export_newick(pll_utree_t * root);

int pll_utree_traverse(pll_utree_t * root, int (*cbtrav)(pll_utree_t *), pll_utree_t ** outbuffer, unsigned int * trav_size);

unsigned int pll_utree_query_tipnodes(pll_utree_t * root, pll_utree_t ** node_list);

unsigned int pll_utree_query_innernodes(pll_utree_t * root, pll_utree_t ** node_list);

void pll_utree_create_operations(pll_utree_t ** trav_buffer, unsigned int trav_buffer_size, double * branches, unsigned int * pmatrix_indices, pll_operation_t * ops, unsigned int * matrix_count, unsigned int * ops_count);

int pll_utree_check_integrity(pll_utree_t * root);

pll_utree_t * pll_utree_clone(pll_utree_t * root);

pll_utree_t * pll_rtree_unroot(pll_rtree_t * root);

int pll_utree_every(pll_utree_t * node, int (*cb)(pll_utree_t *));

Rooted tree structure manipulation

void pll_rtree_destroy(pll_rtree_t * root);

void pll_rtree_show_ascii(pll_rtree_t * tree, int options);

char * pll_rtree_export_newick(pll_rtree_t * root);

int pll_rtree_traverse(pll_rtree_t * root, int (*cbtrav)(pll_rtree_t *), pll_rtree_t ** outbuffer, unsigned int * trav_size);

unsigned int pll_rtree_query_tipnodes(pll_rtree_t * root, pll_rtree_t ** node_list);

unsigned int pll_rtree_query_innernodes(pll_rtree_t * root, pll_rtree_t ** node_list);

void pll_rtree_create_operations(pll_rtree_t ** trav_buffer, unsigned int trav_buffer_size, double * branches, unsigned int * pmatrix_indices, pll_operation_t * ops, unsigned int * matrix_count, unsigned int * ops_count);

void pll_rtree_create_pars_buildops(pll_rtree_t ** trav_buffer, unsigned int trav_buffer_size, pll_pars_buildop_t * ops, unsigned int * ops_count);

void pll_rtree_create_pars_recops(pll_rtree_t ** trav_buffer, unsigned int trav_buffer_size, pll_pars_recop_t * ops, unsigned int * ops_count);

Topological rearrangement moves

int pll_utree_spr(pll_utree_t * p, pll_utree_t * r, pll_utree_rb_t * rb, double * branch_lengths, unsigned int * matrix_indices);

int pll_utree_spr_safe(pll_utree_t * p, pll_utree_t * r, pll_utree_rb_t * rb, double * branch_lengths, unsigned int * matrix_indices);

int pll_utree_nni(pll_utree_t * p, int type, pll_utree_rb_t * rb);

int pll_utree_rollback(pll_utree_rb_t * rollback, double * branch_lengths, unsigned int * matrix_indices);

Parsimony functions

int pll_set_parsimony_sequence(pll_parsimony_t * pars, unsigned int tip_index, const unsigned int * map, const char * sequence);

pll_parsimony_t * pll_parsimony_create(unsigned int * tips, unsigned int states, unsigned int sites, double * score_matrix, unsigned int score_buffers, unsigned int ancestral_buffers);

double pll_parsimony_build(pll_parsimony_t * pars, pll_pars_buildop_t * operations, unsigned int count);

void pll_parsimony_reconstruct(pll_parsimony_t * pars, const unsigned int * map, pll_pars_recop_t * operations, unsigned int count);

double pll_parsimony_score(pll_parsimony_t * pars, unsigned int score_buffer_index);

void pll_parsimony_destroy(pll_parsimony_t * pars);

Auxiliary functions

int pll_compute_gamma_cats(double alpha, unsigned int categories, double * output_rates);

void * pll_aligned_alloc(size_t size, size_t alignment);

void pll_aligned_free(void * ptr);

unsigned int * pll_compress_site_patterns(char ** sequence, const unsigned int * map, int count, int * length);

Core functions

void pll_core_create_lookup(unsigned int states, unsigned int rate_cats, double * lookup, const double * left_matrix, const double * right_matrix, unsigned int * tipmap, unsigned int tipmap_size, unsigned int attrib);

void pll_core_update_partial_tt(unsigned int states, unsigned int sites, unsigned int rate_cats, double * parent_clv, unsigned int * parent_scaler, const unsigned char * left_tipchars, const unsigned char * right_tipchars, const unsigned int * tipmap, unsigned int tipmap_size, const double * lookup, unsigned int attrib);

void pll_core_update_partial_ti(unsigned int states, unsigned int sites, unsigned int rate_cats, double * parent_clv, unsigned int * parent_scaler, const unsigned char * left_tipchars, const double * right_clv, const double * left_matrix, const double * right_matrix, const unsigned int * right_scaler, const unsigned int * tipmap, unsigned int attrib);

void pll_core_update_partial_ii(unsigned int states, unsigned int sites, unsigned int rate_cats, double * parent_clv, unsigned int * parent_scaler, const double * left_clv, const double * right_clv, const double * left_matrix, const double * right_matrix, const unsigned int * left_scaler, const unsigned int * right_scaler, unsigned int attrib);

int pll_core_update_sumtable_ti(unsigned int states, unsigned int sites, unsigned int rate_cats, const double * parent_clv, const unsigned char * left_tipchars, double ** eigenvecs, double ** inv_eigenvecs, double ** freqs, unsigned int * tipmap, double * sumtable, unsigned int attrib);

int pll_core_likelihood_derivatives(unsigned int states, unsigned intsites, unsigned int rate_cats, const double * rate_weights, const unsigned int * parent_scaler, const unsigned int * child_scaler, const int * invariant, const unsigned int * pattern_weights, double branch_length, const double * prop_invar, double ** freqs, const double * rates, double ** eigenvals, const double * sumtable, double * d_f, double * dd_f, unsigned int attrib);

double pll_core_edge_loglikelihood_ii(unsigned int states, unsigned int sites, unsigned int rate_cats, const double * parent_clv, const unsigned int * parent_scaler, const double * child_clv, const unsigned int * child_scaler, const double * pmatrix, double ** frequencies, const double * rate_weights, const unsigned int * pattern_weights, const double * invar_proportion, const int * invar_indices, const unsigned int * freqs_indices, double * persite_lnl, unsigned int attrib);

double pll_core_edge_loglikelihood_ti(unsigned int states, unsigned int sites, unsigned int rate_cats, const double * parent_clv, const unsigned int * parent_scaler, const unsigned char * tipchars, const unsigned int * tipmap, const double * pmatrix, double ** frequencies, const double * rate_weights, const unsigned int * pattern_weights, const double * invar_proportion, const int * invar_indices, const unsigned int * freqs_indices, double * persite_lnl, unsigned int attrib);

int pll_core_update_pmatrix(double * pmatrix, unsigned int states, double rate, double prop_invar, double branch_length, double * eigenvals, double * eigenvecs, double * inv_eigenvecs, unsigned int attrib);

DESCRIPTION

libpll is a library for phylogenetics.

Creates a partition with either tips character arrays or tips CLV arrays (depending on attributes, see Partition Attributes), and, additionally, clv_buffers CLV vectors, for storing conditional probabilities at inner nodes. The partition structure is constructed for states number of states (e.g. 4 for nucleotide and 20 for amino-acid data) and sufficient space is allocated to host an alignment of size sites*tips. The number of rate matrices that can be used is given by rate_matrices. Additionally, the function allocates space for hosting rate_matrices arrays of substitution parameters, frequencies, and auxiliary eigen-decomposition arrays (transparent to the user). The parameter prob_matrices dictates the number of probability matrices for which space will be allocated. This parameter is typically set to the number of branches the tree has (e.g., 2n-3 for unrooted and 2n-2 for rooted, where n is the number of tips/leaves). libpll will automatically create space for prob_matrices*rate_cats, where rate_cats is the number of different rate categories. The array of probability matrices is indexed from 0 to prob_matrices-1. Each matrix entry consists of sufficient space to accommodate rate_cats matrices, which are stored consecutively in memory. Note that libpll will not allocate space for the different substitution matrices specified by rate_matrices. The user must indicate that to libpll by multiplying prob_matrices with the corresponding factor. Finally, scale_buffers sets the number of scaling buffers to be allocated, and attributes states the hardware acceleration options to be used (see Partition Attributes). The function returns a pointer to the allocated pll_partition_t structure. Note that, rate_matrices are used to address heterotachy, i.e. transition probability matrices computed from different rate matrices. For more information, see Updating transition probability matrices.
Deallocates all data associated with the partition pointed by partition.
Set the tip CLV (or tip character array) with index tip_index of instance partition, according to the character sequence sequence and the conversion table map, which translates (or maps) characters to states. For an example see Setting CLV vectors at tips from sequences and maps.
Set the tip CLV with index tip_index of instance partition, to the contents of the array clv. For an example see Setting CLV vectors manually. Note, this function cannot be used in conjunction with the PLL_ATTRIB_PATTERN_TIP (see Partition Attributes).
Sets the parameters for substitution model with index params_index, where params_index ranges from 0 to rate_matrices-1, as specified in the pll_partition_create() call. Array params should contain exactly (states*states-states)/2 parameters of type double. These values correspond to the upper triangle elements (above the main diagonal) of the rate matrix.
Sets the base frequencies for the substitution model with index params_index, where params_index ranges from 0 to rate_matrices-1, as specified in the pll_partition_create() call. The array of base frequencies (frequencies) is copied into the instance. The order of bases in the array depends on the encoding used when converting tip sequences to CLV. For example, if the pll_map_nt map was used with the pll_set_tip_states() function to describe nucleotide data, then the order is A, C, G, T. However, this can be arbitrarily set by adjusting the provided map.
Sets the vector of pattern weights (pattern_weights) for partition. The function reads and copies the first partition->sites elements of pattern_weights into partition->pattern_weights.
Sets the rate categories for partition. The function reads and copies the first partition->rate_cats elements of array rates into partition->rates.
Updates the invariant sites array partition->invariant, according to the sequences in the partition. This function is implicitly called by pll_update_invariant_sites_proportion() when the specified proportion of invariant sites is greater than zero, but it must be explicitly called by the client code if the sequences change.
Updates the proportion of invariant sites for the partition rate matrix with with index params_index. Note that, this call will not implicitly update the transition probability matrices computed from the particular rate matrix, but must be done explicitly for example with a call to pll_update_prob_matrices().
Computes the transition probability matrices specified by the count indices in matrix_indices, for all rate categories. A matrix with index matrix_indices[i] will be computed using the branch length branch_lengths[i]. To compute the matrix for rate category j, the function uses the rate matrix with index params_indices[j]. Matrices are stored in partition->pmatrix[matrix_indices[i]]. Note that, each such entry holds the matrices for all rate categories, stored consecutively in memory.
Updates the eigenvectors (partition->eigenvecs[params_index]), inverse eigenvectors (partition->eigenvecs[params_index]), and eigenvalues (partition->eigenvals[params_index]) using the substitution parameters (partition->subst_params[params_index]) and base frequencies (partition->frequencies[params_index]) specified by params_index.
Prints the transition probability matrices for each rate category of partition associated with index to standard output. The floating point precision is dictated by float_precision.
Returns the number of invariant sites in the sequence alignment from partition. The array state_inv_count must be of size partition->states and is filled such that entry i contains the count of invariant sites for state i.
Updates the invariant sites array partition->invariant, according to the sequences in the partition. This function is implicitly called by pll_update_invariant_sites_proportion() when the specified proportion of invariant sites is greater than zero, but it must be explicitly called by the client code if the sequences change.
Updates the proportion of invariant sites for the rate matrix of partition with index params_index. Note that, this call will not implicitly update the transition probability matrices computed from the particular rate matrix, but must be done explicitly for example with a call to pll_update_prob_matrices().
Updates the count conditional probability vectors (CPV) defined by the entries of operations, in the order they appear in the array. Each operations entry describes one CPV from partition. See also pll_operation_t.
Prints to standard output the conditional probability vector for index clv_index from partition, using the scale buffer with index scaler_index. If no scale buffer was used, then scaler_index must be passed the value PLL_SCALE_BUFFER_NONE. The floating precision (number of digits after decimal point) is dictated by float_precision. The output contains brackets, curly braces and round brackets to separate the values as sites, rate categories and states related, respectively.
Evaluates the log-likelihood of a rooted tree, for the vector of conditional probabilities (partials) with index clv_index, scale buffer with index scaler_index (or PLL_SCALE_BUFFER_NONE), and base frequencies arrays with indices freqs_index (one per rate category). If persite_lnl is not NULL, then it must be large enough to hold partition->sites double-precision values, and will be filled with the per-site log-likelihoods.
Evaluates the log-likelihood of an unrooted tree, by providing the conditional probability vectors (partials) for two nodes that share an edge with indices parent_clv_index resp. child_clv_index, scale buffers with indices parent_scaler_index resp. child_clv_index (or PLL_SCALE_BUFFER_NONE), the transition probability matrix with index matrix_index and base frequencies arrays with indices freqs_index (one per rate category). If persite_lnl is not NULL, then it must be large enough to hold partition>sites` double-precision values, and will be filled with the per-site log-likelihoods.

AVAILABILITY

Source code and binaries are available at <https://github.com/xflouris/libpll>.

COPYRIGHT

Copyright (C) 2015-2017, Tomas Flouri, Diego Darriba

All rights reserved.

Contact: Tomas Flouri <Tomas.Flouri@h-its.org>, Scientific Computing, Heidelberg Insititute for Theoretical Studies, 69118 Heidelberg, Germany

This software is licensed under the terms of the GNU Affero General Public License version 3.

GNU Affero General Public License version 3

This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.

VERSION HISTORY

New features and important modifications of libpll (short lived or minor bug releases may not be mentioned):

First public release.
Added faster vectorizations for 20-state and arbitrary-state models, unweighted parsimony functions, randomized stepwise addition, portable functions for parsing trees from C-strings, per-rate category scalers for preventing numerical underflows. Modified newick exporting function to accept callbacks for custom printing. Fixed derivatives computation, parsing of branch lengths, invariant sites computation, log-likelihood computation for cases where we have scaling and patterns, ascertainment bias computation, per-site log-likelihood computation, memory leaks. Added run-time detection of hardware.
Correct updating of paddded eigen-decomposition arrays for models with a number of states not being a power of two. Added portable hardware detection for clang and GCC.
Added optional per-rate category scalers for protein and generic kernels. Improved fix for negative transition probability matrices caused by numerics. Fixed initialization of tip CLVs when using ascertainment bias correction with non-DNA sequences. Fixed excessive memory allocation when compressing site patterns and issue with PHYLIP parsing when header ends with CRLF.
July 12, 2017 libpll 0.3.2