jjustison / siphynetwork Goto Github PK
View Code? Open in Web Editor NEWA birth-death-hybridization based phylogenetic network simulator.
Home Page: https://jjustison.github.io/SiPhyNetwork/
License: GNU General Public License v3.0
A birth-death-hybridization based phylogenetic network simulator.
Home Page: https://jjustison.github.io/SiPhyNetwork/
License: GNU General Public License v3.0
The goal of this script is to show that when using mrca=TRUE
,bdh.age()
and bdh.taxa.ssa()
produce binary and non binary trees whereas with mrca=FALSE
, the functions produce all non binary trees as expected.
sim.bdh.age
, mrca=TRUE
##Libraries
library(NetSim) # simulate bd time-tree networks: https://github.com/jjustison/NetSim
library(geiger)
library(ape) # tools for handling phylo objects
## sim.bdh.age (mrca=TRUE)
set.seed(20)
numbsim=300
bdNS <- NetSim::sim.bdh.age(age = 6,
numbsim = numbsim,
lambda = 0.9,
mu = 0.5,
nu = 0,
hybprops = c(0.5, 0.5, 0.5),
hyb.inher.fxn = NetSim::make.beta.draw(1, 1),
frac = 1,
mrca = TRUE,
complete = FALSE,
stochsampling = FALSE,
hyb.rate.fxn = NULL,
trait.model = NULL)
## remove any trees with no taxa
bdNS <- bdNS[!sapply(X = bdNS, FUN = is.null)]
# Extract information of extinct Trees and Trees with one tip
ExtTip1<-unlist(bdNS[which(!sapply(X = bdNS, FUN = is.phylo))])
ExtTip1Char<-as.character(ExtTip1)
ExtTip1Char[ExtTip1Char=="0"]<-"Extinct"
ExtTip1Char[ExtTip1Char=="1"]<-"OneTip"
#remove extinct Trees and Trees with one tip
bdNS2 <- bdNS[sapply(X = bdNS, FUN = is.phylo)]#birth dead tree with NetSim with value 0 or 1
# Extract information of Binary and not binary Trees
Bin<-sapply(X = bdNS2, FUN = is.binary)
BinCh<-as.character(Bin)
BinCh[BinCh=="TRUE"]<-"Binary"
BinCh[BinCh=="FALSE"]<-"NotBinary"
# Percentage of tree types
AllTreeType<-c(ExtTip1Char,BinCh)
AllTreeType<-factor(AllTreeType,levels=c("Extinct","OneTip","Binary","NotBinary"))
PerTree<-(table(AllTreeType)/length(AllTreeType))*100
#Making plot
#As you can see, even with mrca=TRUE, the simulation produce binary and not binary trees, moreover not Binary Tree is more than 50% of the simulated trees.
par(mfrow=c(1,3),oma=c(0,0,5,0))
barplot(PerTree,main="Percentage of tree types",ylim=c(0,50))
plot(bdNS2[[which(Bin)[1]]],main="First Binary Tree")
plot(bdNS2[[which(!Bin)[1]]],main="First Not Binary Tree")
title("300 Birth-Dead Tree \n (age=6,lambda=0.9,mu=0.5,nu=0,hybprops = c(0.5, 0.5, 0.5))\n (mrca=TRUE)", outer=TRUE,cex.main=1.5)
sim.bdh.age
, mrca=FALSE
## sim.bdh.age (mrca=FALSE)
set.seed(40)
numbsim=300
bdNS <- NetSim::sim.bdh.age(age = 6,
numbsim = numbsim,
lambda = 0.9,
mu = 0.5,
nu = 0,
hybprops = c(0.5, 0.5, 0.5),
hyb.inher.fxn = NetSim::make.beta.draw(1, 1),
frac = 1,
mrca = TRUE,
complete = FALSE,
stochsampling = FALSE,
hyb.rate.fxn = NULL,
trait.model = NULL)
## remove any trees with no taxa
bdNS <- bdNS[!sapply(X = bdNS, FUN = is.null)]
# Extract information of extinct Trees and Trees with one tip
ExtTip1<-unlist(bdNS[which(!sapply(X = bdNS, FUN = is.phylo))])
ExtTip1Char<-as.character(ExtTip1)
ExtTip1Char[ExtTip1Char=="0"]<-"Extinct"
ExtTip1Char[ExtTip1Char=="1"]<-"OneTip"
#remove extinct Trees and Trees with one tip
bdNS2 <- bdNS[sapply(X = bdNS, FUN = is.phylo)]#birth dead tree with NetSim with value 0 or 1
# Extract information of Binary and not binary Trees
Bin<-sapply(X = bdNS2, FUN = is.binary)
BinCh<-as.character(Bin)
BinCh[BinCh=="TRUE"]<-"Binary"
BinCh[BinCh=="FALSE"]<-"NotBinary"
# Percentage of tree types
AllTreeType<-c(ExtTip1Char,BinCh)
AllTreeType<-factor(AllTreeType,levels=c("Extinct","OneTip","Binary","NotBinary"))
PerTree<-(table(AllTreeType)/length(AllTreeType))*100
#Making plot
par(mfrow=c(1,2),oma=c(0,0,5,0))
barplot(PerTree,main="Percentage of tree types",ylim=c(0,50))
plot(bdNS2[[which(!Bin)[1]]],main="First Not Binary Tree")
title("300 Birth-Dead Tree \n (age=6,lambda=0.9,mu=0.5,nu=0,hybprops = c(0.5, 0.5, 0.5))\n (mrca=TRUE)", outer=TRUE,cex.main=1.5)
sim.bdh.taxa.ssa
, mrca=TRUE
## sim.bdh.taxa.ssa (mrca=TRUE)
set.seed(20)
numbsim=300
bdNS <- NetSim::sim.bdh.taxa.ssa(n = 10,
numbsim = numbsim,
lambda = 0.9,
mu = 0.5,
nu = 0,
hybprops = c(0.5, 0.5, 0.5),
hyb.inher.fxn = NetSim::make.beta.draw(1, 1),
frac = 1,
mrca = TRUE,
complete = FALSE,
stochsampling = FALSE,
hyb.rate.fxn = NULL,
trait.model = NULL)
## remove any trees with no taxa
bdNS <- bdNS[!sapply(X = bdNS, FUN = is.null)]
# Extract information of extinct Trees and Trees with one tip
ExtTip1<-unlist(bdNS[which(!sapply(X = bdNS, FUN = is.phylo))])
ExtTip1Char<-as.character(ExtTip1)
ExtTip1Char[ExtTip1Char=="0"]<-"Extinct"
ExtTip1Char[ExtTip1Char=="1"]<-"OneTip"
#remove extinct Trees and Trees with one tip
bdNS2 <- bdNS[sapply(X = bdNS, FUN = is.phylo)]#birth dead tree with NetSim with value 0 or 1
# Extract information of Binary and not binary Trees
Bin<-sapply(X = bdNS2, FUN = is.binary)
BinCh<-as.character(Bin)
BinCh[BinCh=="TRUE"]<-"Binary"
BinCh[BinCh=="FALSE"]<-"NotBinary"
# Percentage of tree types
AllTreeType<-c(ExtTip1Char,BinCh)
AllTreeType<-factor(AllTreeType,levels=c("Extinct","OneTip","Binary","NotBinary"))
PerTree<-(table(AllTreeType)/length(AllTreeType))*100
#Making plot
par(mfrow=c(1,3),oma=c(0,0,5,0))
barplot(PerTree,main="Percentage of tree types",ylim=c(0,50))
plot(bdNS2[[which(Bin)[1]]],main="First Binary Tree")
plot(bdNS2[[which(!Bin)[1]]],main="First Not Binary Tree")
title("300 Birth-Dead Tree \n (n=10,lambda=0.9,mu=0.5,nu=0,hybprops = c(0.5, 0.5, 0.5))\n (mrca=TRUE)", outer=TRUE,cex.main=1.5)
sim.bdh.taxa.ssa
, mrca=FALSE
## sim.bdh.taxa.ssa (mrca=FALSE)
set.seed(40)
numbsim=300
bdNS <- NetSim::sim.bdh.taxa.ssa(n = 10,
numbsim = numbsim,
lambda = 0.9,
mu = 0.5,
nu = 0,
hybprops = c(0.5, 0.5, 0.5),
hyb.inher.fxn = NetSim::make.beta.draw(1, 1),
frac = 1,
mrca = TRUE,
complete = FALSE,
stochsampling = FALSE,
hyb.rate.fxn = NULL,
trait.model = NULL)
## remove any trees with no taxa
bdNS <- bdNS[!sapply(X = bdNS, FUN = is.null)]
# Extract information of extinct Trees and Trees with one tip
ExtTip1<-unlist(bdNS[which(!sapply(X = bdNS, FUN = is.phylo))])
ExtTip1Char<-as.character(ExtTip1)
ExtTip1Char[ExtTip1Char=="0"]<-"Extinct"
ExtTip1Char[ExtTip1Char=="1"]<-"OneTip"
#remove extinct Trees and Trees with one tip
bdNS2 <- bdNS[sapply(X = bdNS, FUN = is.phylo)]#birth dead tree with NetSim with value 0 or 1
# Extract information of Binary and not binary Trees
Bin<-sapply(X = bdNS2, FUN = is.binary)
BinCh<-as.character(Bin)
BinCh[BinCh=="TRUE"]<-"Binary"
BinCh[BinCh=="FALSE"]<-"NotBinary"
# Percentage of tree types
AllTreeType<-c(ExtTip1Char,BinCh)
AllTreeType<-factor(AllTreeType,levels=c("Extinct","OneTip","Binary","NotBinary"))
PerTree<-(table(AllTreeType)/length(AllTreeType))*100
#Making plot
par(mfrow=c(1,2),oma=c(0,0,5,0))
barplot(PerTree,main="Percentage of tree types",ylim=c(0,50))
plot(bdNS2[[which(!Bin)[1]]],main="First Not Binary Tree")
title("300 Birth-Dead Tree \n (n=10,lambda=0.9,mu=0.5,nu=0,hybprops = c(0.5, 0.5, 0.5))\n (mrca=TRUE)", outer=TRUE,cex.main=1.5)
We ran into another kind of error, here is a reproducible example:
> set.seed(32)
> net <- sim.bdh.taxa.ssa(n=4, numbsim=1,
+ lambda=1, mu=0.2, nu=0.3, hybprops=c(0.5,0.25,0.25),
+ hyb.rate.fxn = gen_dist_fxn, hyb.inher.fxn = make.beta.draw(2,2), complete=FALSE)[[1]]
> write.net(net, tol=1e-12)
Error in x$reticulation[i, 2] : subscript out of bounds
This seems to be caused by the network being a tree:
> net$reticulation
from to
> net$inheritance
numeric(0)
sorry, one more report ๐ฎ !
I came across networks that have non-leaf nodes without any children. That's rare, but here is an example:
require(SiPhyNetwork)
inheritance_fxn = make.beta.draw(10,10)
hybridproportions = c(0.5,0.25,0.25)
set.seed(5282)
net <- sim.bdh.taxa.ssa(n=17, numbsim=1,
lambda=1,mu=0.2, nu=0.8, hybprops=hybridproportions,
hyb.inher.fxn = inheritance_fxn)[[1]]
write.net(net, digits=1)
the output is a mouthful, sorry:
"(((((((t12:0.1,#H87:0.04::0.566817808665801):0.07,#H69:0::0.363177806287171):0.4,((((((t5:0.04,():0):0.2)#H75:0::0.560043980278853,#H77:0.01::0.356749669774551):0.1,((t18:0.1,#H85:0::0.679805427150224):0.02,#H84:0::0.41641402604163):0.2):0.1)#H46:0.2::0.439835130806992)#H43:0::0.446440230563991):0.02,#H40:0::0.452335033482829):0.2,#H27:0::0.626470441974728):0.6,((((((((((t1:0.2,#H75:0::0.439956019721147):0.004)#H56:0.1::0.366278800991607,#H55:0::0.500612545096176):0.09,#H46:0::0.560164869193008):0.03,(((((((#H82:0.03::0.419661471950185)#H81:0.06::0.411872637536133,#H79:0::0.373315241205891):0.009)#H77:0.003::0.643250330225449)#H58:0.1::0.644974840787198)#H57:0.02::0.51611249556549)#H55:0::0.499387454903824,#H57:0.02::0.48388750443451):0.1):0.2,(((#H42:0.07::0.265304329797207)#H40:0.02::0.547664966517171,#H38:0::0.452173412265279):0.004,(((#H62:0::0.408854680385383,#H58:0.09::0.355025159212802):0.3)#H42:0.06::0.734695670202793)#H37:0.03::0.421707616423641):0.05):0.04,#H31:0::0.475380353357758):0.08,(((((((((((t16:0.04,#H98:0.03::0.532697954851926):0.06)#H87:0.006::0.433182191334199)#H85:0.02::0.320194572849776,(t2:0.1)#H84:0::0.58358597395837):0.08,#H73:0::0.512554982986711):0.004,#H56:0::0.633721199008393):0.04,((t20:0.05,#H88:0::0.367932348319471):0.05,(((t11:0.02,t24:0.02):0.06,(t9:0.07)#H95:0::0.54866966512444):0.02)#H90:0::0.356200187546517):0.2):0.04,((((t17:0.01,t26:0.01,#H94:0::0.655539860002413)#H98:0.06::0.467302045148074,#H95:0::0.45133033487556):0.1,(((t6:0.05)#H88:0.06::0.632067651680529)#H82:0.08::0.580338528049815)#H79:0::0.626684758794109):0.1)#H62:0::0.591145319614617):0.08,#H48:0::0.444746978914219):0.3,(#H37:0.03::0.578292383576359)#H38:0::0.547826587734721):0.2)#H27:0::0.373529558025272):0.3,(t14:0.1,(((((t4:0.2)#H73:0::0.487445017013289,#H72:0.03::0.576699910918535):0.1,t22:0.3):0.3,(t27:0.4)#H48:0.2::0.555253021085781):0.2)#H31:0::0.524619646642242):0.4):0.3,(((((t3:0.03)#H94:0.06::0.344460139997587)#H92:0.01::0.338453967866981,#H90:0::0.643799812453483):0.1,(((#H81:0::0.588127362463867,#H92:0.05::0.661546032133019):0.05)#H72:0.03::0.423300089081465)#H69:0::0.636822193712829):0.4,#H43:0::0.553559769436009):0.8):0.01):0.8);"
This newick description contains this: ...():0):0.2)#H75:...
with a pair of parentheses that has nothing inside.
I first stumbled across a network that had this: ...()#H115:0::0.4...
(I can reproduce it but as part of a larger julia script. I'm not sending it, but I can serve as tester!)
By the way, in write.net
it looks like the digits
option only applies to edge lengths, not ฮณs (feature?)
Should I use some option to prune these dangling nodes? It makes sense that they would appear in the original network, but not in the reconstructed network. So perhaps I'm missing a step to get the reconstructed network.
Here is a small example where I get nothing simulated:
require(SiPhyNetwork)
inheritance_fxn = make.beta.draw(10,10)
hybridproportions = c(0.5,0.25,0.25)
set.seed(1234)
net <- sim.bdh.taxa.ssa(n=7, numbsim=1,
lambda=1,mu=0.2,
nu=0.20, hybprops=hybridproportions,
hyb.inher.fxn = inheritance_fxn)
net
and net
is empty:
[[1]]
[1] 0
But: if I give n=7L
instead of n=7
, then it works:
net <- sim.bdh.taxa.ssa(n=7L, numbsim=1,
lambda=1,mu=0.2,
nu=0.20, hybprops=hybridproportions,
hyb.inher.fxn = inheritance_fxn)
net
[[1]]
$edge
[,1] [,2]
[1,] 9 10
[2,] 10 11
...
but then... I can't get the parenthetical description of the network:
write.net(net[[1]])
Error: $ operator is invalid for atomic vectors
Sorry, I should have probably opened 2 separate issues. I am eager to use your package!! ๐
Sometimes when using SiPhyNetwork::write.net to write the network to Extended Newick format, the hybrid edge lengths seem to come out wrong. Specifically, here's an example in which 3 out of 4 hybrid edges have about 0.8752 added to what I believe their lengths ought to be.
library(SiPhyNetwork)
###
set.seed(1347) # current time
numbsim = 100
n = 7 # number of taxa to simulate to
###
inheritance.fxn <- make.beta.draw(1,1) # beta(1,1) distribution (i.e. unif(0,1))
hybrid_proportions <-c(0.5, ##Lineage Generative
0.25, ##Lineage Degenerative
0.25) ##Lineage Neutral
ssa_nets <- sim.bdh.taxa.ssa(
#age = age,
n = 7,
numbsim = numbsim,
lambda=1, # speciation rate
mu=0.2, # extinction rate
nu=0.25, # hybridization rate,
hybprops = hybrid_proportions,
hyb.inher.fxn = inheritance.fxn,
complete=FALSE
)
# i think this is correct (but doesn't include hybrid proportions)
ape::write.evonet(ssa_nets[[100]], 'sim100_evonet.tree')
# i think this is incorrect in some of the hybrid edge lengths
SiPhyNetwork::write.net(ssa_nets[[100]], 'sim100_net.tree')
This is a minor thing. I got the parenthetical format below from SiPhyNetwork
:
((((t8:0.4681991291,((t4:0.1122732353,#H34:0::0.9899097874):0.1481915801,#H32:0::0.728598428):0.2077343137):1.01928697,((t10:0.6260437678,(t12:0.625354073,t7:0.625354073):0.0006896948131):0.3308102357,((t13:0.1122732353,(t15:0.1122732353)#H34:0::0.01009021257):0.334506684,t1:0.4467799193):0.5100740842):0.5306320959):0.5266602863,(((t3:0.6118126485,(t2:0.2604648154,(t5:0.2604648154)#H32:0::0.271401572):0.3513478331):0.04918772583,(t11:0.4833606604,t9:0.4833606604):0.1776397139):1.266528995,(t14:1.855881859,t6:1.855881859):0.07164751056):0.086617016):0.5254422314);
and it seems to violate a (rather minor) formatting convention on #H32
. Namely, the leaf associated to #H32
should correspond to the minor hybrid edge by convention. But in this case, it seems that the leaf is connected to the major hybrid edge: #H32:0::0.728598428
because gamma>0.5 whereas the internal node associated to #H32
corresponds to the minor hybrid edge (t5:0.2604648154)#H32:0::0.271401572
because gamma<0.5 but by convention it should be associated to the major hybrid edge. Maybe this is confusing to explain here and this is not important in any way, so feel free to ignore!
I'm not sure if this should be reported as a problem with ape::write.net
. In any case, it looks like some edges are simulated with negative branch lengths. tiny, but negative. It would be great if this could be avoided!
Here is an example:
require(ape)
require(SiPhyNetwork)
inheritance_fxn = make.beta.draw(10,10)
hybridproportions = c(0.5,0.25,0.25)
set.seed(44)
net <- sim.bdh.taxa.ssa(n=17, numbsim=1,
lambda=1,mu=0.2, nu=0.20, hybprops=hybridproportions,
hyb.inher.fxn = inheritance_fxn)[[1]]
write.net(net)
with this output (look for #H32
or for #H26
for example):
[1] "((t10:0.73813995,(((((t3:0.2976621587,#H37:0::0.50070944374506):0.07825146437,#H38:0::0.473234112088868):0.1974751083,(((t2:0.3118052744,(t16:0.2497963593,#H32:-4.440892099e-16::0.439653139225563):0.06200891511):0.2214038277,(((t18:0.3118225002,((t11:0.1924233387,t9:0.1924233387):0.0003856376645,((t4:0.08199490263,t8:0.08199490263):0.03972920041,(t6:0.04379744837,t17:0.04379744837):0.07792665467):0.07108487332):0.1190135238):0.001387781611)#H36:0.1336600823::0.688007203371415,(t14:0.2976621587)#H37:0.1492082053::0.49929055625494):0.08633873808):0.04017962923)#H30:0::0.506636630686971):0.6966604422,(((t5:0.3759136231)#H38:0::0.526765887911132,#H36:0.06270334128::0.311992796628585):0.5533852128,#H26:-4.440892099e-16::0.516924587307278):0.3407503376):1.283036482,((((t7:0.001818444613,t12:0.001818444613):0.1808183389,t13:0.1826367835):0.7466620524)#H26:0.1424111866::0.483075412692722,(((t15:0.2135036576,t19:0.2135036576):0.0362927017)#H32:0.3235923721::0.560346860774437,#H30:4.440892099e-16::0.493363369313029):0.4983212912):1.481375633):0.05897134919):0.4056962731);"
even when asking for less precision in branch lengths, the negative signs come up:
write.net(net, digits=2)
[1] "((t10:0.74,(((((t3:0.3,#H37:0::0.50070944374506):0.078,#H38:0::0.473234112088868):0.2,(((t2:0.31,(t16:0.25,#H32:-4.4e-16::0.439653139225563):0.062):0.22,(((t18:0.31,((t11:0.19,t9:0.19):0.00039,((t4:0.082,t8:0.082):0.04,(t6:0.044,t17:0.044):0.078):0.071):0.12):0.0014)#H36:0.13::0.688007203371415,(t14:0.3)#H37:0.15::0.49929055625494):0.086):0.04)#H30:0::0.506636630686971):0.7,(((t5:0.38)#H38:0::0.526765887911132,#H36:0.063::0.311992796628585):0.55,#H26:-4.4e-16::0.516924587307278):0.34):1.3,((((t7:0.0018,t12:0.0018):0.18,t13:0.18):0.75)#H26:0.14::0.483075412692722,(((t15:0.21,t19:0.21):0.036)#H32:0.32::0.560346860774437,#H30:4.4e-16::0.493363369313029):0.5):1.5):0.059):0.41);"
Some functions on certain Networks crash R. Anytime a function call is made to reorder.phylo
(this occurs quite often. e.g. plot.phylo
, and write.evonet
make the function call) R crashes. This has to do with some C++ code used to reorder the phylogeny.
Essentially, it boils down to an issue with the edge
matrix when the largest numbered node is only in the second column (i.e phy$edge[,2]
but not phy$edge[,1]
). The C++ code is trying to compute the number of nodes and it only looks in the first column of edges for the number of nodes. Normally this isn't an issue because an internal node should have the largest node number and internal nodes should always have children, making them always appear in phy$edge[,1]
. However, networks are funky and are an exception to this. Incorrectly computing the number of nodes causes the function to later look at a vector with an index that doesn't exist, crashing R.
There are three reasonable solutions to this:
phy$nNode
as a parameter instead of incorrectly trying to compute it within the function.A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.