-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvisualize-phylogenetic-tree.Rmd
executable file
·187 lines (139 loc) · 5.36 KB
/
visualize-phylogenetic-tree.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
---
title: "10. Visualize Phylogenetic Tree"
author:
- "Will Hannon"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
html_document:
highlight: tango
number_sections: no
theme: default
toc: yes
toc_depth: 3
toc_float:
collapsed: no
smooth_scroll: yes
---
The goal of this notebook is to visualize the phylogenetic relationship of the haplotypes identified in the previous notebooks.
```{r Setup, include=FALSE}
require("knitr")
knitr::opts_chunk$set(echo = FALSE)
```
```{r Required Packages, message=FALSE, warning=FALSE, echo=FALSE}
## ==== Install Required Packages ==== ##
## List of all packages needed
packages = c("tidyverse", "RColorBrewer", "ggnetwork", "network", "tidytree", "phytools", "ggtree", "treeio")
## Packages loading
invisible(lapply(c(packages), library, character.only = TRUE))
```
```{r Inputs, echo=T}
## ==== File paths input data ==== ##
if (exists("snakemake")) {
# Data from lofreq and varscan labeled with subclonal haplotypes
updated.data = snakemake@params[["incsv"]]
# Phylogeny
phylo.data = snakemake@input[["tree"]]
} else {
# Data from lofreq and varscan labeled with subclonal haplotypes
updated.data = "../../results/variants/validated_variants.csv"
# Phylogeny
phylo.data = "../../results/phylogeny/haplotype-sequences.fa.treefile"
}
```
```{r Outputs, echo=T}
## ==== File paths output data ==== ##
if (exists("snakemake")) {
# Path to save figures
figure.dir = paste0(snakemake@params[['figures']], "/")
} else {
# Path to save figures
figure.dir = "../../results/figures/"
}
# Make the figure directory if it doesn't exist
if (!file.exists(figure.dir)) {
dir.create(figure.dir, recursive = TRUE)
print("Directory created.")
} else {
print("Directory already exists.")
}
```
## Visualize IQ-TREE Phylogeny
I took the sequences for each of these haplotypes and generated a phylogenetic tree using `iqtree`. The phylogenetic relationship of the clusters was determined by `SPRUCE` with the total set of trees filtered down by bridging reads. The `iqtree` analysis is done in notebook `09-make-phylogenetic-tree.ipynb`.
```{r Import Data, echo=F}
# Assigned SNPs with haplotype labels
updated.df = readr::read_csv(updated.data, show_col_types = FALSE) %>%
dplyr::mutate(Haplotype = case_when(
Haplotype == "cluster 1" ~ "cluster 1",
Haplotype == "cluster 2" ~ "cluster 2",
Haplotype == "cluster 3" ~ "cluster 3",
Haplotype == "cluster 4" ~ "cluster 4",
Haplotype == "cluster 5" ~ "cluster 1a",
Haplotype == "cluster 6" ~ "G-FC2",
Haplotype == "cluster 7" ~ "cluster 5",
Haplotype == "cluster 8" ~ "cluster 6",
Haplotype == "cluster 9" ~ "cluster 7",
Haplotype == "cluster 10" ~ "cluster 8",
Haplotype == "cluster 11" ~ "cluster 9",
Haplotype == "cluster 12" ~ "cluster 10",
Haplotype == "cluster 13" ~ "cluster 11",
Haplotype == "genome-1" ~ "G-01",
Haplotype == "genome-1-1" ~ "G-1",
Haplotype == "genome-2" ~ "G-2",
TRUE ~ Haplotype))
# Get the background of the haplotypes
background.label = updated.df %>%
select(Haplotype, Background) %>%
dplyr::filter(!Haplotype %in% c("subclonal", "G-2", "G-01", "G-1", "both")) %>%
mutate(Haplotype = str_replace(Haplotype, " ", "_")) %>%
distinct()
# Mutation counts
mutation.counts = updated.df %>%
dplyr::filter(!Haplotype %in% c('subclonal', 'fixed', 'both')) %>%
select(POS, REF, ALT, Haplotype) %>%
distinct() %>%
group_by(Haplotype) %>%
count() %>%
ungroup() %>%
rename(label = Haplotype, mutation_count = n)
# Phylogenetic tree
tree = read.iqtree(phylo.data)
```
```{r Root the Tree, echo=T}
# Name of the out group for rooting the tree
root = "SSPE_ancestor"
# Root the tree
tree@phylo = root(tree@phylo, outgroup = root)
tree = tree %>%
as_tibble(.) %>%
mutate(label = str_replace(label, "__unknown", "")) %>%
as.treedata()
# Add the background labels
tree.df = tree %>%
as_tibble(.) %>%
left_join(., background.label, by=c("label" = "Haplotype"))
# Get the nodes in the tree
genome.1.node = MRCA(tree, filter(background.label, Background == "genome-1")$Haplotype)
genome.2.node = MRCA(tree, filter(background.label, Background == "genome-2")$Haplotype)
genome.1.offspring = offspring(tree, genome.1.node)
genome.2.offspring = offspring(tree, genome.2.node)
# Add the background labels
tree.df = as_tibble(tree) %>%
mutate(branch.length = if_else(UFboot < 70 & !is.na(UFboot), 0.0, branch.length)) %>%
mutate(Background = case_when(node %in% genome.1.offspring ~ "genome-1",
node %in% genome.2.offspring ~ "genome-2",
TRUE ~ "ancestral")) %>%
mutate(label = str_replace(label, "_", " ")) %>%
left_join(., mutation.counts, by = "label")
```
```{r Plot the Tree, fig.align="center", fig.width=32, fig.height=15}
tree.df %>%
as.treedata() %>%
ggtree(size = 3, aes(color = Background)) +
geom_point2(aes(subset=(node==1)), size=12, color="#4d4d4d") +
geom_point2(aes(subset=(node==genome.1.node)), size=12, color="#307EC2") +
geom_point2(aes(subset=(node==genome.2.node)), size=12, color="#B63F3E") +
geom_tiplab(hjust = -.05, size = 8) +
scale_color_manual(values = c("#4d4d4d", "#307EC2", "#B63F3E")) +
theme(legend.position = "none")
ggsave(paste(figure.dir, "sspe-phylogeny.png", sep=""), width = 32, height = 15, dpi = 300)
```