Nakamura, T., Okamoto, I., Sasaki, K., Yabuta, Y., Iwatani, C., Tsuchiya, H., Seita, Y., Nakamura, S., Yamamoto, T., and Saitou, M. (2016). A developmental coordinate of pluripotency among mice, monkeys and humans. Nature 537, 57–62.
Load required packages.
library(tidyverse)
library(magrittr)
library(Matrix)
library(extrafont)
library(ggrepel)
library(patchwork)
# library(tidylog)
Sys.time()
## [1] "2020-08-18 15:08:05 CDT"
SEED_USE <- 20200616
MINIMAL_NUM_COUNTS_REQUIRED_FOR_GENE <- 60
source(
file = file.path(
SCRIPT_DIR,
"utilities.R"
)
)
Prepare metadata for single cells.
# https://www.ncbi.nlm.nih.gov/Traces/study/?query_key=1&WebEnv=MCID_5f3b627244419748985be4f3
cell_metadata <- read_delim(
file = "../SraRunTable.txt",
delim = ","
) %>%
rename_all(. %>% tolower()) %>%
`colnames<-`(str_replace(
string = colnames(.),
pattern = " ", replacement = "_"
)) %>%
dplyr::select(
sample_name,
# run,
biosample,
source_name,
cell_line,
organism
) %>%
unique()
## Parsed with column specification:
## cols(
## .default = col_character(),
## AvgSpotLen = col_double(),
## Bases = col_double(),
## Bytes = col_double(),
## ReleaseDate = col_datetime(format = ""),
## Strain = col_logical(),
## Cell_type = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 558 parsing failures.
## row col expected actual file
## 2527 Strain 1/0/T/F/TRUE/FALSE C57BL/6 '../SraRunTable.txt'
## 2528 Strain 1/0/T/F/TRUE/FALSE C57BL/6 '../SraRunTable.txt'
## 2529 Strain 1/0/T/F/TRUE/FALSE C57BL/6 '../SraRunTable.txt'
## 2530 Strain 1/0/T/F/TRUE/FALSE C57BL/6 '../SraRunTable.txt'
## 2531 Strain 1/0/T/F/TRUE/FALSE C57BL/6 '../SraRunTable.txt'
## .... ...... .................. ....... ....................
## See problems(...) for more details.
cell_metadata
cell_metadata %>%
dplyr::count(organism) %>%
gt::gt() %>%
gt::cols_label(n = "num_cells") %>%
gt::tab_options(table.font.size = "median")
organism | num_cells |
---|---|
Macaca fascicularis | 421 |
Mus musculus | 71 |
Load SC3-seq sample information.
# Supplementary Table 1, Sheet 3
table_s1_sheet3 <- readxl::read_excel(
path = "../../../../docs/A_developmental_coordinate_of_pluripotency_among_mice,_monkeys_and_humans/41586_2016_BFnature19096_MOESM296_ESM.xlsx",
sheet = "SC3-seq_SampleTable",
skip = 1
)
table_s1_sheet3
cell_metadata <- cell_metadata %>%
dplyr::left_join(
table_s1_sheet3,
by = c("sample_name" = "GSM_ID")
) %>%
rename_all(. %>% tolower()) %>%
dplyr::rename(
developmental_stage = `embryonic day/ culture condition`
) %>%
dplyr::select(
sample_name:developmental_stage
) %>%
mutate(
developmental_stage = factor(
developmental_stage,
levels = c(
"E5.5",
"E6.5",
"E06",
"E07",
"E08",
"E09",
"E13",
"E14",
"E16",
"E17",
"2iLESC",
"EpiLC",
"CMK6_FeederFree",
"CMK6_onFeeder",
"CMK9_onFeeder"
)
),
group = factor(
group,
levels = c(
"ICM",
"Pre-EPI",
"Hypoblast",
"PreE-TE",
"PreL-TE",
"PostE-EPI",
"PostL-EPI",
"Gast1",
"Gast2a",
"Gast2b",
"Post-paTE",
"VE/YE",
"EXMC",
"cyESCFF",
"cyESCoF",
"E5.5EPI",
"E5.5VE",
"E6.5EPI-T hi",
"E6.5EPI-T lo",
"E6.5VE",
"EpiLC",
"2i+L ESC"
)
),
source_name = factor(
source_name,
levels = c(
"Cynomolgus monkey embryo E06",
"Cynomolgus monkey embryo E07",
"Cynomolgus monkey embryo E08",
"Cynomolgus monkey embryo E09",
"Cynomolgus monkey embryo E13",
"Cynomolgus monkey embryo E14",
"Cynomolgus monkey embryo E16",
"Cynomolgus monkey embryo E17",
"cyESC\\, feeder free",
"cyESC\\, on feeder",
"mouse embryo E5.5",
"mouse embryo E6.5",
"2i/LIF ESC",
"EpiLC"
)
)
)
cell_metadata
embedding_cy <- read_csv(
file = "../preprocessed/cy/exploring/embedding_ncomponents44_seed20200317.csv"
) %>%
dplyr::left_join(
cell_metadata,
by = c("cell" = "sampleid")
) %>%
droplevels()
## Parsed with column specification:
## cols(
## cell = col_character(),
## batch = col_character(),
## louvain = col_double(),
## x_tsne = col_double(),
## y_tsne = col_double(),
## x_umap = col_double(),
## y_umap = col_double(),
## x_fitsne = col_double(),
## y_fitsne = col_double(),
## x_phate = col_double(),
## y_phate = col_double(),
## `x_min_dist=0.1` = col_double(),
## `y_min_dist=0.1` = col_double(),
## x_multicoretsne = col_double(),
## y_multicoretsne = col_double()
## )
reticulate::py_discover_config()
## python: /Users/jialei/.pyenv/shims/python
## libpython: /Users/jialei/.pyenv/versions/3.8.2/lib/libpython3.8.dylib
## pythonhome: /Users/jialei/.pyenv/versions/3.8.2:/Users/jialei/.pyenv/versions/3.8.2
## version: 3.8.2 (default, May 23 2020, 03:35:41) [Clang 11.0.3 (clang-1103.0.32.62)]
## numpy: /Users/jialei/.pyenv/versions/3.8.2/lib/python3.8/site-packages/numpy
## numpy_version: 1.19.0
##
## NOTE: Python version was forced by RETICULATE_PYTHON
np <- reticulate::import("numpy", convert = TRUE)
scipy.sparse <- reticulate::import(module = "scipy.sparse", convert = TRUE)
matrix_readcount_use <- scipy.sparse$load_npz("../preprocessed/cy/matrix_readcount.npz")
matrix_readcount_use_features <- np$load("../preprocessed/cy/matrix_readcount_features.npy")
matrix_readcount_use_barcodes <- np$load("../preprocessed/cy/matrix_readcount_barcodes.npy")
colnames(matrix_readcount_use) <- matrix_readcount_use_barcodes
rownames(matrix_readcount_use) <- matrix_readcount_use_features
matrix_readcount_use <- matrix_readcount_use[, embedding_cy$cell]
# calculate CPM
# matrix_cpm_use <- calc_cpm(matrix_readcount_use)
# stopifnot(dim(matrix_readcount_use) == dim(matrix_cpm_use))
print(dim(matrix_readcount_use))
## [1] 29095 421
walk(list(embedding_cy, matrix_readcount_use), function(x) {
print(object.size(x), units = "auto", standard = "SI")
})
## 196.1 kB
## 57.4 MB
embedding_cy %>%
dplyr::count(organism) %>%
gt::gt() %>%
gt::cols_label(n = "num_cells") %>%
gt::tab_options(table.font.size = "median")
organism | num_cells |
---|---|
Macaca fascicularis | 421 |
embedding_cy %>%
dplyr::mutate(
num_genes = Matrix::colSums(matrix_readcount_use > 0)
) %>%
dplyr::group_by(
louvain
) %>%
summarise(
median_genes = median(num_genes),
num_cells = n()
) %>%
gt::gt() %>%
gt::tab_options(table.font.size = "median")
## `summarise()` ungrouping output (override with `.groups` argument)
louvain | median_genes | num_cells |
---|---|---|
0 | 10679.0 | 82 |
1 | 10880.5 | 64 |
2 | 10997.0 | 62 |
3 | 10898.0 | 55 |
4 | 11073.0 | 53 |
5 | 10649.0 | 39 |
6 | 11590.5 | 34 |
7 | 11974.5 | 32 |
embedding_cy %>%
dplyr::count(source_name) %>%
gt::gt() %>%
gt::cols_label(n = "num_cells") %>%
gt::tab_options(table.font.size = "median")
source_name | num_cells |
---|---|
Cynomolgus monkey embryo E06 | 43 |
Cynomolgus monkey embryo E07 | 35 |
Cynomolgus monkey embryo E08 | 95 |
Cynomolgus monkey embryo E09 | 20 |
Cynomolgus monkey embryo E13 | 46 |
Cynomolgus monkey embryo E14 | 62 |
Cynomolgus monkey embryo E16 | 36 |
Cynomolgus monkey embryo E17 | 53 |
cyESC\, feeder free | 9 |
cyESC\, on feeder | 22 |
embedding_cy %>%
dplyr::count(group) %>%
gt::gt() %>%
gt::cols_label(n = "num_cells") %>%
gt::tab_options(table.font.size = "median")
group | num_cells |
---|---|
ICM | 30 |
Pre-EPI | 34 |
Hypoblast | 54 |
PreE-TE | 23 |
PreL-TE | 52 |
PostE-EPI | 55 |
PostL-EPI | 50 |
Gast1 | 18 |
Gast2a | 13 |
Gast2b | 13 |
Post-paTE | 11 |
VE/YE | 5 |
EXMC | 32 |
cyESCFF | 9 |
cyESCoF | 22 |
embedding_cy %>%
dplyr::count(developmental_stage) %>%
gt::gt() %>%
gt::cols_label(n = "num_cells") %>%
gt::tab_options(table.font.size = "median")
developmental_stage | num_cells |
---|---|
E06 | 43 |
E07 | 35 |
E08 | 95 |
E09 | 20 |
E13 | 46 |
E14 | 62 |
E16 | 36 |
E17 | 53 |
CMK6_FeederFree | 9 |
CMK6_onFeeder | 14 |
CMK9_onFeeder | 8 |
get_middle_points <- function(embedding, x, y, group) {
embedding_middle <- embedding %>%
dplyr::group_by({{ group }}) %>%
dplyr::summarise(
x_median = median({{ x }}),
y_median = median({{ y }})
)
labels_group <- purrr::map(levels(embedding %>% dplyr::pull({{ group }})), function(i) {
embedding %>%
dplyr::filter(
{{ group }} == i
) %>%
dplyr::left_join(embedding_middle) %>%
dplyr::mutate(
distance = sqrt(
({{ x }} - x_median)^2 + ({{ y }} - y_median)^2
)
) %>%
dplyr::filter(distance == min(distance))
}) %>%
dplyr::bind_rows() %>%
dplyr::select({{ group }}, {{ x }}, {{ y }})
return(labels_group)
}
p_embedding_cluster <- plot_embedding(
embedding = embedding_cy[, c("x_tsne", "y_tsne")],
color_values = embedding_cy$louvain %>% as.factor(),
label = "t-SNE; Cluster",
# label_position = c(label_x, label_y),
show_color_value_labels = TRUE,
show_color_legend = FALSE,
geom_point_size = 1,
sort_values = FALSE
) +
scale_color_manual(
values = ggthemes::tableau_color_pal("Tableau 10")(
length(unique(embedding_cy$louvain))
)
)
p_embedding_lineage <- plot_embedding(
embedding = embedding_cy[, c("x_tsne", "y_tsne")],
color_values = embedding_cy$group,
label = "t-SNE; Lineage",
# label_position = c(label_x, label_y),
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = 1,
sort_values = FALSE
) +
scale_color_manual(
values = ggthemes::tableau_color_pal("Tableau 20")(
length(unique(embedding_cy$group))
)
) +
labs(color = NULL) +
ggrepel::geom_text_repel(
data = get_middle_points(embedding_cy, x_tsne, y_tsne, group),
ggplot2::aes(
x = x_tsne,
y = y_tsne,
label = group
),
color = "black",
size = 2,
family = "Arial",
#
segment.color = "grey35",
segment.size = 0.25,
segment.inflect = TRUE,
#
box.padding = 0.5,
min.segment.length = 0,
arrow = ggplot2::arrow(length = unit(0.015, "npc")),
max.overlaps = Inf,
seed = SEED_USE
)
p_embedding_developmental_stage <- plot_embedding(
embedding = embedding_cy[, c("x_tsne", "y_tsne")],
color_values = embedding_cy$developmental_stage,
label = "t-SNE; Developmental stage",
# label_position = c(label_x, label_y),
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = 1,
sort_values = FALSE
) +
scale_color_manual(
values = gg_color_hue(n = embedding_cy$developmental_stage %>%
droplevels() %>%
nlevels())
) +
labs(color = NULL) +
guides(colour = guide_legend(
override.aes = list(size = 3),
ncol = 2
)) +
customized_theme(x = 0.035, y = 0.42)
p_embedding_cell_line <- plot_embedding(
embedding = embedding_cy[, c("x_tsne", "y_tsne")],
color_values = embedding_cy$cell_line %>% replace_na("N/A"),
label = "t-SNE; Cell line",
# label_position = c(label_x, label_y),
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = 1,
sort_values = FALSE
) +
scale_color_manual(
values = c("green", "red", "grey70")
) +
labs(color = NULL) +
guides(colour = guide_legend(override.aes = list(size = 3))) +
customized_theme(x = 0.035, y = 0.25)
Fig. 5a
purrr::reduce(list(
p_embedding_cluster,
p_embedding_lineage,
p_embedding_developmental_stage,
p_embedding_cell_line
), `+`) +
plot_layout(ncol = 2) +
plot_annotation(
theme = theme(plot.margin = margin())
)
FEATURES_SELECTED <- c(
"GAPDH",
# "PPIA",
"LOC101925040",
"POU5F1",
"NANOG",
"SOX2",
"PRDM14",
"DPPA3",
"GATA6",
"PDGFRA",
"GATA4",
"TFAP2C",
# "GATA2",
"LOC101865311",
"CDX2",
"T",
"MIXL1"
)
heatmap_labels <- c(
"GAPDH",
# "PPIA",
"LOC101925040/PPIA",
"POU5F1",
"NANOG",
"SOX2",
"PRDM14",
"DPPA3",
"GATA6",
"PDGFRA",
"GATA4",
"TFAP2C",
# "GATA2",
"LOC101865311/GATA2",
"CDX2",
"T",
"MIXL1"
)
cells_heatmap <- embedding_cy %>%
filter(str_detect(string = developmental_stage, pattern = "^E")) %>%
pull(cell)
Scaled version
matrix_heatmap <- matrix_readcount_use[FEATURES_SELECTED, cells_heatmap]
matrix_heatmap <- matrix_heatmap[rowSums(matrix_heatmap) != 0, ]
matrix_heatmap <- log10(matrix_heatmap + 1)
matrix_heatmap <- t(scale(t(matrix_heatmap)))
heatmap_limits <- quantile(matrix_heatmap, c(0.05, 0.95))
matrix_heatmap[matrix_heatmap < heatmap_limits[1]] <- heatmap_limits[1]
matrix_heatmap[matrix_heatmap > heatmap_limits[2]] <- heatmap_limits[2]
groups_selected <- c(
"Post-paTE",
"PreL-TE",
"Hypoblast",
"PreE-TE",
"ICM",
"Pre-EPI",
"PostE-EPI",
"PostL-EPI",
"Gast1",
"Gast2a",
"Gast2b",
"VE/YE",
"EXMC"
)
cells_heatmap <- map(groups_selected, function(x) {
cells_in_group <- embedding_cy %>%
filter(group == x, cell %in% cells_heatmap) %>%
pull(cell)
cells_in_group[
pvclust::pvclust(
matrix_heatmap[, cells_in_group],
nboot = 100,
parallel = parallel::detectCores() - 2L
)$hclust$order
]
}) %>%
unlist()
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
matrix_heatmap <- matrix_heatmap[FEATURES_SELECTED, cells_heatmap]
ha_columns <- ComplexHeatmap::HeatmapAnnotation(
lineage = ComplexHeatmap::anno_simple(
embedding_cy[match(
x = colnames(matrix_heatmap),
table = embedding_cy$cell
), "group"],
# pch = anno_labels_cluster,
col = setNames(
object = ggthemes::tableau_color_pal("Tableau 20")(
embedding_cy$group %>%
# droplevels() %>%
nlevels()
),
nm = levels(embedding_cy$group)
),
which = "column",
pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
simple_anno_size = unit(3, "mm")
),
#
developmental_stage = ComplexHeatmap::anno_simple(
embedding_cy[match(
x = colnames(matrix_heatmap),
table = embedding_cy$cell
), "developmental_stage"],
# pch = anno_labels_lineage,
col = setNames(
object = gg_color_hue(n = embedding_cy$developmental_stage %>%
# droplevels() %>%
nlevels()),
nm = embedding_cy$developmental_stage %>%
# droplevels() %>%
levels()
),
which = "column",
# pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
simple_anno_size = unit(3, "mm")
),
show_annotation_name = TRUE,
annotation_label = c(
"Lineage",
"Developmental stage"
),
annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
annotation_name_side = "left"
)
lgd_lineage <- ComplexHeatmap::Legend(
title = "Lineage",
labels = levels(
embedding_cy$group
)[
seq_len(nlevels(embedding_cy$group) - 2)
],
legend_gp = grid::gpar(
fill = ggthemes::tableau_color_pal("Tableau 20")(
nlevels(embedding_cy$group) - 2
)
),
labels_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6)
)
lgd_developmental_stage <- ComplexHeatmap::Legend(
title = "Develpmental stage",
labels = levels(
embedding_cy$developmental_stage
)[
seq_len(nlevels(embedding_cy$developmental_stage) - 3)
],
legend_gp = grid::gpar(
fill = gg_color_hue(
n = embedding_cy$developmental_stage %>%
# droplevels() %>%
nlevels()
)[seq_len(nlevels(embedding_cy$developmental_stage) - 3)]
),
labels_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6)
)
ht <- ComplexHeatmap::Heatmap(
matrix = matrix_heatmap,
# col = viridis::plasma(n = 10),
col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
#
cluster_rows = FALSE,
cluster_columns = FALSE,
#
row_labels = heatmap_labels,
row_names_side = c("left"),
row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
show_row_names = TRUE,
show_column_names = FALSE,
#
# column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
show_row_dend = FALSE,
show_column_dend = FALSE,
#
show_heatmap_legend = TRUE,
top_annotation = ha_columns,
## left_annotation = ha_left,
#
heatmap_legend_param = list(
title = "Z score",
title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
legend_direction = "vertical",
labels_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
legend_height = unit(25, "mm"),
legend_width = unit(10, "mm")
),
#
column_split = embedding_cy[match(
x = colnames(matrix_heatmap),
table = embedding_cy$cell
), "group", drop = TRUE] %>%
factor(levels = groups_selected),
column_gap = unit(0.3, "mm"),
#
column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
column_title_rot = -90,
#
use_raster = FALSE
)
pd <- ComplexHeatmap::packLegend(
lgd_lineage,
lgd_developmental_stage,
# gap = unit(8, "mm"),
direction = "vertical"
)
Fig. 3
ComplexHeatmap::draw(
ht,
heatmap_legend_list = list(pd)
)
Non scaled version
matrix_heatmap <- matrix_readcount_use[FEATURES_SELECTED, cells_heatmap]
matrix_heatmap <- matrix_heatmap[rowSums(matrix_heatmap) != 0, ]
matrix_heatmap <- log10(matrix_heatmap + 1)
ht <- ComplexHeatmap::Heatmap(
matrix = as.matrix(matrix_heatmap),
# col = viridis::plasma(n = 10),
col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
#
cluster_rows = FALSE,
cluster_columns = FALSE,
#
row_labels = heatmap_labels,
row_names_side = c("left"),
row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
show_row_names = TRUE,
show_column_names = FALSE,
#
column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
show_row_dend = FALSE,
show_column_dend = FALSE,
#
show_heatmap_legend = TRUE,
top_annotation = ha_columns,
## left_annotation = ha_left,
#
heatmap_legend_param = list(
title = expression(paste("Log"[10], " (RPM + 1)")), # "Log10(RPM + 1)",
title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
legend_direction = "vertical",
labels_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
legend_height = unit(25, "mm"),
legend_width = unit(10, "mm")
),
#
column_split = embedding_cy[match(
x = colnames(matrix_heatmap),
table = embedding_cy$cell
), "group", drop = TRUE] %>%
factor(levels = groups_selected),
column_gap = unit(0.3, "mm"),
#
column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
column_title_rot = -90,
#
use_raster = FALSE
)
Fig. 3
ComplexHeatmap::draw(
ht,
heatmap_legend_list = list(pd)
)
cells_cyEPI <- embedding_cy %>%
filter(group %in% c(
"ICM",
"Pre-EPI",
"PostE-EPI",
"PostL-EPI",
"Gast1",
"Gast2a",
"Gast2b"
)) %>%
pull(cell) %>%
sort()
matrix_readcount_cyEPI <- matrix_readcount_use[, cells_cyEPI]
matrix_readcount_cyEPI <- matrix_readcount_cyEPI[
Matrix::rowSums(
matrix_readcount_cyEPI
) >= MINIMAL_NUM_COUNTS_REQUIRED_FOR_GENE,
]
dim(matrix_readcount_cyEPI)
## [1] 16831 213
matrix_readcount_cyEPI_log <- matrix_readcount_cyEPI
matrix_readcount_cyEPI_log@x <- log1p(matrix_readcount_cyEPI@x)
# z-score
matrix_readcount_cyEPI_log_scaled <- t(
scale(
t(
matrix_readcount_cyEPI_log
),
center = TRUE, scale = TRUE
)
)
# features_var <- apply(matrix_readcount_norm_log_scaled, 1, var)
# features_use <- rownames(matrix_readcount_norm_log_scaled)[features_var > 0]
pca_out <- prcomp(
t(matrix_readcount_cyEPI_log_scaled),
center = FALSE,
scale = FALSE
)
summary(pca_out)$imp %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "component") %>%
mutate(
rank = 1:n()
) %>%
dplyr::slice(1:30) %>%
ggplot(
aes(
x = rank,
y = `Proportion of Variance`
)
) +
geom_point(size = 0.3) +
theme_bw() +
scale_x_continuous(
name = "Component",
breaks = c(1, seq(5, 30, 5))
) +
scale_y_continuous(
name = "Proportion of variance",
labels = scales::percent
) +
theme(
axis.title = ggplot2::element_text(family = "Arial", size = 6),
axis.text = ggplot2::element_text(family = "Arial", size = 6),
panel.grid.minor = ggplot2::element_blank()
)
embedding_cyEPI <- pca_out$x %>%
as.data.frame() %>%
rownames_to_column(var = "sample_name") %>%
dplyr::select(sample_name:PC3) %>%
left_join(
embedding_cy %>%
select(
cell, batch, sample_name:developmental_stage
),
by = c("sample_name" = "cell")
) %>%
dplyr::rename(
x_pca = PC1,
y_pca = PC2,
z_pca = PC3
) %>%
relocate(x_pca, y_pca, z_pca, .after = last_col()) # %>% droplevels()
p_embedding_pca1 <- embedding_cyEPI %>%
mutate(
category = "PCA"
) %>%
sample_frac() %>%
plot_pca(
x = y_pca,
y = x_pca,
fill = group
) +
scale_fill_manual(
values = setNames(
object = ggthemes::tableau_color_pal("Tableau 20")(
length(unique(embedding_cy$group))
),
nm = embedding_cy$group %>% levels()
)[embedding_cyEPI$group %>%
droplevels() %>%
levels()]
) +
add_xy_label_pca(x = "PC2", y = "PC1") +
ggrepel::geom_text_repel(
data = get_middle_points(embedding_cyEPI, x_pca, y_pca, group),
ggplot2::aes(
x = y_pca,
y = x_pca,
label = group
),
color = "red",
size = 2,
family = "Arial",
#
segment.color = "grey35",
segment.size = 0.25,
segment.inflect = TRUE,
#
box.padding = 0.5,
min.segment.length = 0,
arrow = ggplot2::arrow(length = unit(0.015, "npc")),
max.overlaps = Inf,
seed = SEED_USE
)
p_embedding_pca2 <- embedding_cyEPI %>%
mutate(
category = "PCA"
) %>%
sample_frac() %>%
plot_pca(
x = z_pca,
y = x_pca,
fill = group
) +
scale_fill_manual(
values = setNames(
object = ggthemes::tableau_color_pal("Tableau 20")(
length(unique(embedding_cy$group))
),
nm = embedding_cy$group %>% levels()
)[embedding_cyEPI$group %>%
droplevels() %>%
levels()]
) +
add_xy_label_pca(x = "PC3", y = "PC1")
Fig. 4a
p_embedding_pca1 +
p_embedding_pca2 +
plot_annotation(theme = theme(plot.margin = margin())) +
plot_layout(guides = "collect")
# Supplementary Table 2, Sheet 4
table_s2_sheet4 <- readxl::read_excel(
path = "../../../../docs/A_developmental_coordinate_of_pluripotency_among_mice,_monkeys_and_humans/41586_2016_BFnature19096_MOESM297_ESM.xlsx",
sheet = "cyEPI ontogenic genes",
skip = 0
)
## New names:
## * `` -> ...11
## * `` -> ...12
table_s2_sheet4
Fig. 4b
FEATURES_SELECTED <- c(
"SFRP1",
"FST",
"HMGA2",
"VIM",
"T",
"BMP4",
"EOMES",
"MIXL1",
"HAND1",
"PDGFRA",
"GSC",
"TBX3",
"GATA6",
"ESRRB",
"KLF5",
"TFCP2L1",
"KHDC1L",
"KLF4",
"NLRP7",
"OOEP",
"DPPA3",
"DNMT3L",
"DPPA5",
"DPPA2",
"NANOG",
"PRDM14",
"SOX2",
"SALL2",
"SFRP2",
"FGF2",
"SOX11"
)
scaled_pc_loadings <- pca_out$rotation %>%
scale() %>%
as.data.frame() %>%
rownames_to_column(var = "feature") %>%
dplyr::select(feature, PC1, PC2) %>%
mutate(
group = feature %in% table_s2_sheet4$macFas5_gene_symbol,
group2 = feature %in% FEATURES_SELECTED
) %>%
arrange(group)
plot_pca_loading(
data = scaled_pc_loadings,
x = PC2,
y = PC1,
z1 = group,
z2 = group2,
x_label = "Z score of PC2 loading",
y_label = "Z score of PC1 loading"
)
Read genes used in Fig. 4c from supplementary table 2.
features_heatmap <- table_s2_sheet4$macFas5_gene_symbol
features_heatmap %>% length()
## [1] 776
# Genes listed in the supplementary table, but not in the expression matrix
features_heatmap[!features_heatmap %in% rownames(matrix_readcount_use)]
## [1] "CYP11A" "LOC102139757" "BRDT" "COX7B2" "42253"
## [6] "42065" "DYRK3" "GSTA4" "FXYD6" "BCAR3"
# fix
features_heatmap <- features_heatmap %>%
enframe(name = "order", value = "symbol_raw") %>%
mutate(
symbol = case_when(
symbol_raw == "CYP11A" ~ "LOC102120640",
symbol_raw == "LOC102139757" ~ "XIST",
symbol_raw == "BRDT" ~ "LOC102123270",
symbol_raw == "COX7B2" ~ "LOC101864947",
symbol_raw == "42253" ~ "MARC2",
symbol_raw == "42065" ~ "SEPT6",
symbol_raw == "DYRK3" ~ "LOC101866219",
symbol_raw == "GSTA4" ~ "LOC101865382",
symbol_raw == "FXYD6" ~ "LOC101867245",
symbol_raw == "BCAR3" ~ "LOC101867222",
TRUE ~ .$symbol_raw
)
) %>%
pull(symbol)
features_heatmap[!features_heatmap %in% rownames(matrix_readcount_use)]
## character(0)
matrix_heatmap <- matrix_readcount_use[features_heatmap, cells_cyEPI]
matrix_heatmap <- matrix_heatmap[rowSums(matrix_heatmap) != 0, ]
matrix_heatmap <- log10(matrix_heatmap + 1)
heatmap_limits <- quantile(matrix_heatmap, c(0.005, 0.995))
matrix_heatmap[matrix_heatmap < heatmap_limits[1]] <- heatmap_limits[1]
matrix_heatmap[matrix_heatmap > heatmap_limits[2]] <- heatmap_limits[2]
cells_heatmap <- map(embedding_cyEPI %>%
pull(group) %>%
droplevels() %>%
levels(), function(x) {
cells_in_group <- embedding_cyEPI %>%
filter(group == x) %>%
pull(sample_name)
cells_in_group[
pvclust::pvclust(
matrix_heatmap[features_heatmap, cells_in_group] %>% as.matrix(),
nboot = 100,
parallel = parallel::detectCores() - 2L
)$hclust$order
]
}) %>%
unlist()
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
matrix_heatmap <- matrix_heatmap[features_heatmap, cells_heatmap]
ha_columns <- ComplexHeatmap::HeatmapAnnotation(
lineage = ComplexHeatmap::anno_simple(
embedding_cyEPI[match(
x = colnames(matrix_heatmap),
table = embedding_cyEPI$sample_name
), "group"],
# pch = anno_labels_cluster,
col = setNames(
object = ggthemes::tableau_color_pal("Tableau 20")(
embedding_cyEPI$group %>%
# droplevels() %>%
nlevels()
),
nm = levels(embedding_cyEPI$group)
),
which = "column",
pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
simple_anno_size = unit(3, "mm")
),
#
developmental_stage = ComplexHeatmap::anno_simple(
embedding_cyEPI[match(
x = colnames(matrix_heatmap),
table = embedding_cyEPI$sample_name
), "developmental_stage"],
# pch = anno_labels_lineage,
col = setNames(
object = gg_color_hue(n = embedding_cy$developmental_stage %>%
nlevels()),
nm = embedding_cy$developmental_stage %>%
levels()
),
which = "column",
# pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
simple_anno_size = unit(3, "mm")
),
show_annotation_name = TRUE,
annotation_label = c(
"Lineage",
"Developmental stage"
),
annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
annotation_name_side = "left"
)
lgd_lineage <- ComplexHeatmap::Legend(
title = "Lineage",
labels = embedding_cyEPI %>% pull(group) %>% droplevels() %>% levels(),
legend_gp = grid::gpar(
fill = setNames(
object = ggthemes::tableau_color_pal("Tableau 20")(
length(unique(embedding_cy$group))
),
nm = embedding_cy$group %>% levels()
)[embedding_cyEPI$group %>%
droplevels() %>%
levels()]
),
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
)
)
lgd_developmental_stage <- ComplexHeatmap::Legend(
title = "Develpmental stage",
labels = levels(
embedding_cy$developmental_stage
)[
seq_len(nlevels(embedding_cy$developmental_stage) - 3)
],
legend_gp = grid::gpar(
fill = gg_color_hue(
n = embedding_cy$developmental_stage %>%
droplevels() %>%
nlevels()
)[seq_len(nlevels(embedding_cy$developmental_stage) - 3)]
),
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
)
)
# genes highlighted in Fig. 4c, fix gene symbols
features_to_mark_left <- c(
"KLF5",
"TBX3",
"GATA6",
"PDGFRA",
"LAMA1",
"LAMB1",
"LAMC1",
"TET2",
#
"ESRRB",
"TFCP2L1",
"LOC102137091", # "KHDC1L",
"SOX15",
"TFAP2C",
"GDF15",
#
"DPPA3",
"NLRP2",
"DPPA2",
"KHDC3L",
#
"PRDM14",
"NANOG",
#
"LOC102143209", # "TDGF1",
"SOX2",
"CXCL12",
"DNMT3B",
#
"ZSCAN10",
"SALL2",
"SEMA6A",
"TCF7L1",
"THY1",
"COL1A1",
"FGF19",
"NOTCH3",
#
"SOX11",
"TCF4",
"FST",
"ZIC3",
"ZIC2",
"FGF2",
"VIM",
"COL4A2",
#
"T",
"MIXL1",
"EOMES",
"GSC",
"BMP2",
"HHEX",
"LHX1",
"CXCR4",
"LOC101925698", # "SOX17",
"BMP4"
)
features_to_mark_left[!features_to_mark_left %in% rownames(matrix_heatmap)]
## character(0)
ha_left <- ComplexHeatmap::rowAnnotation(
foo = ComplexHeatmap::anno_mark(
at = which(rownames(matrix_heatmap) %in% features_to_mark_left),
labels = features_to_mark_left,
which = "row",
side = "right",
lines_gp = grid::gpar(col = "grey50"),
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
)
)
)
ht <- ComplexHeatmap::Heatmap(
matrix = matrix_heatmap %>% as.matrix(),
# col = viridis::plasma(n = 10),
col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
#
cluster_rows = FALSE,
cluster_columns = FALSE,
#
## row_labels = str_remove(string = rownames(matrix_heatmap), pattern = "^E.+_"),
# row_names_side = c("left"),
# row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
show_row_names = FALSE,
show_column_names = FALSE,
#
show_row_dend = FALSE,
show_column_dend = FALSE,
#
column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
show_heatmap_legend = TRUE,
top_annotation = ha_columns,
## left_annotation = ha_left,
right_annotation = ha_left,
#
heatmap_legend_param = list(
title = expression(paste("Log"[10], " (RPM + 1)")),
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
legend_direction = "vertical",
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
legend_height = unit(25, "mm"),
legend_width = unit(10, "mm")
),
#
row_split = table_s2_sheet4$ClusterID,
row_gap = unit(0.3, "mm"),
#
row_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
row_title_rot = 0,
#
# column_split = embedding_cy[match(x = colnames(matrix_heatmap),table = embedding_cy$cell), "group", drop = TRUE],
column_split = factor(
embedding_cyEPI[match(
x = colnames(matrix_heatmap),
table = embedding_cyEPI$sample_name
), "group", drop = TRUE] %>%
droplevels() %>%
str_pad(
width = map_int(levels(embedding_cyEPI$group), nchar) %>%
max(),
side = "left"
),
levels = levels(embedding_cyEPI$group %>% droplevels()) %>%
str_pad(
width = map_int(levels(embedding_cyEPI$group), nchar) %>%
max(),
side = "left"
)
),
column_gap = unit(0.3, "mm"),
#
column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
column_title_rot = -90,
#
use_raster = FALSE
)
pd <- ComplexHeatmap::packLegend(
lgd_lineage,
lgd_developmental_stage,
# gap = unit(8, "mm"),
direction = "vertical"
)
Fig. 4c
ComplexHeatmap::draw(
ht,
heatmap_legend_list = list(pd)
)
# Supplementary Table 1, Sheet 3
table_s1_sheet3 <- readxl::read_excel(
path = "../../../../docs/A_developmental_coordinate_of_pluripotency_among_mice,_monkeys_and_humans/41586_2016_BFnature19096_MOESM296_ESM.xlsx",
sheet = "SC3-seq_SampleTable",
skip = 1
)
table_s1_sheet3 %>% head()
embedding_mm <- read_csv(
file = "../preprocessed/mm/exploring/embedding_ncomponents18_seed20200317.csv"
) %>%
dplyr::left_join(
table_s1_sheet3 %>%
filter(Species == "Mouse") %>%
mutate(
SampleID = case_when(
str_detect(string = SampleID, pattern = "^\\d") ~ str_c("X", .$SampleID),
TRUE ~ .$SampleID
)
),
by = c("cell" = "SampleID")
) %>%
dplyr::select(
cell:y_umap, GSM_ID:`Embryonic Day/ Culture Condition`
) %>%
dplyr::rename_all(. %>% tolower()) %>%
dplyr::rename("developmental_stage" = `embryonic day/ culture condition`) %>%
mutate(
developmental_stage = factor(
developmental_stage,
levels = c("E4.5", "E5.5", "E6.5", "EpiLC", "2iLESC")
),
group = factor(
group,
levels = c(
"E4.5EPI",
"E4.5PE",
"E4.5mTE",
"E4.5pTE",
"E5.5EPI",
"E5.5VE",
"E6.5EPI-T hi",
"E6.5EPI-T lo",
"E6.5VE",
"EpiLC",
"2i+L ESC"
)
)
)
## Parsed with column specification:
## cols(
## cell = col_character(),
## batch = col_character(),
## louvain = col_double(),
## x_tsne = col_double(),
## y_tsne = col_double(),
## x_umap = col_double(),
## y_umap = col_double(),
## x_fitsne = col_double(),
## y_fitsne = col_double(),
## x_phate = col_double(),
## y_phate = col_double(),
## `x_min_dist=0.1` = col_double(),
## `y_min_dist=0.1` = col_double(),
## x_multicoretsne = col_double(),
## y_multicoretsne = col_double()
## )
embedding_mm %>%
dplyr::count(louvain) %>%
gt::gt() %>%
gt::cols_label(n = "num_cells") %>%
gt::tab_options(table.font.size = "median")
louvain | num_cells |
---|---|
0 | 49 |
1 | 37 |
2 | 22 |
embedding_mm %>%
dplyr::count(group) %>%
gt::gt() %>%
gt::cols_label(n = "num_cells") %>%
gt::tab_options(table.font.size = "median")
group | num_cells |
---|---|
E4.5EPI | 9 |
E4.5PE | 9 |
E4.5mTE | 9 |
E4.5pTE | 10 |
E5.5EPI | 16 |
E5.5VE | 9 |
E6.5EPI-T hi | 10 |
E6.5EPI-T lo | 9 |
E6.5VE | 5 |
EpiLC | 12 |
2i+L ESC | 10 |
embedding_mm %>%
dplyr::count(developmental_stage) %>%
gt::gt() %>%
gt::cols_label(n = "num_cells") %>%
gt::tab_options(table.font.size = "median")
developmental_stage | num_cells |
---|---|
E4.5 | 37 |
E5.5 | 25 |
E6.5 | 24 |
EpiLC | 12 |
2iLESC | 10 |
embedding_middle <- embedding_mm %>%
group_by(group) %>%
summarise(
x_median = median(x_umap),
y_median = median(y_umap)
)
## `summarise()` ungrouping output (override with `.groups` argument)
labels_group <- map(levels(embedding_mm$group), function(x) {
embedding_mm %>%
dplyr::filter(
group == x
) %>%
dplyr::left_join(embedding_middle) %>%
dplyr::mutate(
distance = sqrt(
(x_umap - x_median)^2 + (y_umap - y_median)^2
)
) %>%
dplyr::filter(distance == min(distance))
}) %>%
dplyr::bind_rows() %>%
dplyr::select(cell, x_umap, y_umap, group)
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
labels_group
p_embedding_lineage <- plot_embedding(
embedding = embedding_mm[, c("x_umap", "y_umap")],
color_values = embedding_mm$group,
label = "UMAP; Lineage",
# label_position = c(label_x, label_y),
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = 1,
sort_values = FALSE
) +
scale_color_manual(
values = ggthemes::tableau_color_pal("Tableau 20")(
length(unique(embedding_mm$group))
)
) +
labs(color = NULL) +
# guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
# customized_theme(x = 0.035, y = 0.72) +
ggrepel::geom_text_repel(
data = get_middle_points(embedding_mm, x_umap, y_umap, group),
ggplot2::aes(
x = x_umap,
y = y_umap,
label = group
),
color = "black",
size = 2,
family = "Arial",
#
segment.color = "grey35",
segment.size = 0.25,
segment.inflect = TRUE,
#
box.padding = 0.5,
min.segment.length = 0,
arrow = ggplot2::arrow(length = unit(0.015, "npc")),
max.overlaps = Inf,
seed = SEED_USE,
)
p_embedding_developmental_stage <- plot_embedding(
embedding = embedding_mm[, c("x_umap", "y_umap")],
color_values = embedding_mm$developmental_stage,
label = "UMAP; Developmental stage",
# label_position = c(label_x, label_y),
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = 1,
sort_values = FALSE
) +
scale_color_manual(
values = gg_color_hue(n = embedding_mm$developmental_stage %>%
droplevels() %>%
nlevels())
) +
labs(color = NULL) +
guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
customized_theme(x = 0.035, y = 0.36)
Extended Data Figure 7d
purrr::reduce(list(
p_embedding_lineage,
p_embedding_developmental_stage
), `+`) +
plot_layout(ncol = 2) +
plot_annotation(
theme = theme(plot.margin = margin())
)
features_heatmap_mm <- c(
"Gapdh",
"Ppia",
"Pou5f1",
"Sox2",
"Nanog",
"Prdm14",
"Tfcp2l1",
"Dppa3",
"Esrrb",
"Zfp42",
"Klf2",
"Klf4",
"Klf5",
"Tbx3",
"Nr5a2",
"Fgf4",
"Il6st",
"Lifr",
"Fgf5",
"Otx2",
"Pou3f1",
"Sox3",
"Dnmt3b",
"T",
"Mixl1",
"Sp5",
"Cdh2",
"Snai1",
"Gata6",
"Pdgfra",
"Sox17",
"Gata4",
"Afp",
"Cdx2",
"Gata2",
"Gata3",
"Tfap2c",
"Krt18"
)
matrix_readcount_mm <- scipy.sparse$load_npz("../preprocessed/mm/matrix_readcount.npz")
matrix_readcount_mm_features <- np$load("../preprocessed/mm/matrix_readcount_features.npy")
matrix_readcount_mm_barcodes <- np$load("../preprocessed/mm/matrix_readcount_barcodes.npy")
colnames(matrix_readcount_mm) <- matrix_readcount_mm_barcodes
rownames(matrix_readcount_mm) <- matrix_readcount_mm_features
matrix_readcount_mm <- matrix_readcount_mm[, embedding_mm$cell]
features_heatmap_mm %in% rownames(matrix_readcount_mm)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
matrix_heatmap <- log10(matrix_readcount_mm[features_heatmap_mm, ] + 1)
cells_heatmap <- map(levels(embedding_mm$group), function(x) {
cells_in_group <- embedding_mm %>%
filter(group == x) %>%
pull(cell)
cells_in_group[
pvclust::pvclust(
matrix_heatmap[, cells_in_group] %>% as.matrix(),
nboot = 100,
parallel = parallel::detectCores() - 2L
)$hclust$order
]
}) %>%
unlist()
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
Extended Data Figure 7c
matrix_heatmap[, cells_heatmap] %>%
as.matrix() %>%
as.data.frame() %>%
rownames_to_column(var = "feature") %>%
pivot_longer(
-c("feature"),
names_to = "sample"
) %>%
left_join(
embedding_mm %>%
dplyr::select(cell, group),
by = c("sample" = "cell")
) %>%
mutate(
feature = factor(
feature,
levels = features_heatmap_mm %>% rev(.)
),
sample = factor(
sample,
levels = cells_heatmap
)
) %>%
plot_heatmap(
x = sample,
y = feature,
z = value,
y_order = rev(features_heatmap_mm)
) +
facet_grid(~group, scales = "free", space = "free")
Number of genes of cy matrix: 29095
Number of genes of mm matrix: 26219
library(org.Hs.eg.db)
library(biomaRt)
org.Hs.eg.db version: 3.11.4
biomaRt version: 2.44.1
ensembl <- biomaRt::useMart("ensembl")
biomaRt::listDatasets(ensembl) %>%
filter(dataset %in% "Cynomolgus")
biomaRt::listDatasets(ensembl)$dataset
## [1] "acalliptera_gene_ensembl" "acarolinensis_gene_ensembl"
## [3] "acchrysaetos_gene_ensembl" "acitrinellus_gene_ensembl"
## [5] "amelanoleuca_gene_ensembl" "amexicanus_gene_ensembl"
## [7] "ampachon_gene_ensembl" "anancymaae_gene_ensembl"
## [9] "aplatyrhynchos_gene_ensembl" "applatyrhynchos_gene_ensembl"
## [11] "atestudineus_gene_ensembl" "bbbison_gene_ensembl"
## [13] "bgrunniens_gene_ensembl" "bihybrid_gene_ensembl"
## [15] "bmutus_gene_ensembl" "bsplendens_gene_ensembl"
## [17] "btaurus_gene_ensembl" "bthybrid_gene_ensembl"
## [19] "cabingdonii_gene_ensembl" "capalliatus_gene_ensembl"
## [21] "caperea_gene_ensembl" "catys_gene_ensembl"
## [23] "ccanadensis_gene_ensembl" "ccapucinus_gene_ensembl"
## [25] "cdromedarius_gene_ensembl" "celegans_gene_ensembl"
## [27] "cgchok1gshd_gene_ensembl" "cgcrigri_gene_ensembl"
## [29] "cgobio_gene_ensembl" "cgpicr_gene_ensembl"
## [31] "charengus_gene_ensembl" "chircus_gene_ensembl"
## [33] "choffmanni_gene_ensembl" "cintestinalis_gene_ensembl"
## [35] "cjacchus_gene_ensembl" "cjaponica_gene_ensembl"
## [37] "clanigera_gene_ensembl" "cldingo_gene_ensembl"
## [39] "clfamiliaris_gene_ensembl" "cmilii_gene_ensembl"
## [41] "cpbellii_gene_ensembl" "cporcellus_gene_ensembl"
## [43] "cporosus_gene_ensembl" "csabaeus_gene_ensembl"
## [45] "csavignyi_gene_ensembl" "csemilaevis_gene_ensembl"
## [47] "csyrichta_gene_ensembl" "cvariegatus_gene_ensembl"
## [49] "dclupeoides_gene_ensembl" "dmelanogaster_gene_ensembl"
## [51] "dnovaehollandiae_gene_ensembl" "dnovemcinctus_gene_ensembl"
## [53] "dordii_gene_ensembl" "drerio_gene_ensembl"
## [55] "eaasinus_gene_ensembl" "eburgeri_gene_ensembl"
## [57] "ecaballus_gene_ensembl" "ecalabaricus_gene_ensembl"
## [59] "eelectricus_gene_ensembl" "eeuropaeus_gene_ensembl"
## [61] "elucius_gene_ensembl" "enaucrates_gene_ensembl"
## [63] "etelfairi_gene_ensembl" "falbicollis_gene_ensembl"
## [65] "fcatus_gene_ensembl" "fdamarensis_gene_ensembl"
## [67] "fheteroclitus_gene_ensembl" "gaculeatus_gene_ensembl"
## [69] "gagassizii_gene_ensembl" "ggallus_gene_ensembl"
## [71] "ggorilla_gene_ensembl" "gmorhua_gene_ensembl"
## [73] "gwilldenowi_gene_ensembl" "hburtoni_gene_ensembl"
## [75] "hcomes_gene_ensembl" "hgfemale_gene_ensembl"
## [77] "hgmale_gene_ensembl" "hhucho_gene_ensembl"
## [79] "hsapiens_gene_ensembl" "ipunctatus_gene_ensembl"
## [81] "itridecemlineatus_gene_ensembl" "jhyemalis_gene_ensembl"
## [83] "jjaculus_gene_ensembl" "lafricana_gene_ensembl"
## [85] "lbergylta_gene_ensembl" "lcalcarifer_gene_ensembl"
## [87] "lcanadensis_gene_ensembl" "lchalumnae_gene_ensembl"
## [89] "lcoronata_gene_ensembl" "lcrocea_gene_ensembl"
## [91] "loculatus_gene_ensembl" "lsdomestica_gene_ensembl"
## [93] "malbus_gene_ensembl" "marmatus_gene_ensembl"
## [95] "mauratus_gene_ensembl" "mcaroli_gene_ensembl"
## [97] "mdomestica_gene_ensembl" "mfascicularis_gene_ensembl"
## [99] "mgallopavo_gene_ensembl" "mleucophaeus_gene_ensembl"
## [101] "mlucifugus_gene_ensembl" "mmmarmota_gene_ensembl"
## [103] "mmulatta_gene_ensembl" "mmurdjan_gene_ensembl"
## [105] "mmurinus_gene_ensembl" "mmusculus_gene_ensembl"
## [107] "mnemestrina_gene_ensembl" "mochrogaster_gene_ensembl"
## [109] "mpahari_gene_ensembl" "mpfuro_gene_ensembl"
## [111] "mspicilegus_gene_ensembl" "mspretus_gene_ensembl"
## [113] "mundulatus_gene_ensembl" "munguiculatus_gene_ensembl"
## [115] "mvitellinus_gene_ensembl" "mzebra_gene_ensembl"
## [117] "neugenii_gene_ensembl" "ngalili_gene_ensembl"
## [119] "nleucogenys_gene_ensembl" "nvison_gene_ensembl"
## [121] "oanatinus_gene_ensembl" "oaries_gene_ensembl"
## [123] "oaureus_gene_ensembl" "ocuniculus_gene_ensembl"
## [125] "odegus_gene_ensembl" "ogarnettii_gene_ensembl"
## [127] "olatipes_gene_ensembl" "olhni_gene_ensembl"
## [129] "olhsok_gene_ensembl" "omelastigma_gene_ensembl"
## [131] "oniloticus_gene_ensembl" "oprinceps_gene_ensembl"
## [133] "pabelii_gene_ensembl" "panubis_gene_ensembl"
## [135] "pcapensis_gene_ensembl" "pcinereus_gene_ensembl"
## [137] "pcoquereli_gene_ensembl" "pformosa_gene_ensembl"
## [139] "pkingsleyae_gene_ensembl" "platipinna_gene_ensembl"
## [141] "pmarinus_gene_ensembl" "pmbairdii_gene_ensembl"
## [143] "pmexicana_gene_ensembl" "pnattereri_gene_ensembl"
## [145] "pnyererei_gene_ensembl" "ppaniscus_gene_ensembl"
## [147] "ppardus_gene_ensembl" "pranga_gene_ensembl"
## [149] "preticulata_gene_ensembl" "psimus_gene_ensembl"
## [151] "psinensis_gene_ensembl" "ptaltaica_gene_ensembl"
## [153] "ptephrosceles_gene_ensembl" "ptroglodytes_gene_ensembl"
## [155] "pvampyrus_gene_ensembl" "pvitticeps_gene_ensembl"
## [157] "rbieti_gene_ensembl" "rferrumequinum_gene_ensembl"
## [159] "rnorvegicus_gene_ensembl" "rroxellana_gene_ensembl"
## [161] "saraneus_gene_ensembl" "saurata_gene_ensembl"
## [163] "sbboliviensis_gene_ensembl" "scanaria_gene_ensembl"
## [165] "scerevisiae_gene_ensembl" "sdumerili_gene_ensembl"
## [167] "sfasciatus_gene_ensembl" "sformosus_gene_ensembl"
## [169] "shabroptila_gene_ensembl" "sharrisii_gene_ensembl"
## [171] "sldorsalis_gene_ensembl" "smaximus_gene_ensembl"
## [173] "smerianae_gene_ensembl" "sorbicularis_gene_ensembl"
## [175] "spunctatus_gene_ensembl" "ssalar_gene_ensembl"
## [177] "ssbamei_gene_ensembl" "ssberkshire_gene_ensembl"
## [179] "sscrofa_gene_ensembl" "sshampshire_gene_ensembl"
## [181] "ssjinhua_gene_ensembl" "sslandrace_gene_ensembl"
## [183] "sslargewhite_gene_ensembl" "ssmeishan_gene_ensembl"
## [185] "sspietrain_gene_ensembl" "ssrongchang_gene_ensembl"
## [187] "sstibetan_gene_ensembl" "ssusmarc_gene_ensembl"
## [189] "sswuzhishan_gene_ensembl" "strutta_gene_ensembl"
## [191] "tbelangeri_gene_ensembl" "tgelada_gene_ensembl"
## [193] "tguttata_gene_ensembl" "tnigroviridis_gene_ensembl"
## [195] "trubripes_gene_ensembl" "ttruncatus_gene_ensembl"
## [197] "uamericanus_gene_ensembl" "umaritimus_gene_ensembl"
## [199] "vpacos_gene_ensembl" "vursinus_gene_ensembl"
## [201] "vvulpes_gene_ensembl" "xmaculatus_gene_ensembl"
## [203] "xtropicalis_gene_ensembl"
grep(
pattern = "fascicularis",
x = biomaRt::listDatasets(ensembl)$dataset,
ignore.case = TRUE,
value = TRUE
)
## [1] "mfascicularis_gene_ensembl"
# mfascicularis_gene_ensembl"
grep(
"Human",
listDatasets(ensembl)$description,
ignore.case = TRUE,
value = TRUE
)
## [1] "Human genes (GRCh38.p13)"
biomaRt::listDatasets(ensembl)[
grep(
"Human",
biomaRt::listDatasets(ensembl)$description,
ignore.case = TRUE
),
]
ensembl <- biomaRt::useDataset("hsapiens_gene_ensembl", mart = ensembl)
# biomaRt::listAttributes(ensembl) %>% head()
# biomaRt::listFilters(ensembl) %>% head()
Extract gene names from our human expression matrix.
ensembl_ids_human <- read_delim(
file = "../../../LW36/filtered_feature_bc_matrix/features.tsv.gz",
delim = "\t",
col_names = FALSE
) %>%
pull(X1)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_character()
## )
gene_homologs <- biomaRt::getBM(
attributes = c(
"ensembl_gene_id",
"ensembl_gene_id_version",
"external_gene_name",
"external_gene_source",
#
"mmusculus_homolog_ensembl_gene",
"mmusculus_homolog_orthology_type",
"mmusculus_homolog_associated_gene_name",
"mmusculus_homolog_orthology_confidence",
#
"mfascicularis_homolog_ensembl_gene",
"mfascicularis_homolog_orthology_type",
"mfascicularis_homolog_associated_gene_name",
"mfascicularis_homolog_orthology_confidence"
),
filters = "ensembl_gene_id",
values = ensembl_ids_human,
mart = ensembl
)
## [1] "gene_homologs_20200818_151012_CDT.rds"
Keep one2one orthologs.
gene_homologs_filtered <- ensembl_ids_human %>%
tibble::enframe(value = "ensembl_gene_id") %>%
dplyr::left_join(
gene_homologs %>%
filter(
mmusculus_homolog_orthology_type == "ortholog_one2one",
mmusculus_homolog_orthology_confidence == 1,
mfascicularis_homolog_orthology_type == "ortholog_one2one",
mfascicularis_homolog_orthology_confidence == 1
),
by = "ensembl_gene_id"
) %>%
dplyr::select(
-c(
name,
mmusculus_homolog_orthology_confidence,
mfascicularis_homolog_orthology_confidence
)
) %>%
tidyr::drop_na()
gene_homologs_filtered %>% head()
Add ENTREZID.
library(org.Mm.eg.db)
##
# keytypes(org.Mm.eg)
gene_homologs_filtered <- gene_homologs_filtered %>%
mutate(
ENTREZID_mm = AnnotationDbi::select(
org.Mm.eg.db,
keys = mmusculus_homolog_associated_gene_name,
columns = c(
"ENTREZID"
),
keytype = "SYMBOL"
) %>%
pull(ENTREZID)
)
## 'select()' returned many:1 mapping between keys and columns
entrezgene_id_cy <- biomaRt::getBM(
attributes = c("ensembl_gene_id", "entrezgene_id"),
filters = "ensembl_gene_id",
values = gene_homologs_filtered$mfascicularis_homolog_ensembl_gene,
mart = biomaRt::useMart("ensembl", dataset = "mfascicularis_gene_ensembl")
)
# cy
entrezgene_id_cy_matrix <- read_delim(
file = "../preprocessed/GSE74767_SC3seq_Cy_ProcessedData.txt.gz",
delim = "\t"
) %>%
pull(macFas5_entrez_id)
## Parsed with column specification:
## cols(
## .default = col_double(),
## macFas5_gene_symbol = col_character()
## )
## See spec(...) for full column specifications.
gene_homologs_filtered <- gene_homologs_filtered %>%
left_join(
entrezgene_id_cy[entrezgene_id_cy$entrezgene_id %in% entrezgene_id_cy_matrix, ],
by = c("mfascicularis_homolog_ensembl_gene" = "ensembl_gene_id")
) %>%
dplyr::rename("ENTREZID_cy" = entrezgene_id)
# mm
entrezgene_id_mm_matrix <- read_delim(
file = "../preprocessed/GSE74767_SC3seq_Ms_ProcessedData.txt.gz",
delim = "\t"
) %>%
pull(mm10_entrez_id)
## Parsed with column specification:
## cols(
## .default = col_double(),
## mm10_gene_symbol = col_character()
## )
## See spec(...) for full column specifications.
# sum(gene_homologs_filtered$ENTREZID_mm %in% entrezgene_id_mm_matrix)
# sum(gene_homologs_filtered$ENTREZID_cy %in% entrezgene_id_cy_matrix)
gene_homologs_filtered <- gene_homologs_filtered %>%
filter(
ENTREZID_cy %in% entrezgene_id_cy_matrix,
ENTREZID_mm %in% entrezgene_id_mm_matrix
)
gene_homologs_filtered %>% dim()
## [1] 13411 12
gene_homologs_filtered %>% head()
Combine cy and mm matrices.
matrix_readcount_cy_mm <- cbind(
matrix_readcount_use[
match(
gene_homologs_filtered$ENTREZID_cy,
entrezgene_id_cy_matrix
),
],
matrix_readcount_mm[
match(
gene_homologs_filtered$ENTREZID_mm,
entrezgene_id_mm_matrix
),
]
)
if (!file.exists("matrix_readcount_cy_mm.rds")) {
saveRDS(
object = matrix_readcount_cy_mm,
file = "matrix_readcount_cy_mm.rds"
)
}
matrix_readcount_cyEPI_mmEPI <- matrix_readcount_cy_mm[, c(
embedding_cyEPI$sample_name,
embedding_mm %>%
filter(
!developmental_stage %in% c("EpiLC", "2iLESC")
) %>%
pull(cell)
)]
matrix_readcount_cyEPI_mmEPI <- matrix_readcount_cyEPI_mmEPI[
Matrix::rowSums(
matrix_readcount_cyEPI_mmEPI
) >= MINIMAL_NUM_COUNTS_REQUIRED_FOR_GENE,
]
dim(matrix_readcount_cyEPI_mmEPI)
## [1] 12038 299
matrix_readcount_cyEPI_mmEPI_log <- matrix_readcount_cyEPI_mmEPI
matrix_readcount_cyEPI_mmEPI_log@x <- log1p(matrix_readcount_cyEPI_mmEPI_log@x)
# z-score
matrix_readcount_cyEPI_mmEPI_log_scaled <- t(
scale(
t(
matrix_readcount_cyEPI_mmEPI_log
),
center = TRUE, scale = TRUE
)
)
pca_out <- prcomp(
t(matrix_readcount_cyEPI_mmEPI_log_scaled),
center = FALSE,
scale = FALSE
)
summary(pca_out)$imp %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "component") %>%
mutate(
rank = 1:n()
) %>%
dplyr::slice(1:30) %>%
ggplot(
aes(
x = rank,
y = `Proportion of Variance`
)
) +
geom_point(size = 0.3) +
theme_bw() +
scale_x_continuous(
name = "Component",
breaks = c(1, seq(5, 30, 5))
) +
scale_y_continuous(
name = "Proportion of variance",
labels = scales::percent
) +
ggplot2::theme(
axis.title = ggplot2::element_text(family = "Arial", size = 6),
axis.text = ggplot2::element_text(family = "Arial", size = 6),
panel.grid.minor = ggplot2::element_blank()
)
embedding_cyEPI_mmEPI <- pca_out$x %>%
as.data.frame() %>%
rownames_to_column(var = "sample_name") %>%
dplyr::select(sample_name:PC3) %>%
dplyr::left_join(
table_s1_sheet3 %>%
# filter(Species == "Mouse") %>%
mutate(
SampleID = case_when(
str_detect(string = SampleID, pattern = "^\\d") ~ str_c("X", .$SampleID),
TRUE ~ .$SampleID
)
),
by = c("sample_name" = "SampleID")
) %>%
dplyr::select(sample_name:`Embryonic Day/ Culture Condition`) %>%
dplyr::rename_all(. %>% tolower()) %>%
dplyr::rename(
x_pca = pc1,
y_pca = pc2,
z_pca = pc3,
developmental_stage = `embryonic day/ culture condition`
) %>%
relocate(x_pca, y_pca, z_pca, .after = last_col()) %>%
mutate(
group = factor(
group,
levels = c(
"ICM",
"Pre-EPI",
"PostE-EPI",
"PostL-EPI",
"Gast1",
"Gast2a",
"Gast2b",
"E4.5EPI",
"E4.5PE",
"E4.5mTE",
"E4.5pTE",
"E5.5EPI",
"E5.5VE",
"E6.5EPI-T hi",
"E6.5EPI-T lo",
"E6.5VE"
)
),
species = factor(
species,
levels = c(
"Cynomolgus Monkey",
"Mouse"
)
)
)
p_embedding_pca1 <- embedding_cyEPI_mmEPI %>%
droplevels() %>%
mutate(
category = "PCA"
) %>%
sample_frac() %>%
plot_pca(
x = x_pca,
y = y_pca,
color = group,
shape = species
) +
add_xy_label_pca(x = "PC1", y = "PC2") +
scale_color_manual(
values = ggthemes::tableau_color_pal("Tableau 20")(
length(unique(embedding_cyEPI_mmEPI$group))
)
) +
ggrepel::geom_text_repel(
data = get_middle_points(embedding_cyEPI_mmEPI, x_pca, y_pca, group),
aes(
x = x_pca,
y = y_pca,
label = group,
shape = NULL
),
color = "black",
size = 2,
family = "Arial",
#
segment.color = "grey35",
segment.size = 0.25,
segment.inflect = TRUE,
#
box.padding = 0.5,
min.segment.length = 0,
arrow = ggplot2::arrow(length = unit(0.015, "npc")),
max.overlaps = Inf,
seed = SEED_USE,
) +
guides(color = "none", shape = "none")
## `summarise()` ungrouping output (override with `.groups` argument)
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
## Joining, by = "group"
p_embedding_pca2 <- embedding_cyEPI_mmEPI %>%
droplevels() %>%
mutate(
category = "PCA"
) %>%
sample_frac() %>%
plot_pca(
x = y_pca,
y = z_pca,
color = group,
shape = species
) +
add_xy_label_pca(x = "PC2", y = "PC3") +
scale_color_manual(
values = ggthemes::tableau_color_pal("Tableau 20")(
length(unique(embedding_cyEPI_mmEPI$group))
)
) +
guides(color = "none") +
theme(
legend.text = element_text(family = "Arial", size = 6),
legend.key.size = unit(3, "mm"),
legend.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "mm"),
legend.background = element_blank(),
#
legend.justification = c(0, 1),
legend.position = c(0.035, 0.22)
)
Extended Data Figure 8a
p_embedding_pca1 +
p_embedding_pca2 +
plot_annotation(theme = theme(plot.margin = margin())) +
plot_layout(ncol = 2, guides = "keep")
# Supplementary Table 2, Sheet 12
table_s2_sheet12 <- readxl::read_excel(
path = "../../../../docs/A_developmental_coordinate_of_pluripotency_among_mice,_monkeys_and_humans/41586_2016_BFnature19096_MOESM297_ESM.xlsx",
sheet = "EDFig8b",
skip = 1
)
## New names:
## * MF5ID -> MF5ID...2
## * MF5Symbol -> MF5Symbol...3
## * HsID -> HsID...4
## * HsSymbol -> HsSymbol...5
## * MsID -> MsID...6
## * ...
table_s2_sheet12 %>% head()
Use genes listed in Supplementary Table 2, Sheet 12.
features_heatmap <- c(
table_s2_sheet12$MF5ID...22,
table_s2_sheet12$MF5ID...2
)
features_heatmap <- features_heatmap[!is.na(features_heatmap)]
groups_selected <- c(
"ICM", "Pre-EPI", "PostE-EPI", "PostL-EPI", "Gast1", "Gast2a", "Gast2b",
"E4.5EPI", "E5.5EPI", "E6.5EPI-T hi", "E6.5EPI-T lo"
)
cells_heatmap <- embedding_cyEPI_mmEPI %>%
filter(group %in% groups_selected) %>%
pull(sample_name)
features_heatmap <- features_heatmap[
features_heatmap %in% gene_homologs_filtered$ENTREZID_cy
]
matrix_heatmap <- matrix_readcount_cy_mm[
match(features_heatmap, gene_homologs_filtered$ENTREZID_cy),
cells_heatmap
]
matrix_heatmap <- matrix_heatmap[rowSums(matrix_heatmap) != 0, ]
matrix_heatmap <- log10(matrix_heatmap + 1)
heatmap_limits <- quantile(matrix_heatmap, c(0.005, 0.995))
matrix_heatmap[matrix_heatmap < heatmap_limits[1]] <- heatmap_limits[1]
matrix_heatmap[matrix_heatmap > heatmap_limits[2]] <- heatmap_limits[2]
Number of genes used: 658.
cells_heatmap <- map(groups_selected, function(x) {
cells_in_group <- embedding_cyEPI_mmEPI %>%
filter(group == x) %>%
pull(sample_name)
cells_in_group[
pvclust::pvclust(
matrix_heatmap[, cells_in_group] %>% as.matrix(),
nboot = 100,
parallel = parallel::detectCores() - 2L
)$hclust$order
]
}) %>%
unlist()
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
matrix_heatmap <- matrix_heatmap[, cells_heatmap]
ha_columns <- ComplexHeatmap::HeatmapAnnotation(
lineage = ComplexHeatmap::anno_simple(
embedding_cyEPI_mmEPI[match(
x = colnames(matrix_heatmap),
table = embedding_cyEPI_mmEPI$sample_name
), "group"],
# pch = anno_labels_cluster,
col = setNames(
object = ggthemes::tableau_color_pal("Tableau 20")(
embedding_cyEPI_mmEPI$group %>%
# droplevels() %>%
nlevels()
),
nm = levels(embedding_cyEPI_mmEPI$group)
),
which = "column",
pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
simple_anno_size = unit(3, "mm")
),
#
Species = ComplexHeatmap::anno_simple(
embedding_cyEPI_mmEPI[match(
x = colnames(matrix_heatmap),
table = embedding_cyEPI_mmEPI$sample_name
), "species"],
# pch = anno_labels_cluster,
col = setNames(
object = as.character(
yarrr::piratepal(palette = "google")
)[seq_along(levels(embedding_cyEPI_mmEPI$species))],
nm = levels(embedding_cyEPI_mmEPI$species)
),
which = "column",
pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
simple_anno_size = unit(3, "mm")
),
#
show_annotation_name = TRUE,
annotation_label = c(
"Lineage",
"Species"
),
annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
annotation_name_side = "left"
)
lgd_lineage <- ComplexHeatmap::Legend(
title = "Lineage",
labels = groups_selected, # embedding_cyEPI_mmEPI %>% pull(group) %>% droplevels() %>% levels(),
legend_gp = grid::gpar(
fill = setNames(
object = ggthemes::tableau_color_pal("Tableau 20")(
length(unique(embedding_cyEPI_mmEPI$group))
),
nm = embedding_cyEPI_mmEPI$group %>% levels()
)[embedding_cyEPI_mmEPI$group %>%
droplevels() %>%
levels()][groups_selected]
),
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
)
)
lgd_species <- ComplexHeatmap::Legend(
title = "Species",
labels = levels(embedding_cyEPI_mmEPI$species),
legend_gp = grid::gpar(
fill = setNames(
object = as.character(
yarrr::piratepal(palette = "google")
)[seq_along(levels(embedding_cyEPI_mmEPI$species))],
nm = levels(embedding_cyEPI_mmEPI$species)
)
),
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
)
)
ht <- ComplexHeatmap::Heatmap(
matrix = matrix_heatmap %>% as.matrix(),
# col = viridis::plasma(n = 10),
col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
#
cluster_rows = FALSE,
cluster_columns = FALSE,
#
## row_labels = str_remove(string = rownames(matrix_heatmap), pattern = "^E.+_"),
# row_names_side = c("left"),
# row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
show_row_names = FALSE,
show_column_names = FALSE,
#
show_row_dend = FALSE,
show_column_dend = FALSE,
#
column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
show_heatmap_legend = TRUE,
top_annotation = ha_columns,
## left_annotation = ha_left,
## right_annotation = ha_left,
#
heatmap_legend_param = list(
title = expression(paste("Log"[10], " (RPM + 1)")),
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
legend_direction = "vertical",
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
legend_height = unit(25, "mm"),
legend_width = unit(10, "mm")
),
#
# row_split = table_s2_sheet4$ClusterID,
# row_gap = unit(0.3, "mm"),
#
# row_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
# row_title_rot = 0,
#
column_split = factor(
embedding_cyEPI_mmEPI[match(
x = colnames(matrix_heatmap),
table = embedding_cyEPI_mmEPI$sample_name
), "group", drop = TRUE] %>%
# levels() %>%
str_pad(
width = max(nchar(levels(embedding_cyEPI_mmEPI$group))),
side = "left"
),
levels = embedding_cyEPI_mmEPI[match(
x = colnames(matrix_heatmap),
table = embedding_cyEPI_mmEPI$sample_name
), "group", drop = TRUE] %>%
levels() %>%
str_pad(
width = max(nchar(levels(embedding_cyEPI_mmEPI$group))),
side = "left"
)
),
column_gap = unit(0.3, "mm"),
column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
column_title_rot = -90,
#
use_raster = FALSE
)
pd <- ComplexHeatmap::packLegend(
lgd_lineage,
lgd_species,
# gap = unit(8, "mm"),
direction = "vertical"
)
Extended Data Figure 8b
ComplexHeatmap::draw(
ht,
heatmap_legend_list = list(pd)
)
scaled_pc_loadings <- pca_out$rotation %>%
scale() %>%
as.data.frame() %>%
rownames_to_column(var = "feature") %>%
dplyr::select(feature, PC1, PC2, PC3) %>%
mutate(
# group = (abs(PC2) >= 2.5 | abs(PC3) >= 2.5) & abs(PC1 < 2),
r23 = sqrt(PC2^2 + PC3^2) >= 2.75,
r12 = sqrt(PC1^2 + PC2^2) < 3,
r13 = sqrt(PC1^2 + PC3^2) < 3,
# pc1 = abs(PC1 < 2),
group = r23 & r12 & r13
) %>%
arrange(group)
sum(scaled_pc_loadings$group)
## [1] 226
plot_pca_loading(
data = scaled_pc_loadings,
x = PC2,
y = PC3,
z1 = group,
x_label = "Z score of PC2 loading",
y_label = "Z score of PC3 loading"
) +
coord_fixed()
features_heatmap <- rownames(
matrix_readcount_cyEPI_mmEPI
)[rownames(matrix_readcount_cyEPI_mmEPI) %in%
scaled_pc_loadings$feature[scaled_pc_loadings$group]]
matrix_heatmap <- matrix_readcount_cy_mm[
rownames(matrix_readcount_cy_mm) %in% features_heatmap,
cells_heatmap
]
matrix_heatmap <- matrix_heatmap[rowSums(matrix_heatmap) != 0, ]
matrix_heatmap <- log10(matrix_heatmap + 1)
heatmap_limits <- quantile(matrix_heatmap, c(0.005, 0.995))
matrix_heatmap[matrix_heatmap < heatmap_limits[1]] <- heatmap_limits[1]
matrix_heatmap[matrix_heatmap > heatmap_limits[2]] <- heatmap_limits[2]
features_heatmap <- rownames(matrix_heatmap)[
pvclust::pvclust(
t(matrix_heatmap %>% as.matrix()),
nboot = 100,
parallel = parallel::detectCores() - 2L
)$hclust$order
]
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
matrix_heatmap <- as.matrix(matrix_heatmap)[features_heatmap, ]
ht <- ComplexHeatmap::Heatmap(
matrix = matrix_heatmap,
# col = viridis::plasma(n = 10),
col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
#
cluster_rows = FALSE,
cluster_columns = FALSE,
#
## row_labels = str_remove(string = rownames(matrix_heatmap), pattern = "^E.+_"),
# row_names_side = c("left"),
# row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
show_row_names = FALSE,
show_column_names = FALSE,
#
show_row_dend = FALSE,
show_column_dend = FALSE,
#
column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
show_heatmap_legend = TRUE,
top_annotation = ha_columns,
## left_annotation = ha_left,
## right_annotation = ha_left,
#
heatmap_legend_param = list(
title = expression(paste("Log"[10], " (RPM + 1)")),
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
legend_direction = "vertical",
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
legend_height = unit(25, "mm"),
legend_width = unit(10, "mm")
),
#
# row_split = table_s2_sheet4$ClusterID,
# row_gap = unit(0.3, "mm"),
#
# row_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
# row_title_rot = 0,
#
column_split = factor(
embedding_cyEPI_mmEPI[match(
x = colnames(matrix_heatmap),
table = embedding_cyEPI_mmEPI$sample_name
), "group", drop = TRUE] %>%
# levels() %>%
str_pad(
width = max(nchar(levels(embedding_cyEPI_mmEPI$group))),
side = "left"
),
levels = embedding_cyEPI_mmEPI[match(
x = colnames(matrix_heatmap),
table = embedding_cyEPI_mmEPI$sample_name
), "group", drop = TRUE] %>%
levels() %>%
str_pad(
width = max(nchar(levels(embedding_cyEPI_mmEPI$group))),
side = "left"
)
),
column_gap = unit(0.3, "mm"),
column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
column_title_rot = -90,
#
use_raster = FALSE
)
Extended Data Figure 8c
ComplexHeatmap::draw(
ht,
heatmap_legend_list = list(pd)
)
# Supplementary Table 2, Sheet 5
table_s2_sheet5 <- readxl::read_excel(
path = "../../../../docs/A_developmental_coordinate_of_pluripotency_among_mice,_monkeys_and_humans/41586_2016_BFnature19096_MOESM297_ESM.xlsx",
sheet = "cy_m common EPI genes",
skip = 0
)
table_s2_sheet5 %>% head()
matrix_heatmap_corr <- map(groups_selected, function(x) {
cells_in_group <- embedding_cyEPI_mmEPI %>%
filter(group == x) %>%
pull(sample_name)
rowMeans(matrix_readcount_cy_mm[, cells_in_group])
}) %>%
bind_cols() %>%
as.matrix()
## New names:
## * NA -> ...1
## * NA -> ...2
## * NA -> ...3
## * NA -> ...4
## * NA -> ...5
## * ...
colnames(matrix_heatmap_corr) <- groups_selected
rownames(matrix_heatmap_corr) <- rownames(matrix_heatmap_corr)
matrix_heatmap_corr <- matrix_heatmap_corr[
gene_homologs_filtered$ENTREZID_cy %in% table_s2_sheet5$macFas5_entrez_id,
]
head(matrix_heatmap_corr)
## ICM Pre-EPI PostE-EPI PostL-EPI Gast1 Gast2a
## [1,] 0.3884385 3.066446 21.705624 56.140386 38.8517128 27.0977585
## [2,] 33.3950797 6.219453 3.198068 7.396065 11.6537856 66.8365023
## [3,] 47.4773044 45.381955 173.390106 319.768217 837.4882833 398.9513877
## [4,] 13.1562846 52.278984 3.787983 2.408792 0.8139348 0.9224277
## [5,] 217.7737733 53.153054 9.975591 8.359721 7.8830661 3.3246738
## [6,] 0.0000000 4.702147 18.555872 16.887636 13.6229839 11.6953331
## Gast2b E4.5EPI E5.5EPI E6.5EPI-T hi E6.5EPI-T lo
## [1,] 17.2856300 1.456039 13.07441250 23.7086779 4.8825092
## [2,] 19.8709846 0.000000 0.13180738 8.2164376 8.2147970
## [3,] 319.1945685 165.748156 115.74261687 160.4587570 246.5546333
## [4,] 0.1858084 0.000000 0.02128681 0.0995788 1.0954902
## [5,] 6.3924846 5.338176 1.99523812 0.9123928 0.8186693
## [6,] 11.9185731 24.416767 15.68149437 3.3923033 18.0418240
Extended Data Figure 8d
corr_heatmap <- cor(log10(matrix_heatmap_corr + 1))
corr_heatmap[lower.tri(corr_heatmap)] <- NA
corr_heatmap %>%
as.data.frame() %>%
rownames_to_column(var = "sample_x") %>%
pivot_longer(
-c("sample_x"),
names_to = "sample_y"
) %>%
as.data.frame() %>%
filter(!is.na(value)) %>%
# round(value, digits = 2)
mutate(
sample_x = factor(
sample_x,
levels = groups_selected,
),
sample_y = factor(
sample_y,
levels = groups_selected %>% rev(.)
)
) %>%
plot_corr_heatmap(
x = sample_x,
y = sample_y,
z = value
) +
scale_fill_gradientn(
limits = c(-1, 1),
colors = wesanderson::wes_palette(
"Zissou1",
21,
type = "continuous"
)
)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
Load data in Ref. 36
matrix_readcount_PRJEB7132 <- read_delim(
file = "../../PRJEB7132/matrix/xaa_Aligned.sortedByCoord.out_deduped_q10_gene_id_featureCounts.txt.gz",
delim = "\t",
col_names = TRUE,
skip = 1
) %>%
dplyr::select(-(2:6))
## Parsed with column specification:
## cols(
## Geneid = col_character(),
## Chr = col_character(),
## Start = col_character(),
## End = col_character(),
## Strand = col_character(),
## Length = col_double(),
## `../aln/ERR590398_rnaseq/aln/Aligned.sortedByCoord.out_deduped.bam` = col_double(),
## `../aln/ERR590399_rnaseq/aln/Aligned.sortedByCoord.out_deduped.bam` = col_double(),
## `../aln/ERR590400_rnaseq/aln/Aligned.sortedByCoord.out_deduped.bam` = col_double(),
## `../aln/ERR590401_rnaseq/aln/Aligned.sortedByCoord.out_deduped.bam` = col_double(),
## `../aln/ERR590408_rnaseq/aln/Aligned.sortedByCoord.out_deduped.bam` = col_double(),
## `../aln/ERR590410_rnaseq/aln/Aligned.sortedByCoord.out_deduped.bam` = col_double()
## )
colnames(matrix_readcount_PRJEB7132) <- colnames(matrix_readcount_PRJEB7132) %>%
str_remove(
pattern = "_rnaseq/aln/Aligned.sortedByCoord.out_deduped.bam"
) %>%
str_remove(pattern = "../aln/")
cell_metadata_PRJEB7132 <- read_delim(
file = "../../PRJEB7132/SraRunTable.txt",
delim = ","
) %>%
dplyr::select(-c("organism", "Sample Name")) %>%
rename_all(. %>% tolower()) %>%
`colnames<-`(str_replace(
string = colnames(.),
pattern = " ", replacement = "_"
)) %>%
filter(organism == "Homo sapiens") %>%
dplyr::select(
sample_name,
run,
biosample,
alias,
cell_line,
organism,
title
) %>%
mutate(
sample_description = str_remove(
string = title,
pattern = "_R\\d$"
)
)
## Parsed with column specification:
## cols(
## .default = col_character(),
## AvgSpotLen = col_double(),
## Bases = col_double(),
## Bytes = col_double(),
## `ENA-FIRST-PUBLIC (run)` = col_date(format = ""),
## `ENA-LAST-UPDATE (run)` = col_date(format = ""),
## INSDC_first_public = col_datetime(format = ""),
## INSDC_last_update = col_datetime(format = ""),
## ReleaseDate = col_datetime(format = "")
## )
## See spec(...) for full column specifications.
matrix_readcount_PRJEB7132 <- Matrix(
data = as.matrix(matrix_readcount_PRJEB7132[, -1]),
dimnames = list(
matrix_readcount_PRJEB7132$Geneid,
colnames(matrix_readcount_PRJEB7132)[-1]
),
sparse = TRUE
)
colnames(matrix_readcount_PRJEB7132) <- colnames(matrix_readcount_PRJEB7132) %>%
enframe(value = "run") %>%
left_join(cell_metadata_PRJEB7132, by = c("run" = "run")) %>%
pull("sample_name")
matrix_cpm_PRJEB7132 <- calc_cpm(
matrix_readcount_PRJEB7132[
,
c("ERS537888", "ERS537890", "ERS537878", "ERS537884", "ERS537881", "ERS537876")
]
)
# gene_homologs_filtered
# matrix_readcount_use[entrezgene_id_cy_matrix %in% table_s2_sheet4$macFas5_entrez_id]
matrix_readcount_use2 <- matrix_readcount_use[
match(gene_homologs_filtered$ENTREZID_cy, entrezgene_id_cy_matrix),
]
matrix_cpm_PRJEB7132_ <- matrix_cpm_PRJEB7132[
match(gene_homologs_filtered$ensembl_gene_id, rownames(matrix_cpm_PRJEB7132)),
]
groups_selected <- c(
"ICM", "Pre-EPI", "PostE-EPI", "PostL-EPI",
"Gast1", "Gast2a", "Gast2b",
"cyESCFF", "cyESCoF"
)
matrix_heatmap_corr <- map(groups_selected, function(x) {
cells_in_group <- embedding_cy %>%
filter(group == x) %>%
pull(cell)
Matrix::rowMeans(matrix_readcount_use2[, cells_in_group])
}) %>%
bind_cols() %>%
as.matrix()
## New names:
## * NA -> ...1
## * NA -> ...2
## * NA -> ...3
## * NA -> ...4
## * NA -> ...5
## * ...
colnames(matrix_heatmap_corr) <- groups_selected
rownames(matrix_heatmap_corr) <- rownames(matrix_readcount_use2)
matrix_heatmap_corr <- cbind(
matrix_heatmap_corr,
H9 = Matrix::rowMeans(
matrix_cpm_PRJEB7132_[, c("ERS537888", "ERS537890", "ERS537878")]
),
H9_reset = Matrix::rowMeans(
matrix_cpm_PRJEB7132_[, c("ERS537884", "ERS537881", "ERS537876")]
)
)
# cleanup
rm(matrix_readcount_use2)
rm(matrix_cpm_PRJEB7132_)
matrix_heatmap_corr <- matrix_heatmap_corr[
gene_homologs_filtered$ENTREZID_cy %in% table_s2_sheet4$macFas5_entrez_id,
]
matrix_heatmap_corr %>% head()
## ICM Pre-EPI PostE-EPI PostL-EPI Gast1 Gast2a
## SMIM1 151.1363033 93.115194 3.820487 7.118101 8.682972 10.24251
## FBXO2 1.4469895 13.184514 102.988591 54.911807 54.469006 38.52027
## MAD2L2 24.9603647 61.772933 279.058067 204.688162 114.454772 123.24133
## DRAXIN 0.6368063 1.544775 41.484816 57.921171 19.596015 16.83393
## FBLIM1 5.6753280 41.920067 109.251597 60.510816 107.413512 215.56966
## ARHGEF19 0.3884385 3.066446 21.705624 56.140386 38.851713 27.09776
## Gast2b cyESCFF cyESCoF H9 H9_reset
## SMIM1 5.740824 14.18091 7.773821 2.190192 2.617383
## FBXO2 16.046803 46.32920 70.165794 12.201141 0.702835
## MAD2L2 87.188329 169.93101 209.349509 131.947331 69.062950
## DRAXIN 52.722563 40.49054 31.154642 81.348664 7.799315
## FBLIM1 46.282181 154.37242 260.867845 67.786364 25.493542
## ARHGEF19 17.285630 56.76092 28.440670 12.810655 3.328117
Fig. 5c (only includes Ref. 36)
corr_heatmap <- cor(log10(matrix_heatmap_corr + 1))
corr_heatmap[lower.tri(corr_heatmap)] <- NA
corr_heatmap %>%
as.data.frame() %>%
rownames_to_column(var = "sample_x") %>%
pivot_longer(
-c("sample_x"),
names_to = "sample_y"
) %>%
as.data.frame() %>%
filter(!is.na(value)) %>%
# round(value, digits = 2)
mutate(
sample_x = factor(
sample_x,
levels = c(groups_selected, "H9", "H9_reset"),
),
sample_y = factor(
sample_y,
levels = c(groups_selected, "H9", "H9_reset") %>% rev(.)
)
) %>%
plot_corr_heatmap(
x = sample_x,
y = sample_y,
z = value
) +
scale_fill_gradientn(
# limits = c(-1, 1),
colors = wesanderson::wes_palette(
"Zissou1",
21,
type = "continuous"
)
)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
corr_heatmap <- cor(log10(matrix_heatmap_corr + 1))
# colnames(corr_heatmap) == rownames(corr_heatmap)
corr_heatmap <- corr_heatmap[!rownames(corr_heatmap) %in% c("H9", "H9_reset"), ]
ha_columns <- ComplexHeatmap::HeatmapAnnotation(
lineage = ComplexHeatmap::anno_simple(
c(groups_selected, "H9", "H9_reset"),
# pch = anno_labels_cluster,
col = c(
setNames(
object = ggthemes::tableau_color_pal("Tableau 20")(
embedding_cy$group %>%
nlevels()
),
nm = levels(embedding_cy$group)
)[groups_selected],
"H9" = "pink",
"H9_reset" = "purple"
),
which = "column",
pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
simple_anno_size = unit(3, "mm")
),
#
Species = ComplexHeatmap::anno_simple(
c(
rep("Cynomolgus Monkey", length(groups_selected)),
rep("Human", 2)
),
# pch = anno_labels_cluster,
col = c("Cynomolgus Monkey" = "#3D79F3FF", "Human" = "#34A74BFF"),
which = "column",
pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
simple_anno_size = unit(3, "mm")
),
show_annotation_name = TRUE,
annotation_label = c(
"Lineage",
"Species"
),
annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
annotation_name_side = "left"
)
ha_left <- ComplexHeatmap::HeatmapAnnotation(
lineage = ComplexHeatmap::anno_simple(
groups_selected,
col = setNames(
object = ggthemes::tableau_color_pal("Tableau 20")(
embedding_cy$group %>%
nlevels()
),
nm = levels(embedding_cy$group)
)[groups_selected],
which = "row",
# pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
),
simple_anno_size = unit(3, "mm")
),
which = "row",
show_annotation_name = FALSE,
annotation_label = c(
"Lineage"
),
annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6)
)
ht <- ComplexHeatmap::Heatmap(
corr_heatmap,
# name = "heatmap",
col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
border = NA,
rect_gp = grid::gpar(col = "white", lwd = 1),
cell_fun = function(j, i, x, y, width, height, fill) {
# grid::grid.text(sprintf("%.2f", corr_heatmap[i, j]), x, y, gp = grid::gpar(fontsize = 6))
grid::grid.text(round(corr_heatmap[i, j], 2), x, y, gp = grid::gpar(fontsize = 6))
},
#
cluster_rows = FALSE,
cluster_columns = FALSE,
#
## row_labels = str_remove(string = rownames(matrix_heatmap), pattern = "^E.+_"),
row_names_side = c("left"),
row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
show_row_names = TRUE,
#
column_names_side = c("top"),
column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
column_names_rot = -90,
show_column_names = TRUE,
#
show_row_dend = FALSE,
show_column_dend = FALSE,
#
top_annotation = ha_columns,
left_annotation = ha_left,
#
heatmap_legend_param = list(
title = "Corr",
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
legend_direction = "vertical",
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
legend_height = unit(25, "mm"),
legend_width = unit(10, "mm")
),
#
column_split = c(
rep("Cynomolgus Monkey", length(groups_selected)),
rep("Human", 2)
),
column_gap = unit(2, "mm"),
column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
column_title_rot = -90,
column_title = NULL
)
lgd_lineage <- ComplexHeatmap::Legend(
title = "Lineage",
labels = groups_selected, # embedding_cyEPI_mmEPI %>% pull(group) %>% droplevels() %>% levels(),
legend_gp = grid::gpar(
fill = c(
setNames(
object = ggthemes::tableau_color_pal("Tableau 20")(
embedding_cy$group %>%
nlevels()
),
nm = levels(embedding_cy$group)
)[groups_selected],
"H9" = "pink",
"H9_reset" = "purple"
)
),
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
)
)
lgd_species <- ComplexHeatmap::Legend(
title = "Species",
labels = c("Cynomolgus Monkey", "Human"),
legend_gp = grid::gpar(
fill = c("Cynomolgus Monkey" = "#3D79F3FF", "Human" = "#34A74BFF")
),
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
)
)
pd <- ComplexHeatmap::packLegend(
# lgd_lineage,
lgd_species,
# gap = unit(8, "mm"),
direction = "vertical"
)
Fig. 5c (only includes Ref. 36)
ComplexHeatmap::draw(
ht,
heatmap_legend_list = list(pd)
)
devtools::session_info()$platform
## setting value
## version R version 4.0.2 (2020-06-22)
## os macOS Catalina 10.15.6
## system x86_64, darwin19.5.0
## ui unknown
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/Chicago
## date 2020-08-18
devtools::session_info()$pack %>%
as_tibble() %>%
dplyr::select(
package,
loadedversion,
date,
`source`
) %>%
# print(n = nrow(.))
gt::gt() %>%
gt::tab_options(table.font.size = "median")
package | loadedversion | date | source |
---|---|---|---|
AnnotationDbi | 1.50.3 | 2020-07-25 | Bioconductor |
askpass | 1.1 | 2019-01-13 | CRAN (R 4.0.0) |
assertthat | 0.2.1 | 2019-03-21 | CRAN (R 4.0.0) |
backports | 1.1.8 | 2020-06-17 | CRAN (R 4.0.1) |
BayesFactor | 0.9.12-4.2 | 2018-05-19 | CRAN (R 4.0.2) |
Biobase | 2.48.0 | 2020-04-27 | Bioconductor |
BiocFileCache | 1.12.1 | 2020-08-04 | Bioconductor |
BiocGenerics | 0.34.0 | 2020-04-27 | Bioconductor |
biomaRt | 2.44.1 | 2020-06-17 | Bioconductor |
bit | 4.0.4 | 2020-08-04 | CRAN (R 4.0.2) |
bit64 | 4.0.2 | 2020-07-30 | CRAN (R 4.0.2) |
blob | 1.2.1 | 2020-01-20 | CRAN (R 4.0.0) |
broom | 0.7.0.9001 | 2020-08-17 | Github (tidymodels/broom@591f87e) |
callr | 3.4.3.9000 | 2020-07-31 | Github (r-lib/callr@b96da8f) |
cellranger | 1.1.0 | 2016-07-27 | CRAN (R 4.0.0) |
checkmate | 2.0.0 | 2020-02-06 | CRAN (R 4.0.0) |
circlize | 0.4.10 | 2020-06-15 | CRAN (R 4.0.1) |
cli | 2.0.2 | 2020-02-28 | CRAN (R 4.0.0) |
clue | 0.3-57 | 2019-02-25 | CRAN (R 4.0.0) |
cluster | 2.1.0 | 2019-06-19 | CRAN (R 4.0.2) |
coda | 0.19-3 | 2019-07-05 | CRAN (R 4.0.0) |
codetools | 0.2-16 | 2018-12-24 | CRAN (R 4.0.2) |
colorspace | 1.4-1 | 2019-03-18 | CRAN (R 4.0.0) |
ComplexHeatmap | 2.4.3 | 2020-07-25 | Bioconductor |
crayon | 1.3.4 | 2017-09-16 | CRAN (R 4.0.0) |
curl | 4.3 | 2019-12-02 | CRAN (R 4.0.0) |
DBI | 1.1.0 | 2019-12-15 | CRAN (R 4.0.0) |
dbplyr | 1.4.4.9000 | 2020-07-28 | Github (tidyverse/dbplyr@a6ed629) |
desc | 1.2.0 | 2018-05-01 | CRAN (R 4.0.0) |
devtools | 2.3.1.9000 | 2020-07-31 | Github (r-lib/devtools@df619ce) |
digest | 0.6.25 | 2020-02-23 | CRAN (R 4.0.0) |
dplyr | 1.0.2 | 2020-08-12 | Github (tidyverse/dplyr@0bea3e8) |
ellipsis | 0.3.1.9000 | 2020-07-18 | Github (r-lib/ellipsis@57a5071) |
evaluate | 0.14 | 2019-05-28 | CRAN (R 4.0.0) |
extrafont | 0.17 | 2014-12-08 | CRAN (R 4.0.2) |
extrafontdb | 1.0 | 2012-06-11 | CRAN (R 4.0.0) |
fansi | 0.4.1 | 2020-01-08 | CRAN (R 4.0.0) |
farver | 2.0.3 | 2020-01-16 | CRAN (R 4.0.0) |
forcats | 0.5.0.9000 | 2020-05-28 | Github (tidyverse/forcats@ab81d1b) |
fs | 1.5.0.9000 | 2020-08-03 | Github (r-lib/fs@93e70a9) |
generics | 0.0.2 | 2018-11-29 | CRAN (R 4.0.0) |
GetoptLong | 1.0.2 | 2020-07-06 | CRAN (R 4.0.2) |
ggplot2 | 3.3.2.9000 | 2020-08-14 | Github (tidyverse/ggplot2@3be0acc) |
ggrepel | 0.9.0 | 2020-07-24 | Github (slowkow/ggrepel@4d0ef50) |
ggthemes | 4.2.0 | 2019-05-13 | CRAN (R 4.0.0) |
GlobalOptions | 0.1.2 | 2020-06-10 | CRAN (R 4.0.0) |
glue | 1.4.1.9000 | 2020-07-07 | Github (tidyverse/glue@205f18b) |
gt | 0.2.2 | 2020-08-06 | Github (rstudio/gt@c97cb4c) |
gtable | 0.3.0 | 2019-03-25 | CRAN (R 4.0.0) |
gtools | 3.8.2 | 2020-03-31 | CRAN (R 4.0.0) |
haven | 2.3.1 | 2020-06-01 | CRAN (R 4.0.0) |
hms | 0.5.3 | 2020-01-08 | CRAN (R 4.0.0) |
htmltools | 0.5.0 | 2020-06-16 | CRAN (R 4.0.1) |
httr | 1.4.2 | 2020-07-20 | CRAN (R 4.0.2) |
IRanges | 2.22.2 | 2020-05-21 | Bioconductor |
jpeg | 0.1-8.1 | 2019-10-24 | CRAN (R 4.0.0) |
jsonlite | 1.7.0 | 2020-06-25 | CRAN (R 4.0.2) |
knitr | 1.29 | 2020-06-23 | CRAN (R 4.0.2) |
labeling | 0.3 | 2014-08-23 | CRAN (R 4.0.0) |
lattice | 0.20-41 | 2020-04-02 | CRAN (R 4.0.2) |
lifecycle | 0.2.0 | 2020-03-06 | CRAN (R 4.0.0) |
lubridate | 1.7.9 | 2020-07-11 | Github (tidyverse/lubridate@de2ee09) |
magrittr | 1.5.0.9000 | 2020-08-04 | Github (tidyverse/magrittr@ba52096) |
Matrix | 1.2-18 | 2019-11-27 | CRAN (R 4.0.2) |
MatrixModels | 0.4-1 | 2015-08-22 | CRAN (R 4.0.0) |
memoise | 1.1.0 | 2017-04-21 | CRAN (R 4.0.0) |
modelr | 0.1.8.9000 | 2020-05-19 | Github (tidyverse/modelr@16168e0) |
munsell | 0.5.0 | 2018-06-12 | CRAN (R 4.0.0) |
mvtnorm | 1.1-1 | 2020-06-09 | CRAN (R 4.0.2) |
openssl | 1.4.2 | 2020-06-27 | CRAN (R 4.0.2) |
org.Hs.eg.db | 3.11.4 | 2020-05-27 | Bioconductor |
org.Mm.eg.db | 3.11.4 | 2020-06-07 | Bioconductor |
patchwork | 1.0.1.9000 | 2020-06-22 | Github (thomasp85/patchwork@82a5e03) |
pbapply | 1.4-2 | 2019-08-31 | CRAN (R 4.0.0) |
pillar | 1.4.6.9000 | 2020-07-21 | Github (r-lib/pillar@8aef8f2) |
pkgbuild | 1.1.0.9000 | 2020-07-14 | Github (r-lib/pkgbuild@3a87bd9) |
pkgconfig | 2.0.3 | 2019-09-22 | CRAN (R 4.0.0) |
pkgload | 1.1.0 | 2020-05-29 | CRAN (R 4.0.0) |
png | 0.1-7 | 2013-12-03 | CRAN (R 4.0.0) |
prettyunits | 1.1.1.9000 | 2020-08-10 | Github (r-lib/prettyunits@b1cdad8) |
processx | 3.4.3 | 2020-07-05 | CRAN (R 4.0.2) |
progress | 1.2.2 | 2019-05-16 | CRAN (R 4.0.0) |
ps | 1.3.4 | 2020-08-11 | CRAN (R 4.0.2) |
purrr | 0.3.4.9000 | 2020-08-10 | Github (tidyverse/purrr@5de5ad2) |
pvclust | 2.2-0 | 2020-04-25 | Github (shimo-lab/pvclust@e734d05) |
R6 | 2.4.1.9000 | 2020-07-18 | Github (r-lib/R6@1415d11) |
rappdirs | 0.3.1 | 2016-03-28 | CRAN (R 4.0.0) |
RColorBrewer | 1.1-2 | 2014-12-07 | CRAN (R 4.0.0) |
Rcpp | 1.0.5 | 2020-07-06 | CRAN (R 4.0.2) |
readr | 1.3.1.9000 | 2020-07-16 | Github (tidyverse/readr@2ab51b2) |
readxl | 1.3.1.9000 | 2020-05-28 | Github (tidyverse/readxl@3815961) |
remotes | 2.2.0.9000 | 2020-08-06 | Github (r-lib/remotes@5a546ad) |
reprex | 0.3.0 | 2019-05-16 | CRAN (R 4.0.0) |
reticulate | 1.16 | 2020-05-27 | CRAN (R 4.0.2) |
rjson | 0.2.20 | 2018-06-08 | CRAN (R 4.0.0) |
rlang | 0.4.7.9000 | 2020-08-12 | Github (r-lib/rlang@de0c176) |
rmarkdown | 2.3.3 | 2020-07-25 | Github (rstudio/rmarkdown@204aa41) |
rprojroot | 1.3-2 | 2018-01-03 | CRAN (R 4.0.0) |
RSQLite | 2.2.0 | 2020-01-07 | CRAN (R 4.0.0) |
rstudioapi | 0.11 | 2020-02-07 | CRAN (R 4.0.2) |
Rttf2pt1 | 1.3.8 | 2020-01-10 | CRAN (R 4.0.0) |
rvest | 0.3.6 | 2020-07-25 | CRAN (R 4.0.2) |
S4Vectors | 0.26.1 | 2020-05-16 | Bioconductor |
sass | 0.2.0 | 2020-03-18 | CRAN (R 4.0.2) |
scales | 1.1.1.9000 | 2020-07-24 | Github (r-lib/scales@9ff4757) |
sessioninfo | 1.1.1.9000 | 2020-07-18 | Github (r-lib/sessioninfo@791705b) |
shape | 1.4.4 | 2018-02-07 | CRAN (R 4.0.0) |
stringi | 1.4.6 | 2020-02-17 | CRAN (R 4.0.0) |
stringr | 1.4.0.9000 | 2020-06-01 | Github (tidyverse/stringr@f70c4ba) |
styler | 1.3.2.9000 | 2020-08-13 | Github (r-lib/styler@7376cf4) |
testthat | 2.99.0.9000 | 2020-08-12 | Github (r-lib/testthat@9e643d8) |
tibble | 3.0.3.9000 | 2020-07-21 | Github (tidyverse/tibble@b4eec19) |
tidyr | 1.1.1.9000 | 2020-08-18 | Github (tidyverse/tidyr@4d0dd69) |
tidyselect | 1.1.0.9000 | 2020-07-11 | Github (tidyverse/tidyselect@69fdc96) |
tidyverse | 1.3.0.9000 | 2020-06-01 | Github (hadley/tidyverse@8a0bb99) |
usethis | 1.6.1.9001 | 2020-08-17 | Github (r-lib/usethis@860c1ea) |
vctrs | 0.3.2.9000 | 2020-08-18 | Github (r-lib/vctrs@c0dcd3f) |
viridisLite | 0.3.0 | 2018-02-01 | CRAN (R 4.0.0) |
wesanderson | 0.3.6.9000 | 2020-06-05 | Github (karthik/wesanderson@d90700a) |
withr | 2.2.0 | 2020-04-20 | CRAN (R 4.0.0) |
xfun | 0.16 | 2020-07-24 | CRAN (R 4.0.2) |
XML | 3.99-0.5 | 2020-07-23 | CRAN (R 4.0.2) |
xml2 | 1.3.2 | 2020-04-23 | CRAN (R 4.0.0) |
yaml | 2.2.1 | 2020-02-01 | CRAN (R 4.0.0) |
yarrr | 0.1.6 | 2020-07-23 | Github (ndphillips/yarrr@e2e4488) |