Sozen, B., Jorgensen, V., Weatherbee, B.A.T., Chen, S., Zhu, M., and Zernicka-Goetz, M. (2021). Reconstructing aspects of human embryogenesis with pluripotent stem cells. Nat. Commun. 12, 1–13.
Load required packages.
library(tidyverse)
library(Matrix)
library(patchwork)
library(extrafont)
# library(magrittr)
Sys.time()
## [1] "2021-09-24 01:35:43 CDT"
source(
file = file.path(
SCRIPT_DIR,"utilities.R"
) )
<- "/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation" PROJECT_DIR
<- reticulate::import(module = "anndata", convert = TRUE)
ad print(ad$`__version__`)
## [1] "0.7.5"
<- NULL
BACKED <- ad$read_h5ad(
adata filename = file.path(
PROJECT_DIR,"raw/public/PRJNA738498",
"matrix",
"adata.h5ad"
),backed = BACKED
)
<- adata |> convert_adata()
matrix_readcount_use |> dim() matrix_readcount_use
## [1] 33538 6231
<- "embedding_ncomponents18_ccc1_seed20210719.csv.gz"
EMBEDDING_FILE
<- vroom::vroom(
embedding file = file.path(
PROJECT_DIR,"raw/public/PRJNA738498/",
"clustering/PRJNA738498/exploring/Scanpy_Harmony",
EMBEDDING_FILE
)
)
|> head() embedding
<- vroom::vroom(
cell_metadata file = file.path(
PROJECT_DIR,"raw/public/PRJNA738498",
"matrix",
"cell_metadata.csv"
)|>
) ::mutate(
dplyrgroup = stringr::str_remove(
string = sample_id,
pattern = "-.+$"
),group = case_when(
== "2D" ~ "hEPSCs in 2D",
group == "D5" ~ "Day 5 hEP-structures",
group == "D6" ~ "Day 6 hEP-structures",
group == "unknown" ~ "hEPSCs, unknown",
group is.na(group) ~ "Natural human embryos"
),group = factor(
group,levels = c(
"Natural human embryos",
"Day 5 hEP-structures",
"Day 6 hEP-structures",
"hEPSCs in 2D",
"hEPSCs, unknown"
)
)|>
) ::left_join(
dplyr$obs |>
adata::rownames_to_column(var = "cell") |>
tibble::select(-batch),
dplyrby = c("cell" = "cell")
)
# cell_metadata$sample_id |> table(exclude = "")
# cell_metadata$developmental_stage |> table(exclude = "")
# cell_metadata |> dplyr::count(group)
|>
cell_metadata ::count(group, name = "num_cells") |>
dplyr::gt() |>
gt::tab_options(table.font.size = "median") gt
group | num_cells |
---|---|
Natural human embryos | 542 |
Day 5 hEP-structures | 2013 |
Day 6 hEP-structures | 2057 |
hEPSCs in 2D | 228 |
hEPSCs, unknown | 1391 |
::walk(list(matrix_readcount_use, cell_metadata), \(x) {
purrrprint(object.size(x), units = "auto", standard = "SI")
})
## 134.8 MB
## 905 kB
<- embedding |>
embedding ::left_join(
dplyr|>
cell_metadata ::select(
dplyr:mt_percentage
cell, group
),by = c("cell" = "cell")
)
|>
embedding ::group_by(
dplyr
group|>
) ::summarise(
dplyrnum_cells = n(),
median_umis = median(num_umis),
num_features = median(num_features),
median_mt_percentage = median(mt_percentage)
|>
) ::gt() |>
gt::tab_options(table.font.size = "median") gt
group | num_cells | median_umis | num_features | median_mt_percentage |
---|---|---|---|---|
Natural human embryos | 542 | 1525.0 | 788 | 0.07816558 |
Day 5 hEP-structures | 2013 | 3916.0 | 1542 | 0.05452046 |
Day 6 hEP-structures | 2057 | 5634.0 | 2146 | 0.04631579 |
hEPSCs in 2D | 228 | 4658.5 | 1740 | 0.07582252 |
|>
embedding ::group_by(
dplyr
leiden|>
) ::summarise(
dplyrnum_cells = n(),
median_umis = median(num_umis),
num_features = median(num_features),
median_mt_percentage = median(mt_percentage)
|>
) ::gt() |>
gt::tab_options(table.font.size = "median") gt
leiden | num_cells | median_umis | num_features | median_mt_percentage |
---|---|---|---|---|
0 | 650 | 7229.0 | 2548.0 | 0.044980507 |
1 | 575 | 5098.0 | 2001.0 | 0.047900969 |
2 | 519 | 4596.0 | 1750.0 | 0.055059253 |
3 | 486 | 3592.5 | 1367.5 | 0.053230059 |
4 | 467 | 1615.0 | 828.0 | 0.076511094 |
5 | 381 | 5207.0 | 1995.0 | 0.045398773 |
6 | 331 | 5504.0 | 2094.0 | 0.044977861 |
7 | 329 | 5760.0 | 2061.0 | 0.053837597 |
8 | 259 | 563.0 | 293.0 | 0.286012526 |
9 | 252 | 2873.5 | 1286.0 | 0.051105461 |
10 | 212 | 4263.0 | 1723.5 | 0.087807433 |
11 | 172 | 3567.5 | 1502.5 | 0.055306764 |
12 | 96 | 4089.0 | 1676.5 | 0.043022000 |
13 | 83 | 1504.0 | 798.0 | 0.163967611 |
14 | 17 | 5581.0 | 1828.0 | 0.062293144 |
15 | 11 | 825.0 | 347.0 | 0.001129944 |
<- "x_umap_min_dist=0.1"
x_column <- "y_umap_min_dist=0.1"
y_column
<- 0.4
GEOM_POINT_SIZE <- "UMAP"
EMBEDDING_TITLE_PREFIX <- TRUE RASTERISED
<- plot_embedding(
p_embedding_leiden embedding = embedding[, c(x_column, y_column)],
color_values = embedding$leiden |> as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Leiden", sep = "; "),
label_position = NULL,
show_color_value_labels = TRUE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
rasterise = RASTERISED
+
) theme_customized()
<- c(0.8, 0.325)
CB_POSITION <- plot_embedding(
p_embedding_UMI embedding = embedding[, c(x_column, y_column)],
color_values = log10(embedding$num_umis),
label = paste(EMBEDDING_TITLE_PREFIX, "UMI", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1.5,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
y = CB_POSITION[2],
legend_key_size = 2,
legend_text_size = 5
)
<- plot_embedding(
p_embedding_MT embedding = embedding[, c(x_column, y_column)],
color_values = embedding$mt_percentage,
label = paste(EMBEDDING_TITLE_PREFIX, "MT%", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1.5,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
y = CB_POSITION[2],
legend_key_size = 2,
legend_text_size = 5
)
::reduce(
purrrlist(
p_embedding_leiden,
p_embedding_UMI,
p_embedding_MT
),`+`
+
) ::plot_layout(ncol = 3) +
patchwork::plot_annotation(
patchworktheme = theme(plot.margin = margin())
)
::map(c(0.4, 0.6, 0.8), \(x) {
purrr<- embedding |>
values ::mutate(
dplyrvalue = case_when(
>= x ~ "1",
mt_percentage TRUE ~ "0"
)|>
) ::pull(value)
dplyr
plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = values |> as.factor(),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; MT% >= {scales::percent(x)}; Cells: {sum(as.numeric(values))}"
),label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE * 1.5,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
values = c("grey70", "salmon")
)|>
}) ::reduce(
purrr`+`
+
) ::plot_layout(ncol = 3) +
patchwork::plot_annotation(
patchworktheme = theme(plot.margin = margin())
)
<- plot_embedding(
p_embedding_batch embedding = embedding[, c(x_column, y_column)],
color_values = embedding$batch |> as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Batch", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
<- plot_embedding(
p_embedding_group embedding = embedding[, c(x_column, y_column)],
color_values = embedding$group |> as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Group", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
<- embedding |>
values ::mutate(
dplyrvalue = case_when(
< 200 ~ "1",
num_features TRUE ~ "0"
)|>
) ::pull(value)
dplyr
<- plot_embedding(
p_embedding_low_complexity embedding = embedding[, c(x_column, y_column)],
color_values = values |> as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Genes < 200", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE * 1.5,
sort_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
values = c("grey70", "salmon")
)
::reduce(
purrrlist(
p_embedding_batch,
p_embedding_group,
p_embedding_low_complexity
),`+`
+
) ::plot_layout(ncol = 3) +
patchwork::plot_annotation(
patchworktheme = theme(plot.margin = margin())
)
<- calc_group_composition(
p_barplot_composition_batch data = embedding,
x = "leiden",
group = "batch"
|>
) ::mutate(
dplyrleiden = factor(leiden)
|>
) plot_barplot(
x = "leiden",
y = "percentage",
z = "batch",
legend_ncol = 1
)
<- calc_group_composition(
p_barplot_group data = embedding,
x = "leiden",
group = "group"
|>
) ::mutate(
dplyrleiden = factor(leiden)
|>
) plot_barplot(
x = "leiden",
y = "percentage",
z = "group",
legend_ncol = 1
)
<- list(
p_barplot_combined
p_barplot_composition_batch,
p_barplot_group|>
) ::reduce(`+`) +
purrr::plot_layout(nrow = 2, guides = "collect") +
patchwork::plot_annotation(
patchworktheme = theme(plot.margin = margin())
) p_barplot_combined
<- c(
FEATURES_SELECTED "ENSG00000204531_POU5F1",
"ENSG00000111704_NANOG",
"ENSG00000171872_KLF17",
"ENSG00000186103_ARGFX",
#
"ENSG00000164736_SOX17",
"ENSG00000125798_FOXA2",
"ENSG00000136574_GATA4",
"ENSG00000134853_PDGFRA",
#
"ENSG00000179348_GATA2",
"ENSG00000070915_SLC12A3",
"ENSG00000165556_CDX2",
"ENSG00000007866_TEAD3"
)
::map(FEATURES_SELECTED, \(x) {
purrr<- x
selected_feature
cat(selected_feature, "\n")
<- log10(calc_cpm(matrix_readcount_use[, embedding$cell])[selected_feature, ] + 1)
values
plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = values,
label = paste(
EMBEDDING_TITLE_PREFIX,|> stringr::str_remove(pattern = "^E.+_"),
selected_feature sep = "; "
),label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1.5,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) scale_color_viridis_c(
na.value = "grey80"
+
) theme_customized(
y = CB_POSITION[2],
legend_key_size = 2,
legend_text_size = 5
)|>
}) ::reduce(`+`) +
purrr::plot_layout(ncol = 3, byrow = FALSE) +
patchwork::plot_annotation(
patchworktheme = theme(plot.margin = margin())
)
## ENSG00000204531_POU5F1
## ENSG00000111704_NANOG
## ENSG00000171872_KLF17
## ENSG00000186103_ARGFX
## ENSG00000164736_SOX17
## ENSG00000125798_FOXA2
## ENSG00000136574_GATA4
## ENSG00000134853_PDGFRA
## ENSG00000179348_GATA2
## ENSG00000070915_SLC12A3
## ENSG00000165556_CDX2
## ENSG00000007866_TEAD3
::session_info()$platform devtools
## setting value
## version R version 4.1.1 (2021-08-10)
## os macOS Big Sur 11.6
## system x86_64, darwin20.4.0
## ui unknown
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/Chicago
## date 2021-09-24
::session_info()$pack |>
devtoolsas_tibble() |>
::select(
dplyr
package,
loadedversion,
date,`source`
|>
) # print(n = nrow(.))
::gt() |>
gt::tab_options(table.font.size = "median") gt
package | loadedversion | date | source |
---|---|---|---|
assertthat | 0.2.1 | 2019-03-21 | CRAN (R 4.1.1) |
backports | 1.2.1 | 2020-12-09 | CRAN (R 4.1.1) |
beeswarm | 0.4.0 | 2021-06-01 | CRAN (R 4.1.1) |
bit | 4.0.4 | 2020-08-04 | CRAN (R 4.1.1) |
bit64 | 4.0.5 | 2020-08-30 | CRAN (R 4.1.1) |
broom | 0.7.9 | 2021-07-27 | CRAN (R 4.1.1) |
bslib | 0.3.0 | 2021-09-02 | CRAN (R 4.1.1) |
cachem | 1.0.6 | 2021-08-19 | CRAN (R 4.1.1) |
callr | 3.7.0 | 2021-04-20 | CRAN (R 4.1.1) |
cellranger | 1.1.0 | 2016-07-27 | CRAN (R 4.1.1) |
checkmate | 2.0.0 | 2020-02-06 | CRAN (R 4.1.1) |
cli | 3.0.1 | 2021-07-17 | CRAN (R 4.1.1) |
colorspace | 2.0-2 | 2021-06-24 | CRAN (R 4.1.1) |
crayon | 1.4.1 | 2021-02-08 | CRAN (R 4.1.1) |
DBI | 1.1.1 | 2021-01-15 | CRAN (R 4.1.1) |
dbplyr | 2.1.1 | 2021-04-06 | CRAN (R 4.1.1) |
desc | 1.3.0 | 2021-03-05 | CRAN (R 4.1.1) |
devtools | 2.4.2 | 2021-08-19 | Github (r-lib/devtools@e10658f) |
digest | 0.6.27 | 2020-10-24 | CRAN (R 4.1.1) |
dplyr | 1.0.7.9000 | 2021-09-23 | Github (tidyverse/dplyr@cab38bf) |
ellipsis | 0.3.2 | 2021-04-29 | CRAN (R 4.1.1) |
evaluate | 0.14 | 2019-05-28 | CRAN (R 4.1.1) |
extrafont | 0.17 | 2014-12-08 | CRAN (R 4.1.1) |
extrafontdb | 1.0 | 2012-06-11 | CRAN (R 4.1.1) |
fansi | 0.5.0 | 2021-05-25 | CRAN (R 4.1.1) |
farver | 2.1.0 | 2021-02-28 | CRAN (R 4.1.1) |
fastmap | 1.1.0 | 2021-01-25 | CRAN (R 4.1.1) |
forcats | 0.5.1.9000 | 2021-08-15 | Github (tidyverse/forcats@b5fce89) |
fs | 1.5.0.9000 | 2021-08-15 | Github (r-lib/fs@10e38dd) |
generics | 0.1.0 | 2020-10-31 | CRAN (R 4.1.1) |
ggbeeswarm | 0.6.0 | 2017-08-07 | CRAN (R 4.1.1) |
ggplot2 | 3.3.5.9000 | 2021-09-21 | Github (tidyverse/ggplot2@759c63c) |
ggrastr | 0.2.3 | 2021-08-15 | Github (VPetukhov/ggrastr@1ef0ff5) |
glue | 1.4.2 | 2020-08-27 | CRAN (R 4.1.1) |
gt | 0.3.1 | 2021-08-07 | CRAN (R 4.1.1) |
gtable | 0.3.0.9000 | 2021-08-15 | Github (r-lib/gtable@0fc53e0) |
haven | 2.4.3 | 2021-08-04 | CRAN (R 4.1.1) |
highr | 0.9 | 2021-04-16 | CRAN (R 4.1.1) |
hms | 1.1.0 | 2021-05-17 | CRAN (R 4.1.1) |
htmltools | 0.5.2 | 2021-08-25 | CRAN (R 4.1.1) |
httr | 1.4.2 | 2020-07-20 | CRAN (R 4.1.1) |
jquerylib | 0.1.4 | 2021-04-26 | CRAN (R 4.1.1) |
jsonlite | 1.7.2 | 2020-12-09 | CRAN (R 4.1.1) |
knitr | 1.34 | 2021-09-09 | CRAN (R 4.1.1) |
labeling | 0.4.2 | 2020-10-20 | CRAN (R 4.1.1) |
lattice | 0.20-45 | 2021-09-22 | CRAN (R 4.1.1) |
lifecycle | 1.0.0 | 2021-02-15 | CRAN (R 4.1.1) |
lubridate | 1.7.10 | 2021-02-26 | CRAN (R 4.1.1) |
magrittr | 2.0.1 | 2020-11-17 | CRAN (R 4.1.1) |
Matrix | 1.3-4 | 2021-06-01 | CRAN (R 4.1.1) |
memoise | 2.0.0 | 2021-01-26 | CRAN (R 4.1.1) |
modelr | 0.1.8 | 2020-05-19 | CRAN (R 4.1.1) |
munsell | 0.5.0 | 2018-06-12 | CRAN (R 4.1.1) |
patchwork | 1.1.0.9000 | 2021-08-15 | Github (thomasp85/patchwork@79223d3) |
pillar | 1.6.2 | 2021-08-26 | Github (r-lib/pillar@1058bda) |
pkgbuild | 1.2.0 | 2020-12-15 | CRAN (R 4.1.1) |
pkgconfig | 2.0.3 | 2019-09-22 | CRAN (R 4.1.1) |
pkgload | 1.2.2 | 2021-09-11 | CRAN (R 4.1.1) |
png | 0.1-7 | 2013-12-03 | CRAN (R 4.1.1) |
prettyunits | 1.1.1.9000 | 2021-08-15 | Github (r-lib/prettyunits@8706d89) |
processx | 3.5.2 | 2021-04-30 | CRAN (R 4.1.1) |
ps | 1.6.0 | 2021-02-28 | CRAN (R 4.1.1) |
purrr | 0.3.4.9000 | 2021-08-15 | Github (tidyverse/purrr@5aca9df) |
R6 | 2.5.0.9000 | 2021-08-15 | Github (r-lib/R6@1d70936) |
ragg | 1.1.3.9000 | 2021-08-15 | Github (r-lib/ragg@adf5f06) |
Rcpp | 1.0.7 | 2021-07-07 | CRAN (R 4.1.1) |
readr | 2.0.1.9000 | 2021-09-22 | Github (tidyverse/readr@0f37e2b) |
readxl | 1.3.1.9000 | 2021-08-19 | Github (tidyverse/readxl@649982a) |
remotes | 2.4.0.9001 | 2021-09-02 | Github (r-lib/remotes@6a0e560) |
reprex | 2.0.1 | 2021-08-05 | CRAN (R 4.1.1) |
reticulate | 1.22 | 2021-09-17 | CRAN (R 4.1.1) |
rlang | 0.4.11.9001 | 2021-09-23 | Github (r-lib/rlang@217ef27) |
rmarkdown | 2.11.2 | 2021-09-23 | Github (rstudio/rmarkdown@71ae829) |
rprojroot | 2.0.2 | 2020-11-15 | CRAN (R 4.1.1) |
rstudioapi | 0.13.0-9000 | 2021-08-15 | Github (rstudio/rstudioapi@96fad1d) |
Rttf2pt1 | 1.3.9 | 2021-07-22 | CRAN (R 4.1.1) |
rvest | 1.0.1 | 2021-07-26 | CRAN (R 4.1.1) |
sass | 0.4.0 | 2021-05-12 | CRAN (R 4.1.1) |
scales | 1.1.1 | 2020-05-11 | CRAN (R 4.1.1) |
sessioninfo | 1.1.1 | 2018-11-05 | CRAN (R 4.1.1) |
stringi | 1.7.4 | 2021-08-25 | CRAN (R 4.1.1) |
stringr | 1.4.0 | 2019-02-10 | CRAN (R 4.1.1) |
styler | 1.6.1.9000 | 2021-09-17 | Github (r-lib/styler@0e41d88) |
systemfonts | 1.0.2 | 2021-05-11 | CRAN (R 4.1.1) |
testthat | 3.0.4.9000 | 2021-09-23 | Github (r-lib/testthat@74a0fee) |
textshaping | 0.3.5 | 2021-06-09 | CRAN (R 4.1.1) |
tibble | 3.1.4.9000 | 2021-08-26 | Github (tidyverse/tibble@e3e40af) |
tidyr | 1.1.3.9000 | 2021-09-23 | Github (tidyverse/tidyr@a39f8de) |
tidyselect | 1.1.1 | 2021-04-30 | CRAN (R 4.1.1) |
tidyverse | 1.3.1.9000 | 2021-08-16 | Github (tidyverse/tidyverse@195d8a4) |
tzdb | 0.1.2 | 2021-07-20 | CRAN (R 4.1.1) |
usethis | 2.0.1.9000 | 2021-08-30 | Github (r-lib/usethis@3385e14) |
utf8 | 1.2.2 | 2021-07-24 | CRAN (R 4.1.1) |
vctrs | 0.3.8 | 2021-04-29 | CRAN (R 4.1.1) |
vipor | 0.4.5 | 2017-03-22 | CRAN (R 4.1.1) |
viridisLite | 0.4.0 | 2021-04-13 | CRAN (R 4.1.1) |
vroom | 1.5.5.9000 | 2021-09-19 | Github (r-lib/vroom@32c2ba6) |
withr | 2.4.2 | 2021-04-18 | CRAN (R 4.1.1) |
xfun | 0.26 | 2021-09-14 | CRAN (R 4.1.1) |
xml2 | 1.3.2 | 2020-04-23 | CRAN (R 4.1.1) |
yaml | 2.2.1 | 2020-02-01 | CRAN (R 4.1.1) |