Sozen, B., Jorgensen, V., Weatherbee, B.A.T., Chen, S., Zhu, M., and Zernicka-Goetz, M. (2021). Reconstructing aspects of human embryogenesis with pluripotent stem cells. Nat. Commun. 12, 1–13.
Load required packages.
library(tidyverse)
library(Matrix)
library(patchwork)
library(extrafont)
# library(magrittr)Sys.time()## [1] "2021-09-24 01:35:43 CDT"
source(
file = file.path(
SCRIPT_DIR,
"utilities.R"
)
)PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation"ad <- reticulate::import(module = "anndata", convert = TRUE)
print(ad$`__version__`)## [1] "0.7.5"
BACKED <- NULL
adata <- ad$read_h5ad(
filename = file.path(
PROJECT_DIR,
"raw/public/PRJNA738498",
"matrix",
"adata.h5ad"
),
backed = BACKED
)
matrix_readcount_use <- adata |> convert_adata()
matrix_readcount_use |> dim()## [1] 33538 6231
EMBEDDING_FILE <- "embedding_ncomponents18_ccc1_seed20210719.csv.gz"
embedding <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw/public/PRJNA738498/",
"clustering/PRJNA738498/exploring/Scanpy_Harmony",
EMBEDDING_FILE
)
)
embedding |> head()cell_metadata <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw/public/PRJNA738498",
"matrix",
"cell_metadata.csv"
)
) |>
dplyr::mutate(
group = stringr::str_remove(
string = sample_id,
pattern = "-.+$"
),
group = case_when(
group == "2D" ~ "hEPSCs in 2D",
group == "D5" ~ "Day 5 hEP-structures",
group == "D6" ~ "Day 6 hEP-structures",
group == "unknown" ~ "hEPSCs, unknown",
is.na(group) ~ "Natural human embryos"
),
group = factor(
group,
levels = c(
"Natural human embryos",
"Day 5 hEP-structures",
"Day 6 hEP-structures",
"hEPSCs in 2D",
"hEPSCs, unknown"
)
)
) |>
dplyr::left_join(
adata$obs |>
tibble::rownames_to_column(var = "cell") |>
dplyr::select(-batch),
by = c("cell" = "cell")
)
# cell_metadata$sample_id |> table(exclude = "")
# cell_metadata$developmental_stage |> table(exclude = "")
# cell_metadata |> dplyr::count(group)
cell_metadata |>
dplyr::count(group, name = "num_cells") |>
gt::gt() |>
gt::tab_options(table.font.size = "median")| group | num_cells |
|---|---|
| Natural human embryos | 542 |
| Day 5 hEP-structures | 2013 |
| Day 6 hEP-structures | 2057 |
| hEPSCs in 2D | 228 |
| hEPSCs, unknown | 1391 |
purrr::walk(list(matrix_readcount_use, cell_metadata), \(x) {
print(object.size(x), units = "auto", standard = "SI")
})## 134.8 MB
## 905 kB
embedding <- embedding |>
dplyr::left_join(
cell_metadata |>
dplyr::select(
cell, group:mt_percentage
),
by = c("cell" = "cell")
)embedding |>
dplyr::group_by(
group
) |>
dplyr::summarise(
num_cells = n(),
median_umis = median(num_umis),
num_features = median(num_features),
median_mt_percentage = median(mt_percentage)
) |>
gt::gt() |>
gt::tab_options(table.font.size = "median")| group | num_cells | median_umis | num_features | median_mt_percentage |
|---|---|---|---|---|
| Natural human embryos | 542 | 1525.0 | 788 | 0.07816558 |
| Day 5 hEP-structures | 2013 | 3916.0 | 1542 | 0.05452046 |
| Day 6 hEP-structures | 2057 | 5634.0 | 2146 | 0.04631579 |
| hEPSCs in 2D | 228 | 4658.5 | 1740 | 0.07582252 |
embedding |>
dplyr::group_by(
leiden
) |>
dplyr::summarise(
num_cells = n(),
median_umis = median(num_umis),
num_features = median(num_features),
median_mt_percentage = median(mt_percentage)
) |>
gt::gt() |>
gt::tab_options(table.font.size = "median")| leiden | num_cells | median_umis | num_features | median_mt_percentage |
|---|---|---|---|---|
| 0 | 650 | 7229.0 | 2548.0 | 0.044980507 |
| 1 | 575 | 5098.0 | 2001.0 | 0.047900969 |
| 2 | 519 | 4596.0 | 1750.0 | 0.055059253 |
| 3 | 486 | 3592.5 | 1367.5 | 0.053230059 |
| 4 | 467 | 1615.0 | 828.0 | 0.076511094 |
| 5 | 381 | 5207.0 | 1995.0 | 0.045398773 |
| 6 | 331 | 5504.0 | 2094.0 | 0.044977861 |
| 7 | 329 | 5760.0 | 2061.0 | 0.053837597 |
| 8 | 259 | 563.0 | 293.0 | 0.286012526 |
| 9 | 252 | 2873.5 | 1286.0 | 0.051105461 |
| 10 | 212 | 4263.0 | 1723.5 | 0.087807433 |
| 11 | 172 | 3567.5 | 1502.5 | 0.055306764 |
| 12 | 96 | 4089.0 | 1676.5 | 0.043022000 |
| 13 | 83 | 1504.0 | 798.0 | 0.163967611 |
| 14 | 17 | 5581.0 | 1828.0 | 0.062293144 |
| 15 | 11 | 825.0 | 347.0 | 0.001129944 |
x_column <- "x_umap_min_dist=0.1"
y_column <- "y_umap_min_dist=0.1"
GEOM_POINT_SIZE <- 0.4
EMBEDDING_TITLE_PREFIX <- "UMAP"
RASTERISED <- TRUEp_embedding_leiden <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding$leiden |> as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Leiden", sep = "; "),
label_position = NULL,
show_color_value_labels = TRUE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
rasterise = RASTERISED
) +
theme_customized()
CB_POSITION <- c(0.8, 0.325)
p_embedding_UMI <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = log10(embedding$num_umis),
label = paste(EMBEDDING_TITLE_PREFIX, "UMI", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1.5,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
y = CB_POSITION[2],
legend_key_size = 2,
legend_text_size = 5
)
p_embedding_MT <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding$mt_percentage,
label = paste(EMBEDDING_TITLE_PREFIX, "MT%", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1.5,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
y = CB_POSITION[2],
legend_key_size = 2,
legend_text_size = 5
)purrr::reduce(
list(
p_embedding_leiden,
p_embedding_UMI,
p_embedding_MT
),
`+`
) +
patchwork::plot_layout(ncol = 3) +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)purrr::map(c(0.4, 0.6, 0.8), \(x) {
values <- embedding |>
dplyr::mutate(
value = case_when(
mt_percentage >= x ~ "1",
TRUE ~ "0"
)
) |>
dplyr::pull(value)
plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = values |> as.factor(),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; MT% >= {scales::percent(x)}; Cells: {sum(as.numeric(values))}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE * 1.5,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
values = c("grey70", "salmon")
)
}) |>
purrr::reduce(
`+`
) +
patchwork::plot_layout(ncol = 3) +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)p_embedding_batch <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding$batch |> as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Batch", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
p_embedding_group <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding$group |> as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Group", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
values <- embedding |>
dplyr::mutate(
value = case_when(
num_features < 200 ~ "1",
TRUE ~ "0"
)
) |>
dplyr::pull(value)
p_embedding_low_complexity <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = values |> as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Genes < 200", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE * 1.5,
sort_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
values = c("grey70", "salmon")
)purrr::reduce(
list(
p_embedding_batch,
p_embedding_group,
p_embedding_low_complexity
),
`+`
) +
patchwork::plot_layout(ncol = 3) +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)p_barplot_composition_batch <- calc_group_composition(
data = embedding,
x = "leiden",
group = "batch"
) |>
dplyr::mutate(
leiden = factor(leiden)
) |>
plot_barplot(
x = "leiden",
y = "percentage",
z = "batch",
legend_ncol = 1
)
p_barplot_group <- calc_group_composition(
data = embedding,
x = "leiden",
group = "group"
) |>
dplyr::mutate(
leiden = factor(leiden)
) |>
plot_barplot(
x = "leiden",
y = "percentage",
z = "group",
legend_ncol = 1
)
p_barplot_combined <- list(
p_barplot_composition_batch,
p_barplot_group
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(nrow = 2, guides = "collect") +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)
p_barplot_combinedFEATURES_SELECTED <- c(
"ENSG00000204531_POU5F1",
"ENSG00000111704_NANOG",
"ENSG00000171872_KLF17",
"ENSG00000186103_ARGFX",
#
"ENSG00000164736_SOX17",
"ENSG00000125798_FOXA2",
"ENSG00000136574_GATA4",
"ENSG00000134853_PDGFRA",
#
"ENSG00000179348_GATA2",
"ENSG00000070915_SLC12A3",
"ENSG00000165556_CDX2",
"ENSG00000007866_TEAD3"
)purrr::map(FEATURES_SELECTED, \(x) {
selected_feature <- x
cat(selected_feature, "\n")
values <- log10(calc_cpm(matrix_readcount_use[, embedding$cell])[selected_feature, ] + 1)
plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = values,
label = paste(
EMBEDDING_TITLE_PREFIX,
selected_feature |> stringr::str_remove(pattern = "^E.+_"),
sep = "; "
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1.5,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
scale_color_viridis_c(
na.value = "grey80"
) +
theme_customized(
y = CB_POSITION[2],
legend_key_size = 2,
legend_text_size = 5
)
}) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 3, byrow = FALSE) +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)## ENSG00000204531_POU5F1
## ENSG00000111704_NANOG
## ENSG00000171872_KLF17
## ENSG00000186103_ARGFX
## ENSG00000164736_SOX17
## ENSG00000125798_FOXA2
## ENSG00000136574_GATA4
## ENSG00000134853_PDGFRA
## ENSG00000179348_GATA2
## ENSG00000070915_SLC12A3
## ENSG00000165556_CDX2
## ENSG00000007866_TEAD3
devtools::session_info()$platform## setting value
## version R version 4.1.1 (2021-08-10)
## os macOS Big Sur 11.6
## system x86_64, darwin20.4.0
## ui unknown
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/Chicago
## date 2021-09-24
devtools::session_info()$pack |>
as_tibble() |>
dplyr::select(
package,
loadedversion,
date,
`source`
) |>
# print(n = nrow(.))
gt::gt() |>
gt::tab_options(table.font.size = "median")| package | loadedversion | date | source |
|---|---|---|---|
| assertthat | 0.2.1 | 2019-03-21 | CRAN (R 4.1.1) |
| backports | 1.2.1 | 2020-12-09 | CRAN (R 4.1.1) |
| beeswarm | 0.4.0 | 2021-06-01 | CRAN (R 4.1.1) |
| bit | 4.0.4 | 2020-08-04 | CRAN (R 4.1.1) |
| bit64 | 4.0.5 | 2020-08-30 | CRAN (R 4.1.1) |
| broom | 0.7.9 | 2021-07-27 | CRAN (R 4.1.1) |
| bslib | 0.3.0 | 2021-09-02 | CRAN (R 4.1.1) |
| cachem | 1.0.6 | 2021-08-19 | CRAN (R 4.1.1) |
| callr | 3.7.0 | 2021-04-20 | CRAN (R 4.1.1) |
| cellranger | 1.1.0 | 2016-07-27 | CRAN (R 4.1.1) |
| checkmate | 2.0.0 | 2020-02-06 | CRAN (R 4.1.1) |
| cli | 3.0.1 | 2021-07-17 | CRAN (R 4.1.1) |
| colorspace | 2.0-2 | 2021-06-24 | CRAN (R 4.1.1) |
| crayon | 1.4.1 | 2021-02-08 | CRAN (R 4.1.1) |
| DBI | 1.1.1 | 2021-01-15 | CRAN (R 4.1.1) |
| dbplyr | 2.1.1 | 2021-04-06 | CRAN (R 4.1.1) |
| desc | 1.3.0 | 2021-03-05 | CRAN (R 4.1.1) |
| devtools | 2.4.2 | 2021-08-19 | Github (r-lib/devtools@e10658f) |
| digest | 0.6.27 | 2020-10-24 | CRAN (R 4.1.1) |
| dplyr | 1.0.7.9000 | 2021-09-23 | Github (tidyverse/dplyr@cab38bf) |
| ellipsis | 0.3.2 | 2021-04-29 | CRAN (R 4.1.1) |
| evaluate | 0.14 | 2019-05-28 | CRAN (R 4.1.1) |
| extrafont | 0.17 | 2014-12-08 | CRAN (R 4.1.1) |
| extrafontdb | 1.0 | 2012-06-11 | CRAN (R 4.1.1) |
| fansi | 0.5.0 | 2021-05-25 | CRAN (R 4.1.1) |
| farver | 2.1.0 | 2021-02-28 | CRAN (R 4.1.1) |
| fastmap | 1.1.0 | 2021-01-25 | CRAN (R 4.1.1) |
| forcats | 0.5.1.9000 | 2021-08-15 | Github (tidyverse/forcats@b5fce89) |
| fs | 1.5.0.9000 | 2021-08-15 | Github (r-lib/fs@10e38dd) |
| generics | 0.1.0 | 2020-10-31 | CRAN (R 4.1.1) |
| ggbeeswarm | 0.6.0 | 2017-08-07 | CRAN (R 4.1.1) |
| ggplot2 | 3.3.5.9000 | 2021-09-21 | Github (tidyverse/ggplot2@759c63c) |
| ggrastr | 0.2.3 | 2021-08-15 | Github (VPetukhov/ggrastr@1ef0ff5) |
| glue | 1.4.2 | 2020-08-27 | CRAN (R 4.1.1) |
| gt | 0.3.1 | 2021-08-07 | CRAN (R 4.1.1) |
| gtable | 0.3.0.9000 | 2021-08-15 | Github (r-lib/gtable@0fc53e0) |
| haven | 2.4.3 | 2021-08-04 | CRAN (R 4.1.1) |
| highr | 0.9 | 2021-04-16 | CRAN (R 4.1.1) |
| hms | 1.1.0 | 2021-05-17 | CRAN (R 4.1.1) |
| htmltools | 0.5.2 | 2021-08-25 | CRAN (R 4.1.1) |
| httr | 1.4.2 | 2020-07-20 | CRAN (R 4.1.1) |
| jquerylib | 0.1.4 | 2021-04-26 | CRAN (R 4.1.1) |
| jsonlite | 1.7.2 | 2020-12-09 | CRAN (R 4.1.1) |
| knitr | 1.34 | 2021-09-09 | CRAN (R 4.1.1) |
| labeling | 0.4.2 | 2020-10-20 | CRAN (R 4.1.1) |
| lattice | 0.20-45 | 2021-09-22 | CRAN (R 4.1.1) |
| lifecycle | 1.0.0 | 2021-02-15 | CRAN (R 4.1.1) |
| lubridate | 1.7.10 | 2021-02-26 | CRAN (R 4.1.1) |
| magrittr | 2.0.1 | 2020-11-17 | CRAN (R 4.1.1) |
| Matrix | 1.3-4 | 2021-06-01 | CRAN (R 4.1.1) |
| memoise | 2.0.0 | 2021-01-26 | CRAN (R 4.1.1) |
| modelr | 0.1.8 | 2020-05-19 | CRAN (R 4.1.1) |
| munsell | 0.5.0 | 2018-06-12 | CRAN (R 4.1.1) |
| patchwork | 1.1.0.9000 | 2021-08-15 | Github (thomasp85/patchwork@79223d3) |
| pillar | 1.6.2 | 2021-08-26 | Github (r-lib/pillar@1058bda) |
| pkgbuild | 1.2.0 | 2020-12-15 | CRAN (R 4.1.1) |
| pkgconfig | 2.0.3 | 2019-09-22 | CRAN (R 4.1.1) |
| pkgload | 1.2.2 | 2021-09-11 | CRAN (R 4.1.1) |
| png | 0.1-7 | 2013-12-03 | CRAN (R 4.1.1) |
| prettyunits | 1.1.1.9000 | 2021-08-15 | Github (r-lib/prettyunits@8706d89) |
| processx | 3.5.2 | 2021-04-30 | CRAN (R 4.1.1) |
| ps | 1.6.0 | 2021-02-28 | CRAN (R 4.1.1) |
| purrr | 0.3.4.9000 | 2021-08-15 | Github (tidyverse/purrr@5aca9df) |
| R6 | 2.5.0.9000 | 2021-08-15 | Github (r-lib/R6@1d70936) |
| ragg | 1.1.3.9000 | 2021-08-15 | Github (r-lib/ragg@adf5f06) |
| Rcpp | 1.0.7 | 2021-07-07 | CRAN (R 4.1.1) |
| readr | 2.0.1.9000 | 2021-09-22 | Github (tidyverse/readr@0f37e2b) |
| readxl | 1.3.1.9000 | 2021-08-19 | Github (tidyverse/readxl@649982a) |
| remotes | 2.4.0.9001 | 2021-09-02 | Github (r-lib/remotes@6a0e560) |
| reprex | 2.0.1 | 2021-08-05 | CRAN (R 4.1.1) |
| reticulate | 1.22 | 2021-09-17 | CRAN (R 4.1.1) |
| rlang | 0.4.11.9001 | 2021-09-23 | Github (r-lib/rlang@217ef27) |
| rmarkdown | 2.11.2 | 2021-09-23 | Github (rstudio/rmarkdown@71ae829) |
| rprojroot | 2.0.2 | 2020-11-15 | CRAN (R 4.1.1) |
| rstudioapi | 0.13.0-9000 | 2021-08-15 | Github (rstudio/rstudioapi@96fad1d) |
| Rttf2pt1 | 1.3.9 | 2021-07-22 | CRAN (R 4.1.1) |
| rvest | 1.0.1 | 2021-07-26 | CRAN (R 4.1.1) |
| sass | 0.4.0 | 2021-05-12 | CRAN (R 4.1.1) |
| scales | 1.1.1 | 2020-05-11 | CRAN (R 4.1.1) |
| sessioninfo | 1.1.1 | 2018-11-05 | CRAN (R 4.1.1) |
| stringi | 1.7.4 | 2021-08-25 | CRAN (R 4.1.1) |
| stringr | 1.4.0 | 2019-02-10 | CRAN (R 4.1.1) |
| styler | 1.6.1.9000 | 2021-09-17 | Github (r-lib/styler@0e41d88) |
| systemfonts | 1.0.2 | 2021-05-11 | CRAN (R 4.1.1) |
| testthat | 3.0.4.9000 | 2021-09-23 | Github (r-lib/testthat@74a0fee) |
| textshaping | 0.3.5 | 2021-06-09 | CRAN (R 4.1.1) |
| tibble | 3.1.4.9000 | 2021-08-26 | Github (tidyverse/tibble@e3e40af) |
| tidyr | 1.1.3.9000 | 2021-09-23 | Github (tidyverse/tidyr@a39f8de) |
| tidyselect | 1.1.1 | 2021-04-30 | CRAN (R 4.1.1) |
| tidyverse | 1.3.1.9000 | 2021-08-16 | Github (tidyverse/tidyverse@195d8a4) |
| tzdb | 0.1.2 | 2021-07-20 | CRAN (R 4.1.1) |
| usethis | 2.0.1.9000 | 2021-08-30 | Github (r-lib/usethis@3385e14) |
| utf8 | 1.2.2 | 2021-07-24 | CRAN (R 4.1.1) |
| vctrs | 0.3.8 | 2021-04-29 | CRAN (R 4.1.1) |
| vipor | 0.4.5 | 2017-03-22 | CRAN (R 4.1.1) |
| viridisLite | 0.4.0 | 2021-04-13 | CRAN (R 4.1.1) |
| vroom | 1.5.5.9000 | 2021-09-19 | Github (r-lib/vroom@32c2ba6) |
| withr | 2.4.2 | 2021-04-18 | CRAN (R 4.1.1) |
| xfun | 0.26 | 2021-09-14 | CRAN (R 4.1.1) |
| xml2 | 1.3.2 | 2020-04-23 | CRAN (R 4.1.1) |
| yaml | 2.2.1 | 2020-02-01 | CRAN (R 4.1.1) |