Tyser, R.C.V., Mahammadov, E., Nakanoh, S., Vallier, L., Scialdone, A., and Srinivas, S. (2021). Single-cell transcriptomic characterization of a gastrulating human embryo. Nature 600, 285–289.
Load required packages.
library(tidyverse)
library(Matrix)
library(patchwork)
library(extrafont)Sys.time()## [1] "2022-01-19 00:46:30 CST"
source(
file = file.path(
SCRIPT_DIR,
"utilities.R"
)
)plot_embedding_highlight <- function(embedding, x, y, label, n_cols = 3) {
cell_metadata_selected <- x
selected_column <- y
purrr::map(levels(cell_metadata_selected[[selected_column]]), \(x) {
values <- embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::mutate(
value = case_when(
.data[[selected_column]] == x ~ "1",
.data[[selected_column]] != x ~ "0"
)
) |>
dplyr::pull(value) |>
as.integer() |>
as.factor()
plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = values,
label = glue::glue(
"{label}; ",
"{x}: {sum(as.integer(as.character(values)), na.rm = TRUE)}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = c("grey70", "salmon"),
na.value = "#7F7F7F"
) +
ggplot2::annotate(
geom = "text",
x = Inf,
y = Inf,
label = sum(as.integer(as.character(values)), na.rm = TRUE),
size = 5 / ggplot2::.pt,
hjust = 1,
vjust = 1,
na.rm = FALSE
)
}) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = n_cols) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)
}PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation"ad <- reticulate::import(module = "anndata", convert = TRUE)
print(ad$`__version__`)## [1] "0.7.6"
adata_files <- purrr::map(c("PRJEB40781", "PRJEB11202", "PRJNA555602"), \(x) {
file.path(
PROJECT_DIR,
"raw",
"public",
x,
"matrix",
"adata.h5ad"
)
})
purrr::map_lgl(adata_files, file.exists)## [1] TRUE TRUE TRUE
BACKED <- NULL
matrix_readcount_use <- purrr::map(adata_files, function(x) {
ad$read_h5ad(
filename = x, backed = BACKED
) |>
convert_adata()
}) |>
purrr::reduce(cbind)
matrix_readcount_use |> dim()## [1] 33538 12690
BACKED <- "r"
cell_metadata <- purrr::map(adata_files, function(x) {
ad$read_h5ad(
filename = x, backed = BACKED
)$obs |>
tibble::rownames_to_column(var = "cell") |>
dplyr::select(cell, everything())
}) |>
dplyr::bind_rows() |>
dplyr::select(-batch)
cell_metadata |> head()
Check memory usage.
purrr::walk(list(matrix_readcount_use, cell_metadata), function(x) {
print(object.size(x), units = "auto", standard = "SI")
})## 828.5 MB
## 1.3 MB
cell_metadata_PRJEB40781 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJEB40781",
"matrix",
"cell_metadata.csv"
)
) |>
dplyr::mutate(
lineage = factor(lineage)
)## Rows: 1195 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): cell, run, source_name, developmental_stage, individual, sex, sampl...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
EMBEDDING_FILE <- "embedding_ncomponents15_ccc1_seed20210719.csv.gz"
embedding_1195 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw/public/PRJEB40781",
"clustering/PRJEB40781/star/exploring",
"Scanpy_Harmony",
EMBEDDING_FILE
)
)
embedding_1195 |> head()x_column <- "x_umap_min_dist=0.1"
y_column <- "y_umap_min_dist=0.1"
GEOM_POINT_SIZE <- 1.25
EMBEDDING_TITLE_PREFIX <- "UMAP"
RASTERISED <- TRUEp_embedding_leiden <- plot_embedding(
embedding = embedding_1195[, c(x_column, y_column)],
color_values = embedding_1195$leiden |> as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Leiden", sep = "; "),
label_position = NULL,
show_color_value_labels = TRUE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED
) +
theme_customized()
# CB_POSITION <- c(0.8, 0.995)
p_embedding_UMI <- plot_embedding(
embedding = embedding_1195[, c(x_column, y_column)],
color_values = embedding_1195 |>
dplyr::left_join(
cell_metadata
) |>
dplyr::pull(num_umis) |>
{
\(x) log10(x)
}(),
label = paste(EMBEDDING_TITLE_PREFIX, "UMI", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE / 1.5,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
p_embedding_MT <- plot_embedding(
embedding = embedding_1195[, c(x_column, y_column)],
color_values = embedding_1195 |>
dplyr::left_join(cell_metadata) |>
dplyr::pull(mt_percentage),
label = paste(EMBEDDING_TITLE_PREFIX, "MT %", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
p_embedding_sampling_site <- plot_embedding(
embedding = embedding_1195[, c(x_column, y_column)],
color_values = embedding_1195 |>
dplyr::left_join(
cell_metadata_PRJEB40781
) |>
dplyr::pull(sampling_site) |>
as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Sampling site", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE / 1.5,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED
) +
theme_customized()embedding_1195 |>
dplyr::left_join(
cell_metadata |>
dplyr::select(cell, num_umis:mt_percentage)
) |>
dplyr::left_join(
cell_metadata_PRJEB40781
) |>
dplyr::group_by(
leiden
) |>
dplyr::summarise(
num_cells = dplyr::n(),
median_umis = median(num_umis),
median_features = median(num_features),
median_mt_percentage = median(mt_percentage)
) |>
gt::gt() |>
gt::tab_options(table.font.size = "median") |>
gt::summary_rows(
columns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),
decimals = 0
) |>
gt::summary_rows(
columns = c("median_umis", "median_features", "median_mt_percentage"),
fns = list(
Median = ~ median(.)
),
decimals = 2
)| leiden | num_cells | median_umis | median_features | median_mt_percentage | |
|---|---|---|---|---|---|
| 0 | 189 | 883508.0 | 2581.0 | 0.008930657 | |
| 1 | 138 | 996283.5 | 4297.5 | 0.006288449 | |
| 2 | 134 | 836131.0 | 4346.0 | 0.010772693 | |
| 3 | 115 | 887650.0 | 4906.0 | 0.007057741 | |
| 4 | 96 | 916361.0 | 4618.5 | 0.014087410 | |
| 5 | 85 | 1110656.0 | 3667.0 | 0.005815362 | |
| 6 | 81 | 763755.0 | 5965.0 | 0.008891377 | |
| 7 | 79 | 938071.0 | 4924.0 | 0.006874891 | |
| 8 | 79 | 746953.0 | 5211.0 | 0.008341437 | |
| 9 | 72 | 1010636.5 | 4530.0 | 0.004510133 | |
| 10 | 60 | 940026.0 | 3409.5 | 0.009846117 | |
| 11 | 34 | 1028500.0 | 4321.5 | 0.005531307 | |
| 12 | 33 | 728519.0 | 4344.0 | 0.014426759 | |
| Sum | — | 1,195 | — | — | — |
| Median | — | — | 916,361.00 | 4,346.00 | 0.01 |
purrr::reduce(
list(
p_embedding_leiden,
p_embedding_sampling_site,
p_embedding_UMI,
p_embedding_MT
),
`+`
) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)embedding_1195 |>
dplyr::left_join(
cell_metadata
) |>
dplyr::left_join(cell_metadata_PRJEB40781) |>
dplyr::group_by(lineage) |>
dplyr::summarise(
num_cells = dplyr::n(),
median_umis = median(num_umis),
median_features = median(num_features),
median_mt_percentage = median(mt_percentage)
) |>
gt::gt() |>
gt::tab_options(table.font.size = "median") |>
gt::summary_rows(
columns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),
decimals = 0
) |>
gt::summary_rows(
columns = c("median_umis", "median_features", "median_mt_percentage"),
fns = list(
Median = ~ median(.)
),
decimals = 2
)| lineage | num_cells | median_umis | median_features | median_mt_percentage | |
|---|---|---|---|---|---|
| advanced mesoderm | 164 | 993741.0 | 4164.5 | 0.006218430 | |
| axial mesoderm | 23 | 853681.0 | 4181.0 | 0.007745421 | |
| ectodermal cell | 29 | 996476.0 | 4314.0 | 0.005143689 | |
| emergent mesoderm | 185 | 911736.0 | 4184.0 | 0.007838085 | |
| endodermal cell | 135 | 989383.0 | 4837.0 | 0.005348267 | |
| epiblast cell | 133 | 790584.0 | 5607.0 | 0.009357085 | |
| erythrocyte | 32 | 716602.0 | 4345.5 | 0.014388710 | |
| hemogenic endothelial progenitor | 111 | 925287.0 | 4498.0 | 0.013978306 | |
| nascent mesoderm | 98 | 787231.5 | 4479.0 | 0.008292312 | |
| primitive streak | 202 | 826282.5 | 3781.0 | 0.010169492 | |
| yolk sac mesoderm | 83 | 1124901.0 | 3778.0 | 0.006017290 | |
| Sum | — | 1,195 | — | — | — |
| Median | — | — | 911,736.00 | 4,314.00 | 0.01 |
p_embedding_lineage <- plot_embedding(
embedding = embedding_1195[, c(x_column, y_column)],
color_values = embedding_1195 |>
dplyr::left_join(
cell_metadata_PRJEB40781
) |>
dplyr::pull(lineage) |>
as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Lineage", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE / 1.5,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED
) +
theme_customized()
list(
p_embedding_lineage,
purrr::map(levels(cell_metadata_PRJEB40781$lineage), \(x) {
values <- embedding_1195 |>
dplyr::left_join(cell_metadata_PRJEB40781) |>
dplyr::mutate(
value = dplyr::case_when(
lineage == x ~ "1",
TRUE ~ "0"
)
) |>
dplyr::pull(value)
plot_embedding(
embedding = embedding_1195[, c(x_column, y_column)],
color_values = factor(values),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; {x}: {sum(as.integer(values))}"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
values = c("grey70", "salmon")
)
})
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)p_barplot_composition_sampling_site <- calc_group_composition(
data = embedding_1195 |>
dplyr::left_join(
cell_metadata_PRJEB40781
),
x = "leiden",
group = "sampling_site"
) |>
dplyr::mutate(
leiden = factor(leiden)
) |>
plot_barplot(
x = "leiden",
y = "percentage",
z = "sampling_site",
legend_ncol = 1
)
p_barplot_composition_lineage <- calc_group_composition(
data = embedding_1195 |>
dplyr::left_join(
cell_metadata_PRJEB40781
),
x = "leiden",
group = "lineage"
) |>
dplyr::mutate(
leiden = factor(leiden)
) |>
plot_barplot(
x = "leiden",
y = "percentage",
z = "lineage",
legend_ncol = 1
)
list(
p_barplot_composition_sampling_site,
p_barplot_composition_lineage
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 1, guides = "collect") +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)FEATURES_SELECTED <- c(
"ENSG00000204531_POU5F1",
"ENSG00000111704_NANOG",
"ENSG00000171872_KLF17",
"ENSG00000186103_ARGFX",
#
"ENSG00000164736_SOX17",
"ENSG00000125798_FOXA2",
"ENSG00000136574_GATA4",
"ENSG00000134853_PDGFRA",
#
"ENSG00000179348_GATA2",
"ENSG00000070915_SLC12A3",
"ENSG00000165556_CDX2",
"ENSG00000007866_TEAD3"
)purrr::map(FEATURES_SELECTED, \(x) {
selected_feature <- x
cat(selected_feature, "\n")
values <- log10(calc_cpm(matrix_readcount_use[, embedding_1195$cell])[selected_feature, ] + 1)
plot_embedding(
embedding = embedding_1195[, c(x_column, y_column)],
color_values = values,
label = paste(
EMBEDDING_TITLE_PREFIX,
selected_feature |> stringr::str_remove(pattern = "^E.+_"),
sep = "; "
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
scale_color_viridis_c(
na.value = "grey80"
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
}) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 3, byrow = FALSE) +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)## ENSG00000204531_POU5F1
## ENSG00000111704_NANOG
## ENSG00000171872_KLF17
## ENSG00000186103_ARGFX
## ENSG00000164736_SOX17
## ENSG00000125798_FOXA2
## ENSG00000136574_GATA4
## ENSG00000134853_PDGFRA
## ENSG00000179348_GATA2
## ENSG00000070915_SLC12A3
## ENSG00000165556_CDX2
## ENSG00000007866_TEAD3
EMBEDDING_FILE <- glue::glue(
"embedding_ncomponents18_ccc1_seed20210719.csv.gz"
)
embedding <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJEB40781",
"clustering",
"PRJEB40781_PRJEB11202_PRJNA555602",
"exploring",
"Scanpy_Harmony",
EMBEDDING_FILE
)
) |>
dplyr::mutate(
study = dplyr::case_when(
batch %in% c("GSM3956280", "GSM3956281") ~ "PRJNA555602",
TRUE ~ batch
),
study = factor(study)
)
embedding |> head()studies <- tibble::tribble(
~bioproject, ~citation,
"PRJEB11202", "Petropoulos et al. 2016",
"PRJEB40781", "Tyser et al. 2021",
"PRJNA555602", "Zheng et al. 2019",
"PRJNA431392", "Zhou et al. 2019",
"PRJNA737139", "Kagawa et al. 2021",
"PRJNA632839", "Yu et al. 2021",
"PRJNA658478", "Liu et al. 2021",
"PRJNA720968", "Yanagida et al. 2021",
"PRJNA667174", "Fan et al. 2021",
"PRJNA738498", "Sozen et al. 2021",
"PRJNA737139", "Kagawa et al. 2021"
)
studies <- setNames(
object = studies$citation,
nm = studies$bioproject
)embedding |>
dplyr::count(study, name = "num_cells") |>
dplyr::mutate(
study = studies[study]
) |>
gt::gt() |>
gt::tab_options(table.font.size = "median") |>
gt::summary_rows(
columns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),
decimals = 0
)| study | num_cells | |
|---|---|---|
| Petropoulos et al. 2016 | 1529 | |
| Tyser et al. 2021 | 1195 | |
| Zheng et al. 2019 | 9966 | |
| Sum | — | 12,690 |
# PRJEB11202; Petropoulos et al. 2016
cell_metadata_PRJEB11202 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJEB11202",
"matrix/cell_metadata.csv"
)
) |>
dplyr::mutate(
developmental_stage = stringr::str_remove(
string = individual,
pattern = "\\..+$"
),
developmental_stage = factor(
developmental_stage,
levels = stringr::str_sort(
unique(developmental_stage),
numeric = TRUE
)
),
#
lineage = factor(
inferred_lineage,
levels = c(
"epiblast",
"primitive_endoderm",
"trophectoderm",
"not_applicable"
)
)
) |>
dplyr::select(
cell, lineage, developmental_stage
)
# PRJNA555602; Zheng et al. 2019
cell_metadata_PRJNA555602 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJNA555602",
"matrix/cell_metadata.csv"
)
) |>
dplyr::mutate(
lineage = dplyr::case_when(
# amniotic ectoderm-like cells
rna_snn_res_0.3 == 0 ~ "Transwell-AMLC",
# mesoderm-like cell 2
rna_snn_res_0.3 == 1 ~ "MeLC2",
rna_snn_res_0.3 == 2 ~ "Human ES cell",
# mesoderm-like cell 1
rna_snn_res_0.3 == 3 ~ "MeLC1",
# human PGC-like cells
rna_snn_res_0.3 == 4 ~ "hPGCLC",
rna_snn_res_0.3 == 5 ~ "AMLC"
),
lineage = factor(
lineage,
levels = c(
"Human ES cell",
"hPGCLC",
"MeLC1",
"MeLC2",
"AMLC",
"Transwell-AMLC"
)
),
source = dplyr::case_when(
orig_ident == "10X_Embryoid" ~ "Embryoid",
orig_ident == "10X_H9_Amnion" ~ "H9_Amnion"
),
source = factor(source)
)x_column <- "x_pacmap"
y_column <- "y_pacmap"
EMBEDDING_TITLE_PREFIX <- "PaCMAP"
GEOM_POINT_SIZE <- 0.4
p_embedding_leiden <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding$leiden |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Leiden"),
label_position = NULL,
show_color_value_labels = TRUE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = FALSE,
rasterise = RASTERISED
) +
theme_customized()
p_embedding_UMI <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = log10(Matrix::colSums(matrix_readcount_use[, embedding$cell])),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; UMI"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1,
geom_point_alpha = 1,
sort_values = TRUE,
shuffle_values = FALSE,
label_size = 2.5,
label_hjust = 0,
label_vjust = 0,
rasterise = RASTERISED,
legend_size = 2,
legend_ncol = 1
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
p_embedding_MT <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(
cell_metadata # |> dplyr::select(-batch)
) |>
dplyr::pull(mt_percentage),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; MT %"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1,
geom_point_alpha = 1,
sort_values = TRUE,
shuffle_values = FALSE,
label_size = 2.5,
label_hjust = 0,
label_vjust = 0,
rasterise = RASTERISED,
legend_size = 2,
legend_ncol = 1
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
p_embedding_study <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding$study,
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Study"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE / 2,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
values = scales::hue_pal()(n = length(unique(embedding$batch))),
labels = studies
)embedding |>
dplyr::left_join(
cell_metadata |>
dplyr::select(cell, num_umis:mt_percentage)
) |>
dplyr::group_by(
leiden
) |>
dplyr::summarise(
num_cells = dplyr::n(),
median_umis = median(num_umis),
median_features = median(num_features),
median_mt_percentage = median(mt_percentage)
) |>
gt::gt() |>
gt::tab_options(table.font.size = "median") |>
gt::summary_rows(
columns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),
decimals = 0
) |>
gt::summary_rows(
columns = c("median_umis", "median_features", "median_mt_percentage"),
fns = list(
Median = ~ median(.)
),
decimals = 2
)| leiden | num_cells | median_umis | median_features | median_mt_percentage | |
|---|---|---|---|---|---|
| 0 | 1179 | 27676.0 | 5089.0 | 0.025830026 | |
| 1 | 1156 | 21609.5 | 4597.0 | 0.033986369 | |
| 2 | 1141 | 22175.0 | 4243.0 | 0.043475207 | |
| 3 | 1124 | 28028.0 | 5113.0 | 0.043416238 | |
| 4 | 969 | 23013.0 | 4643.0 | 0.026599176 | |
| 5 | 892 | 28762.0 | 4966.5 | 0.034163936 | |
| 6 | 868 | 29193.5 | 5040.0 | 0.025966430 | |
| 7 | 826 | 23003.0 | 4499.5 | 0.030224185 | |
| 8 | 797 | 22893.0 | 4362.0 | 0.041594258 | |
| 9 | 647 | 1348781.0 | 9991.0 | 0.008139240 | |
| 10 | 602 | 861400.5 | 4880.5 | 0.008097055 | |
| 11 | 472 | 1787005.5 | 10772.5 | 0.006137011 | |
| 12 | 434 | 24493.5 | 4661.0 | 0.035983814 | |
| 13 | 345 | 31267.0 | 5475.0 | 0.039519924 | |
| 14 | 324 | 1607461.5 | 10278.0 | 0.007017669 | |
| 15 | 235 | 26117.0 | 4854.0 | 0.047443806 | |
| 16 | 228 | 881789.5 | 2682.5 | 0.009192114 | |
| 17 | 178 | 1092848.0 | 3854.0 | 0.005902158 | |
| 18 | 131 | 883583.0 | 4508.0 | 0.014213556 | |
| 19 | 84 | 1570985.0 | 10117.5 | 0.007988063 | |
| 20 | 58 | 958118.0 | 4789.5 | 0.004154967 | |
| Sum | — | 12,690 | — | — | — |
| Median | — | — | 29,193.50 | 4,854.00 | 0.03 |
purrr::reduce(
list(
p_embedding_leiden,
p_embedding_study,
p_embedding_UMI,
p_embedding_MT
),
`+`
) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)purrr::map(unique(embedding$study), \(x) {
plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = as.integer(embedding$study == x) |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; {studies[x]}, {sum(as.integer(embedding$study == x), na.rm = TRUE)}"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
values = c("grey70", "salmon")
)
}) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)embedding |>
dplyr::left_join(
cell_metadata |>
dplyr::select(cell, num_umis:mt_percentage)
) |>
dplyr::group_by(
study
) |>
dplyr::summarise(
num_cells = dplyr::n(),
median_umis = median(num_umis),
median_features = median(num_features),
median_mt_percentage = median(mt_percentage)
) |>
dplyr::mutate(
study = studies[study],
platform = c("Smart-Seq2", "Smart-Seq2", "10x Genomics")
) |>
dplyr::select(
study, platform, everything()
) |>
gt::gt() |>
gt::tab_options(table.font.size = "median") |>
gt::summary_rows(
columns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),
decimals = 0
) |>
gt::summary_rows(
columns = c("median_umis", "median_features", "median_mt_percentage"),
fns = list(
Median = ~ median(.)
),
decimals = 2
)| study | platform | num_cells | median_umis | median_features | median_mt_percentage | |
|---|---|---|---|---|---|---|
| Petropoulos et al. 2016 | Smart-Seq2 | 1529 | 1551093 | 10305 | 0.007144702 | |
| Tyser et al. 2021 | Smart-Seq2 | 1195 | 899241 | 4379 | 0.008164051 | |
| Zheng et al. 2019 | 10x Genomics | 9966 | 25264 | 4772 | 0.034485263 | |
| Sum | — | — | 12,690 | — | — | — |
| Median | — | — | — | 899,241.00 | 4,772.00 | 0.01 |
bioproject <- "PRJEB11202"
cell_metadata_selected <- cell_metadata_PRJEB11202
selected_column <- "developmental_stage"
p_embedding_developmental_stage <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
selected_column <- "lineage"
p_embedding_lineage <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
list(
p_embedding_lineage,
p_embedding_developmental_stage
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)
Salmon: highlighted group of cells; Light grey: cells belonging to this dataset but not the highlighted group; Dark grey: cells belonging to other datasets.
plot_embedding_highlight(
embedding = embedding,
x = cell_metadata_selected,
y = "lineage",
label = studies[bioproject],
n_cols = 2
)plot_embedding_highlight(
embedding = embedding,
x = cell_metadata_selected,
y = "developmental_stage",
label = studies[bioproject],
n_cols = 2
)bioproject <- "PRJNA555602"
cell_metadata_selected <- cell_metadata_PRJNA555602
selected_column <- "source"
p_embedding_developmental_stage <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
selected_column <- "lineage"
p_embedding_lineage <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
list(
p_embedding_lineage,
p_embedding_developmental_stage
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)bioproject <- "PRJEB40781"
cell_metadata_selected <- cell_metadata_PRJEB40781
selected_column <- "sampling_site"
p_embedding_sampling_site <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "grey70"
)
selected_column <- "lineage"
p_embedding_lineage <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
list(
p_embedding_lineage,
p_embedding_sampling_site
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)plot_embedding_highlight(
embedding = embedding,
x = cell_metadata_selected,
y = "lineage",
label = studies[bioproject],
n_cols = 2
)devtools::session_info()$platform## setting value
## version R version 4.1.2 (2021-11-01)
## os macOS Monterey 12.1
## system aarch64, darwin20.6.0
## ui unknown
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/Chicago
## date 2022-01-19
## pandoc 2.14.0.3 @ /Applications/RStudio.app/Contents/MacOS/pandoc/ (via rmarkdown)
devtools::session_info()$pack |>
as_tibble() |>
dplyr::select(
package,
loadedversion,
date,
`source`
) |>
# print(n = nrow(.))
gt::gt() |>
gt::tab_options(table.font.size = "median")| package | loadedversion | date | source |
|---|---|---|---|
| assertthat | 0.2.1 | 2019-03-21 | CRAN (R 4.1.1) |
| backports | 1.4.1 | 2021-12-13 | CRAN (R 4.1.2) |
| beeswarm | 0.4.0 | 2021-06-01 | CRAN (R 4.1.2) |
| bit | 4.0.4 | 2020-08-04 | CRAN (R 4.1.1) |
| bit64 | 4.0.5 | 2020-08-30 | CRAN (R 4.1.1) |
| brio | 1.1.3 | 2021-11-30 | CRAN (R 4.1.2) |
| broom | 0.7.11 | 2022-01-03 | CRAN (R 4.1.2) |
| bslib | 0.3.1 | 2021-10-06 | CRAN (R 4.1.1) |
| cachem | 1.0.6 | 2021-08-19 | CRAN (R 4.1.1) |
| callr | 3.7.0 | 2021-04-20 | CRAN (R 4.1.1) |
| cellranger | 1.1.0 | 2016-07-27 | CRAN (R 4.1.1) |
| checkmate | 2.0.0 | 2020-02-06 | CRAN (R 4.1.1) |
| cli | 3.1.0 | 2021-10-27 | CRAN (R 4.1.1) |
| codetools | 0.2-18 | 2020-11-04 | CRAN (R 4.1.2) |
| colorspace | 2.0-2 | 2021-06-24 | CRAN (R 4.1.1) |
| crayon | 1.4.2 | 2021-10-29 | CRAN (R 4.1.1) |
| data.table | 1.14.2 | 2021-09-27 | CRAN (R 4.1.1) |
| DBI | 1.1.2 | 2021-12-20 | CRAN (R 4.1.2) |
| dbplyr | 2.1.1 | 2021-04-06 | CRAN (R 4.1.1) |
| desc | 1.4.0 | 2021-09-28 | CRAN (R 4.1.1) |
| devtools | 2.4.3.9000 | 2022-01-15 | Github (r-lib/devtools@e2f25cd69031c8d2099106baed894df4109cb7a4) |
| digest | 0.6.29 | 2021-12-01 | CRAN (R 4.1.2) |
| dplyr | 1.0.7.9000 | 2022-01-12 | Github (tidyverse/dplyr@05013358ace44fe17a51395d49d384232d18d6c1) |
| dtplyr | 1.2.0 | 2021-12-05 | CRAN (R 4.1.2) |
| ellipsis | 0.3.2 | 2021-04-29 | CRAN (R 4.1.1) |
| evaluate | 0.14 | 2019-05-28 | CRAN (R 4.1.1) |
| extrafont | 0.17 | 2014-12-08 | CRAN (R 4.1.1) |
| extrafontdb | 1.0 | 2012-06-11 | CRAN (R 4.1.1) |
| fansi | 1.0.2 | 2022-01-14 | CRAN (R 4.1.2) |
| farver | 2.1.0 | 2021-02-28 | CRAN (R 4.1.1) |
| fastmap | 1.1.0 | 2021-01-25 | CRAN (R 4.1.1) |
| forcats | 0.5.1.9000 | 2021-11-29 | Github (tidyverse/forcats@b4dade0636a46543c30b0b647d97c3ce697c0ada) |
| fs | 1.5.2.9000 | 2021-12-09 | Github (r-lib/fs@6d1182fea7e1c1ddbef3b0bba37c0b0a2e09749c) |
| gargle | 1.2.0 | 2021-07-02 | CRAN (R 4.1.1) |
| generics | 0.1.1 | 2021-10-25 | CRAN (R 4.1.1) |
| ggbeeswarm | 0.6.0 | 2017-08-07 | CRAN (R 4.1.2) |
| ggplot2 | 3.3.5 | 2021-06-25 | CRAN (R 4.1.1) |
| ggrastr | 1.0.1 | 2021-12-08 | Github (VPetukhov/ggrastr@7aed9af2b9cffabda86e6d2af2fa10d4e60cc63d) |
| glue | 1.6.0.9000 | 2021-12-21 | Github (tidyverse/glue@76793ef2c376140350c0e1909e66fd404a52b1ef) |
| googledrive | 2.0.0 | 2021-07-08 | CRAN (R 4.1.1) |
| googlesheets4 | 1.0.0 | 2021-07-21 | CRAN (R 4.1.1) |
| gt | 0.3.1.9000 | 2022-01-17 | Github (rstudio/gt@fcabb414c55b70c9e445fbedfb24d52fe394ba61) |
| gtable | 0.3.0.9000 | 2021-10-28 | Github (r-lib/gtable@a0bd2721a0a31c8b4391b84aabe98f8c85881140) |
| haven | 2.4.3 | 2021-08-04 | CRAN (R 4.1.1) |
| highr | 0.9 | 2021-04-16 | CRAN (R 4.1.1) |
| hms | 1.1.1 | 2021-09-26 | CRAN (R 4.1.1) |
| htmltools | 0.5.2 | 2021-08-25 | CRAN (R 4.1.1) |
| httr | 1.4.2 | 2020-07-20 | CRAN (R 4.1.1) |
| jquerylib | 0.1.4 | 2021-04-26 | CRAN (R 4.1.1) |
| jsonlite | 1.7.3 | 2022-01-17 | CRAN (R 4.1.2) |
| knitr | 1.37.1 | 2021-12-21 | https://yihui.r-universe.dev (R 4.1.2) |
| labeling | 0.4.2 | 2020-10-20 | CRAN (R 4.1.1) |
| lattice | 0.20-45 | 2021-09-22 | CRAN (R 4.1.2) |
| lifecycle | 1.0.1 | 2021-09-24 | CRAN (R 4.1.1) |
| lubridate | 1.8.0 | 2022-01-15 | Github (tidyverse/lubridate@53e5892a548b3425d6c3bf887542aa105341ab73) |
| magrittr | 2.0.1 | 2020-11-17 | CRAN (R 4.1.1) |
| Matrix | 1.4-0 | 2021-12-08 | CRAN (R 4.1.2) |
| memoise | 2.0.1 | 2021-11-26 | CRAN (R 4.1.2) |
| modelr | 0.1.8.9000 | 2021-10-27 | Github (tidyverse/modelr@16168e0624215d9d1a008f3a85de30aeb75302f6) |
| munsell | 0.5.0 | 2018-06-12 | CRAN (R 4.1.1) |
| patchwork | 1.1.0.9000 | 2021-10-27 | Github (thomasp85/patchwork@79223d3002e7bd7e715a270685c6507d684b2622) |
| pillar | 1.6.4 | 2021-10-18 | CRAN (R 4.1.1) |
| pkgbuild | 1.3.1 | 2021-12-20 | CRAN (R 4.1.2) |
| pkgconfig | 2.0.3 | 2019-09-22 | CRAN (R 4.1.1) |
| pkgload | 1.2.4 | 2021-11-30 | CRAN (R 4.1.2) |
| png | 0.1-7 | 2013-12-03 | CRAN (R 4.1.1) |
| prettyunits | 1.1.1 | 2020-01-24 | CRAN (R 4.1.1) |
| processx | 3.5.2 | 2021-04-30 | CRAN (R 4.1.1) |
| ps | 1.6.0 | 2021-02-28 | CRAN (R 4.1.1) |
| purrr | 0.3.4 | 2020-04-17 | CRAN (R 4.1.1) |
| R.cache | 0.15.0 | 2021-04-30 | CRAN (R 4.1.1) |
| R.methodsS3 | 1.8.1 | 2020-08-26 | CRAN (R 4.1.1) |
| R.oo | 1.24.0 | 2020-08-26 | CRAN (R 4.1.1) |
| R.utils | 2.11.0 | 2021-09-26 | CRAN (R 4.1.1) |
| R6 | 2.5.1.9000 | 2021-12-09 | Github (r-lib/R6@1b05b89f30fe6713cb9ff51d91fc56bd3016e4b2) |
| ragg | 1.2.1.9000 | 2021-12-08 | Github (r-lib/ragg@c68c6665ef894f16c006333658b32bf25d2e9d19) |
| Rcpp | 1.0.8 | 2022-01-13 | CRAN (R 4.1.2) |
| readr | 2.1.1 | 2021-11-30 | CRAN (R 4.1.2) |
| readxl | 1.3.1.9000 | 2022-01-18 | Github (tidyverse/readxl@03258a3b2341ce600ee0af56851c80c35d6245ef) |
| remotes | 2.4.2 | 2021-12-02 | Github (r-lib/remotes@fcad17b68b7a19d5363d64adfb0a0426a3a5b3db) |
| reprex | 2.0.1 | 2021-08-05 | CRAN (R 4.1.1) |
| reticulate | 1.23 | 2022-01-14 | CRAN (R 4.1.2) |
| rlang | 0.99.0.9003 | 2022-01-18 | Github (r-lib/rlang@d79ab3a1ab1ce8ca5bb0ebc6ab0454cb10fa4dd1) |
| rmarkdown | 2.11.9 | 2022-01-18 | Github (rstudio/rmarkdown@d0d3b08bf78b6cd900d0505fb7141037e117c6b2) |
| rprojroot | 2.0.2 | 2020-11-15 | CRAN (R 4.1.1) |
| rstudioapi | 0.13.0-9000 | 2022-01-15 | Github (rstudio/rstudioapi@5d0f0873dc160779c71bf4b00d8b016b898f6fb5) |
| Rttf2pt1 | 1.3.9 | 2021-07-22 | CRAN (R 4.1.1) |
| rvest | 1.0.2 | 2021-10-16 | CRAN (R 4.1.1) |
| sass | 0.4.0 | 2021-05-12 | CRAN (R 4.1.1) |
| scales | 1.1.1 | 2020-05-11 | CRAN (R 4.1.1) |
| sessioninfo | 1.2.2 | 2021-12-06 | CRAN (R 4.1.2) |
| stringi | 1.7.6 | 2021-11-29 | CRAN (R 4.1.2) |
| stringr | 1.4.0.9000 | 2022-01-17 | Github (tidyverse/stringr@3848cd70b1e331e6c20401e4da518ff4c3725324) |
| styler | 1.6.2.9000 | 2022-01-17 | Github (r-lib/styler@9274aed613282eca01909ae8c341224055d9c928) |
| systemfonts | 1.0.3.9000 | 2021-12-07 | Github (r-lib/systemfonts@414114e645efb316def3d8de1056d855f92d588e) |
| testthat | 3.1.1.9000 | 2022-01-13 | Github (r-lib/testthat@f09df60dd881530332b252474e9f35c97f8640be) |
| textshaping | 0.3.6 | 2021-10-13 | CRAN (R 4.1.1) |
| tibble | 3.1.6.9000 | 2022-01-18 | Github (tidyverse/tibble@7aa54e67d6ceb31c81172c7d18d28ea9ce088888) |
| tidyr | 1.1.4 | 2021-09-27 | CRAN (R 4.1.1) |
| tidyselect | 1.1.1 | 2021-04-30 | CRAN (R 4.1.1) |
| tidyverse | 1.3.1.9000 | 2021-12-08 | Github (tidyverse/tidyverse@6186fbf09bf359110f8800ff989cbbdd40485eb0) |
| tzdb | 0.2.0 | 2021-10-27 | CRAN (R 4.1.1) |
| usethis | 2.1.5.9000 | 2022-01-18 | Github (r-lib/usethis@3c4ab669481ab4a11b6426dbc583f05077a4c6db) |
| utf8 | 1.2.2 | 2021-07-24 | CRAN (R 4.1.1) |
| vctrs | 0.3.8 | 2021-04-29 | CRAN (R 4.1.1) |
| vipor | 0.4.5 | 2017-03-22 | CRAN (R 4.1.2) |
| viridisLite | 0.4.0 | 2021-04-13 | CRAN (R 4.1.1) |
| vroom | 1.5.7 | 2021-11-30 | CRAN (R 4.1.2) |
| withr | 2.4.3 | 2021-11-30 | CRAN (R 4.1.2) |
| xfun | 0.29 | 2021-12-14 | CRAN (R 4.1.2) |
| xml2 | 1.3.3 | 2021-11-30 | CRAN (R 4.1.2) |
| yaml | 2.2.1 | 2020-02-01 | CRAN (R 4.1.1) |