Tyser, R.C.V., Mahammadov, E., Nakanoh, S., Vallier, L., Scialdone, A., and Srinivas, S. (2021). Single-cell transcriptomic characterization of a gastrulating human embryo. Nature 600, 285–289.
Load required packages.
library(tidyverse)
library(Matrix)
library(patchwork)
library(extrafont)
Sys.time()
## [1] "2022-01-19 00:46:30 CST"
source(
file = file.path(
SCRIPT_DIR,"utilities.R"
) )
<- function(embedding, x, y, label, n_cols = 3) {
plot_embedding_highlight <- x
cell_metadata_selected <- y
selected_column
::map(levels(cell_metadata_selected[[selected_column]]), \(x) {
purrr<- embedding |>
values ::left_join(cell_metadata_selected) |>
dplyr::mutate(
dplyrvalue = case_when(
== x ~ "1",
.data[[selected_column]] != x ~ "0"
.data[[selected_column]]
)|>
) ::pull(value) |>
dplyras.integer() |>
as.factor()
plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = values,
label = glue::glue(
"{label}; ",
"{x}: {sum(as.integer(as.character(values)), na.rm = TRUE)}"
),label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
na.translate = TRUE,
values = c("grey70", "salmon"),
na.value = "#7F7F7F"
+
) ::annotate(
ggplot2geom = "text",
x = Inf,
y = Inf,
label = sum(as.integer(as.character(values)), na.rm = TRUE),
size = 5 / ggplot2::.pt,
hjust = 1,
vjust = 1,
na.rm = FALSE
)|>
}) ::reduce(`+`) +
purrr::plot_layout(ncol = n_cols) +
patchwork::plot_annotation(
patchworktheme = ggplot2::theme(plot.margin = ggplot2::margin())
) }
<- "/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation" PROJECT_DIR
<- reticulate::import(module = "anndata", convert = TRUE)
ad print(ad$`__version__`)
## [1] "0.7.6"
<- purrr::map(c("PRJEB40781", "PRJEB11202", "PRJNA555602"), \(x) {
adata_files file.path(
PROJECT_DIR,"raw",
"public",
x,"matrix",
"adata.h5ad"
)
})::map_lgl(adata_files, file.exists) purrr
## [1] TRUE TRUE TRUE
<- NULL
BACKED <- purrr::map(adata_files, function(x) {
matrix_readcount_use $read_h5ad(
adfilename = x, backed = BACKED
|>
) convert_adata()
|>
}) ::reduce(cbind)
purrr
|> dim() matrix_readcount_use
## [1] 33538 12690
<- "r"
BACKED <- purrr::map(adata_files, function(x) {
cell_metadata $read_h5ad(
adfilename = x, backed = BACKED
$obs |>
)::rownames_to_column(var = "cell") |>
tibble::select(cell, everything())
dplyr|>
}) ::bind_rows() |>
dplyr::select(-batch)
dplyr
|> head() cell_metadata
Check memory usage.
::walk(list(matrix_readcount_use, cell_metadata), function(x) {
purrrprint(object.size(x), units = "auto", standard = "SI")
})
## 828.5 MB
## 1.3 MB
<- vroom::vroom(
cell_metadata_PRJEB40781 file = file.path(
PROJECT_DIR,"raw",
"public",
"PRJEB40781",
"matrix",
"cell_metadata.csv"
)|>
) ::mutate(
dplyrlineage = factor(lineage)
)
## Rows: 1195 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): cell, run, source_name, developmental_stage, individual, sex, sampl...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<- "embedding_ncomponents15_ccc1_seed20210719.csv.gz"
EMBEDDING_FILE
<- vroom::vroom(
embedding_1195 file = file.path(
PROJECT_DIR,"raw/public/PRJEB40781",
"clustering/PRJEB40781/star/exploring",
"Scanpy_Harmony",
EMBEDDING_FILE
)
)
|> head() embedding_1195
<- "x_umap_min_dist=0.1"
x_column <- "y_umap_min_dist=0.1"
y_column
<- 1.25
GEOM_POINT_SIZE <- "UMAP"
EMBEDDING_TITLE_PREFIX <- TRUE RASTERISED
<- plot_embedding(
p_embedding_leiden embedding = embedding_1195[, c(x_column, y_column)],
color_values = embedding_1195$leiden |> as.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Leiden", sep = "; "),
label_position = NULL,
show_color_value_labels = TRUE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED
+
) theme_customized()
# CB_POSITION <- c(0.8, 0.995)
<- plot_embedding(
p_embedding_UMI embedding = embedding_1195[, c(x_column, y_column)],
color_values = embedding_1195 |>
::left_join(
dplyr
cell_metadata|>
) ::pull(num_umis) |>
dplyr
{log10(x)
\(x)
}(),label = paste(EMBEDDING_TITLE_PREFIX, "UMI", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE / 1.5,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
<- plot_embedding(
p_embedding_MT embedding = embedding_1195[, c(x_column, y_column)],
color_values = embedding_1195 |>
::left_join(cell_metadata) |>
dplyr::pull(mt_percentage),
dplyrlabel = paste(EMBEDDING_TITLE_PREFIX, "MT %", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
<- plot_embedding(
p_embedding_sampling_site embedding = embedding_1195[, c(x_column, y_column)],
color_values = embedding_1195 |>
::left_join(
dplyr
cell_metadata_PRJEB40781|>
) ::pull(sampling_site) |>
dplyras.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Sampling site", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE / 1.5,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED
+
) theme_customized()
|>
embedding_1195 ::left_join(
dplyr|>
cell_metadata ::select(cell, num_umis:mt_percentage)
dplyr|>
) ::left_join(
dplyr
cell_metadata_PRJEB40781|>
) ::group_by(
dplyr
leiden|>
) ::summarise(
dplyrnum_cells = dplyr::n(),
median_umis = median(num_umis),
median_features = median(num_features),
median_mt_percentage = median(mt_percentage)
|>
) ::gt() |>
gt::tab_options(table.font.size = "median") |>
gt::summary_rows(
gtcolumns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),decimals = 0
|>
) ::summary_rows(
gtcolumns = c("median_umis", "median_features", "median_mt_percentage"),
fns = list(
Median = ~ median(.)
),decimals = 2
)
leiden | num_cells | median_umis | median_features | median_mt_percentage | |
---|---|---|---|---|---|
0 | 189 | 883508.0 | 2581.0 | 0.008930657 | |
1 | 138 | 996283.5 | 4297.5 | 0.006288449 | |
2 | 134 | 836131.0 | 4346.0 | 0.010772693 | |
3 | 115 | 887650.0 | 4906.0 | 0.007057741 | |
4 | 96 | 916361.0 | 4618.5 | 0.014087410 | |
5 | 85 | 1110656.0 | 3667.0 | 0.005815362 | |
6 | 81 | 763755.0 | 5965.0 | 0.008891377 | |
7 | 79 | 938071.0 | 4924.0 | 0.006874891 | |
8 | 79 | 746953.0 | 5211.0 | 0.008341437 | |
9 | 72 | 1010636.5 | 4530.0 | 0.004510133 | |
10 | 60 | 940026.0 | 3409.5 | 0.009846117 | |
11 | 34 | 1028500.0 | 4321.5 | 0.005531307 | |
12 | 33 | 728519.0 | 4344.0 | 0.014426759 | |
Sum | — | 1,195 | — | — | — |
Median | — | — | 916,361.00 | 4,346.00 | 0.01 |
::reduce(
purrrlist(
p_embedding_leiden,
p_embedding_sampling_site,
p_embedding_UMI,
p_embedding_MT
),`+`
+
) ::plot_layout(ncol = 2) +
patchwork::plot_annotation(
patchworktheme = theme(plot.margin = margin())
)
|>
embedding_1195 ::left_join(
dplyr
cell_metadata|>
) ::left_join(cell_metadata_PRJEB40781) |>
dplyr::group_by(lineage) |>
dplyr::summarise(
dplyrnum_cells = dplyr::n(),
median_umis = median(num_umis),
median_features = median(num_features),
median_mt_percentage = median(mt_percentage)
|>
) ::gt() |>
gt::tab_options(table.font.size = "median") |>
gt::summary_rows(
gtcolumns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),decimals = 0
|>
) ::summary_rows(
gtcolumns = c("median_umis", "median_features", "median_mt_percentage"),
fns = list(
Median = ~ median(.)
),decimals = 2
)
lineage | num_cells | median_umis | median_features | median_mt_percentage | |
---|---|---|---|---|---|
advanced mesoderm | 164 | 993741.0 | 4164.5 | 0.006218430 | |
axial mesoderm | 23 | 853681.0 | 4181.0 | 0.007745421 | |
ectodermal cell | 29 | 996476.0 | 4314.0 | 0.005143689 | |
emergent mesoderm | 185 | 911736.0 | 4184.0 | 0.007838085 | |
endodermal cell | 135 | 989383.0 | 4837.0 | 0.005348267 | |
epiblast cell | 133 | 790584.0 | 5607.0 | 0.009357085 | |
erythrocyte | 32 | 716602.0 | 4345.5 | 0.014388710 | |
hemogenic endothelial progenitor | 111 | 925287.0 | 4498.0 | 0.013978306 | |
nascent mesoderm | 98 | 787231.5 | 4479.0 | 0.008292312 | |
primitive streak | 202 | 826282.5 | 3781.0 | 0.010169492 | |
yolk sac mesoderm | 83 | 1124901.0 | 3778.0 | 0.006017290 | |
Sum | — | 1,195 | — | — | — |
Median | — | — | 911,736.00 | 4,314.00 | 0.01 |
<- plot_embedding(
p_embedding_lineage embedding = embedding_1195[, c(x_column, y_column)],
color_values = embedding_1195 |>
::left_join(
dplyr
cell_metadata_PRJEB40781|>
) ::pull(lineage) |>
dplyras.factor(),
label = paste(EMBEDDING_TITLE_PREFIX, "Lineage", sep = "; "),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE / 1.5,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED
+
) theme_customized()
list(
p_embedding_lineage,::map(levels(cell_metadata_PRJEB40781$lineage), \(x) {
purrr<- embedding_1195 |>
values ::left_join(cell_metadata_PRJEB40781) |>
dplyr::mutate(
dplyrvalue = dplyr::case_when(
== x ~ "1",
lineage TRUE ~ "0"
)|>
) ::pull(value)
dplyrplot_embedding(
embedding = embedding_1195[, c(x_column, y_column)],
color_values = factor(values),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; {x}: {sum(as.integer(values))}"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
values = c("grey70", "salmon")
)
})|>
) ::reduce(`+`) +
purrr::plot_layout(ncol = 2) +
patchwork::plot_annotation(
patchworktheme = theme(plot.margin = margin())
)
<- calc_group_composition(
p_barplot_composition_sampling_site data = embedding_1195 |>
::left_join(
dplyr
cell_metadata_PRJEB40781
),x = "leiden",
group = "sampling_site"
|>
) ::mutate(
dplyrleiden = factor(leiden)
|>
) plot_barplot(
x = "leiden",
y = "percentage",
z = "sampling_site",
legend_ncol = 1
)
<- calc_group_composition(
p_barplot_composition_lineage data = embedding_1195 |>
::left_join(
dplyr
cell_metadata_PRJEB40781
),x = "leiden",
group = "lineage"
|>
) ::mutate(
dplyrleiden = factor(leiden)
|>
) plot_barplot(
x = "leiden",
y = "percentage",
z = "lineage",
legend_ncol = 1
)
list(
p_barplot_composition_sampling_site,
p_barplot_composition_lineage|>
) ::reduce(`+`) +
purrr::plot_layout(ncol = 1, guides = "collect") +
patchwork::plot_annotation(
patchworktheme = theme(plot.margin = margin())
)
<- c(
FEATURES_SELECTED "ENSG00000204531_POU5F1",
"ENSG00000111704_NANOG",
"ENSG00000171872_KLF17",
"ENSG00000186103_ARGFX",
#
"ENSG00000164736_SOX17",
"ENSG00000125798_FOXA2",
"ENSG00000136574_GATA4",
"ENSG00000134853_PDGFRA",
#
"ENSG00000179348_GATA2",
"ENSG00000070915_SLC12A3",
"ENSG00000165556_CDX2",
"ENSG00000007866_TEAD3"
)
::map(FEATURES_SELECTED, \(x) {
purrr<- x
selected_feature
cat(selected_feature, "\n")
<- log10(calc_cpm(matrix_readcount_use[, embedding_1195$cell])[selected_feature, ] + 1)
values
plot_embedding(
embedding = embedding_1195[, c(x_column, y_column)],
color_values = values,
label = paste(
EMBEDDING_TITLE_PREFIX,|> stringr::str_remove(pattern = "^E.+_"),
selected_feature sep = "; "
),label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) scale_color_viridis_c(
na.value = "grey80"
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
)|>
}) ::reduce(`+`) +
purrr::plot_layout(ncol = 3, byrow = FALSE) +
patchwork::plot_annotation(
patchworktheme = theme(plot.margin = margin())
)
## ENSG00000204531_POU5F1
## ENSG00000111704_NANOG
## ENSG00000171872_KLF17
## ENSG00000186103_ARGFX
## ENSG00000164736_SOX17
## ENSG00000125798_FOXA2
## ENSG00000136574_GATA4
## ENSG00000134853_PDGFRA
## ENSG00000179348_GATA2
## ENSG00000070915_SLC12A3
## ENSG00000165556_CDX2
## ENSG00000007866_TEAD3
<- glue::glue(
EMBEDDING_FILE "embedding_ncomponents18_ccc1_seed20210719.csv.gz"
)
<- vroom::vroom(
embedding file = file.path(
PROJECT_DIR,"raw",
"public",
"PRJEB40781",
"clustering",
"PRJEB40781_PRJEB11202_PRJNA555602",
"exploring",
"Scanpy_Harmony",
EMBEDDING_FILE
)|>
) ::mutate(
dplyrstudy = dplyr::case_when(
%in% c("GSM3956280", "GSM3956281") ~ "PRJNA555602",
batch TRUE ~ batch
),study = factor(study)
)
|> head() embedding
<- tibble::tribble(
studies ~bioproject, ~citation,
"PRJEB11202", "Petropoulos et al. 2016",
"PRJEB40781", "Tyser et al. 2021",
"PRJNA555602", "Zheng et al. 2019",
"PRJNA431392", "Zhou et al. 2019",
"PRJNA737139", "Kagawa et al. 2021",
"PRJNA632839", "Yu et al. 2021",
"PRJNA658478", "Liu et al. 2021",
"PRJNA720968", "Yanagida et al. 2021",
"PRJNA667174", "Fan et al. 2021",
"PRJNA738498", "Sozen et al. 2021",
"PRJNA737139", "Kagawa et al. 2021"
)
<- setNames(
studies object = studies$citation,
nm = studies$bioproject
)
|>
embedding ::count(study, name = "num_cells") |>
dplyr::mutate(
dplyrstudy = studies[study]
|>
) ::gt() |>
gt::tab_options(table.font.size = "median") |>
gt::summary_rows(
gtcolumns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),decimals = 0
)
study | num_cells | |
---|---|---|
Petropoulos et al. 2016 | 1529 | |
Tyser et al. 2021 | 1195 | |
Zheng et al. 2019 | 9966 | |
Sum | — | 12,690 |
# PRJEB11202; Petropoulos et al. 2016
<- vroom::vroom(
cell_metadata_PRJEB11202 file = file.path(
PROJECT_DIR,"raw",
"public",
"PRJEB11202",
"matrix/cell_metadata.csv"
)|>
) ::mutate(
dplyrdevelopmental_stage = stringr::str_remove(
string = individual,
pattern = "\\..+$"
),developmental_stage = factor(
developmental_stage,levels = stringr::str_sort(
unique(developmental_stage),
numeric = TRUE
)
),#
lineage = factor(
inferred_lineage,levels = c(
"epiblast",
"primitive_endoderm",
"trophectoderm",
"not_applicable"
)
)|>
) ::select(
dplyr
cell, lineage, developmental_stage
)
# PRJNA555602; Zheng et al. 2019
<- vroom::vroom(
cell_metadata_PRJNA555602 file = file.path(
PROJECT_DIR,"raw",
"public",
"PRJNA555602",
"matrix/cell_metadata.csv"
)|>
) ::mutate(
dplyrlineage = dplyr::case_when(
# amniotic ectoderm-like cells
.3 == 0 ~ "Transwell-AMLC",
rna_snn_res_0# mesoderm-like cell 2
.3 == 1 ~ "MeLC2",
rna_snn_res_0.3 == 2 ~ "Human ES cell",
rna_snn_res_0# mesoderm-like cell 1
.3 == 3 ~ "MeLC1",
rna_snn_res_0# human PGC-like cells
.3 == 4 ~ "hPGCLC",
rna_snn_res_0.3 == 5 ~ "AMLC"
rna_snn_res_0
),lineage = factor(
lineage,levels = c(
"Human ES cell",
"hPGCLC",
"MeLC1",
"MeLC2",
"AMLC",
"Transwell-AMLC"
)
),source = dplyr::case_when(
== "10X_Embryoid" ~ "Embryoid",
orig_ident == "10X_H9_Amnion" ~ "H9_Amnion"
orig_ident
),source = factor(source)
)
<- "x_pacmap"
x_column <- "y_pacmap"
y_column <- "PaCMAP"
EMBEDDING_TITLE_PREFIX <- 0.4
GEOM_POINT_SIZE
<- plot_embedding(
p_embedding_leiden embedding = embedding[, c(x_column, y_column)],
color_values = embedding$leiden |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Leiden"),
label_position = NULL,
show_color_value_labels = TRUE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = FALSE,
rasterise = RASTERISED
+
) theme_customized()
<- plot_embedding(
p_embedding_UMI embedding = embedding[, c(x_column, y_column)],
color_values = log10(Matrix::colSums(matrix_readcount_use[, embedding$cell])),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; UMI"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1,
geom_point_alpha = 1,
sort_values = TRUE,
shuffle_values = FALSE,
label_size = 2.5,
label_hjust = 0,
label_vjust = 0,
rasterise = RASTERISED,
legend_size = 2,
legend_ncol = 1
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
<- plot_embedding(
p_embedding_MT embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
::left_join(
dplyr# |> dplyr::select(-batch)
cell_metadata |>
) ::pull(mt_percentage),
dplyrlabel = glue::glue("{EMBEDDING_TITLE_PREFIX}; MT %"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1,
geom_point_alpha = 1,
sort_values = TRUE,
shuffle_values = FALSE,
label_size = 2.5,
label_hjust = 0,
label_vjust = 0,
rasterise = RASTERISED,
legend_size = 2,
legend_ncol = 1
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
<- plot_embedding(
p_embedding_study embedding = embedding[, c(x_column, y_column)],
color_values = embedding$study,
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Study"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE / 2,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
values = scales::hue_pal()(n = length(unique(embedding$batch))),
labels = studies
)
|>
embedding ::left_join(
dplyr|>
cell_metadata ::select(cell, num_umis:mt_percentage)
dplyr|>
) ::group_by(
dplyr
leiden|>
) ::summarise(
dplyrnum_cells = dplyr::n(),
median_umis = median(num_umis),
median_features = median(num_features),
median_mt_percentage = median(mt_percentage)
|>
) ::gt() |>
gt::tab_options(table.font.size = "median") |>
gt::summary_rows(
gtcolumns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),decimals = 0
|>
) ::summary_rows(
gtcolumns = c("median_umis", "median_features", "median_mt_percentage"),
fns = list(
Median = ~ median(.)
),decimals = 2
)
leiden | num_cells | median_umis | median_features | median_mt_percentage | |
---|---|---|---|---|---|
0 | 1179 | 27676.0 | 5089.0 | 0.025830026 | |
1 | 1156 | 21609.5 | 4597.0 | 0.033986369 | |
2 | 1141 | 22175.0 | 4243.0 | 0.043475207 | |
3 | 1124 | 28028.0 | 5113.0 | 0.043416238 | |
4 | 969 | 23013.0 | 4643.0 | 0.026599176 | |
5 | 892 | 28762.0 | 4966.5 | 0.034163936 | |
6 | 868 | 29193.5 | 5040.0 | 0.025966430 | |
7 | 826 | 23003.0 | 4499.5 | 0.030224185 | |
8 | 797 | 22893.0 | 4362.0 | 0.041594258 | |
9 | 647 | 1348781.0 | 9991.0 | 0.008139240 | |
10 | 602 | 861400.5 | 4880.5 | 0.008097055 | |
11 | 472 | 1787005.5 | 10772.5 | 0.006137011 | |
12 | 434 | 24493.5 | 4661.0 | 0.035983814 | |
13 | 345 | 31267.0 | 5475.0 | 0.039519924 | |
14 | 324 | 1607461.5 | 10278.0 | 0.007017669 | |
15 | 235 | 26117.0 | 4854.0 | 0.047443806 | |
16 | 228 | 881789.5 | 2682.5 | 0.009192114 | |
17 | 178 | 1092848.0 | 3854.0 | 0.005902158 | |
18 | 131 | 883583.0 | 4508.0 | 0.014213556 | |
19 | 84 | 1570985.0 | 10117.5 | 0.007988063 | |
20 | 58 | 958118.0 | 4789.5 | 0.004154967 | |
Sum | — | 12,690 | — | — | — |
Median | — | — | 29,193.50 | 4,854.00 | 0.03 |
::reduce(
purrrlist(
p_embedding_leiden,
p_embedding_study,
p_embedding_UMI,
p_embedding_MT
),`+`
+
) ::plot_layout(ncol = 2) +
patchwork::plot_annotation(
patchworktheme = theme(plot.margin = margin())
)
::map(unique(embedding$study), \(x) {
purrrplot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = as.integer(embedding$study == x) |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; {studies[x]}, {sum(as.integer(embedding$study == x), na.rm = TRUE)}"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
values = c("grey70", "salmon")
)|>
}) ::reduce(`+`) +
purrr::plot_layout(ncol = 2) +
patchwork::plot_annotation(
patchworktheme = ggplot2::theme(plot.margin = ggplot2::margin())
)
|>
embedding ::left_join(
dplyr|>
cell_metadata ::select(cell, num_umis:mt_percentage)
dplyr|>
) ::group_by(
dplyr
study|>
) ::summarise(
dplyrnum_cells = dplyr::n(),
median_umis = median(num_umis),
median_features = median(num_features),
median_mt_percentage = median(mt_percentage)
|>
) ::mutate(
dplyrstudy = studies[study],
platform = c("Smart-Seq2", "Smart-Seq2", "10x Genomics")
|>
) ::select(
dplyreverything()
study, platform, |>
) ::gt() |>
gt::tab_options(table.font.size = "median") |>
gt::summary_rows(
gtcolumns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),decimals = 0
|>
) ::summary_rows(
gtcolumns = c("median_umis", "median_features", "median_mt_percentage"),
fns = list(
Median = ~ median(.)
),decimals = 2
)
study | platform | num_cells | median_umis | median_features | median_mt_percentage | |
---|---|---|---|---|---|---|
Petropoulos et al. 2016 | Smart-Seq2 | 1529 | 1551093 | 10305 | 0.007144702 | |
Tyser et al. 2021 | Smart-Seq2 | 1195 | 899241 | 4379 | 0.008164051 | |
Zheng et al. 2019 | 10x Genomics | 9966 | 25264 | 4772 | 0.034485263 | |
Sum | — | — | 12,690 | — | — | — |
Median | — | — | — | 899,241.00 | 4,772.00 | 0.01 |
<- "PRJEB11202"
bioproject <- cell_metadata_PRJEB11202
cell_metadata_selected
<- "developmental_stage"
selected_column <- plot_embedding(
p_embedding_developmental_stage embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
dplyrlabel = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
<- "lineage"
selected_column <- plot_embedding(
p_embedding_lineage embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
dplyrlabel = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
list(
p_embedding_lineage,
p_embedding_developmental_stage|>
) ::reduce(`+`) +
purrr::plot_layout(ncol = 2) +
patchwork::plot_annotation(
patchworktheme = ggplot2::theme(plot.margin = ggplot2::margin())
)
Salmon: highlighted group of cells; Light grey: cells belonging to this dataset but not the highlighted group; Dark grey: cells belonging to other datasets.
plot_embedding_highlight(
embedding = embedding,
x = cell_metadata_selected,
y = "lineage",
label = studies[bioproject],
n_cols = 2
)
plot_embedding_highlight(
embedding = embedding,
x = cell_metadata_selected,
y = "developmental_stage",
label = studies[bioproject],
n_cols = 2
)
<- "PRJNA555602"
bioproject <- cell_metadata_PRJNA555602
cell_metadata_selected
<- "source"
selected_column <- plot_embedding(
p_embedding_developmental_stage embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
dplyrlabel = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
<- "lineage"
selected_column <- plot_embedding(
p_embedding_lineage embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
dplyrlabel = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
list(
p_embedding_lineage,
p_embedding_developmental_stage|>
) ::reduce(`+`) +
purrr::plot_layout(ncol = 2) +
patchwork::plot_annotation(
patchworktheme = ggplot2::theme(plot.margin = ggplot2::margin())
)
<- "PRJEB40781"
bioproject <- cell_metadata_PRJEB40781
cell_metadata_selected
<- "sampling_site"
selected_column <- plot_embedding(
p_embedding_sampling_site embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
dplyrlabel = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "grey70"
)
<- "lineage"
selected_column <- plot_embedding(
p_embedding_lineage embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
dplyrlabel = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
+
) theme_customized(
legend_key_size = 2,
legend_text_size = 5
+
) scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
list(
p_embedding_lineage,
p_embedding_sampling_site|>
) ::reduce(`+`) +
purrr::plot_layout(ncol = 2) +
patchwork::plot_annotation(
patchworktheme = ggplot2::theme(plot.margin = ggplot2::margin())
)
plot_embedding_highlight(
embedding = embedding,
x = cell_metadata_selected,
y = "lineage",
label = studies[bioproject],
n_cols = 2
)
::session_info()$platform devtools
## setting value
## version R version 4.1.2 (2021-11-01)
## os macOS Monterey 12.1
## system aarch64, darwin20.6.0
## ui unknown
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/Chicago
## date 2022-01-19
## pandoc 2.14.0.3 @ /Applications/RStudio.app/Contents/MacOS/pandoc/ (via rmarkdown)
::session_info()$pack |>
devtoolsas_tibble() |>
::select(
dplyr
package,
loadedversion,
date,`source`
|>
) # print(n = nrow(.))
::gt() |>
gt::tab_options(table.font.size = "median") gt
package | loadedversion | date | source |
---|---|---|---|
assertthat | 0.2.1 | 2019-03-21 | CRAN (R 4.1.1) |
backports | 1.4.1 | 2021-12-13 | CRAN (R 4.1.2) |
beeswarm | 0.4.0 | 2021-06-01 | CRAN (R 4.1.2) |
bit | 4.0.4 | 2020-08-04 | CRAN (R 4.1.1) |
bit64 | 4.0.5 | 2020-08-30 | CRAN (R 4.1.1) |
brio | 1.1.3 | 2021-11-30 | CRAN (R 4.1.2) |
broom | 0.7.11 | 2022-01-03 | CRAN (R 4.1.2) |
bslib | 0.3.1 | 2021-10-06 | CRAN (R 4.1.1) |
cachem | 1.0.6 | 2021-08-19 | CRAN (R 4.1.1) |
callr | 3.7.0 | 2021-04-20 | CRAN (R 4.1.1) |
cellranger | 1.1.0 | 2016-07-27 | CRAN (R 4.1.1) |
checkmate | 2.0.0 | 2020-02-06 | CRAN (R 4.1.1) |
cli | 3.1.0 | 2021-10-27 | CRAN (R 4.1.1) |
codetools | 0.2-18 | 2020-11-04 | CRAN (R 4.1.2) |
colorspace | 2.0-2 | 2021-06-24 | CRAN (R 4.1.1) |
crayon | 1.4.2 | 2021-10-29 | CRAN (R 4.1.1) |
data.table | 1.14.2 | 2021-09-27 | CRAN (R 4.1.1) |
DBI | 1.1.2 | 2021-12-20 | CRAN (R 4.1.2) |
dbplyr | 2.1.1 | 2021-04-06 | CRAN (R 4.1.1) |
desc | 1.4.0 | 2021-09-28 | CRAN (R 4.1.1) |
devtools | 2.4.3.9000 | 2022-01-15 | Github (r-lib/devtools@e2f25cd69031c8d2099106baed894df4109cb7a4) |
digest | 0.6.29 | 2021-12-01 | CRAN (R 4.1.2) |
dplyr | 1.0.7.9000 | 2022-01-12 | Github (tidyverse/dplyr@05013358ace44fe17a51395d49d384232d18d6c1) |
dtplyr | 1.2.0 | 2021-12-05 | CRAN (R 4.1.2) |
ellipsis | 0.3.2 | 2021-04-29 | CRAN (R 4.1.1) |
evaluate | 0.14 | 2019-05-28 | CRAN (R 4.1.1) |
extrafont | 0.17 | 2014-12-08 | CRAN (R 4.1.1) |
extrafontdb | 1.0 | 2012-06-11 | CRAN (R 4.1.1) |
fansi | 1.0.2 | 2022-01-14 | CRAN (R 4.1.2) |
farver | 2.1.0 | 2021-02-28 | CRAN (R 4.1.1) |
fastmap | 1.1.0 | 2021-01-25 | CRAN (R 4.1.1) |
forcats | 0.5.1.9000 | 2021-11-29 | Github (tidyverse/forcats@b4dade0636a46543c30b0b647d97c3ce697c0ada) |
fs | 1.5.2.9000 | 2021-12-09 | Github (r-lib/fs@6d1182fea7e1c1ddbef3b0bba37c0b0a2e09749c) |
gargle | 1.2.0 | 2021-07-02 | CRAN (R 4.1.1) |
generics | 0.1.1 | 2021-10-25 | CRAN (R 4.1.1) |
ggbeeswarm | 0.6.0 | 2017-08-07 | CRAN (R 4.1.2) |
ggplot2 | 3.3.5 | 2021-06-25 | CRAN (R 4.1.1) |
ggrastr | 1.0.1 | 2021-12-08 | Github (VPetukhov/ggrastr@7aed9af2b9cffabda86e6d2af2fa10d4e60cc63d) |
glue | 1.6.0.9000 | 2021-12-21 | Github (tidyverse/glue@76793ef2c376140350c0e1909e66fd404a52b1ef) |
googledrive | 2.0.0 | 2021-07-08 | CRAN (R 4.1.1) |
googlesheets4 | 1.0.0 | 2021-07-21 | CRAN (R 4.1.1) |
gt | 0.3.1.9000 | 2022-01-17 | Github (rstudio/gt@fcabb414c55b70c9e445fbedfb24d52fe394ba61) |
gtable | 0.3.0.9000 | 2021-10-28 | Github (r-lib/gtable@a0bd2721a0a31c8b4391b84aabe98f8c85881140) |
haven | 2.4.3 | 2021-08-04 | CRAN (R 4.1.1) |
highr | 0.9 | 2021-04-16 | CRAN (R 4.1.1) |
hms | 1.1.1 | 2021-09-26 | CRAN (R 4.1.1) |
htmltools | 0.5.2 | 2021-08-25 | CRAN (R 4.1.1) |
httr | 1.4.2 | 2020-07-20 | CRAN (R 4.1.1) |
jquerylib | 0.1.4 | 2021-04-26 | CRAN (R 4.1.1) |
jsonlite | 1.7.3 | 2022-01-17 | CRAN (R 4.1.2) |
knitr | 1.37.1 | 2021-12-21 | https://yihui.r-universe.dev (R 4.1.2) |
labeling | 0.4.2 | 2020-10-20 | CRAN (R 4.1.1) |
lattice | 0.20-45 | 2021-09-22 | CRAN (R 4.1.2) |
lifecycle | 1.0.1 | 2021-09-24 | CRAN (R 4.1.1) |
lubridate | 1.8.0 | 2022-01-15 | Github (tidyverse/lubridate@53e5892a548b3425d6c3bf887542aa105341ab73) |
magrittr | 2.0.1 | 2020-11-17 | CRAN (R 4.1.1) |
Matrix | 1.4-0 | 2021-12-08 | CRAN (R 4.1.2) |
memoise | 2.0.1 | 2021-11-26 | CRAN (R 4.1.2) |
modelr | 0.1.8.9000 | 2021-10-27 | Github (tidyverse/modelr@16168e0624215d9d1a008f3a85de30aeb75302f6) |
munsell | 0.5.0 | 2018-06-12 | CRAN (R 4.1.1) |
patchwork | 1.1.0.9000 | 2021-10-27 | Github (thomasp85/patchwork@79223d3002e7bd7e715a270685c6507d684b2622) |
pillar | 1.6.4 | 2021-10-18 | CRAN (R 4.1.1) |
pkgbuild | 1.3.1 | 2021-12-20 | CRAN (R 4.1.2) |
pkgconfig | 2.0.3 | 2019-09-22 | CRAN (R 4.1.1) |
pkgload | 1.2.4 | 2021-11-30 | CRAN (R 4.1.2) |
png | 0.1-7 | 2013-12-03 | CRAN (R 4.1.1) |
prettyunits | 1.1.1 | 2020-01-24 | CRAN (R 4.1.1) |
processx | 3.5.2 | 2021-04-30 | CRAN (R 4.1.1) |
ps | 1.6.0 | 2021-02-28 | CRAN (R 4.1.1) |
purrr | 0.3.4 | 2020-04-17 | CRAN (R 4.1.1) |
R.cache | 0.15.0 | 2021-04-30 | CRAN (R 4.1.1) |
R.methodsS3 | 1.8.1 | 2020-08-26 | CRAN (R 4.1.1) |
R.oo | 1.24.0 | 2020-08-26 | CRAN (R 4.1.1) |
R.utils | 2.11.0 | 2021-09-26 | CRAN (R 4.1.1) |
R6 | 2.5.1.9000 | 2021-12-09 | Github (r-lib/R6@1b05b89f30fe6713cb9ff51d91fc56bd3016e4b2) |
ragg | 1.2.1.9000 | 2021-12-08 | Github (r-lib/ragg@c68c6665ef894f16c006333658b32bf25d2e9d19) |
Rcpp | 1.0.8 | 2022-01-13 | CRAN (R 4.1.2) |
readr | 2.1.1 | 2021-11-30 | CRAN (R 4.1.2) |
readxl | 1.3.1.9000 | 2022-01-18 | Github (tidyverse/readxl@03258a3b2341ce600ee0af56851c80c35d6245ef) |
remotes | 2.4.2 | 2021-12-02 | Github (r-lib/remotes@fcad17b68b7a19d5363d64adfb0a0426a3a5b3db) |
reprex | 2.0.1 | 2021-08-05 | CRAN (R 4.1.1) |
reticulate | 1.23 | 2022-01-14 | CRAN (R 4.1.2) |
rlang | 0.99.0.9003 | 2022-01-18 | Github (r-lib/rlang@d79ab3a1ab1ce8ca5bb0ebc6ab0454cb10fa4dd1) |
rmarkdown | 2.11.9 | 2022-01-18 | Github (rstudio/rmarkdown@d0d3b08bf78b6cd900d0505fb7141037e117c6b2) |
rprojroot | 2.0.2 | 2020-11-15 | CRAN (R 4.1.1) |
rstudioapi | 0.13.0-9000 | 2022-01-15 | Github (rstudio/rstudioapi@5d0f0873dc160779c71bf4b00d8b016b898f6fb5) |
Rttf2pt1 | 1.3.9 | 2021-07-22 | CRAN (R 4.1.1) |
rvest | 1.0.2 | 2021-10-16 | CRAN (R 4.1.1) |
sass | 0.4.0 | 2021-05-12 | CRAN (R 4.1.1) |
scales | 1.1.1 | 2020-05-11 | CRAN (R 4.1.1) |
sessioninfo | 1.2.2 | 2021-12-06 | CRAN (R 4.1.2) |
stringi | 1.7.6 | 2021-11-29 | CRAN (R 4.1.2) |
stringr | 1.4.0.9000 | 2022-01-17 | Github (tidyverse/stringr@3848cd70b1e331e6c20401e4da518ff4c3725324) |
styler | 1.6.2.9000 | 2022-01-17 | Github (r-lib/styler@9274aed613282eca01909ae8c341224055d9c928) |
systemfonts | 1.0.3.9000 | 2021-12-07 | Github (r-lib/systemfonts@414114e645efb316def3d8de1056d855f92d588e) |
testthat | 3.1.1.9000 | 2022-01-13 | Github (r-lib/testthat@f09df60dd881530332b252474e9f35c97f8640be) |
textshaping | 0.3.6 | 2021-10-13 | CRAN (R 4.1.1) |
tibble | 3.1.6.9000 | 2022-01-18 | Github (tidyverse/tibble@7aa54e67d6ceb31c81172c7d18d28ea9ce088888) |
tidyr | 1.1.4 | 2021-09-27 | CRAN (R 4.1.1) |
tidyselect | 1.1.1 | 2021-04-30 | CRAN (R 4.1.1) |
tidyverse | 1.3.1.9000 | 2021-12-08 | Github (tidyverse/tidyverse@6186fbf09bf359110f8800ff989cbbdd40485eb0) |
tzdb | 0.2.0 | 2021-10-27 | CRAN (R 4.1.1) |
usethis | 2.1.5.9000 | 2022-01-18 | Github (r-lib/usethis@3c4ab669481ab4a11b6426dbc583f05077a4c6db) |
utf8 | 1.2.2 | 2021-07-24 | CRAN (R 4.1.1) |
vctrs | 0.3.8 | 2021-04-29 | CRAN (R 4.1.1) |
vipor | 0.4.5 | 2017-03-22 | CRAN (R 4.1.2) |
viridisLite | 0.4.0 | 2021-04-13 | CRAN (R 4.1.1) |
vroom | 1.5.7 | 2021-11-30 | CRAN (R 4.1.2) |
withr | 2.4.3 | 2021-11-30 | CRAN (R 4.1.2) |
xfun | 0.29 | 2021-12-14 | CRAN (R 4.1.2) |
xml2 | 1.3.3 | 2021-11-30 | CRAN (R 4.1.2) |
yaml | 2.2.1 | 2020-02-01 | CRAN (R 4.1.1) |